# Neurotech Chatbot Using RAG

## Setup

In [None]:
!pip install openai langchain faiss-cpu PyMuPDF langchain-community tiktoken

In [2]:
import openai
from openai import OpenAI
import fitz
import os
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
import faiss
import numpy as np

In [3]:
# from google.colab import drive
# drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
with open('./api_key.txt', 'r') as f:
    my_api_key = f.read()

In [5]:
client = OpenAI(
    api_key=my_api_key
)

In [None]:
models = client.models.list()
for model in models:
    print(model.id)

## PDF Parsing

In [7]:
def parse_pdf(path):
    doc = fitz.open(path)
    text = ""
    for page in doc:
        text += page.get_text()
    return text

In [8]:
pdf_dir = './articles/raw'

texts = []
for f in os.listdir(pdf_dir):
    pdf_path = os.path.join(pdf_dir, f)
    texts.append(parse_pdf(pdf_path))

## Text Post-Processing

### Chunking

In [None]:
text_splitter = CharacterTextSplitter(chunk_size=200, chunk_overlap=50)

chunks = []
for text in texts:
    chunks.extend(text_splitter.split_text(text))

### Embeddings

In [None]:
embedding = OpenAIEmbeddings(api_key=my_api_key)
embeddings = embedding.embed_documents(chunks)

In [11]:
embedding_matrix = np.array(embeddings).astype('float32')
index = faiss.IndexFlatL2(embedding_matrix.shape[1])
index.add(embedding_matrix)

In [12]:
def retrieve_context(query, k=1):
    query_embedding = embedding.embed_query(query)
    query_embedding = np.array(query_embedding).astype('float32').reshape(1, -1)

    distances, indices = index.search(query_embedding, k)

    retrieved_texts = [chunks[idx] for idx in indices[0]]
    return "\n".join(retrieved_texts)

In [13]:
def ask_chatbot(client, query):
    """Queries the LLM with optional retrieved context."""
    context = retrieve_context(query)
    prompt = f"Context: {context}\n\nUser: {query}\nAI:"
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {
              "role": "user",
              "content": prompt
            }
        ],
    )
    return context, response.choices[0].message.content


In [15]:
query = "What is neurotechnology?"
context, response = ask_chatbot(client, query)
print(response)

Neurotechnology involves the development and application of technologies that interact with the nervous system, specifically the brain. The main objective of neurotechnology is to understand the brain's functions better, address neurological disorders, and enhance or restore neural functions. Neurotechnology encompasses a wide range of tools and applications, including:

1. **Brain-Computer Interfaces (BCIs)**: These systems enable direct communication between the brain and external devices, often aimed at assisting individuals with significant physical impairments, such as those caused by ALS or spinal cord injuries, to control computers or prosthetic limbs using their thoughts.

2. **Neuroprosthetics**: These are artificial devices that replace or enhance the function of damaged or missing neural systems, such as cochlear implants for hearing loss or retinal implants for blindness.

3. **Deep Brain Stimulation (DBS)**: This involves implanting electrodes in specific parts of the brai