In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install langchain-community
!pip install langchain_openai
!pip install faiss-cpu

Collecting langchain-community
  Downloading langchain_community-0.3.24-py3-none-any.whl.metadata (2.5 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Downloading pydantic_settings-2.9.1-py3-none-any.whl.metadata (3.8 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain-community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading marshmallow-3.26.1-py3-none-any.whl.metadata (7.3 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)
Collecting python-dotenv>=0.21.0 (from pydantic-settings<3.0.0,>=2.4.0->langchain-community)
  Downloading python_dotenv-1.1.0-py3-none-any.whl.metadata (24 kB

DOCUMENT LOADER

In [None]:
from langchain.document_loaders import CSVLoader
loader = CSVLoader(file_path='/content/drive/MyDrive/Final_Project/ai-medical-chatbot_500_Doctor.csv')
data = loader.load()

In [None]:
print(data)

Buffered data was truncated after reaching the output size limit.

In [None]:
loader_mediAI = CSVLoader(file_path='/content/drive/MyDrive/Final_Project/ai-medical-chatbot_500.csv', source_column="Doctor")

Doctor_data = loader_mediAI.load()

TEXT SPLITTER (RECURSIVELY SPLIT BY CHARACTER)


In [None]:
with open("/content/drive/MyDrive/Final_Project/ai-medical-chatbot_500_Doctor.csv") as f:
    Medical_Dr_Note_ORG = f.read()

In [None]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [None]:
text_splitter = RecursiveCharacterTextSplitter(
    # Set a really small chunk size, just to show.
    chunk_size=300,
    chunk_overlap=50,
    length_function=len,
    is_separator_regex=False,
)

In [None]:
Dr_texts = text_splitter.create_documents([Medical_Dr_Note_ORG])
#print(Dr_texts[0])
#print(Dr_texts[1])

In [None]:
print(Dr_texts[0])

page_content='Doctor
Hi. I have gone through your query with diligence and would like you to know that I am here to help you. For further information consult a neurologist online -->'


In [None]:
len(Dr_texts)

1099

EMBEDDINGS MODEL

In [None]:
from langchain.embeddings import CacheBackedEmbeddings

In [None]:
import os
from dotenv import load_dotenv

load_dotenv('/content/drive/MyDrive/Final_Project/OpenAI.env')

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

#print(f"OPENAI_API Key: {OPENAI_API_KEY}")

In [None]:
#embed the chunks of text
from langchain.embeddings import CacheBackedEmbeddings
from langchain.storage import LocalFileStore
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from langchain_core.prompts import ChatPromptTemplate

underlying_embeddings = OpenAIEmbeddings()
store = LocalFileStore("./cache/")

#cached embeddings so we don't have to do it again
cached_embedder = CacheBackedEmbeddings.from_bytes_store(
    underlying_embeddings,
    store,
    namespace=underlying_embeddings.model
)


In [None]:
list(store.yield_keys())

[]

VECTOR DOCUMENTS USING Facebook AI Similarity Search (FAISS) library

In [None]:
#!pip install faiss-gpu

#%%time
db = FAISS.from_documents(Dr_texts, cached_embedder)

In [None]:
#save faiss index
#Python pickle file that stores metadata about the indexed documents
# faiss_index binary file that stores the actual FAISS index, which means:
#The numerical vectors (embeddings)

#save faiss index
db.save_local('/content/drive/MyDrive/Final_Project/my_faiss_index')

In [None]:
query = "small cysts are concern"

docs = db.similarity_search(query, k=3)
print(docs[0])

page_content='"Hello. I understand your concern. The thing on the chest looks more of a sebaceous cyst, (attachment removed to protect patient identity) which comes and goes even if you pop out the contents as it needs to be excised completely to prevent the recurrence. For the toenail spot, I am not sure that'


In [None]:
### Gnerative question answering
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI # Import ChatOpenAI

llm_model = ChatOpenAI(openai_api_key= OPENAI_API_KEY, temperature=0.0)

query2 = "what type of small cysts are concern"
qa = RetrievalQA.from_chain_type(
    llm=llm_model,
    chain_type="stuff",
    retriever=db.as_retriever(),
    return_source_documents=True
)

  llm_model = ChatOpenAI(openai_api_key= OPENAI_API_KEY, temperature=0.0)


In [None]:
qa.invoke(query2)

{'query': 'small cysts are concern',
 'result': "Yes, small cysts can sometimes be a concern, especially if they are recurring or causing discomfort. It's important to have them evaluated by a healthcare professional to determine the best course of action.",
 'source_documents': [Document(id='b06cd1bd-a77d-4653-964b-902f3b2496ff', metadata={}, page_content='"Hello. I understand your concern. The thing on the chest looks more of a sebaceous cyst, (attachment removed to protect patient identity) which comes and goes even if you pop out the contents as it needs to be excised completely to prevent the recurrence. For the toenail spot, I am not sure that'),
  Document(id='f325c49c-462f-47f1-8fcf-e4683d45e9ca', metadata={}, page_content='"Hello. Small spot in center of forehead is seen in some pictures (attachment removed to protect patient identity). As you know skull is made up of 16 small bones, these bones are attached with cartilages and get ossified with times. Due to some gaping, this

##AGENT

In [None]:
#import dependencies for agent to work
from langchain.agents import AgentType, initialize_agent, load_tools
from langchain.memory import ConversationBufferMemory
from langchain_openai import ChatOpenAI


In [None]:
def init_faiss():
    # Load or build your FAISS index
    vectorstore = FAISS.load_local("/content/drive/MyDrive/Final_Project/my_faiss_index/index.faiss", embeddings=cached_embbeder)
    return vectorstore

In [None]:
#chat model
chat_model = ChatOpenAI(model="gpt-3.5-turbo", api_key=OPENAI_API_KEY)

In [None]:
from langchain_core.prompts import ChatPromptTemplate

chat_template = ChatPromptTemplate.from_messages(
    [
        ("system", """You are a helpful Medical AI bot. Your name is MediChat. Please help them to answer
        any medical questions they have"""),
        ("human", "Hello, how are you doing?"),
        ("ai", "I'm doing well, thanks!"),
        ("human", "{user_input}"),
    ]
)