<a href="https://colab.research.google.com/github/UdaraChamidu/EyeDiseaseChatbot/blob/main/EyeBot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Import important libraries

In [146]:
!pip install langchain -qU
!pip install langchain-community -qU
!pip install langchain-chroma -qU
!pip install langchain-openai -qU

In [126]:
import os
from google.colab import userdata

# Initialize LLM

In [147]:
from langchain_openai import ChatOpenAI

# Set Hugging Face token if required
os.environ['OPENAI_API_KEY'] = userdata.get('OPENAI_API_KEY')
# Initialize Hugging Face LLM
llm = ChatOpenAI(
    model = 'gpt-3.5-turbo',
    temperature=0
)

# Initialize Embedding Model

In [148]:
from langchain_openai import OpenAIEmbeddings
embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")


# Load the pdf file from Google Drive

In [149]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [150]:
pdf_path = "/content/drive/My Drive/Eye_Disease.pdf"

In [151]:
!pip install pymupdf

from langchain_community.document_loaders.pdf import PyMuPDFLoader
loader = PyMuPDFLoader(pdf_path)
data = loader.load()



In [132]:
len(data)

897

In [133]:
data[45]

Document(metadata={'producer': '3-Heights(TM) PDF Producer 4.4.43.3 (http://www.pdf-tools.com); modified using iTextSharp 5.2.1 (c) 1T3XT BVBA', 'creator': 'Elsevier', 'creationdate': '2015-05-13T17:49:48+07:00', 'source': '/content/drive/My Drive/Eye_Disease.pdf', 'file_path': '/content/drive/My Drive/Eye_Disease.pdf', 'total_pages': 897, 'format': 'PDF 1.7', 'title': "Kanski's Clinical Ophthalmology, Eighth Edition (2016)", 'author': 'Brad Bowling', 'subject': "Kanski's Clinical Ophthalmology, Eighth Edition (2016) ii. doi:10.1016/B978-0-7020-5572-0.00025-8", 'keywords': '', 'moddate': '2015-07-05T16:31:24+09:30', 'trapped': '', 'modDate': "D:20150705163124+09'30'", 'creationDate': "D:20150513174948+07'00'", 'page': 45}, page_content='34\nBlepharitis\nTable 1.4\u2002 Summary of characteristics of chronic blepharitis\nFeature\nAnterior blepharitis\nPosterior blepharitis\nStaphylococcal\nSeborrhoeic\nLashes\nDeposit\nHard\nSoft\nLoss\n++\n+\nDistorted or trichiasis\n++\n+\nLid margin\n

# Split Documents into Chunks

In [152]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=400, chunk_overlap=50)

splits = text_splitter.split_documents(data)

In [135]:
len(splits)

6509

In [136]:
splits[1000]


Document(metadata={'producer': '3-Heights(TM) PDF Producer 4.4.43.3 (http://www.pdf-tools.com); modified using iTextSharp 5.2.1 (c) 1T3XT BVBA', 'creator': 'Elsevier', 'creationdate': '2015-05-13T17:49:48+07:00', 'source': '/content/drive/My Drive/Eye_Disease.pdf', 'file_path': '/content/drive/My Drive/Eye_Disease.pdf', 'total_pages': 897, 'format': 'PDF 1.7', 'title': "Kanski's Clinical Ophthalmology, Eighth Edition (2016)", 'author': 'Brad Bowling', 'subject': "Kanski's Clinical Ophthalmology, Eighth Edition (2016) ii. doi:10.1016/B978-0-7020-5572-0.00025-8", 'keywords': '', 'moddate': '2015-07-05T16:31:24+09:30', 'trapped': '', 'modDate': "D:20150705163124+09'30'", 'creationDate': "D:20150513174948+07'00'", 'page': 146}, page_content='Acute bacterial conjunctivitis\nAcute bacterial conjunctivitis is a common and usually self-\nlimiting condition caused by direct contact with infected secre-\ntions. The most common isolates are Streptococcus pneumoniae, \nStaphylococcus aureus, Haemo

In [193]:
!pip install --upgrade --force-reinstall numpy


Collecting numpy
  Using cached numpy-2.2.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (62 kB)
Using cached numpy-2.2.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (16.4 MB)
Installing collected packages: numpy
  Attempting uninstall: numpy
    Found existing installation: numpy 1.26.4
    Uninstalling numpy-1.26.4:
      Successfully uninstalled numpy-1.26.4
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
langchain-chroma 0.2.2 requires numpy<2.0.0,>=1.22.4; python_version < "3.12", but you have numpy 2.2.4 which is incompatible.
tensorflow 2.18.0 requires numpy<2.1.0,>=1.26.0, but you have numpy 2.2.4 which is incompatible.
numba 0.60.0 requires numpy<2.1,>=1.22, but you have numpy 2.2.4 which is incompatible.[0m[31m
[0mSuccessfully installed numpy-2.2.4


# Create Vector Store and Retriever

Create Semantic Search Retriever


In [174]:
from langchain_chroma import Chroma

# Create a vector store from the document chunks
vectorstore = Chroma.from_documents(documents=splits, embedding=embedding_model)

In [175]:
vectorstore_retreiver = vectorstore.as_retriever(search_kwargs={"k": 2})
# k = number of documents need to be retrieved

Create Keyword Search Retriever


In [179]:
!pip install rank_bm25

Collecting rank_bm25
  Downloading rank_bm25-0.2.2-py3-none-any.whl.metadata (3.2 kB)
Downloading rank_bm25-0.2.2-py3-none-any.whl (8.6 kB)
Installing collected packages: rank_bm25
Successfully installed rank_bm25-0.2.2


In [180]:
from langchain.retrievers import BM25Retriever

keyword_retriever = BM25Retriever.from_documents(splits)

keyword_retriever.k =  2

Create Hybrid Search Retriever


In [181]:
from langchain.retrievers import EnsembleRetriever
ensemble_retriever = EnsembleRetriever(retrievers = [vectorstore_retreiver, keyword_retriever], weights = [0.5, 0.5])


# Define Prompt Template

In [182]:
from langchain_core.prompts import ChatPromptTemplate

# Define the system prompt
system_prompt = (
    "You are an intelligent chatbot built for eye disease identification. You have to answer the eye disease questions that user asks. Use the following context to answer the question. If you don't know the answer, just say that you don't know."
    "\n\n"
    "{context}"  # this context come from retriever
    # best chunks for the asked question from the vector space.
    # we can define number of chunks that need to come here from retriever
)

# Create the prompt template
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt), # 2 roles, system and human
        ("human", "{input}"), # user input
    ]
)

In [183]:
prompt

ChatPromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template="You are an intelligent chatbot built for eye disease identification. You have to answer the eye disease questions that user asks. Use the following context to answer the question. If you don't know the answer, just say that you don't know.\n\n{context}"), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], input_types={}, partial_variables={}, template='{input}'), additional_kwargs={})])

# Create RAG Chain

In [184]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

# Create the question-answering chain
qa_chain = create_stuff_documents_chain(llm, prompt) # create QA chain using llm and prompt
# prompt = system prompt + user input
# system prompt = instruction + context

# Create the RAG chain containing QA chain and retriever
rag_chain = create_retrieval_chain(ensemble_retriever, qa_chain)

# Invoke RAG Chain with Example Questions

In [158]:
response = rag_chain.invoke({"input": "what is gluacoma ?"}) # ask questions
response["answer"]

'Glaucoma is a group of eye conditions that damage the optic nerve, often due to increased pressure in the eye. It can lead to vision loss and blindness if not treated early.'

In [159]:
response = rag_chain.invoke({"input": "diagnosis of gluacoma ?"}) # ask questions
response["answer"]

'The diagnosis of glaucoma typically involves a combination of tests and examinations. Some of the key investigations include pachymetry for central corneal thickness (CCT), perimetry (visual field testing), and imaging of the optic disc, peripapillary retinal nerve fiber layer (RNFL), and/or ganglion cell complex using techniques such as OCT, red-free photography, stereo disc photography, confocal scanning laser ophthalmoscopy, or scanning laser polarimetry. Visual field defects are also an important indicator of glaucoma. It is important to consult an eye care professional for a comprehensive evaluation and diagnosis.'

# Create a retriever that aware history

In [185]:
from langchain.chains import create_history_aware_retriever
from langchain_core.prompts import MessagesPlaceholder

# Define the contextualize system prompt
contextualize_system_prompt = (
    "using chat history and the latest user question, just reformulate question if needed and otherwise return it as is"
)

# Create the contextualize prompt template
contextualize_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

# Create the history-aware retriever
history_aware_retriever = create_history_aware_retriever(
    llm, ensemble_retriever, contextualize_prompt
)

# Create History-Aware RAG Chain

In [186]:
from langchain.chains import create_history_aware_retriever
from langchain_core.prompts import MessagesPlaceholder

system_prompt = (
    "You are an intelligent chatbot built for eye disease identification. Use the following context to answer the question. If you don't know the answer, just say that you don't know."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

# Manage Chat Session History

In [202]:
# different user has different history
# here we use session id. according to session id, store the history in a list

from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory

# Initialize the store for session histories (a dictionary)
store = {}

# Function to get the session history for a given session ID
def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]

# Create the conversational RAG chain with session history
conversational_rag_chain = RunnableWithMessageHistory(
    rag_chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer",
)

In [208]:
import joblib
joblib.dump(conversational_rag_chain, "eye_disease_chatbot.pkl")

PicklingError: Can't pickle <class 'langchain_core.runnables.base.RunnableParallel[dict[str, Any]]'>: it's not found as langchain_core.runnables.base.RunnableParallel[dict[str, Any]]


# Invoke Conversational RAG Chain

In [188]:
response = conversational_rag_chain.invoke(
    {"input": "what is diabitic retinopathy ?"},
    config={"configurable": {"session_id": "101"}},  # this is session id.
)   # each user have unique session id
response["answer"]

'Diabetic retinopathy (DR) is a complication of diabetes that affects the eyes. It is caused by damage to the blood vessels of the light-sensitive tissue at the back of the eye (retina). DR can lead to vision impairment or even blindness if left untreated. It is more common in individuals with type 1 diabetes and those who have had diabetes for a long duration.'

In [165]:
response = conversational_rag_chain.invoke(
    {"input": "what are the diognosis of it ?"},
    config={"configurable": {"session_id": "101"}},  # this is session id.
)   # each user have unique session id
response["answer"]

'The diagnosis of OIS (Ocular Ischemic Syndrome) is usually based on the symptoms and signs observed in the affected eye. OIS is typically unilateral in 80% of cases. The signs can be variable and subtle, which may lead to the condition being missed or misdiagnosed. Symptoms of OIS include gradual loss of vision over weeks or months, occasional sudden or intermittent vision loss (amaurosis fugax), and ocular or periocular pain in 40% of cases. Patients may also notice unusually persistent symptoms.'

In [166]:
response = conversational_rag_chain.invoke(
    {"input": "what are the diognosis of it ?"},
    config={"configurable": {"session_id": "99"}},  # this is session id.
)   # each user have unique session id
response["answer"]

'The diagnosis of OIS (Ocular Ischemic Syndrome) is usually based on the symptoms and signs observed in the affected eye. OIS is typically unilateral in 80% of cases. The signs can be variable and subtle, which may lead to the condition being missed or misdiagnosed. Symptoms of OIS include gradual loss of vision over weeks or months, occasional sudden or intermittent vision loss (amaurosis fugax), and ocular and periocular pain in 40% of cases. Patients may also notice unusually persistent symptoms.'

In [167]:
response = conversational_rag_chain.invoke(
    {"input": "what are you doing now?"},
    config={"configurable": {"session_id": "99"}},  # this is session id.
)   # each user have unique session id
response["answer"]

'I am an intelligent chatbot built for eye disease identification. I can help answer questions related to eye diseases and conditions. If you have any questions, feel free to ask!'

In [169]:
response = conversational_rag_chain.invoke(
    {"input": "what are the diognosis of diabetic retinopathy ?"},
    config={"configurable": {"session_id": "1000"}},  # this is session id.
)   # each user have unique session id
response["answer"]

'The diagnosis of diabetic retinopathy (DR) includes the following:\n1. Microaneurysms: These are generally the earliest signs of DR and persist as more advanced lesions appear.\n2. Diabetic maculopathy: This strictly refers to the presence of any retinopathy at the macula, but is commonly reserved for significant changes, particularly vision-threatening oedema and ischaemia.\n3. Preproliferative diabetic retinopathy (PPDR): This manifests with cotton wool spots, venous changes, intraretinal hemorrhages, and other signs of retinal damage.'

In [171]:
response = conversational_rag_chain.invoke(
    {"input": "explain breifly those things"},
    config={"configurable": {"session_id": "1000"}},  # this is session id.
)   # each user have unique session id
response["answer"]

"The cornea is a vital part of the eye that provides about three-quarters of the eye's optical power. It plays a protective role and is free of blood vessels. Nutrients are supplied to the cornea through the aqueous humor at the back and tears at the front, while metabolic waste is removed through the same routes."

In [172]:
response = conversational_rag_chain.invoke(
    {"input": "i am udara ?"},
    config={"configurable": {"session_id": "99"}},  # this is session id.
)   # each user have unique session id
response["answer"]

'Hello Udara! How can I assist you today?'

In [173]:
response = conversational_rag_chain.invoke(
    {"input": "what is my name ?"},
    config={"configurable": {"session_id": "99"}},  # this is session id.
)   # each user have unique session id
response["answer"]

"I'm sorry, but I don't have access to your name. If you have any questions related to eye diseases or the information provided in the context, feel free to ask!"

In [197]:
!pip install huggingface_hub




In [210]:
!git clone https://huggingface.co/spaces/UdaraChamidu/EyeBot


Cloning into 'EyeBot'...
remote: Enumerating objects: 4, done.[K
remote: Total 4 (delta 0), reused 0 (delta 0), pack-reused 4 (from 1)[K
Unpacking objects: 100% (4/4), 1.31 KiB | 1.31 MiB/s, done.


In [212]:
!pip install streamlit


Collecting streamlit
  Downloading streamlit-1.44.1-py3-none-any.whl.metadata (8.9 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.44.1-py3-none-any.whl (9.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.8/9.8 MB[0m [31m63.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m62.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (79 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.1/79.1 kB[0m [31m6.0 MB/s[0m eta [36m0:00:00[0m
[?25hInst

In [213]:
import streamlit as st

x = st.slider('Select a value')
st.write(x, 'squared is', x * x)

2025-04-13 10:57:20.063 
  command:

    streamlit run /usr/local/lib/python3.11/dist-packages/colab_kernel_launcher.py [ARGUMENTS]
