In [None]:
!pip install langchain==0.1.14
!pip install langchain-experimental==0.0.56
!pip install langchain-community==0.0.31
!pip install faiss-cpu==1.8.0
!pip install pdfplumber==0.11.0
!pip install gradio==4.25.0

In [2]:
%pip install --quiet --upgrade langchain langchain-community langchain-chroma

Note: you may need to restart the kernel to use updated packages.


In [2]:
from langchain_community.document_loaders import PDFPlumberLoader
loader = PDFPlumberLoader("data/rajasekar-krishnan-2021-mental-health-of-medical-practitioner.pdf")
docs = loader.load()

# Check the number of pages
print("Number of pages in the PDF:",len(docs))

# Load the random page content
docs[2].page_content

Number of pages in the PDF: 7


'Presidential Address\nmedical student population is 46.5% • Poor judgment in critical care and sur- of their mental capacity change, they\nwith it being three times more among gery. refuse to seek advice.20 In depression,\nmen than women.13 • Losing temper with caregivers. somatic symptoms and insomnia appear\n8. Stress of breaking bad • Losing clients. early. A physician unable to identify his\nMany stressors involved in the profes- depression feels incompetent. Seeking\nBurnout\nsion are unique to MPs such as they to recommend to another colleague is to\nare routinely faced with breaking bad This is defined as a pathological syn- admit failure when their problems are\nnews, frequent contact with illness, drome from an extended period of played down by the treating physician,\nanxiety, suffering, and death.14 They occupational stress. This is neither reinforcing his belief of wordlessness\nhave to face the patient’s emotional anxiety nor depression. The three main and helplessness an

In [None]:
import os
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_ollama.llms import OllamaLLM
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

# Cấu hình embedding model
def get_embedder():
    return HuggingFaceEmbeddings(model_name="BAAI/bge-small-en")


# Tải và chia nhỏ tài liệu
def load_and_split_documents(file_path, chunk_size=1000, chunk_overlap=200):
    loader = TextLoader(file_path)
    docs = loader.load()

    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
    return text_splitter.split_documents(docs)


# Tạo vector store từ các đoạn văn bản đã chia nhỏ và embedding
def create_vector_store(splits, embedder):
    return FAISS.from_documents(splits, embedder)


# Định dạng tài liệu thành văn bản chuỗi
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


# Xây dựng và trả về chuỗi RAG
def create_rag_chain(file_path, model_name="llama3.2"):
    embedder = get_embedder()
    splits = load_and_split_documents(file_path)
    vectorstore = create_vector_store(splits, embedder)
    retriever = vectorstore.as_retriever()

    llm = OllamaLLM(model=model_name)
    prompt = """
Given the user query, extract the symptoms mentioned and identify the most likely disease categories they could be linked to.
The symptoms should be categorized into groups (e.g., cardiovascular, respiratory, neurological, digestive, etc.).
Also, suggest the potential diseases based on these symptoms. Use the following symptom-disease associations to guide your response:

1. **Cardiovascular diseases:**
   - Symptoms: Chest pain, shortness of breath, fatigue, dizziness, palpitations.
   - Diseases: Coronary artery disease, heart failure, arrhythmias, myocardial infarction (heart attack).

2. **Respiratory diseases:**
   - Symptoms: Cough, difficulty breathing, wheezing, chest pain, fever.
   - Diseases: Pneumonia, asthma, chronic obstructive pulmonary disease (COPD), bronchitis.

3. **Neurological diseases:**
   - Symptoms: Headache, dizziness, numbness, memory loss, muscle weakness.
   - Diseases: Stroke, migraine, epilepsy, Parkinson's disease, Alzheimer's disease.

4. **Digestive diseases:**
   - Symptoms: Abdominal pain, nausea, vomiting, diarrhea, loss of appetite.
   - Diseases: Gastritis, irritable bowel syndrome (IBS), Crohn’s disease, ulcerative colitis.

5. **Infectious diseases:**
   - Symptoms: Fever, chills, sore throat, body aches, cough.
   - Diseases: Flu, cold, COVID-19, bacterial infections, viral infections.

6. **Musculoskeletal diseases:**
   - Symptoms: Joint pain, swelling, stiffness, muscle weakness.
   - Diseases: Osteoarthritis, rheumatoid arthritis, gout, lupus.

**Task:**  
From the user's query, extract the symptoms they mentioned and categorize them into one or more of the disease groups. Then, provide a list of the most likely diseases related to these symptoms. Additionally, mention any warning signs or urgent symptoms that should be addressed immediately.

Example input: "I have been feeling very tired, I get headaches frequently, and I have trouble breathing sometimes. Could it be something serious?"

The output should include:
- The extracted symptoms
- Disease categories and associated diseases
- Warning signs to watch out for
- Don't hallucination

User query: I feel headache
"""

    # Tạo một hàm để xử lý đầu ra của retriever
    def retrieve_and_format(query):
        retrieved_docs = retriever.retrieve(query)
        return format_docs(retrieved_docs)

    rag_chain = (
            {"context": retrieve_and_format, "question": RunnablePassthrough()}
            | prompt
            | llm
            | StrOutputParser()
    )
    return rag_chain


# Gọi RAG chain với câu hỏi và nhận câu trả lời
def ask_question(chain, question):
    return chain.invoke(question)


# Hàm chính
# if __name__ == "__main__":
file_path = 'C:/Users/7420/Desktop/Code/Python/RAG_Healthcare/data/cardiovascular/data.txt'

rag_chain = create_rag_chain(file_path)
question = "What is Task Decomposition?"
answer = ask_question(rag_chain, question)

print("Answer:", answer)


TypeError: unsupported operand type(s) for |: 'dict' and 'str'

In [6]:
rag_chain.invoke('''I haven’t been feeling well lately. Over the past few days, I’ve been experiencing some unusual symptoms, and I’m not sure what they could be related to. I often feel extremely tired and drained, even after doing simple tasks. Throughout the day, I feel dizzy, especially when standing up or bending down. I’ve also been having frequent headaches, mainly around my forehead, and there’s a feeling of heaviness in my neck. Sometimes, I also feel short of breath, and my heart rate seems to be faster than usual. On several occasions, my hands and feet feel numb or cold, especially in the mornings when I wake up. However, I don’t have a fever or a cough. I’m really worried about whether these symptoms could indicate something serious.''')

"I don't know how to provide a direct answer to your question about your symptoms, as the context seems to be focused on burnout among healthcare professionals rather than general medical advice. However, I can suggest that you consult a doctor or a healthcare professional for proper evaluation and guidance on your symptoms. They can help determine the cause of your symptoms and recommend appropriate treatment options."

In [3]:
from langchain_experimental.text_splitter import SemanticChunker
from langchain.embeddings import HuggingFaceEmbeddings

text_splitter = SemanticChunker(HuggingFaceEmbeddings())
documents = text_splitter.split_documents(docs)

  from tqdm.autonotebook import tqdm, trange


In [4]:
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS

# Instantiate the embedding model
embedder = HuggingFaceEmbeddings()

# Create the vector store
vector = FAISS.from_documents(documents, embedder)

In [5]:
# Input
retriever = vector.as_retriever(search_type="similarity", search_kwargs={"k": 3})
retrieved_docs = retriever.invoke("How does plant respond to disease?")

In [None]:
import gradio as gr
# from langchain_community.llms.ollama import Ollama

# Khởi tạo mô hình LLM từ Ollama
# llm = OllamaLLM(model="llama3.2")

# Hàm để xử lý đầu vào từ người dùng và trả về kết quả từ mô hình
def chatbot_response(user_input, _):
    # Lấy phản hồi từ mô hình LLM
    response = rag_chain.invoke(user_input)
    return response

# Tạo giao diện chat với Gradio
gr.ChatInterface(
    chatbot_response,
    chatbot=gr.Chatbot(height=500),
    textbox=gr.Textbox(placeholder="Ask me question related to Plants and their diseases", container=False, scale=7),
    title="Plant's Chatbot",
    examples=["What are different kinds of plant diseases", "What is Stewart’s wilt disease"],
    cache_examples=True,
    retry_btn=None,

).launch(share = True)

Caching examples at: 'C:\Users\7420\Desktop\Code\Python\RAG_Healthcare\gradio_cached_examples\91'
Caching example 1/2
IMPORTANT: You are using gradio version 4.25.0, however version 4.44.1 is available, please upgrade.
--------
Caching example 2/2
Running on local URL:  http://127.0.0.1:7864

Could not create share link. Please check your internet connection or our status page: https://status.gradio.app.




ERROR:    Exception in ASGI application
Traceback (most recent call last):
  File "c:\Users\7420\anaconda3\Lib\site-packages\pydantic\type_adapter.py", line 270, in _init_core_attrs
    self._core_schema = _getattr_no_parents(self._type, '__pydantic_core_schema__')
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\7420\anaconda3\Lib\site-packages\pydantic\type_adapter.py", line 112, in _getattr_no_parents
    raise AttributeError(attribute)
AttributeError: __pydantic_core_schema__

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "c:\Users\7420\anaconda3\Lib\site-packages\uvicorn\protocols\http\httptools_impl.py", line 401, in run_asgi
    result = await app(  # type: ignore[func-returns-value]
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\7420\anaconda3\Lib\site-packages\uvicorn\middleware\proxy_headers.py", line 60, in __call__
    return awa

In [23]:
llm('What can you do ?')

"I can be used in a variety of ways, from helping you plan a vacation to creating art. I'm here to assist you in finding the help or information you need. My strengths include answering questions, generating text and images and even just chatting with you."

In [16]:
print(qa("How does plant respond to disease?")["result"])



[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
1. Use the following pieces of context to answer the question at the end.
2. If you don't know the answer, just say that "I don't know" but don't make up an answer on your own.

3. Keep the answer crisp and limited to 3,4 sentences.

Context: Context:
content:Address for Correspondence: Ramakrishnan Krishnan, Athma Hospitals and Submitted: 9 Mar. 2021
Research, Tiruchirappalli, Tamil Nadu 620018, India. E-mail: drkrk@athmahospitals. Accepted: 10 Mar.
source:data/rajasekar-krishnan-2021-mental-health-of-medical-practitioner.pdf

Context:
content:Vonnegut M. Recovery and service: On
content/323/7315/728
3. Frank E, Biola H, and Burnett CA. 12.
source:data/rajasekar-krishnan-2021-mental-health-of-medical-practitioner.pdf

Context:
content:Personality traits (J. Bradley Harssal M.D.)
COVID-19 and Mental Illness
Certain personality traits of the
It wears you down q

ConnectionError: HTTPConnectionPool(host='localhost', port=11434): Max retries exceeded with url: /api/generate (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x00000286AC5F4450>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))

In [None]:
from langchain_community.document_loaders import PDFPlumberLoader
from langchain_experimental.text_splitter import SemanticChunker
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.llms import Ollama
from langchain.prompts import PromptTemplate
from langchain.chains.llm import LLMChain
from langchain.chains.combine_documents.stuff import StuffDocumentsChain
from langchain.chains import RetrievalQA
import gradio as gr

# Load the PDF
loader = PDFPlumberLoader("11pests1disease.pdf")
docs = loader.load()

# Split into chunks
text_splitter = SemanticChunker(HuggingFaceEmbeddings())
documents = text_splitter.split_documents(docs)


# Instantiate the embedding model
embedder = HuggingFaceEmbeddings()

# Create the vector store and fill it with embeddings
vector = FAISS.from_documents(documents, embedder)
retriever = vector.as_retriever(search_type="similarity", search_kwargs={"k": 3})

# Define llm
llm = Ollama(model="mistral")

# Define the prompt
prompt = """
1. Use the following pieces of context to answer the question at the end.
2. If you don't know the answer, just say that "I don't know" but don't make up an answer on your own.\n
3. Keep the answer crisp and limited to 3,4 sentences.

Context: {context}

Question: {question}

Helpful Answer:"""

QA_CHAIN_PROMPT = PromptTemplate.from_template(prompt)

llm_chain = LLMChain(
                  llm=llm,
                  prompt=QA_CHAIN_PROMPT,
                  callbacks=None,
                  verbose=True)

document_prompt = PromptTemplate(
    input_variables=["page_content", "source"],
    template="Context:\ncontent:{page_content}\nsource:{source}",
)

combine_documents_chain = StuffDocumentsChain(
                  llm_chain=llm_chain,
                  document_variable_name="context",
                  document_prompt=document_prompt,
                  callbacks=None)

qa = RetrievalQA(
                  combine_documents_chain=combine_documents_chain,
                  verbose=True,
                  retriever=retriever,
                  return_source_documents=True)

def respond(question,history):
    return qa(question)["result"]


gr.ChatInterface(
    respond,
    chatbot=gr.Chatbot(height=500),
    textbox=gr.Textbox(placeholder="Ask me question related to Plants and their diseases", container=False, scale=7),
    title="Plant's Chatbot",
    examples=["What are different kinds of plant diseases", "What is Stewart’s wilt disease"],
    cache_examples=True,
    retry_btn=None,

).launch(share = True)