# Install Require Packages

In [None]:
!pip install langchain --quiet
!pip install langchain-community --quiet
!pip install Cohere --quiet
!pip install wikipedia --quiet
!pip install chromadb --quiet

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m16.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.4/44.4 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.9/50.9 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m259.5/259.5 kB[0m [31m7.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.3/3.3 MB[0m [31m59.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for wikipedia (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.3/67.3 kB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[

# Importing Dependencies

In [None]:
import os
from google.colab import userdata
import langchain
from langchain.schema import Document
from langchain.llms import Cohere
from langchain.document_loaders import WikipediaLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import CohereEmbeddings
from langchain.vectorstores import Chroma
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain.retrievers import MultiQueryRetriever
from langchain.chat_models import ChatCohere

# Api Key

In [None]:
os.environ['COHERE_API_KEY'] = userdata.get('Cohere')

# Getting The Information From Wikipedia

In [None]:
diseases = ["Diabetes", "Hypertension", "Asthma", "Tuberculosis", "Cancer", "Malaria"]

In [None]:
documents = []
for disease in diseases:
    loader = WikipediaLoader(query=disease, lang='en')
    docs = loader.load()
    documents.extend(docs)


In [None]:
treatment_docs = []
for doc in documents:
    content = doc.page_content
    title = doc.metadata.get("title", "Unknown Disease")

    # Simple extraction of the 'Treatment' section
    treatment_section = ""
    if "== Treatment" in content:
        start = content.find("== Treatment")
        end = content.find("==", start + 1)
        treatment_section = content[start:end].strip() if end != -1 else content[start:].strip()

    combined_content = f"Disease: {title}\n\n{treatment_section}"
    treatment_docs.append(Document(page_content=combined_content, metadata={"source": title}))


# Split The Data-Set Into Chunking

In [None]:
split_data = RecursiveCharacterTextSplitter(chunk_size=2000 , chunk_overlap=200)

chunk = split_data.split_documents(treatment_docs)

# Vector Embeddings & Data-Base

In [None]:
embedding = CohereEmbeddings(user_agent='langchain')

In [None]:
db = Chroma.from_documents(
    chunk,
    embedding,
    persist_directory='./db'
)

db.persist()

# Save The Data-Base

In [None]:
!zip -r chroma_db.zip db

  adding: db/ (stored 0%)
  adding: db/cc9529fd-a9cb-4d7f-8a16-71b8747ac9d8/ (stored 0%)
  adding: db/cc9529fd-a9cb-4d7f-8a16-71b8747ac9d8/link_lists.bin (stored 0%)
  adding: db/cc9529fd-a9cb-4d7f-8a16-71b8747ac9d8/header.bin (deflated 61%)
  adding: db/cc9529fd-a9cb-4d7f-8a16-71b8747ac9d8/data_level0.bin (deflated 100%)
  adding: db/cc9529fd-a9cb-4d7f-8a16-71b8747ac9d8/length.bin (deflated 100%)
  adding: db/chroma.sqlite3 (deflated 50%)


In [None]:
from google.colab import files
files.download("chroma_db.zip")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# Set-Up Memory Modules

In [None]:
memory = ConversationBufferMemory(
    memory_key='chat_history' ,
    return_messages=True ,
    output_key='answer')

# Set-Up MultiQueryRetriever

In [None]:
retriever = db.as_retriever()
llm = Cohere()

multi_query_retriver = MultiQueryRetriever.from_llm(
    retriever = retriever,
    llm = llm
)

# Initialize Conversational Retrieval Chain

In [None]:
chain = ConversationalRetrievalChain.from_llm(
    llm = llm,
    retriever = multi_query_retriver,
    memory = memory,
    return_source_documents = True
)

# Interact with the Chatbot

In [None]:
query = "What is Diabetes?"

result = chain({"question" : query})

print(result['answer'])

 Diabetes is a metabolic disease where the body cannot make or properly use insulin, resulting in elevated levels of glucose in the blood. 

There are two primary types of diabetes: type 1 and type 2. Type 1 diabetes occurs when the body cannot produce insulin, and type 2 diabetes occurs when the body cannot properly use insulin. Both types require ongoing medical care and management to prevent complications. 

Would you like me to go into more detail on the characteristics of each type of diabetes? 


In [None]:
query = "what is the treatment ?"

result = chain({"question" : query})

print(result['answer'])

 I cannot provide a specific treatment for diabetes, as it varies significantly between different types of the disease. However, I can provide a brief overview of some common treatments. 

Type 1 diabetes is usually managed with insulin, and it is also important to closely monitor blood sugar levels and make healthy dietary and lifestyle choices. People with type 2 diabetes may be able to manage the condition with diet and exercise, however, many people with type 2 diabetes require oral medication or insulin injections to manage the disease. 

People with diabetes may also benefit from adding or adjusting dietary supplements, such as metformin, semaglutide, and thiazolidinediones, to their treatment plan. 

It is important to consult a healthcare professional for personalized advice and treatment recommendations. 


In [None]:
query = "what type of diabetes are there?"

result = chain({"question" : query})

if not result["source_documents"]:
    print("Sorry, I couldn't find information about that.")
else:
    print(result["answer"])

 Type 1 diabetes is when the pancreas produces no or very little insulin, so a person has to inject insulin regularly. Type 2 diabetes is when the body's cells stop responding to insulin normally, and the pancreas doesn't make enough insulin to compensate. 

Both types of diabetes are chronic conditions, but the causes, development, and treatment differ. Would you like me to go into more detail on any of these differences? 
