In [1]:
%pwd

'g:\\GKML\\genai_projects\\MedicalBot\\research'

In [2]:
import os
os.chdir('../')

In [3]:
%pwd

'g:\\GKML\\genai_projects\\MedicalBot'

In [4]:
from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Extract data from pdf file

In [5]:
def load_pdf_file(data):
    loader = DirectoryLoader(path=data,
                    glob="*.pdf",
                    loader_cls=PyPDFLoader)
    documents = loader.load()
    return documents

In [6]:
extracted_data = load_pdf_file(data="Data/" )

In [7]:
type(extracted_data)

list

In [8]:
len(extracted_data)

637

In [9]:
type(extracted_data[0])

langchain_core.documents.base.Document

# split the data into chunks

In [10]:
def text_split(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500,chunk_overlap=20)
    text_chunks = text_splitter.split_documents(extracted_data)
    return text_chunks

In [11]:
text_chunks = text_split(extracted_data)

In [12]:
print("Length : ",len(text_chunks))

Length :  5860


# Embedding Model

In [13]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

In [14]:
def get_gemini_embeddings():
    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
    return embeddings

In [15]:
embeddings = get_gemini_embeddings()

In [16]:
query_result = embeddings.embed_query("Hello")
len(query_result)

768

# Load Environment variables

In [17]:
from dotenv import load_dotenv
load_dotenv()
import os

In [18]:
GROQ_API_KEY=os.getenv("GROQ_API_KEY")
GOOGLE_API_KEY=os.getenv("GOOGLE_API_KEY")
PINECONE_API_KEY=os.getenv("PINECONE_API_KEY")

In [19]:
os.environ["GROQ_API_KEY"] = GROQ_API_KEY
os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY
os.environ["PINECONE_API_KEY"] = PINECONE_API_KEY

# Vector Store/ Vector Embeddings

- pine cone

## create pinecone index 

In [20]:
from pinecone.grpc import PineconeGRPC as Pinecone
from pinecone import ServerlessSpec


In [21]:
pc = Pinecone(api_key=PINECONE_API_KEY)

In [22]:
index_name = "medicalvdb"

In [None]:
index_list = pc.list_indexes().names()
index_list

['gkbot', 'medicalbot']

- name
- metric
- dimension
- cloud service provider
- region

In [25]:
if index_name not in index_list:
    pc.create_index(name=index_name,
                    dimension=768,
                    metric='cosine',
                    spec = ServerlessSpec(cloud="aws",
                                          region="us-east-1"))

# Embed and upsert to Pinecone

In [26]:
from langchain_pinecone import PineconeVectorStore

In [None]:
docsearch = PineconeVectorStore.from_documents(documents=text_chunks,
                                   embedding=embeddings,
                                   index_name=index_name)

# Retriever

In [28]:
retriever = docsearch.as_retriever(search_type="similarity",search_kwargs={"k":2})

In [29]:
response = retriever.invoke("what is Abscess")

In [30]:
response

[Document(id='fe66df0e-6800-4f1d-8ded-80532a1bc7ec', metadata={'creationdate': '2004-12-18T17:00:02-05:00', 'creator': 'PyPDF', 'moddate': '2004-12-18T16:15:31-06:00', 'page': 29.0, 'page_label': '30', 'producer': 'PDFlib+PDI 5.0.0 (SunOS)', 'source': 'Data\\Medical_book.pdf', 'total_pages': 637.0}, page_content='Normal results\nMost abscesses heal after drainage alone; others\nrequire drainage and antibiotic drug treatment.\nResources\nBOOKS\nTurkington, Carol A., and Jeffrey S. Dover. Skin Deep. New\nYork: Facts on File, 1998.\nKEY TERMS\nWhite blood cells —Cells that protect the body\nagainst infection.\nORGANIZATIONS\nNational Institute of Arthritis and Musculoskeletal and Skin\nDiseases. 9000 Rockville Pike, Bldg. 31, Rm 9A04,\nBethesda, MD 20892.\nCarol A. Turkington\nAbuse\nDefinition'),
 Document(id='77b873af-8330-4401-bb37-7ea3b3588b30', metadata={'creationdate': '2004-12-18T17:00:02-05:00', 'creator': 'PyPDF', 'moddate': '2004-12-18T16:15:31-06:00', 'page': 28.0, 'page_label'

In [31]:
for i in response:
    print(i.page_content)
    print('-'*80)

Normal results
Most abscesses heal after drainage alone; others
require drainage and antibiotic drug treatment.
Resources
BOOKS
Turkington, Carol A., and Jeffrey S. Dover. Skin Deep. New
York: Facts on File, 1998.
KEY TERMS
White blood cells —Cells that protect the body
against infection.
ORGANIZATIONS
National Institute of Arthritis and Musculoskeletal and Skin
Diseases. 9000 Rockville Pike, Bldg. 31, Rm 9A04,
Bethesda, MD 20892.
Carol A. Turkington
Abuse
Definition
--------------------------------------------------------------------------------
Because the lining of the abscess cavity tends to
interfere with the amount of the drug that can penetrate
the source of infection from the blood, the cavity itself
may require draining. Once an abscess has fully formed,
it often does not respond to antibiotics. Even if the
antibiotic does penetrate into the abscess, it doesn’t func-
tion as well in that environment.
Precautions
An abscess can usually be diagnosed visually, al-
though an imagi

In [32]:
retriever.invoke("what is hyper tension")

[Document(id='9ca4c7db-02a0-40fc-ab9c-579cf51255ec', metadata={'creationdate': '2004-12-18T17:00:02-05:00', 'creator': 'PyPDF', 'moddate': '2004-12-18T16:15:31-06:00', 'page': 117.0, 'page_label': '118', 'producer': 'PDFlib+PDI 5.0.0 (SunOS)', 'source': 'Data\\Medical_book.pdf', 'total_pages': 637.0}, page_content='especially adults, have very strong tension habits associ-\nated with movement. Chronic misuse of the muscles is\ncommon. It may be caused by slouching in front of tele-\nvisions or video monitors, too much sitting or driving\nand too little walking, or by tension associated with past\ntraumas and injuries. Stiffening the neck after a\nwhiplashinjury or favoring a broken or sprained leg\nlong after it has healed are examples of habitual tension\ncaused by injury.'),
 Document(id='da3fae9d-6fa8-4ad9-9565-180773bb5d1e', metadata={'creationdate': '2004-12-18T17:00:02-05:00', 'creator': 'PyPDF', 'moddate': '2004-12-18T16:15:31-06:00', 'page': 118.0, 'page_label': '119', 'produce

In [34]:
retriever.invoke("what is llm")

[Document(id='4a827428-34e6-413d-9b7e-746cb064f3c4', metadata={'creationdate': '2004-12-18T17:00:02-05:00', 'creator': 'PyPDF', 'moddate': '2004-12-18T16:15:31-06:00', 'page': 566.0, 'page_label': '567', 'producer': 'PDFlib+PDI 5.0.0 (SunOS)', 'source': 'Data\\Medical_book.pdf', 'total_pages': 637.0}, page_content='(T cells and B cells), lymph nodes, bone marrow,\nand the spleen. Abnormal cells (lymphocyte/leuko-\ncyte) multiply uncontrollably.\nMatch—How similar the HLA typing, out of a pos-\nsible six antigens, is between the donor and the\nrecipient.\nMixed lymphocyte culture (MLC) —Test that mea-\nsures level of reactivity between donor and recipi-\nent lymphocytes.\nNeuroblastoma —Solid tumor in children, may be\ntreated by BMT.\nPlatelets —Fragments of a large precursor cell, a'),
 Document(id='ef9f7f2b-0d04-475c-b032-67586d8c1165', metadata={'creationdate': '2004-12-18T17:00:02-05:00', 'creator': 'PyPDF', 'moddate': '2004-12-18T16:15:31-06:00', 'page': 565.0, 'page_label': '566'

# LLM

In [35]:
from langchain_groq import ChatGroq
llm = ChatGroq(model="llama3-70b-8192")

## Legacy chain creation

In [39]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

In [37]:
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

In [38]:
prompt = ChatPromptTemplate.from_messages(
    [
        ("system",system_prompt),
        ("human","{input}")
    ]
)

In [40]:
q_a_chain = create_stuff_documents_chain(llm,prompt)
rag_chain = create_retrieval_chain(retriever,q_a_chain)

In [41]:
response1 = rag_chain.invoke({"input":"what is Achalasia"})
response1

{'input': 'what is Achalasia',
 'context': [Document(id='43c5f9cb-7af7-4afb-be40-76e9d41d023e', metadata={'creationdate': '2004-12-18T17:00:02-05:00', 'creator': 'PyPDF', 'moddate': '2004-12-18T16:15:31-06:00', 'page': 33.0, 'page_label': '34', 'producer': 'PDFlib+PDI 5.0.0 (SunOS)', 'source': 'Data\\Medical_book.pdf', 'total_pages': 637.0}, page_content='ation is unknown. Autoimmune disease or hidden infec-\ntion is suspected.\nSymptoms\nDysphagia, or difficulty swallowing, is the most com-\nmon symptom of achalasia. The person with achalasia usu-\nally has trouble swallowing both liquid and solid foods,\noften feeling that food “gets stuck” on the way down. The\nperson has chest pain that is often mistaken for angina\npectoris (cardiac pain). Heartburn and difficulty belching\nare common. Symptoms usually get steadily worse. Other'),
  Document(id='34babfb8-335c-4e79-8d26-0e12a593bb94', metadata={'creationdate': '2004-12-18T17:00:02-05:00', 'creator': 'PyPDF', 'moddate': '2004-12-18T

In [42]:
response1['answer']

'Achalasia is a condition where the esophageal sphincter remains contracted, interrupting normal peristalsis and preventing food from entering the stomach. This causes symptoms such as dysphagia (difficulty swallowing), chest pain, heartburn, and difficulty belching. The ultimate cause of achalasia is the degeneration of nerve cells that normally signal the brain to relax the esophageal sphincter.'

In [43]:
print(response1['answer'])

Achalasia is a condition where the esophageal sphincter remains contracted, interrupting normal peristalsis and preventing food from entering the stomach. This causes symptoms such as dysphagia (difficulty swallowing), chest pain, heartburn, and difficulty belching. The ultimate cause of achalasia is the degeneration of nerve cells that normally signal the brain to relax the esophageal sphincter.


In [44]:
response1 = rag_chain.invoke({"input":"what is probability"})
response1['answer']

'I don\'t know. The provided context does not mention the term "probability" or provide any information related to it.'

## LCEL Chains

In [45]:
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

In [46]:
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

In [47]:
prompt = ChatPromptTemplate.from_messages(
    [
        ("system",system_prompt),
        ("human","{input}")
    ]
)

In [48]:
rag_chain1 = (
    {
        "context": retriever |(lambda docs: "\n\n".join(doc.page_content for doc in docs)),
        "input": RunnablePassthrough()
    }
    | prompt
    | llm
    | StrOutputParser()
)

In [49]:
rag_chain1.invoke("what is Achalasia")

'Achalasia is a condition where the esophageal sphincter remains contracted, interrupting normal peristalsis and preventing food from entering the stomach.'

In [50]:
rag_chain1.invoke("what is probability")

'I don\'t know. The provided context does not mention the term "probability" or provide any information related to it.'

# Read the data from the existing index of Pinecone

In [51]:
from langchain_pinecone import PineconeVectorStore

In [52]:
index_name = "medicalvdb"

In [53]:
vectorstore = PineconeVectorStore.from_existing_index(index_name=index_name,
                                        embedding=embeddings)

In [55]:
retriever = vectorstore.as_retriever(search_type="similarity",search_kwargs={"k":2})

In [57]:
resp1 = retriever.invoke("what is Achalasia")
resp1

[Document(id='43c5f9cb-7af7-4afb-be40-76e9d41d023e', metadata={'creationdate': '2004-12-18T17:00:02-05:00', 'creator': 'PyPDF', 'moddate': '2004-12-18T16:15:31-06:00', 'page': 33.0, 'page_label': '34', 'producer': 'PDFlib+PDI 5.0.0 (SunOS)', 'source': 'Data\\Medical_book.pdf', 'total_pages': 637.0}, page_content='ation is unknown. Autoimmune disease or hidden infec-\ntion is suspected.\nSymptoms\nDysphagia, or difficulty swallowing, is the most com-\nmon symptom of achalasia. The person with achalasia usu-\nally has trouble swallowing both liquid and solid foods,\noften feeling that food “gets stuck” on the way down. The\nperson has chest pain that is often mistaken for angina\npectoris (cardiac pain). Heartburn and difficulty belching\nare common. Symptoms usually get steadily worse. Other'),
 Document(id='34babfb8-335c-4e79-8d26-0e12a593bb94', metadata={'creationdate': '2004-12-18T17:00:02-05:00', 'creator': 'PyPDF', 'moddate': '2004-12-18T16:15:31-06:00', 'page': 33.0, 'page_label':