In [1]:
import os

In [2]:
%pwd

'd:\\AI Personal Projects\\GenAI - AI Agents\\Medical-Agent\\research'

In [3]:
os.chdir('../')

In [4]:
%pwd

'd:\\AI Personal Projects\\GenAI - AI Agents\\Medical-Agent'

In [5]:
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [6]:
def load_pdfs_from_directory(directory):
    loader = DirectoryLoader(directory, glob="**/*.pdf", loader_cls=PyPDFLoader)
    documents = loader.load()
    return documents

In [7]:
extracted_docs = load_pdfs_from_directory("data")

In [15]:
extracted_docs[:2]

[Document(metadata={'producer': 'PDFlib+PDI 6.0.3 (SunOS)', 'creator': 'Adobe Acrobat 6.0', 'creationdate': '2006-10-16T20:19:33+02:00', 'moddate': '2006-10-16T22:03:45+02:00', 'source': 'data\\The-Gale-Encyclopedia-of-Medicine-3rd-Edition.pdf', 'total_pages': 4505, 'page': 0, 'page_label': 'i'}, page_content=''),
 Document(metadata={'producer': 'PDFlib+PDI 6.0.3 (SunOS)', 'creator': 'Adobe Acrobat 6.0', 'creationdate': '2006-10-16T20:19:33+02:00', 'moddate': '2006-10-16T22:03:45+02:00', 'source': 'data\\The-Gale-Encyclopedia-of-Medicine-3rd-Edition.pdf', 'total_pages': 4505, 'page': 1, 'page_label': 'ii'}, page_content='The GALE\nENCYCLOPEDIA of\nMEDICINE\nTHIRD EDITION')]

In [8]:
len(extracted_docs)

4505

In [10]:
from typing import List
from langchain.schema import Document

def filter_to_minimal_docs(docs: List[Document]) -> List[Document]:

    minimal_docs: List[Document] = []
    for doc in docs:
        src = doc.metadata.get('source')
        minimal_docs.append(
            Document(
                page_content=doc.page_content,
                metadata={
                    'source': src
                }
            )
        )
    return minimal_docs

In [11]:
minimal_docs = filter_to_minimal_docs(extracted_docs)

In [14]:
minimal_docs[:2]

[Document(metadata={'source': 'data\\The-Gale-Encyclopedia-of-Medicine-3rd-Edition.pdf'}, page_content=''),
 Document(metadata={'source': 'data\\The-Gale-Encyclopedia-of-Medicine-3rd-Edition.pdf'}, page_content='The GALE\nENCYCLOPEDIA of\nMEDICINE\nTHIRD EDITION')]

In [13]:
len(minimal_docs)

4505

In [17]:
def text_splitter(documents: List[Document], chunk_size: int = 500, chunk_overlap: int = 20) -> List[Document]:
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap
    )
    text_chunks = text_splitter.split_documents(documents)
    return text_chunks

In [18]:
text_chunks = text_splitter(minimal_docs)

In [24]:
text_chunks[:2]

[Document(metadata={'source': 'data\\The-Gale-Encyclopedia-of-Medicine-3rd-Edition.pdf'}, page_content='The GALE\nENCYCLOPEDIA of\nMEDICINE\nTHIRD EDITION'),
 Document(metadata={'source': 'data\\The-Gale-Encyclopedia-of-Medicine-3rd-Edition.pdf'}, page_content='The GALE\nENCYCLOPEDIA of\nMEDICINE\nTHIRD EDITION\nVOLUME\n\x81\n1\nA-B\nJACQUELINE L. LONGE, PROJECT EDITOR')]

In [23]:
print(text_chunks[0].page_content)

The GALE
ENCYCLOPEDIA of
MEDICINE
THIRD EDITION


In [19]:
len(text_chunks)

40000

In [90]:
from dotenv import load_dotenv
load_dotenv()

True

In [91]:
print(os.getenv("GEMINI_API_KEY2"))

AIzaSyB-ECUHDu2GffFB-PK7kWqy-WWR3U3nGVQ


In [111]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

def get_embeddings():
    embeddings = GoogleGenerativeAIEmbeddings(
        model="models/embedding-001",
        google_api_key=os.getenv("GEMINI_API_KEY"),
        request_options={
            "timeout": 10,
        }
    )
    return embeddings

In [112]:
embeddings = get_embeddings()

In [113]:
test_vectors = embeddings.embed_query("What is the purpose of this document?")

In [114]:
test_vectors[:5]  # Display first 5 dimensions of the embedding

[0.025289049372076988,
 -0.014773928560316563,
 -0.07498802989721298,
 -0.010782887227833271,
 0.05573681741952896]

In [115]:
len(test_vectors)

768

In [117]:
from pinecone import Pinecone, ServerlessSpec

def create_pinecone_index(index_name: str):
    pinecone = Pinecone(
        api_key=os.getenv("PINECONE_API_KEY")
    )
    if not pinecone.has_index(index_name):
        pinecone.create_index(name=index_name,
                              dimension=768,
                              metric="cosine",
                              spec=ServerlessSpec(cloud="aws", region="us-east-1"))
        
    pinecone_index = pinecone.Index(index_name)
    return pinecone_index

In [118]:
pinecone_index = create_pinecone_index("medical-agent")

In [119]:
from langchain_pinecone import PineconeVectorStore

def store_embeddings(index_name: str):
    vector_store = PineconeVectorStore.from_documents(
        documents=text_chunks,
        index_name=index_name,
        embedding=embeddings
    )
    return vector_store

In [120]:
vector_store = store_embeddings(index_name="medical-agent")

In [121]:
def load_existing_vector_store(index_name: str):
    vector_store = PineconeVectorStore.from_existing_index(
        index_name=index_name,
        embedding=embeddings
    )
    return vector_store

In [122]:
existing_vector_store = load_existing_vector_store(index_name="medical-agent")

In [150]:
retriever = existing_vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 5})

In [151]:
retrived_docs = retriever.invoke("What is Acne?")
retrived_docs

[Document(id='1062baf3-fcf7-4e93-98ce-29d4dd961415', metadata={'source': 'data\\The-Gale-Encyclopedia-of-Medicine-3rd-Edition.pdf'}, page_content='Researchers, Inc. Reproduced by permission.)\n26 GALE ENCYCLOPEDIA OF MEDICINE\nAcne'),
 Document(id='e12f5c22-85bf-4c4f-a1eb-7659a102c5e7', metadata={'source': 'data\\The-Gale-Encyclopedia-of-Medicine-3rd-Edition.pdf'}, page_content='occurs when new skin cells are laid down to replace\ndamaged cells.\nThe most common sites of acne are the face, chest,\nshoulders, and back since these are the parts of the\nbody where the most sebaceous follicles are found.\nCauses and symptoms\nThe exact cause of acne is unknown. Several risk\nfactors have been identified:\n/C15Age. Due to the hormonal changes they experience,\nteenagers are more likely to develop acne.\n/C15Gender. Boys have more severe acne and develop it\nmore often than girls.'),
 Document(id='180c861b-19a4-46fc-adbd-9fd494afeaff', metadata={'source': 'data\\The-Gale-Encyclopedia-of-Medi

In [152]:
for doc in retrived_docs:
    print(doc.page_content)
    print("Source:", doc.metadata.get('source'))
    print("ID:", doc.id)
    print("\n---\n")

Researchers, Inc. Reproduced by permission.)
26 GALE ENCYCLOPEDIA OF MEDICINE
Acne
Source: data\The-Gale-Encyclopedia-of-Medicine-3rd-Edition.pdf
ID: 1062baf3-fcf7-4e93-98ce-29d4dd961415

---

occurs when new skin cells are laid down to replace
damaged cells.
The most common sites of acne are the face, chest,
shoulders, and back since these are the parts of the
body where the most sebaceous follicles are found.
Causes and symptoms
The exact cause of acne is unknown. Several risk
factors have been identified:
/C15Age. Due to the hormonal changes they experience,
teenagers are more likely to develop acne.
/C15Gender. Boys have more severe acne and develop it
more often than girls.
Source: data\The-Gale-Encyclopedia-of-Medicine-3rd-Edition.pdf
ID: e12f5c22-85bf-4c4f-a1eb-7659a102c5e7

---

/C15Diet. No foods cause acne, but certain foods may
cause flare-ups.
/C15Drugs. Acne can be a side effect of drugs including
tranquilizers, antidepressants, antibiotics,oral con-
traceptives, and anabo

In [176]:
def retrieve_documents(vector_store):
    retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 10})
    return retriever                                                                                                                 

In [177]:
retriever = retrieve_documents(existing_vector_store)

In [178]:
from langchain_google_genai import ChatGoogleGenerativeAI

def get_chat_model():
    chat_model = ChatGoogleGenerativeAI(
        model="models/gemini-2.5-flash",
        google_api_key=os.getenv("GEMINI_API_KEY")
    )
    return chat_model

In [179]:
llm = get_chat_model()

In [180]:
from langchain_core.prompts import ChatPromptTemplate


def get_prompt():

    system_prompt = (
    "You are a medical expert. Use only the information from the retrieved documents from the Gale Encyclopedia of Medicine (3rd Edition) "
    "to answer the user's question. Provide clear, accurate, and concise responses, including definitions, causes, symptoms, diagnosis, treatment, "
    "or prevention as relevant. Avoid speculation and do not introduce external knowledge. Maintain a professional, neutral, and informative tone "
    "suitable for a general audience."
    "if you do not have enough information to answer the question, say 'I don't know'. "
    "\n\n"
    "{context}"
)
    
    prompt = ChatPromptTemplate.from_messages([
        ("system", system_prompt),
        ("human", "{input}"),
    ])
    return prompt

In [181]:
prompt = get_prompt()

In [182]:
from langchain.chains.combine_documents import create_stuff_documents_chain

def create_chain(llm, prompt):
    chain = create_stuff_documents_chain(
        llm=llm,
        prompt=prompt
    )
    return chain

In [183]:
chain = create_chain(llm, prompt)


In [184]:
from langchain.chains import create_retrieval_chain

def build_retrieval_chain(retriever, chain):
    retrieval_chain = create_retrieval_chain(
        retriever=retriever,
        combine_docs_chain=chain
    )
    return retrieval_chain

In [185]:
retrieval_chain = build_retrieval_chain(retriever, chain)

In [186]:
resposnse = retrieval_chain.invoke({
    "input": "What is Acne?"
})

In [187]:
print(resposnse['answer'])

Acne is a common skin disease characterized by pimples on the face, chest, and back. It is medically known as acne vulgaris.

It occurs when the pores of the skin become clogged with oil, dead skin cells, and bacteria. The most common sites for acne are the face, chest, shoulders, and back, as these parts of the body contain the most sebaceous (oil) follicles.


In [188]:
resposnse = retrieval_chain.invoke({
    "input": "What is Acromegaly and gegantism? and how is it treated?"
})
print(resposnse['answer'])

Acromegaly is a disorder characterized by the abnormal release of growth hormone (GH) from the pituitary gland in the brain. This leads to increased growth in bone and soft tissue and can alter the body's ability to process nutrients like fats and sugars. When this abnormality occurs in children whose bony growth plates have not yet closed, it results in exceptional growth of long bones, causing unusual height, a condition known as gigantism. If the abnormality occurs after bone growth stops, the disorder is called acromegaly. Overproduction of pituitary growth hormone (GH) is responsible for acromegaly.

Regarding treatment for acromegaly and gigantism, most patients are treated in cancer clinics, even though the disease is not cancer. This is because cancer specialists historically treated it before its cause was known, and the treatment often involves drugs typically used for cancer. However, any cancer drugs used are usually given in smaller doses to reduce side effects. Radiation 

In [189]:
resposnse = retrieval_chain.invoke({
    "input": "What is Acne? and how is it treated?"
})
print(resposnse['answer'])

Acne is a skin condition that has a characteristic appearance. It occurs when new skin cells are laid down to replace damaged cells. The most common sites for acne are the face, chest, shoulders, and back, as these areas have the most sebaceous follicles. In teenagers, it often appears on the forehead, nose, and chin. As people age, acne may appear on the outer part of the face, and adult women may experience it on their chins and around their mouths. The elderly may develop whiteheads and blackheads on the upper cheeks and skin around the eyes. While usually not conspicuous, inflamed lesions can cause pain, tenderness, itching, or swelling, and may leave scars.

Acne treatment aims to reduce sebum production, remove dead skin cells, and kill bacteria using topical drugs and oral medications. The choice of treatment depends on whether the acne is mild, moderate, or severe.

**Treatment options include:**

*   **Drugs:**
    *   **Topical Drugs:** For mild noninflammatory acne, these in