In [1]:
import os
%pwd
os.chdir("../")

%pwd

'c:\\Users\\Utilisateur\\Desktop\\medical_chatbot\\medical_chatbot_RAG_system'

In [2]:
from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
def load_documents(data):
    loader = DirectoryLoader(
        data,
        glob = "*.pdf",
        loader_cls = PyPDFLoader
    )
    documents = loader.load()
    return documents


In [4]:
book = load_documents("data")

KeyboardInterrupt: 

In [None]:
from langchain_core.documents import Document

def filter_metadata(documents):
    filtred_doc = []
    for doc in documents:
        filtred_doc.append(
            Document(
                metadata = {'source': doc.metadata['source']},
                page_content = doc.page_content
            )
        )
    return filtred_doc
filtred_book = filter_metadata(book)

In [None]:
#chuncking
from langchain_text_splitters import RecursiveCharacterTextSplitter
def text_splitter(document):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size = 500,
        chunk_overlap = 20
    )
    chunks = text_splitter.split_documents(document)
    return chunks
chunks = text_splitter(filtred_book)
len(chunks)


5859

In [None]:
from langchain_huggingface import HuggingFaceEndpointEmbeddings
import os
from dotenv import load_dotenv

load_dotenv()
api_key = os.getenv('HUGGING_FACE_KEY')
if not api_key:
    raise ValueError("HUGGING_FACE_KEY not found in environment variables. Please set it in your .env file.")

embeddings = HuggingFaceEndpointEmbeddings(
    model="sentence-transformers/all-MiniLM-L6-v2",
    huggingfacehub_api_token=api_key
)


‚úÖ Hugging Face embeddings initialized from environment variables.


In [6]:
from pinecone import Pinecone
from pinecone import ServerlessSpec
load_dotenv()
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))

In [7]:
#Create index
index_name = "medical-chatbot"


if not pc.has_index(index_name):
    pc.create_index(
        name=index_name,
        vector_type="dense",
        dimension=384,
        metric="cosine",
        spec=ServerlessSpec(
            cloud="aws",
            region="us-east-1"
        ),
    )


In [8]:
from langchain_pinecone import PineconeVectorStore
index_name = "medical-chatbot"
vectorstore = PineconeVectorStore(
    index_name=index_name,
    embedding=embeddings,
)


In [9]:
# Using Google Gemini Free API (Better than HuggingFace for tool calling)
from langchain_google_genai import ChatGoogleGenerativeAI
from dotenv import load_dotenv
import os

load_dotenv()

model = ChatGoogleGenerativeAI(
    model="models/gemini-2.5-flash",  
    google_api_key=os.getenv('GOOGLE_API_KEY'),  
    temperature=0.2,
    timeout=60  
)

In [10]:
from langchain.agents import create_agent
from langchain.tools import tool

# Define retrieval as a tool
@tool(response_format="content_and_artifact")
def retrieve_context(query: str):
    """Retrieve medical information to help answer a query."""
    retrieved_docs = vectorstore.similarity_search(query, k=3)
    serialized = "\n\n".join(
        f"Source: {doc.metadata}\nContent: {doc.page_content}"
        for doc in retrieved_docs
    )
    return serialized, retrieved_docs

# Create agent
agent = create_agent(
    model,
    tools=[retrieve_context],
    system_prompt = """You are a knowledgeable and helpful Medical Information Assistant. Your role is to provide accurate, evidence-based medical information to users based on the medical documents available to you.

                        CORE RESPONSIBILITIES:
                        1. Use the retrieval tool to search for relevant medical information before answering any medical questions
                        2. Base your responses primarily on the retrieved context from medical documents
                        3. Provide clear, accurate, and well-structured medical information


                        IMPORTANT GUIDELINES:
                        - If the retrieved context doesn't contain enough information to answer the question, clearly state this limitation
                        - Never make up or fabricate medical information
                        - If uncertain, acknowledge the uncertainty and suggest consulting a healthcare professional
                        - Provide information in a clear, accessible manner while maintaining medical accuracy
                        - Include relevant warnings or precautions when discussing treatments or conditions

                        SAFETY REMINDERS:
                        - Always remind users that this information is educational and not a substitute for professional medical advice
                        - For urgent medical concerns, advise users to seek immediate medical attention
                        - Never provide specific diagnoses or treatment plans - these require a licensed healthcare provider

                        FORMAT:
                        - Structure your responses clearly with sections if needed
                        - Use bullet points for lists of symptoms, treatments, etc.
                        - Cite the source documents when providing specific information
                        - End medical advice with appropriate disclaimers

                        Remember: Your goal is to educate and inform, not to replace healthcare professionals."""   
                    )

# Use streaming to see real-time progress
print("üîç Sending query to agent...")
print("="*60)

for event in agent.stream(
    {"messages": [{"role": "user", "content": "what headachs"}]},
    stream_mode="values"
):
    event["messages"][-1].pretty_print()
    print("\n" + "="*60 + "\n")

üîç Sending query to agent...

what headachs


Tool Calls:
  retrieve_context (00d73c24-d247-443c-9dc1-2354a7aa7c65)
 Call ID: 00d73c24-d247-443c-9dc1-2354a7aa7c65
  Args:
    query: what are headaches


Name: retrieve_context

Source: {'source': 'data\\Medical_book.pdf'}
Content: Purpose
Migraine headaches usually cause a throbbing pain
on one side of the head. Nausea, vomiting, dizziness ,
increased sensitivity to light and sound, and other symp-
toms may accompany the pain. The attacks may last for
several hours or for a day or more and may come as often
as several times a week. Some people who get migraine
begin, such as restlessness, tingling in an arm or leg, or

Source: {'source': 'data\\Medical_book.pdf'}
Content: flashing lights, that some people have 10-30 min-
utes before a migraine attack.
Inflammation ‚ÄîPain, redness, swelling, and heat
that usually develop in response to injury or illness.
Treatment You Need. New York, NY: The Guilford Press,
1995.
ORGANIZATIONS
America