In [None]:
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [None]:
loader = PyPDFLoader('data/Medical_book.pdf')

In [None]:
documents = loader.load()

In [None]:
len(documents)

In [None]:
from langchain.schema import Document
clean_docs = [Document(page_content=doc.page_content) for doc in documents if doc.page_content.strip() != ""]
clean_docs[4]

In [None]:
split = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100)
docs = split.split_documents(clean_docs)

In [None]:
len(docs)

In [None]:
from sentence_transformers import SentenceTransformer 
from langchain.embeddings import HuggingFaceEmbeddings
embeddings_model = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
embeddings = embeddings_model.embed_documents([doc.page_content for doc in docs])

In [None]:
len(embeddings)

In [None]:
from dotenv import load_dotenv
import os
load_dotenv()

In [None]:
PINECONE_API_KEY = os.getenv('PINECONE_API_KEY')
os.environ['PINECONE_API_KEY'] = PINECONE_API_KEY

In [None]:
from pinecone import Pinecone

pinecone_api_key = PINECONE_API_KEY
pc = Pinecone(api_key=pinecone_api_key)

In [None]:
pc

In [None]:
from pinecone import ServerlessSpec
index_name = "medical-chatbot"

if not pc.has_index(index_name):
    pc.create_index(
        name=index_name,
        dimension=384,
        metric="cosine",
        spec=ServerlessSpec(cloud="aws",region="us-east-1")
    )

index = pc.Index(index_name)

In [None]:
desc = pc.describe_index("medical-chatbot")
print(desc)

In [None]:
from langchain_pinecone import PineconeVectorStore
vector_store = PineconeVectorStore.from_documents(documents=docs, embedding=embeddings_model,index_name=index_name)

In [None]:
#pc.delete_index(index_name)

In [None]:
retriever = vector_store.as_retriever(search_type="similarity",search_kwargs={"k":5})


In [None]:
retrieved_docs = retriever.invoke('What are the symptoms of diabetes?')
retrieved_docs

In [None]:
from langchain_google_genai import ChatGoogleGenerativeAI
from dotenv import load_dotenv
import os 
load_dotenv()
api_key = os.getenv('GOOGLE_GENAI_API_KEY')
chatModel = ChatGoogleGenerativeAI(model="gemini-2.5-flash",temperature=0.3, api_key=api_key)

In [None]:
from langchain.chains import create_retrieval_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain

In [None]:
system_prompt = """You are Medibot, an AI medical assistant.
Your job is to answer user queries based strictly on the provided context documents.
If the context does not contain the answer, say:
"I could not find that information in the documents. Please consult a licensed medical professional for accurate advice."

Guidelines:
- Use clear, professional, and empathetic language.
- Do NOT make up facts beyond the provided context.
- If a question sounds like a medical emergency, politely remind the user to seek immediate professional help.
- Keep responses informative and easy to understand for a general audience.
- Don't mentions the document 
After providing an answer from the documents:
- You may ask if the user wants more details about symptoms, causes, or prevention. 
- Do NOT provide medical advice or prescriptions beyond the documents.

Context:
{context}
"""

template = ChatPromptTemplate(
    [
        ("system", system_prompt),
        ("human", "{input}")
    ]
)

In [None]:
qa_chain = create_stuff_documents_chain(chatModel, template)
final_chain = create_retrieval_chain(retriever,qa_chain)

In [None]:
response = final_chain.invoke({"input":'What are the symptoms of diabetes?'})
print(response['answer'])

In [None]:
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationSummaryMemory

memory = ConversationSummaryMemory(
    llm=chatModel,
    memory_key="chat_history",
    buffer_size=3,
    return_messages=True
)

conv_chain = ConversationalRetrievalChain.from_llm(
    llm=chatModel,
    retriever=retriever,
    memory=memory,
    return_source_documents=False
)

In [None]:
response1 = conv_chain({"question": "What is Diabetes?"})
print(response1["answer"])

response2 = conv_chain({"question": "What are the symptoms?"})
print(response2["answer"])

response3 = conv_chain({"question": "How is it treated?"})
print(response3["answer"])