In [None]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("static/example.pdf")
data = loader.load()

In [None]:
data

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
)

docs = text_splitter.split_documents(data)
print(f"Total number of documents: {len(docs)}")

In [None]:
docs

In [None]:
from langchain_chroma import Chroma

from langchain_google_genai import GoogleGenerativeAIEmbeddings
from dotenv import load_dotenv
import os

load_dotenv()  # This lo
api_key = os.getenv("GOOGLE_API_KEY")
embeddings = GoogleGenerativeAIEmbeddings(
    model="models/embedding-001",
    api_key=api_key
)
print("API key for Google Generative AI embeddings:", api_key)


In [None]:
embeddings

In [None]:
vectorstore = Chroma.from_documents(
    documents=docs,
    embedding=embeddings,
    persist_directory="db"
)

In [None]:
retriever = vectorstore.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 3}
)
retrieved_docs = retriever.invoke("What is COVID-19?")

In [None]:
len(retrieved_docs)

In [None]:
retrieved_docs

In [None]:
from langchain_google_genai import GoogleGenerativeAI

llm = GoogleGenerativeAI(model="gemini-2.5-flash",temperature=0.3,max_tokens=500)

In [None]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

system_prompt = (
    "You are a helpful assistant for question answering tasks. "
    "Use the following pieces of context to answer the question. "
    "If you don't know the answer, say that you don't know. "
    "Use three sentences maximum and keep the answer concise.\n\n"
    "{context}"
)


In [None]:
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

In [None]:
question_answer_chain = create_stuff_documents_chain(llm, prompt=prompt)
rag_chain = create_retrieval_chain(
    retriever=retriever,
    combine_docs_chain=question_answer_chain,

)

In [None]:
response = rag_chain.invoke({
    "input": "sympotms of COVID-19?",
})
print(response["answer"])

In [None]:
response