<a href="https://colab.research.google.com/github/Zimal-Fatemah/Langchain-Agents/blob/main/PDF_Chatbot(RAG).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install "langchain>=0.3.0,<0.4.0" "langchain-community>=0.3.0,<0.4.0" "langchain-core>=0.3.0,<0.4.0" "langchain-google-genai" "python-dotenv"


In [None]:
!pip install pypdf faiss-cpu langchain-text-splitters

In [None]:
import os
from google.colab import userdata

# Setting up my API key.
os.environ["GOOGLE_API_KEY"] = userdata.get('GOOGLE_API_KEY')

#Importing required libraries
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_google_genai.embeddings import GoogleGenerativeAIEmbeddings
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain.chains import RetrievalQA

#initializing the BRAIN
llm = ChatGoogleGenerativeAI(
    model="gemini-2.5-flash",
    temperature=0
)

#Parsing the document to get the TEXT out.
loader = PyPDFLoader("ML_Week1 slides.pdf")#as im doing in google colab, i have uploded my own pdf, upload your own pdf and change the name accordingly.
documents = loader.load()
print(f"Loaded{len(documents)} pages")

#splitting the texts into CHUNKS/BLOCKS
text_splitter=RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)
texts = text_splitter.split_documents(documents)
print(f"created {len(texts)} chunks")

#Making the embeddings of teh chunks in vectorstore or database
embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004")
vectorstore=FAISS.from_documents(texts, embeddings)
print("Vector store created")

#Initializing my RAG chain/agent.
qa_chain = RetrievalQA.from_chain_type(
 llm=llm,
 chain_type="stuff",
 retriever=vectorstore.as_retriever(search_kwargs={"k"10}),#will look for first ten related searches
 return_source_documents=True
)

#the BOT is ready
print("\n PDF BOT READY, ASK QUESTIONS.\n")

while True:
  question = input("You: ")
  if question.lower() in ['exit','quit','bye']:
    print("Goodbye")
    break

  result = qa_chain.invoke({"query": question})
  print(f"Bot: {result['result']}\n")
  print(f"Source:page{result['source_documents'][0].metadata.get('page','unknown')}\n")

