In [31]:
import warnings
warnings.filterwarnings('ignore', category=DeprecationWarning)

In [32]:
import os
from dotenv import load_dotenv

# Loading environment variables from .env file
load_dotenv()

# Fetching the API key
gemini_api_key = os.getenv("GEMINI_API_KEY")

# Verifing the key loaded
print("API Key loaded successfully!" if gemini_api_key else "API Key not found!")

API Key loaded successfully!


In [58]:
from langchain_google_genai import ChatGoogleGenerativeAI

# Initializing Gemini
geminillm = ChatGoogleGenerativeAI(
    model="gemini-2.5-flash", 
    google_api_key=gemini_api_key,
    temperature=0.7
)

print("Gemini LLM initialized!")

Gemini LLM initialized!


In [34]:
from langchain_community.document_loaders import PyPDFLoader

pdf_path = "testPaper.pdf" 
loader = PyPDFLoader(pdf_path)

pages = loader.load_and_split()

print(f"Loaded {len(pages)} pages\n")
print(f"First page content preview:\n{pages[0].page_content[:500]}...")

Loaded 28 pages

First page content preview:
Retrieval-Augmented Generation for
Knowledge-Intensive NLP Tasks
Patrick Lewis†‡, Ethan Perez⋆,
Aleksandra Piktus†, Fabio Petroni†, Vladimir Karpukhin†, Naman Goyal†, Heinrich Küttler†,
Mike Lewis†, Wen-tau Yih†, Tim Rocktäschel†‡, Sebastian Riedel†‡, Douwe Kiela†
†Facebook AI Research;‡University College London;⋆New York University;
plewis@fb.com
Abstract
Large pre-trained language models have been shown to store factual knowledge
in their parameters, and achieve state-of-the-art results when ﬁ...


In [35]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

# Creating a text splitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,        # chars per chunk
    chunk_overlap=200,      # overlap between chunks
    length_function=len,
)

chunks = text_splitter.split_documents(pages)
print(f"total chunks - {len(chunks)}")

total chunks - 96


In [37]:
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma

# Initializing embeddings model
HuggingFaceembeddingsModel = HuggingFaceEmbeddings(
       model_name="all-MiniLM-L6-v2"
   )

vectorstore = Chroma.from_documents(
    documents=chunks,
    embedding=HuggingFaceembeddingsModel,
    persist_directory="./chroma_db"
)

print("embeddings created")

embeddings created


In [38]:
print(f"Number of embeddings created: {vectorstore._collection.count()}")

Number of embeddings created: 96


In [39]:
# Testing embeddings
print(chunks[0].page_content[:200])
sample_embedding = HuggingFaceembeddingsModel.embed_query(chunks[0].page_content)

print(f"\nEmbedding dimension: {len(sample_embedding)}")
print(f"First 5 values: {sample_embedding[:5]}")

Retrieval-Augmented Generation for
Knowledge-Intensive NLP Tasks
Patrick Lewis†‡, Ethan Perez⋆,
Aleksandra Piktus†, Fabio Petroni†, Vladimir Karpukhin†, Naman Goyal†, Heinrich Küttler†,
Mike Lewis†, W

Embedding dimension: 384
First 5 values: [-0.06688090413808823, -0.03467119485139847, -0.026010597124695778, 0.08180363476276398, 0.01612071320414543]


In [41]:
# Test similarity search
query = "What is this paper about?"

# Retrieve top 3 most relevant chunks
relevant_chunks = vectorstore.similarity_search(query, k=3)

print(f"Query: {query}\n")
print("=" * 80)

for i, chunk in enumerate(relevant_chunks, 1):
    print(f"\nChunk {i}:")
    print(chunk.page_content[:300])
    print("...")

Query: What is this paper about?


Chunk 1:
speciﬁc by a large margin. Table 3 shows typical generations from each model.
Jeopardy questions often contain two separate pieces of information, and RAG-Token may perform
best because it can generate responses that combine content from several documents. Figure 2 shows
an example. When generating 
...

Chunk 2:
https://www.aclweb.org/anthology/P17-1171.
[6] Eunsol Choi, Daniel Hewlett, Jakob Uszkoreit, Illia Polosukhin, Alexandre Lacoste, and
Jonathan Berant. Coarse-to-ﬁne question answering for long documents. In Proceedings of the
55th Annual Meeting of the Association for Computational Linguistics (Volu
...

Chunk 3:
ard of wikipedia: Knowledge-powered conversational agents. In International Conference on
Learning Representations, 2019. URL https://openreview.net/forum?id=r1l73iRqKm.
[10] Matthew Dunn, Levent Sagun, Mike Higgins, V . Ugur Guney, V olkan Cirik, and Kyunghyun
Cho. SearchQA: A New Q&A Dataset Augme
...


In [50]:
# Helper function to join documents
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [59]:
from langchain_classic import hub
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

prompt = hub.pull("rlm/rag-prompt")

retriever = vectorstore.as_retriever(search_kwargs={"k": 3})

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | geminillm
    | StrOutputParser()
)

In [60]:
rag_chain.invoke("When was the research paper published?")

'The research paper "Coarse-to-fine question answering for long documents" by Choi et al. was published in July 2017. It appeared in the Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics.'