In [27]:
# reading from the pdf
from langchain_community.document_loaders import PyPDFLoader
loader = PyPDFLoader("attention.pdf")
doc = loader.load()


In [28]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter= RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200) 
# chunksize = 1000 character part/chunk 
# chunk overlap = do chunks ke beech me kitna common (repeat) text hoga. 
# Matlab agle chunk ki starting pichhle chunk
# ke last 200 characters se hogi — taaki context na toote.
documents = text_splitter.split_documents(doc)



In [None]:
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings

# FAISS : Ek vector database hota hai jo fast search karne mein madad karta hai
# HuggingFaceEmbeddings : Ye text ko number (vector) mein convert karta hai using pre-trained model

# model 'all-mpnet-base-v2' Ye HuggingFace ka ek very good sentence embedding model hai.
# 📌 Kaam:
# Text ya sentence ko dense vector (numbers) mein convert karta hai
# Jo baad mein similarity search mein kaam aata hai (like: RAG, search bots)

model_name = "sentence-transformers/all-mpnet-base-v2"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}
hf = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)



In [4]:
vectorstore = FAISS.from_documents(documents[:20],embedding=hf)
vectorstore

<langchain_community.vectorstores.faiss.FAISS at 0x13d4c05cad0>

In [24]:
query = "An attention function can be described as mapping  a query"
result = vectorstore.similarity_search(query)
result[0].page_content

'3.2 Attention\nAn attention function can be described as mapping a query and a set of key-value pairs to an output,\nwhere the query, keys, values, and output are all vectors. The output is computed as a weighted sum\n3'

In [12]:
from langchain_ollama import OllamaLLM

# Initialize the Ollama model
llm = OllamaLLM(model="gemma3:1b")

In [13]:
from langchain_core.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_template("""
 Answer the following question based only on the provided context.
 Think step by step before providing a detailed answer.
 I will tip you $1000 if the user finds the answer helpful,
 <context>
 {context}
 </context>
 Question: {input}""")

In [14]:
from langchain.chains.combine_documents import create_stuff_documents_chain

document_chain =create_stuff_documents_chain(llm=llm,prompt=prompt)


In [15]:
"""
Retreivers : A retriever is an interaface that returns documents based on a query.
    It is more general than a vector store. A retriever does not need to be able to store docuements,
    only to return(retreive) them.
"""

retriever = vectorstore.as_retriever() # vectorstore is now connected to this variable and we can retrieve
retriever

VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x0000013D4C05CAD0>, search_kwargs={})

In [16]:
"""
Retrival chain: This chain takes in a user inquiry ,which is then passed to the retreiver to
 fetch relavent documents. Those decuments (and original inputs) are then passed to LLM to
 generate response
"""

from langchain.chains import create_retrieval_chain
retrievel_chain = create_retrieval_chain(retriever,document_chain)

In [25]:
response= retrievel_chain.invoke({"input":"What is attention"})
response['answer']

'Attention is used in the model to map a query and a set of key-value pairs to an output, where the query, keys, values, and output are all vectors. It allows the model to jointly attend to information from different representation subspaces at different positions.'

In [22]:
response

{'input': 'An attention function can be described as mapping  a query',
 'context': [Document(id='85719649-4d5d-4e2c-a4db-277ace2e90f7', metadata={'producer': 'pdfTeX-1.40.25', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-08-03T00:07:29+00:00', 'author': '', 'keywords': '', 'moddate': '2023-08-03T00:07:29+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5', 'subject': '', 'title': '', 'trapped': '/False', 'source': 'attention.pdf', 'total_pages': 15, 'page': 2, 'page_label': '3'}, page_content='3.2 Attention\nAn attention function can be described as mapping a query and a set of key-value pairs to an output,\nwhere the query, keys, values, and output are all vectors. The output is computed as a weighted sum\n3'),
  Document(id='2f4d5f37-b133-4a68-9d29-0ee9e0fda870', metadata={'producer': 'pdfTeX-1.40.25', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-08-03T00:07:29+00:00', 'author': '', 'keywords': '',