1.Importing Components

In [None]:
from langchain_ollama import OllamaLLM as Ollama
from langchain_community.document_loaders import PyPDFLoader as reader
from langchain.prompts import PromptTemplate
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.retrievers import BM25Retriever, EnsembleRetriever
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import OllamaEmbeddings
from operator import itemgetter
from langchain_core.runnables import RunnableLambda

2.Model

In [2]:
model = 'llama2'

In [27]:
template = """
You are an assistant that provides answers to questions based on
a given context. 

Answer the question based on the context. If you can't answer the
question, reply "I don't know".

Be as concise as possible and go straight to the point.

Context: {context}

Question: {question}
"""

prompt = PromptTemplate.from_template(template)
print(prompt.format(context="Here is some context", question="Here is a question"))


You are an assistant that provides answers to questions based on
a given context. 

Answer the question based on the context. If you can't answer the
question, reply "I don't know".

Be as concise as possible and go straight to the point.

Context: Here is some context

Question: Here is a question



In [3]:

model = Ollama(model=model, temperature=0.1)
model.invoke("what is capital of india?")

'\nThe capital of India is New Delhi.'

3.Loading document

In [4]:
loader = reader('sample.pdf')
pages = loader.load_and_split()
len(pages)

40

In [36]:
pages

[Document(metadata={'producer': 'PDFium', 'creator': 'PDFium', 'creationdate': 'D:20250623160401', 'source': 'sample.pdf', 'total_pages': 40, 'page': 0, 'page_label': '1'}, page_content='1\nA\tScientist\tin\tHollywood\n:\nTHE\tGENESIS\tOF\t\nINTERSTELLAR\nLynda\tObst,\tMy\tHollywood\tPartner\nT\nhe\tseed\tfor\t\nInterstellar\n\twas\ta\tfailed\tromance\tthat\twarped\tinto\ta\tcreative\nfriendship\tand\tpartnership.\nIn\tSeptember\t1980,\tmy\tfriend\tCarl\tSagan\tphoned\tme.\tHe\tknew\tI\twas\ta\nsingle\tfather,\traising\ta\tteenaged\tdaughter\t(or\ttrying\tto\tdo\tso;\tI\twasn’t\tvery\ngood\tat\tit),\tand\tliving\ta\tSouthern\tCalifornia\tsingle’s\tlife\t(I\twas\tonly\ta\tbit\nbetter\tat\tthat),\twhile\tpursuing\ta\ttheoretical\tphysics\tcareer\t(at\t\nthat\n\tI\twas\ta\nlot\tbetter).\nCarl\tcalled\tto\tpropose\ta\tblind\tdate.\tA\tdate\twith\tLynda\tObst\tto\tattend\tthe\nworld\tpremier\tof\tCarl’s\tforthcoming\ttelevision\tseries,\t\nCosmos\n.\nLynda,\ta\tbrilliant\tand\tbeautiful\tco

In [5]:
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
chunks = splitter.split_documents(pages)

In [6]:
print(len(chunks))

86


In [37]:
chunks

[Document(metadata={'producer': 'PDFium', 'creator': 'PDFium', 'creationdate': 'D:20250623160401', 'source': 'sample.pdf', 'total_pages': 40, 'page': 0, 'page_label': '1'}, page_content='1\nA\tScientist\tin\tHollywood\n:\nTHE\tGENESIS\tOF\t\nINTERSTELLAR\nLynda\tObst,\tMy\tHollywood\tPartner\nT\nhe\tseed\tfor\t\nInterstellar\n\twas\ta\tfailed\tromance\tthat\twarped\tinto\ta\tcreative\nfriendship\tand\tpartnership.\nIn\tSeptember\t1980,\tmy\tfriend\tCarl\tSagan\tphoned\tme.\tHe\tknew\tI\twas\ta\nsingle\tfather,\traising\ta\tteenaged\tdaughter\t(or\ttrying\tto\tdo\tso;\tI\twasn’t\tvery\ngood\tat\tit),\tand\tliving\ta\tSouthern\tCalifornia\tsingle’s\tlife\t(I\twas\tonly\ta\tbit\nbetter\tat\tthat),\twhile\tpursuing\ta\ttheoretical\tphysics\tcareer\t(at\t\nthat\n\tI\twas\ta\nlot\tbetter).\nCarl\tcalled\tto\tpropose\ta\tblind\tdate.\tA\tdate\twith\tLynda\tObst\tto\tattend\tthe\nworld\tpremier\tof\tCarl’s\tforthcoming\ttelevision\tseries,\t\nCosmos\n.\nLynda,\ta\tbrilliant\tand\tbeautiful\tco

4.Hybrid Retrieval

In [7]:
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

In [8]:
vectorstore = FAISS.from_documents(chunks, embedding_model)
vectorstore_retreiver = vectorstore.as_retriever(search_kwargs={"k": 3})

In [9]:
keyword_retriever = BM25Retriever.from_documents(chunks)

In [10]:
ensemble_retriever = EnsembleRetriever(
    retrievers=[vectorstore_retreiver, keyword_retriever],
    weights=[0.5, 0.5]
)

In [11]:
res = ensemble_retriever.invoke("How old are sun and earth?")
res

[Document(id='721b4cb4-f360-4f64-aabe-fcfee17c1b33', metadata={'producer': 'PDFium', 'creator': 'PDFium', 'creationdate': 'D:20250623160401', 'source': 'sample.pdf', 'total_pages': 40, 'page': 18, 'page_label': '19'}, page_content='Stellar\tDeath:\tWhite\tDwarfs,\tNeutron\tStars,\tand\tBlack\nHoles\nThe\tSun\tand\tEarth\tare\tabout\t4.5\tbillion\tyears\told,\tabout\ta\tthird\tthe\tage\tof\nthe\tuniverse.\tAfter\tanother\t6.5\tbillion\tyears\tor\tso,\tthe\tSun\twill\texhaust\tthe\nnuclear\tfuel\tin\tits\tcore,\tthe\tfuel\tthat\tkeeps\tit\thot.\tThe\tSun\tthen\twill\tshift\tto\nburning\tfuel\tin\ta\tshell\taround\tits\tcore,\tand\tits\tsurface\twill\texpand\tto\tengulf\nand\tfry\tthe\tEarth.\tWith\tthe\tshell’s\tfuel\tspent\tand\tthe\tEarth\tfried,\tthe\tSun\twill\nshrink\tto\tbecome\ta\twhite\tdwarf\tstar,\tabout\tthe\tsize\tof\tthe\tEarth\tbut\twith\ndensity\ta\tmillion\ttimes\thigher.\tThe\twhite\tdwarf\twill\tgradually\tcool,\tover\ntens\tof\tbillions\tof\tyears,\tto\tbecome\ta\tdens

5.Reranking using Cohere

In [12]:
from dotenv import load_dotenv
import os
import cohere

load_dotenv()
cohere_api_key = os.getenv("COHERE_API_KEY")

In [13]:
co = cohere.Client(cohere_api_key)

In [None]:
def rerank_with_cohere(question, docs):
    doc_texts = [doc.page_content for doc in docs]

    reranked = co.rerank(query=question, documents=doc_texts, top_n=min(len(doc_texts), 3))
    top_docs = [doc_texts[result.index] for result in reranked.results]
    return "\n".join(top_docs)

def rerank_context(inputs):
    question = inputs["question"]
    docs = ensemble_retriever.invoke(question)
    return rerank_with_cohere(question, docs)

6.Chain

In [None]:
chain = (
    {
        "context": RunnableLambda(rerank_context),
        "question": itemgetter("question"),
    }
    | prompt
    | model
)

7.Result

In [29]:
chain.invoke({"question":"What is age of sun and earth?"})

' Based on the context provided, the age of the Sun and Earth are approximately 4.5 billion years old.'

In [32]:
print(chain.invoke({"question":"What are white drawfs?"}))

 White dwarfs are small, dense stars that are formed when a star with a mass similar to that of the Sun runs out of fuel and collapses. They are about the size of the Earth but have a density a million times higher than the Sun. After exhausting their fuel, white dwarfs will gradually cool over tens of billions of years until they become dense, dark cinders.


In [33]:
print(chain.invoke({"question":"What are neutron stars?"}))

 Neutron stars are incredibly dense celestial objects that are formed when a star undergoes a supernova explosion. They have masses ranging from one to three times that of the sun, circumferences of 75 to 100 kilometers (about the size of Chicago), and densities the same as the nucleus of an atom - a hundred trillion times more dense than rock and the Earth itself. Neutron stars are made up of almost pure nuclear matter, with atomic nuclei packed side by side. They have very strong magnetic fields whose force lines are donut-shaped, like the Earth's, and fast-moving particles trapped in these fields light up the force lines, producing blue rings in Figure 2.10. Some of these particles are liberated and stream out the field's surface. Black holes, on the other hand, are made solely from warped space and time (I'll explain this weird claim in Chapter 4). They contain no matter whatsoever but have surfaces called "event horizons" or "horizons," through which nothing can escape, not even l

In [34]:
print(chain.invoke({"question":"What are blackholes?"}))

 Black holes are regions in space where the gravitational pull is so strong that nothing, including light, can escape once it gets too close to the event horizon. They are made up of warped space and time and have a circumference proportional to their mass. The heavier the black hole, the bigger its circumference. Black holes have no matter, but they have surfaces called "event horizons" or "horizons," through which nothing can escape. In contrast, neutron stars are made of almost pure nuclear matter and have densities the same as the nucleus of an atom. They are much smaller than black holes, with circumferences of about 75 to 100 kilometers.


In [35]:
print(chain.invoke({"question":"What are different kind of field universe consist of?"}))

 Based on the context provided, the universe consists of various types of fields, including:

1. Magnetic fields: Collections of magnetic force lines that sweep around the sky above a neutron star, producing pulses of radiation as they pass over Earth.
2. Electric fields: Collections of electric force lines that drive electric current to flow through wires.
3. Gravitational fields: Collections of gravitational force lines that pull objects towards the Earth's surface.
4. Quantum fluctuations: Strong and intense in realms where gravity is also intense, such as the big bang birth of our universe, the cores of black holes, and backward time travel.
5. New laws of quantum gravity: Rise from the "fiery marriage" of relativistic and quantum laws in realms where they are incompatible.
