In [1]:
#<imports>
import os
from getpass import getpass

from langchain_community.document_loaders import PyPDFLoader
from langchain.embeddings import HuggingFaceInferenceAPIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain.llms import HuggingFaceHub

from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import CrossEncoderReranker
from langchain_community.cross_encoders import HuggingFaceCrossEncoder
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
#<\imports>

In [2]:
#<get hf token>
HF_TOKEN = getpass("Huggingface Token : ")
os.environ['HUGGINGFACEHUB_API_TOKEN'] = HF_TOKEN
#<\get hf token>

In [3]:
#<load document>
loader = PyPDFLoader("data/sample_data.pdf")
data = loader.load()
#<\load document>

In [4]:
#<make chunks>
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 512,chunk_overlap = 100)
chunks = text_splitter.split_documents(data)
#<\make chunks>

In [5]:
#<embeddings and chroma db>
embeddings = HuggingFaceInferenceAPIEmbeddings(api_key = HF_TOKEN,model_name = "thenlper/gte-large")

db = Chroma.from_documents(chunks, embeddings)

#<embeddings and chroma db>

In [6]:
#<regular vector search retriever>
retriever = db.as_retriever(
    search_kwargs={"k": 4}
)

#above retriever will retrieve 4 documents based on your query. 
#But which among them is the most relevant? in order to rank that we use reranking.

#<\regular vector search retriever>

In [None]:
#<cross encoders>
model = HuggingFaceCrossEncoder(model_name="BAAI/bge-reranker-base")

compressor = CrossEncoderReranker(model=model, top_n=3)
re_rank_retriever = ContextualCompressionRetriever(base_compressor=compressor, 
                                                   base_retriever=retriever
                                                )
#<\cross encoders>

In [None]:
#<llm>
model = HuggingFaceHub(repo_id="HuggingFaceH4/zephyr-7b-alpha",
                       model_kwargs = {"temperature":0.5,
                                       "max_new_tokens":512,
                                       "max_length":64,
                                       "return_full_text":False
                                    }
                    )
#<\llm>

In [9]:
query = "What is a dream?"

In [10]:
#<template>
template = """
<|system|>
You are an AI Assistant that follows instructions extremely well.
Please be truthful and give direct answers. Please tell 'I don't know' if user query is not in CONTEXT

CONTEXT: {context}
</s>
<|user|>
{query}
</s>
<|assistant|>
"""
#<\template>

In [11]:
#<prompt>
prompt = ChatPromptTemplate.from_template(template)
#<\prompt>

In [12]:
#<rag pipe>
output_parser = StrOutputParser()

chain = (
    {"context": re_rank_retriever, "query": RunnablePassthrough()}
    | prompt
    | model
    | output_parser
)
#<\rag pipe>

In [None]:
#<test rag>
query = "What is a dream?"
response = chain.invoke(query)
print(response)
#<\test rag>