Here I have implemented Agentic RAG, namely the ReaAct agents from Transformers Agents (ReAct = Reason + Act). ReAct agent is exceptionally good at handing resoing-based queries. I have also compared and contrasted the output with standard RAG model.

In [None]:
!pip install "git+https://github.com/huggingface/transformers.git#egg=transformers[agents]"
!pip install langchain 
!pip install langchain-community 
!pip install sentence-transformers 
!pip install faiss-cpu 
!pip install groq
!pip install -qU langchain-groq
!pip install unstructured
!pip install "unstructured[pdf]"
!pip install -U langchain-huggingface

In [None]:
import pandas as pd
import datasets
from transformers import AutoTokenizer
from langchain.docstore.document import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores.utils import DistanceStrategy
from tqdm import tqdm
from transformers.agents import Tool, ReactJsonAgent
from huggingface_hub import InferenceClient

In [None]:
!pip install pdfplumber

In [4]:
from langchain_community.document_loaders import PDFPlumberLoader
loader = PDFPlumberLoader("/kaggle/input/hp-deathly-hallows-book/J.K. Rowling - HP 7 - Harry Potter and the Deathly Hallows.pdf")
docs = loader.load()

In [5]:
# Initialize the text splitter
tokenizer = AutoTokenizer.from_pretrained("thenlper/gte-small")
text_splitter = RecursiveCharacterTextSplitter.from_huggingface_tokenizer(
    tokenizer,
    chunk_size=200,
    chunk_overlap=20,
    add_start_index=True,
    strip_whitespace=True,
    separators=["\n\n", "\n", ".", " ", ""],
)

tokenizer_config.json:   0%|          | 0.00/394 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/712k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

In [6]:
# Split documents and remove duplicates
# logger.info("Splitting documents...")
docs_processed = []
unique_texts = {}
for doc in tqdm(docs):
    new_docs = text_splitter.split_documents([doc])
    for new_doc in new_docs:
        if new_doc.page_content not in unique_texts:
            unique_texts[new_doc.page_content] = True
            docs_processed.append(new_doc)

100%|██████████| 781/781 [00:05<00:00, 130.82it/s]


In [7]:
from langchain_huggingface import HuggingFaceEmbeddings
model_name = "thenlper/gte-small"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}
embedding_model = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

modules.json:   0%|          | 0.00/385 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/68.1k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/57.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/583 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/66.7M [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [8]:
# Create the vector database
vectordb = FAISS.from_documents(
    documents=docs_processed,
    embedding=embedding_model,
    distance_strategy=DistanceStrategy.COSINE,
)

In [9]:
class RetrieverTool(Tool):
    name = "retriever"
    description = "Using semantic similarity, retrieves some documents from the knowledge base that have the closest embeddings to the input query."
    inputs = {
        "query": {
            "type": "string",  # Updated from 'text' to 'string'
            "description": "The query to perform. This should be semantically close to your target documents. Use the affirmative form rather than a question.",
        }
    }
    output_type = "string"  # Ensure that the output type matches the allowed types

    def __init__(self, vectordb, **kwargs):
        super().__init__(**kwargs)
        self.vectordb = vectordb

    def forward(self, query: str) -> str:
        assert isinstance(query, str), "Your search query must be a string"

        docs = self.vectordb.similarity_search(
            query,
            k=7,
        )

        return "\nRetrieved documents:\n" + "".join(
            [f"===== Document {str(i)} =====\n" + doc.page_content for i, doc in enumerate(docs)]
        )

# Create an instance of the RetrieverTool
retriever_tool = RetrieverTool(vectordb)

In [10]:
from langchain_groq import ChatGroq
import os
os.environ["GROQ_API_KEY"] = 'API-KEY'
#
llm = ChatGroq(
    model="llama3-70b-8192",
    temperature=0,
    max_tokens=2048,
  )

In [11]:
import os
from groq import Groq

from typing import List, Dict
from transformers.agents.llm_engine import MessageRole, get_clean_message_list
from huggingface_hub import InferenceClient

openai_role_conversions = {
    MessageRole.TOOL_RESPONSE: MessageRole.USER,
}


class OpenAIEngine:
    def __init__(self, model_name="llama3-groq-70b-8192-tool-use-preview"):
        self.model_name = model_name
        self.client = Groq(
            api_key=os.getenv("GROQ_API_KEY"),
        )

    def __call__(self, messages, stop_sequences=[]):
        messages = get_clean_message_list(messages, role_conversions=openai_role_conversions)

        response = self.client.chat.completions.create(
            model=self.model_name,
            messages=messages,
            stop=stop_sequences,
            temperature=0.5,
            max_tokens=2048
        )
        return response.choices[0].message.content

In [12]:
llm_engine = OpenAIEngine()

In [13]:
# Create the agent
agent = ReactJsonAgent(tools=[retriever_tool], llm_engine=llm_engine, max_iterations=4, verbose=2)

In [14]:
# Function to run the agent
def run_agentic_rag(question: str) -> str:
    enhanced_question = f"""Using the information contained in your knowledge base, which you can access with the 'retriever' tool,
give a comprehensive answer to the question below.
Respond only to the question asked, response should be concise and relevant to the question.
If you cannot find information, do not give up and try calling your retriever again with different arguments!
Make sure to have covered the question completely by calling the retriever tool several times with semantically different queries.
Your queries should not be questions but affirmative form sentences: e.g. rather than "How do I load a model from the Hub in bf16?", query should be "load a model from the Hub bf16 weights".

Question:
{question}"""

    return agent.run(enhanced_question)

In [None]:
# Example usage
question = "What was Dumbledore's original plan regarding the Elder wand and how did Harry ultimately become the master of it?"
answer = run_agentic_rag(question)

In [28]:
print(f"Question: {question}")
print(f"Answer: {answer}")

Question: What was Dumbledore's original plan regarding the Elder wand and how did Harry ultimately become the master of it?
Answer: Dumbledore's original plan regarding the Elder Wand was to break its power by having its last master, Severus Snape, die a natural death. However, the plan was foiled when Voldemort killed Snape, thinking he would become the master of the Elder Wand. Ultimately, Harry Potter became the master of the Elder Wand by defeating Draco Malfoy, who had unknowingly become the master after disarming Dumbledore. Harry then decided to return the Elder Wand to Dumbledore's tomb, breaking the Deathly Hallows' curse and making the wand powerless.


In [29]:
# Standard RAG function
def run_standard_rag(question: str) -> str:
    context = retriever_tool(question)

    prompt = f"""Given the question and supporting documents below, give a comprehensive answer to the question.
Respond only to the question asked, response should be concise and relevant to the question.
Provide the number of the source document when relevant.

Question:
{question}

{context}
"""
    messages = [{"role": "human", "content": prompt}]

    response = llm.invoke(messages)

    return response.content

In [30]:
standard_answer = run_standard_rag(question)
print(standard_answer)

Dumbledore's original plan regarding the Elder Wand was to have Severus Snape become its master after his death, as he believed Snape would be able to wield its power responsibly (Document 3). However, this plan did not work out as intended.

Ultimately, Harry became the master of the Elder Wand because he had unknowingly become its master when he defeated Draco Malfoy, who had previously become the wand's master after disarming Dumbledore (Documents 1 and 2). This meant that when Harry ultimately defeated Voldemort, the Elder Wand's allegiance was already with him, making him its true master.


We can easily see the AgenticRAG gives a better and more logically structured answer than StandardRAG.