<a href="https://colab.research.google.com/github/banno-0720/Deep-Learning-Projects/blob/main/Agentic_RAG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Agentic RAG

## 1. Install Required Dependencies

In [None]:
!pip install smolagents pandas langchain langchain-community sentence-transformers datasets python-dotenv rank_bm25 --upgrade

## 2. Prepare the Knowledge Base

In [None]:
import datasets
from langchain.docstore.document import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.retrievers import BM25Retriever

# Load the Hugging Face documentation dataset
knowledge_base = datasets.load_dataset("m-ric/huggingface_doc", split="train")

# Filter to include only Transformers documentation
knowledge_base = knowledge_base.filter(lambda row: row["source"].startswith("huggingface/transformers"))

# Convert dataset entries to Document objects with metadata
source_docs = [
    Document(page_content=doc["text"], metadata={"source": doc["source"].split("/")[1]})
    for doc in knowledge_base
]

# Split documents into smaller chunks for better retrieval
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,  # Characters per chunk
    chunk_overlap=50,  # Overlap between chunks to maintain context
    add_start_index=True,
    strip_whitespace=True,
    separators=["\n\n", "\n", ".", " ", ""],  # Priority order for splitting
)
docs_processed = text_splitter.split_documents(source_docs)

print(f"Knowledge base prepared with {len(docs_processed)} document chunks")

## 3. Create a Retriever Tool

In [None]:
from smolagents import Tool

class RetrieverTool(Tool):
    name = "retriever"
    description = "Uses semantic search to retrieve the parts of transformers documentation that could be most relevant to answer your query."
    inputs = {
        "query": {
            "type": "string",
            "description": "The query to perform. This should be semantically close to your target documents. Use the affirmative form rather than a question.",
        }
    }
    output_type = "string"

    def __init__(self, docs, **kwargs):
        super().__init__(**kwargs)
        # Initialize the retriever with our processed documents
        self.retriever = BM25Retriever.from_documents(
            docs, k=10  # Return top 10 most relevant documents
        )

    def forward(self, query: str) -> str:
        """Execute the retrieval based on the provided query."""
        assert isinstance(query, str), "Your search query must be a string"

        # Retrieve relevant documents
        docs = self.retriever.invoke(query)

        # Format the retrieved documents for readability
        return "\nRetrieved documents:\n" + "".join(
            [
                f"\n\n===== Document {str(i)} =====\n" + doc.page_content
                for i, doc in enumerate(docs)
            ]
        )

# Initialize our retriever tool with the processed documents
retriever_tool = RetrieverTool(docs_processed)

## 4. Create an Advanced Retrieval Agent

In [None]:
from smolagents import InferenceClientModel, CodeAgent

# Initialize the agent with our retriever tool
agent = CodeAgent(
    tools=[retriever_tool],  # List of tools available to the agent
    model=InferenceClientModel(),  # Default model "Qwen/Qwen2.5-Coder-32B-Instruct"
    max_steps=4,  # Limit the number of reasoning steps
    verbosity_level=2,  # Show detailed agent reasoning
)

# To use a specific model, you can specify it like this:
# model=InferenceClientModel(model_id="meta-llama/Llama-3.3-70B-Instruct")

## 5. Run the Agent to Answer Questions

In [None]:
# Ask a question that requires retrieving information
question = "For a transformers model training, which is slower, the forward or the backward pass?"

# Run the agent to get an answer
agent_output = agent.run(question)

# Display the final answer
print("\nFinal answer:")
print(agent_output)

## 6. Practical Applications of Agentic RAG

Agentic RAG systems can be applied to various use cases:

1. **Technical Documentation Assistance:** Help users navigate complex technical documentation
2. **Research Paper Analysis:** Extract and synthesize information from scientific papers
3. **Legal Document Review:** Find relevant precedents and clauses in legal documents
4. **Customer Support:** Answer questions based on product documentation and knowledge bases
5. **Educational Tutoring:** Provide explanations based on textbooks and learning materials

## 7. Creating Interactive demo for our Agentic RAG

In [None]:
from pathlib import Path

# Setup path to trashify demo folder (we'll store all of our demo requirements in here)
demo_path = Path("../demos/agentic_rag")

# Create the directory
demo_path.mkdir(parents=True, exist_ok=True)

In [None]:
%%writefile ../demos/agentic_rag/app.py

"""
Gradio demo that exposes your agentic QA pipeline (uses smolagents CodeAgent + a BM25 retriever).
Intended for deployment to Hugging Face Spaces. Set HF_TOKEN in Space secrets or environment.
"""

import os
import traceback
import gradio as gr

# Basic ML / NLP libs used by your pipeline
import datasets
from langchain.docstore.document import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.retrievers import BM25Retriever

# smolagents agent pieces
from smolagents import Tool, InferenceClientModel, CodeAgent

# -------------------------
# Document preparation
# -------------------------
def prepare_knowledge_base(cache_dir="/tmp/hf_kb_cache"):
    """
    Download and prepare the HF docs dataset, filter to transformers docs,
    chunk into smaller documents and return the processed doc list.
    This function caches results across runs (simple file-check).
    """
    import os
    import pickle

    cache_path = os.path.join(cache_dir, "docs_processed.pkl")
    os.makedirs(cache_dir, exist_ok=True)

    # If cached, load and return
    if os.path.exists(cache_path):
        try:
            with open(cache_path, "rb") as f:
                docs_processed = pickle.load(f)
            return docs_processed
        except Exception:
            # fall through to re-create cache
            pass

    knowledge_base = datasets.load_dataset("m-ric/huggingface_doc", split="train")
    # Keep only transformers docs (same filter as your original snippet)
    knowledge_base = knowledge_base.filter(lambda row: row["source"].startswith("huggingface/transformers"))
    source_docs = [
        Document(page_content=doc["text"], metadata={"source": doc["source"].split("/")[1]})
        for doc in knowledge_base
    ]

    # Split into chunks
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=500,
        chunk_overlap=50,
        add_start_index=True,
        strip_whitespace=True,
        separators=["\n\n", "\n", ".", " ", ""],
    )
    docs_processed = text_splitter.split_documents(source_docs)

    import pickle
    with open(cache_path, "wb") as f:
        pickle.dump(docs_processed, f)

    return docs_processed

# -------------------------
# Retriever tool for agent
# -------------------------
class RetrieverTool(Tool):
    name = "retriever"
    description = "Uses BM25 retrieval over transformers docs to fetch context relevant to a question."
    inputs = {
        "query": {
            "type": "string",
            "description": "A short query describing the information to retrieve (affirmative form).",
        }
    }
    output_type = "string"

    def __init__(self, docs, k=8, **kwargs):
        super().__init__(**kwargs)
        # Build a BM25 retriever from the processed docs
        self.retriever = BM25Retriever.from_documents(docs, k=k)

    def forward(self, query: str) -> str:
        assert isinstance(query, str), "query must be a string"
        docs = self.retriever.invoke(query)
        formatted = "\nRetrieved documents:\n" + "".join(
            [
                f"\n\n===== Document {i} =====\n{doc.page_content}"
                for i, doc in enumerate(docs)
            ]
        )
        return formatted

# -------------------------
# Agent initialization
# -------------------------
# Prepare docs
DOCS = prepare_knowledge_base()

# Initialize tool instance
retriever_tool = RetrieverTool(DOCS)

# NOTE: On HF Spaces you can set environment variable HF_TOKEN in the UI (Settings -> Secrets).
HF_TOKEN = os.environ.get("HF_TOKEN")
if HF_TOKEN is None:
    print("Warning: HF_TOKEN not set. If your chosen model requires authentication, set HF_TOKEN in environment/secrets.")


model = InferenceClientModel()  # default model; you can set model_id arg if needed
agent = CodeAgent(
    tools=[retriever_tool],
    model=model,
    max_steps=4,
    verbosity_level=1,
)


# -------------------------
# Gradio interface
# -------------------------
def run_agent(question: str):
    """Run the agent and return the final answer (or a helpful error)."""
    if not question or question.strip() == "":
        return "Please enter a question."

    # If agent couldn't be created, return fallback info
    if agent is None:
        return "Agent not initialized in this environment. Check logs in the Space and ensure `smolagents` is installed and HF_TOKEN is configured."

    result = agent.run(question)
    return result


with gr.Blocks(title="Agentic RAG Demo") as demo:
    gr.Markdown(
        """
        # Transformers docs QA (Agent demo)
        Ask the agent a question about Hugging Face Transformers docs.
        Example: *For a transformers model training, which is slower, the forward or the backward pass?*
        """
    )
    with gr.Row():
        inp = gr.Textbox(placeholder="Write your question here...", label="Question", lines=2)
        out = gr.Textbox(label="Agent answer", lines=10)
    with gr.Row():
        run_btn = gr.Button("Ask")
        clear_btn = gr.Button("Clear")
    run_btn.click(fn=run_agent, inputs=inp, outputs=out)
    clear_btn.click(lambda: "", None, inp)

demo.launch()

In [None]:
%%writefile ../demos/agentic_rag/requirements.txt

# Core
smolagents
gradio
datasets
langchain
langchain-community
sentence-transformers
rank_bm25
python-dotenv

# Extras
transformers
huggingface-hub
accelerate
torch
pandas

In [None]:
%%writefile ../demos/agentic_rag/README.md
---
title: Agentic RAG Demo
emoji: 🤖
app_file: app.py
sdk: gradio
sdk_version: 5.34.0
license: apache-2.0
colorFrom: indigo
colorTo: purple
colorMode: light
---

# 🤖 Agentic RAG Demo

A small Gradio Space demonstrating an **agentic RAG (Retrieval-Augmented Generation)** pipeline:
- a BM25 retriever over a Transformers documentation knowledge base,
- a `smolagents` `CodeAgent` (agentic reasoning + tools),
- a Gradio UI for asking natural-language questions and receiving agent answers.

---

## What this Space contains
- `app.py` — main Gradio app. (The `app_file` above points to this.)
- `requirements.txt` — Python dependencies used by the Space.
- `README.md` — this file (with YAML front matter used by Spaces).

---

In [None]:
# 1. Import the required methods for uploading to the Hugging Face Hub
from huggingface_hub import (
    create_repo,
    get_full_repo_name,
    upload_file, # for uploading a single file (if necessary)
    upload_folder # for uploading multiple files (in a folder)
)

# 2. Define the parameters we'd like to use for the upload
LOCAL_DEMO_FOLDER_PATH_TO_UPLOAD = "../demos/agentic_rag"
HF_TARGET_SPACE_NAME = "agentic_rag_demo"
HF_REPO_TYPE = "space" # we're creating a Hugging Face Space
HF_SPACE_SDK = "gradio"
HF_TOKEN = "" # optional: set to your Hugging Face token (but I'd advise storing this as an environment variable as previously discussed)

# 3. Create a Space repository on Hugging Face Hub
print(f"[INFO] Creating repo on Hugging Face Hub with name: {HF_TARGET_SPACE_NAME}")
create_repo(
    repo_id=HF_TARGET_SPACE_NAME,
    # token=HF_TOKEN, # optional: set token manually (though it will be automatically recognized if it's available as an environment variable)
    repo_type=HF_REPO_TYPE,
    private=False, # set to True if you don't want your Space to be accessible to others
    space_sdk=HF_SPACE_SDK,
    exist_ok=True, # set to False if you want an error to raise if the repo_id already exists
)

# 4. Get the full repository name (e.g. {username}/{model_id} or {username}/{space_name})
full_hf_repo_name = get_full_repo_name(model_id=HF_TARGET_SPACE_NAME)
print(f"[INFO] Full Hugging Face Hub repo name: {full_hf_repo_name}")

# 5. Upload our demo folder
print(f"[INFO] Uploading {LOCAL_DEMO_FOLDER_PATH_TO_UPLOAD} to repo: {full_hf_repo_name}")
folder_upload_url = upload_folder(
    repo_id=full_hf_repo_name,
    folder_path=LOCAL_DEMO_FOLDER_PATH_TO_UPLOAD,
    path_in_repo=".", # upload our folder to the root directory ("." means "base" or "root", this is the default)
    # token=HF_TOKEN, # optional: set token manually
    repo_type=HF_REPO_TYPE,
    commit_message="Upload Agentic RAG demo (app, requirements, README)"
)
print(f"[INFO] Demo folder successfully uploaded with commit URL: {folder_upload_url}")