<a href="https://colab.research.google.com/github/YashNigam65/gitfolder/blob/master/Agentic_RAG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import os
import math
from dataclasses import dataclass
from typing import List, Sequence

from google import genai
from google.genai import types

# -----------------------------
# Config
# -----------------------------
GENERATION_MODEL = "gemini-2.5-flash"     # or "gemini-1.5-flash" if 2.5 not available
EMBEDDING_MODEL = "text-embedding-004"    # current text embedding model for RAG

client = genai.Client(api_key="AIzaSyAdiOyxWRex2dsdtdeG9V7qNmEM1HMdR5c")


# -----------------------------
# Simple in-memory vector store
# -----------------------------

@dataclass
class DocChunk:
    id: int
    text: str
    embedding: List[float]


def cosine_similarity(a: List[float], b: List[float]) -> float:
    dot = sum(x * y for x, y in zip(a, b))
    na = math.sqrt(sum(x * x for x in a))
    nb = math.sqrt(sum(y * y for y in b))
    if na == 0 or nb == 0:
        return 0.0
    return dot / (na * nb)


class InMemoryVectorStore:
    def __init__(self):
        self.chunks: List[DocChunk] = []

    def add_texts(self, texts: Sequence[str]) -> None:
        """Embed and add a list of texts as document chunks."""
        if not texts:
            return

        result = client.models.embed_content(
            model=EMBEDDING_MODEL,
            contents=list(texts),
            # Task types recommended for RAG: RETRIEVAL_DOCUMENT / RETRIEVAL_QUERY
            config=types.EmbedContentConfig(task_type="RETRIEVAL_DOCUMENT"),
        )

        start_id = len(self.chunks)
        for i, (t, emb) in enumerate(zip(texts, result.embeddings)):
            self.chunks.append(
                DocChunk(
                    id=start_id + i,
                    text=t,
                    embedding=list(emb.values),
                )
            )

    def similarity_search(self, query: str, k: int = 4) -> List[DocChunk]:
        """Return top-k most similar chunks to the query."""
        if not self.chunks:
            return []

        result = client.models.embed_content(
            model=EMBEDDING_MODEL,
            contents=[query],
            config=types.EmbedContentConfig(task_type="RETRIEVAL_QUERY"),
        )
        q_emb = list(result.embeddings[0].values)

        scored = []
        for chunk in self.chunks:
            score = cosine_similarity(q_emb, chunk.embedding)
            scored.append((score, chunk))

        scored.sort(key=lambda x: x[0], reverse=True)
        return [c for _, c in scored[:k]]


# Single global store for this toy example
vector_store = InMemoryVectorStore()


# -----------------------------
# Tool / function declaration
# -----------------------------

# Define a function the model can call to retrieve docs
search_kb_function = types.FunctionDeclaration(
    name="search_knowledge_base",
    description=(
        "Search the local document knowledge base and return the most "
        "relevant text passages for answering the user's question."
    ),
    parameters_json_schema={
        "type": "object",
        "properties": {
            "query": {
                "type": "string",
                "description": (
                    "Natural language query or rephrased question to search with."
                ),
            },
            "k": {
                "type": "integer",
                "description": "Number of passages to retrieve (default 4).",
            },
        },
        "required": ["query"],
    },
)

rag_tool = types.Tool(function_declarations=[search_kb_function])

BASE_CONFIG = types.GenerateContentConfig(
    tools=[rag_tool],
    temperature=0.2,  # keep it deterministic for RAG
)


# -----------------------------
# Agentic RAG logic
# -----------------------------

def agentic_rag(question: str, default_k: int = 4) -> str:
    """
    Ask the Gemini model a question. The model can:
      - answer directly, OR
      - call the search_knowledge_base function to fetch context, then answer.
    """
    user_content = types.Content(
        role="user",
        parts=[types.Part(text=question)], # Fix: Changed from types.Part.from_text(question)
    )

    # 1) First call: let Gemini decide whether to call the tool
    response = client.models.generate_content(
        model=GENERATION_MODEL,
        contents=[user_content],
        config=BASE_CONFIG,
    )

    # Quick accessor; returns list of Parts that contain function_call data
    function_call_parts = response.function_calls

    # If the model decided NOT to call a function, just return its text answer
    if not function_call_parts:
        return response.text

    # For simplicity, handle the first function call only
    fn = function_call_parts[0] # Corrected: fn_part itself is the FunctionCall object

    if fn.name != "search_knowledge_base":
        # Unknown tool – just return whatever text we got
        return response.text

    # Extract args (with fallbacks)
    query = fn.args.get("query", question)
    k = int(fn.args.get("k", default_k))

    # 2) Execute the tool: perform retrieval
    docs = vector_store.similarity_search(query, k=k)
    function_result = {
        "matches": [
            {
                "rank": i + 1,
                "text": d.text,
            }
            for i, d in enumerate(docs)
        ]
    }

    # 3) Build function response part and send back to model
    function_response_part = types.Part.from_function_response(
        name="search_knowledge_base",
        response=function_result,
    )
    function_response_content = types.Content(
        role="tool",
        parts=[function_response_part],
    )

    # Second call: give the model (a) the original user question
    # (b) its own function-call content, and (c) the tool output
    response2 = client.models.generate_content(
        model=GENERATION_MODEL,
        contents=[
            user_content,
            response.candidates[0].content,  # includes the function_call part
            function_response_content,
        ],
        config=BASE_CONFIG,
    )

    return response2.text


# -----------------------------
# Demo / usage
# -----------------------------
if __name__ == "__main__":
    # 1. Index some toy docs. In real RAG you’d chunk + load PDFs, DB rows, etc.
    corpus = [
        "Retrieval-Augmented Generation (RAG) combines a language model with ",
        "a retriever that fetches relevant documents from a knowledge base.",

        "Gemini is a family of multimodal models from Google that can accept ",
        "text, images, audio and more in a single prompt.",

        "In RAG, embeddings are used to represent both user queries and ",
        "documents as vectors so that semantic similarity search can be done.",

        "Chunking long documents into smaller passages often improves RAG ",
        "retrieval quality and reduces prompt token usage.",
    ]
    vector_store.add_texts(corpus)

    print("Simple Agentic RAG demo with Gemini. Empty question to exit.\n")

    while True:
        q = input("You: ").strip()
        if not q:
            break
        ans = agentic_rag(q)
        print("\nAssistant:", ans, "\n" + "-" * 60 + "\n")

Simple Agentic RAG demo with Gemini. Empty question to exit.

You: how are you

Assistant: I'm doing well, thank you for asking! I am ready to assist you with any questions or tasks you have. How can I help you today?
 
------------------------------------------------------------

You: 
