# Agentic Rag


# =========================
# 1. Setup & Imports
# =========================

In [None]:

# If running locally, uncomment to install dependencies.
# !pip install --upgrade pip
!python -m pip install pinecone sentence-transformers langchain-pinecone langchain langchain-huggingface langchain-google-genai --quiet --upgrade

In [2]:
import json
import logging
from typing import List, Dict, Optional
import pinecone
from langgraph.graph import StateGraph, START, END
from pydantic import BaseModel
from google import genai
from google.genai import types

# Logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("agentic_rag")

# =========================
# 2. Config
# =========================

In [3]:
from dotenv import load_dotenv
import os 
import json

load_dotenv()

LOCATION = "us-central1"
GEMINI_MODEL = "gemini-2.5-flash"   # For generation
GEMINI_EMBED_MODEL = "models/embedding-001"

INDEX_NAME = "tredence-aravind"

# Init clients
PINECONE_API_KEY = os.environ.get("PINECONE_API_KEY")
if not PINECONE_API_KEY:
    raise ValueError("Set PINECONE_API_KEY environment variable")

client = genai.Client()
pc = pinecone.Pinecone(api_key=PINECONE_API_KEY)

# =========================
# 3. Load KB & Build Index
# =========================

In [4]:
with open("self_critique_loop_dataset.json") as f:
    kb = json.load(f)

logger.info(f"Loaded {len(kb)} KB entries")

INFO:agentic_rag:Loaded 30 KB entries


# =========================
# 4. Embedding Function
# =========================

In [5]:
def embed_texts(texts: List[str]) -> List[List[float]]:
    resp = client.models.embed_content(model=GEMINI_EMBED_MODEL, contents=texts)
    # Each element has .values for embedding
    return [e.values for e in resp.embeddings]

# Check embedding dimension
sample_vec = embed_texts([kb[0]["answer_snippet"]])[0]
dim = len(sample_vec)
logger.info(f"Embedding dimension: {dim}")

INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/embedding-001:batchEmbedContents "HTTP/1.1 200 OK"
INFO:agentic_rag:Embedding dimension: 768


# =========================
# 5. Create Pinecone Index
# =========================

In [6]:
if not pc.has_index(INDEX_NAME):
    pc.create_index(name=INDEX_NAME, dimension=dim, metric="cosine",spec=pinecone.ServerlessSpec(cloud="aws", region="us-east-1"))

index = pc.Index(INDEX_NAME)

  from .autonotebook import tqdm as notebook_tqdm


# =========================
# 6. Upsert KB
# =========================

In [7]:
for entry in kb:
    vec = embed_texts([entry["answer_snippet"]])[0]
    index.upsert([(entry["doc_id"], vec, entry)])
logger.info("KB indexed successfully.")

INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/embedding-001:batchEmbedContents "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/embedding-001:batchEmbedContents "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/embedding-001:batchEmbedContents "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/embedding-001:batchEmbedContents "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/embedding-001:batchEmbedContents "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/embedding-001:batchEmbedContents "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/embedding-001:batchEmbedContents "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://gen

# =========================
# 7. LangGraph Workflow
# =========================

In [25]:
class RAGState(BaseModel):
    question: str
    snippets: List[Dict]
    initial_answer: str = ""
    critique: str = ""
    final_answer: str = ""
    missing_keywords: Optional[List[str]] = []

def retrieve_kb(state: RAGState) -> RAGState:
    q_vec = embed_texts([state.question])[0]
    res = index.query(vector=q_vec, top_k=5, include_metadata=True)
    state.snippets = [m["metadata"] for m in res["matches"]]
    logger.info(f"Retrieved {len(state.snippets)} snippets: {[s['doc_id'] for s in state.snippets]}")
    return state

def generate_answer(state: RAGState) -> RAGState:
    context = "\n".join([f"[{s['doc_id']}] {s['answer_snippet']}" for s in state.snippets])
    prompt = f"Question: {state.question}\nContext:\n{context}\nAnswer with citations [KBxxx]:"

    resp = client.models.generate_content(
        model=GEMINI_MODEL,
        config=types.GenerateContentConfig(
            temperature=0
        ),
        contents=[prompt]
    )
    state.initial_answer = resp.text
    logger.info(f"Generated initial answer:\n{state.initial_answer}")
    return state

def critique_answer(state: RAGState) -> RAGState:
    # prompt = (
    #     f"Critique the following answer for completeness vs the KB snippets.\n\n"
    #     f"Answer: {state.initial_answer}\n\n"
    #     f"Context:\n" + "\n".join([f"[{s['doc_id']}] {s['answer_snippet']}" for s in state.snippets]) +
    #     "\n\nRespond with 'COMPLETE' if sufficient, or 'REFINE: <missing keywords>' if something is missing."
    # )
    prompt = (f"""
    You are a reviewer. 
    - Question: {state.question}
    - Initial Answer: {state.initial_answer}
    - Available KB Snippets: \n """ + "\n".join([f"[{s['doc_id']}] {s['answer_snippet']}" for s in state.snippets]) +
    
    """

    Check: 
    1. Does the answer fully address the user’s question? 
    2. Does it cover all relevant snippets?
    3. Is the user question relevant to the documents provided?

    If yes → respond with ONLY 'COMPLETE'.
    If no → respond with 'REFINE: <list of missing topics/keywords>'.
    """)

    resp = client.models.generate_content(
        model=GEMINI_MODEL,
        config=types.GenerateContentConfig(
            temperature=0
        ),
        contents=[prompt]
    )
    critique_text = resp.text.strip()
    state.critique = critique_text
    logger.info(f"Critique: {state.critique}")

    if critique_text.upper().startswith("REFINE:"):
        payload = critique_text.split("REFINE:")[1].strip()
        state.missing_keywords = [k.strip() for k in payload.split(",") if k.strip()]

    return state

def refine_answer(state: RAGState) -> RAGState:
    if not hasattr(state, "missing_keywords") or not state.missing_keywords:
        state.final_answer = state.initial_answer
        return state

    # Retrieve additional snippet for missing keyword
    keyword = state.missing_keywords[0]
    q_vec = embed_texts([keyword])[0]
    res = index.query(vector=q_vec, top_k=1, include_metadata=True)
    new_snippet = res["matches"][0]["metadata"]
    state.snippets.append(new_snippet)
    logger.info(f"Retrieved extra snippet [{new_snippet['doc_id']}] for refinement.")

    # Regenerate answer with additional snippet
    context = "\n".join([f"[{s['doc_id']}] {s['answer_snippet']}" for s in state.snippets])
    prompt = f"Question: {state.question}\nContext:\n{context}\nProvide a refined answer with citations [KBxxx]:"

    resp = client.models.generate_content(
        model=GEMINI_MODEL,
        config=types.GenerateContentConfig(
            temperature=0
        ),
        contents=[prompt]
    )
    state.final_answer = resp.text
    logger.info(f"Refined answer:\n{state.final_answer}")
    return state

In [26]:
# Build LangGraph
graph = StateGraph(RAGState)

# Add nodes
graph.add_node("retriever", retrieve_kb)
graph.add_node("generator", generate_answer)
graph.add_node("critic", critique_answer)
graph.add_node("refiner", refine_answer)

# Connect START → retriever
graph.add_edge(START, "retriever")

# Linear flow: retriever → generator → critic
graph.add_edge("retriever", "generator")
graph.add_edge("generator", "critic")

# Conditional edge from critic
graph.add_conditional_edges("critic", lambda state: "refiner" if "REFINE" in state.critique else END)

# After refinement, go to END
graph.add_edge("refiner", END)

# Compile the graph
app = graph.compile()

# =========================
# 8. Run Queries
# =========================

In [27]:
test_questions = [
    "What are best practices for caching?",
    "How should I set up CI/CD pipelines?",
    "What are performance tuning tips?",
    "How do I version my APIs?",
    "What should I consider for error handling?"
]

for q in test_questions:
    logger.info(f"Running pipeline for: {q}")
    state = app.invoke(RAGState(question=q, snippets=[]))
    print(f"\nQ: {q}\nAnswer:\n{state['initial_answer']}\nCritique: {state['critique']}\n")

INFO:agentic_rag:Running pipeline for: What are best practices for caching?
INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/embedding-001:batchEmbedContents "HTTP/1.1 200 OK"
INFO:agentic_rag:Retrieved 5 snippets: ['KB003', 'KB023', 'KB013', 'KB012', 'KB002']
INFO:google_genai.models:AFC is enabled with max remote calls: 10.
INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent "HTTP/1.1 200 OK"
INFO:google_genai.models:AFC remote call 1 is done.
INFO:agentic_rag:Generated initial answer:
When addressing caching, it's important to follow well-defined patterns [KB003, KB023, KB013].
INFO:google_genai.models:AFC is enabled with max remote calls: 10.
INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent "HTTP/1.1 200 OK"
INFO:google_genai.models:AFC remote call 1 is done.
INFO:agentic_rag:Critique: REFINE: specific caching 


Q: What are best practices for caching?
Answer:
When addressing caching, it's important to follow well-defined patterns [KB003, KB023, KB013].
Critique: REFINE: specific caching best practices



INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/embedding-001:batchEmbedContents "HTTP/1.1 200 OK"
INFO:agentic_rag:Retrieved 5 snippets: ['KB007', 'KB027', 'KB017', 'KB016', 'KB006']
INFO:google_genai.models:AFC is enabled with max remote calls: 10.
INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent "HTTP/1.1 200 OK"
INFO:google_genai.models:AFC remote call 1 is done.
INFO:agentic_rag:Generated initial answer:
When setting up CI/CD pipelines, it's important to follow well-defined patterns [KB007, KB027, KB017].
INFO:google_genai.models:AFC is enabled with max remote calls: 10.
INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent "HTTP/1.1 200 OK"
INFO:google_genai.models:AFC remote call 1 is done.
INFO:agentic_rag:Critique: REFINE: specific patterns for CI/CD setup, steps to set up CI/CD pipelines, components of CI/


Q: How should I set up CI/CD pipelines?
Answer:
When setting up CI/CD pipelines, it's important to follow well-defined patterns [KB007, KB027, KB017].
Critique: REFINE: specific patterns for CI/CD setup, steps to set up CI/CD pipelines, components of CI/CD pipelines, best practices for CI/CD implementation



INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/embedding-001:batchEmbedContents "HTTP/1.1 200 OK"
INFO:agentic_rag:Retrieved 5 snippets: ['KB002', 'KB022', 'KB012', 'KB013', 'KB003']
INFO:google_genai.models:AFC is enabled with max remote calls: 10.
INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent "HTTP/1.1 200 OK"
INFO:google_genai.models:AFC remote call 1 is done.
INFO:agentic_rag:Generated initial answer:
When addressing performance tuning, it's important to follow well-defined patterns [KB002, KB022, KB012].
INFO:google_genai.models:AFC is enabled with max remote calls: 10.
INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent "HTTP/1.1 200 OK"
INFO:google_genai.models:AFC remote call 1 is done.
INFO:agentic_rag:Critique: REFINE: caching
INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1


Q: What are performance tuning tips?
Answer:
When addressing performance tuning, it's important to follow well-defined patterns [KB002, KB022, KB012].
Critique: REFINE: caching



INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/embedding-001:batchEmbedContents "HTTP/1.1 200 OK"
INFO:agentic_rag:Retrieved 5 snippets: ['KB005', 'KB025', 'KB015', 'KB020', 'KB010']
INFO:google_genai.models:AFC is enabled with max remote calls: 10.
INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent "HTTP/1.1 200 OK"
INFO:google_genai.models:AFC remote call 1 is done.
INFO:agentic_rag:Generated initial answer:
When addressing API versioning, it's important to follow well-defined patterns [KB005, KB025, KB015].
INFO:google_genai.models:AFC is enabled with max remote calls: 10.
INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent "HTTP/1.1 200 OK"
INFO:google_genai.models:AFC remote call 1 is done.
INFO:agentic_rag:Critique: REFINE: API versioning methods/strategies
INFO:httpx:HTTP Request: POST https://generativelang


Q: How do I version my APIs?
Answer:
When addressing API versioning, it's important to follow well-defined patterns [KB005, KB025, KB015].
Critique: REFINE: API versioning methods/strategies



INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/embedding-001:batchEmbedContents "HTTP/1.1 200 OK"
INFO:agentic_rag:Retrieved 5 snippets: ['KB009', 'KB029', 'KB019', 'KB018', 'KB008']
INFO:google_genai.models:AFC is enabled with max remote calls: 10.
INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent "HTTP/1.1 200 OK"
INFO:google_genai.models:AFC remote call 1 is done.
INFO:agentic_rag:Generated initial answer:
When addressing error handling, it's important to follow well-defined patterns [KB009, KB029, KB019].
INFO:google_genai.models:AFC is enabled with max remote calls: 10.
INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent "HTTP/1.1 200 OK"
INFO:google_genai.models:AFC remote call 1 is done.
INFO:agentic_rag:Critique: COMPLETE



Q: What should I consider for error handling?
Answer:
When addressing error handling, it's important to follow well-defined patterns [KB009, KB029, KB019].
Critique: COMPLETE

