In [1]:
from typing import TypedDict
from pathlib import Path
import os

# LangChain + vector DB + embeddings
from langchain_chroma import Chroma
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_text_splitters import CharacterTextSplitter

# LangGraph
from langgraph.graph import StateGraph, END

# Gemini SDK
import google.generativeai as genai

import warnings
warnings.filterwarnings("ignore")

In [2]:
# os.environ["ANONYMIZED_TELEMETRY"] = "false"
# os.environ["CHROMA_TELEMETRY"] = "false"
# os.environ["OTEL_SDK_DISABLED"] = "true"
# os.environ["OTEL_EXPORTER_OTLP_ENDPOINT"] = "none"

In [3]:
# enable tracing

os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = "AI Agent using LangGraph" 

In [4]:
LANGSMITH_API_KEY=os.getenv("LANGSMITH_API_KEY")
os.environ["LANGSMITH_API_KEY"] = LANGSMITH_API_KEY

In [5]:
# STATE DEFINITION

class AgentState(TypedDict):
    question: str
    context: str
    answer: str
    reflection: str

In [6]:
# DOCUMENT LOADING

def load_documents(folder="data"):
    folder = Path(folder)

    docs = []
    for file in folder.glob("*.txt"):
        text = file.read_text(encoding="utf-8")
        print(f"[LOAD] Loaded {file.name}")
        docs.append(text)
    return docs

In [None]:
# BUILD VECTOR DATABASE

def build_vectordb(texts):
    splitter = CharacterTextSplitter(chunk_size=300, chunk_overlap=30)
    chunks = splitter.split_text("\n\n".join(texts))

    print(f"[VDB] Created {len(chunks)} text chunks")

    embeddings = HuggingFaceEmbeddings(
        model_name="sentence-transformers/all-MiniLM-L6-v2"
    )

    vectordb = Chroma.from_texts(chunks, embedding=embeddings)
    return vectordb

In [None]:
# LOAD GEMINI
def load_gemini():
    api_key = os.getenv("GEMINI_API_KEY")
    if not api_key:
        raise ValueError("Please set GEMINI_API_KEY environment variable.")

    genai.configure(api_key=api_key)
    print("Using Gemini 2.0 Model")
    return genai.GenerativeModel("gemini-2.0-flash")

In [9]:
# Plan Node
def plan_node(state: AgentState):
    q = state["question"]
    qwords = ["what", "why", "how", "benefit", "explain", "Tell me about"]

    need = any(k in q.lower() for k in qwords)
    print("\n[PLAN] retrieval needed?", need)

    return {"context": "RETRIEVE" if need else ""}

In [10]:
# Retrieve Node
def retrieve_node(state: AgentState):
    print("[RETRIEVE] retrieving chunks from vector DB...")
    results = vectordb.similarity_search(state["question"], k=2)

    context = "\n\n---\n\n".join([r.page_content for r in results]) if results else ""
    return {"context": context}

In [None]:
# Answer Node
def answer_node(state: AgentState):
    q = state["question"]
    c = state["context"]

    prompt = f"""
Use ONLY the context to answer the question.

Context:
{c}

Question:
{q}

Answer:
"""

    response = GEMINI.generate_content(prompt)
    answer = response.text

    print("[ANSWER]", answer[:80], "...")
    return {"answer": answer}


In [12]:
# Reflect Node
def reflect_node(state: AgentState):
    q = state["question"]
    a = state["answer"]

    prompt = f"""
Rate the relevance of the answer from 0 to 10. Then explain briefly.

Question: {q}
Answer: {a}

Format:
Score: <0-10>
Reason: <one sentence>
"""

    response = GEMINI.generate_content(prompt)
    review = response.text

    print("[REFLECT]", review[:80].replace("\n", " "), "...")
    return {"reflection": review}

In [13]:
# BUILD LANGGRAPH
graph = StateGraph(AgentState)

graph.add_node("plan", plan_node)
graph.add_node("retrieve", retrieve_node)
graph.add_node("answer", answer_node)
graph.add_node("reflect", reflect_node)

graph.set_entry_point("plan")

graph.add_conditional_edges(
    "plan",
    lambda s: "retrieve" if s["context"] == "RETRIEVE" else "answer",
    {"retrieve": "retrieve", "answer": "answer"},
)

graph.add_edge("retrieve", "answer")
graph.add_edge("answer", "reflect")
graph.add_edge("reflect", END)

app = graph.compile()

In [14]:
# MAIN RAG FUNCTION
def ask(question: str):
    input_state = {
        "question": question,
        "context": "",
        "answer": "",
        "reflection": "",
    }

    result = app.invoke(input_state)
    print("\n===== FINAL ANSWER =====\n")
    print(result["answer"])

    print("\n===== REFLECTION =====\n")
    print(result["reflection"])

    return result

In [16]:
# RUN (interactive mode)
if __name__ == "__main__":
    GEMINI = load_gemini()
    docs = load_documents()
    vectordb = build_vectordb(docs)

    print("\nRAG Agent ready — type a question or 'exit' to quit.\n")
    while True:
        q = input("Ask: ").strip()
        if q.lower() in ["exit", "quit"]:
            break
        ask(q)

Created a chunk of size 741, which is longer than the specified 400
Created a chunk of size 799, which is longer than the specified 400
Created a chunk of size 785, which is longer than the specified 400
Created a chunk of size 753, which is longer than the specified 400


Using Gemini 2.0 Model
[LOAD] Loaded ai.txt
[LOAD] Loaded data_science.txt
[LOAD] Loaded deep_learning.txt
[LOAD] Loaded machine_learning.txt
[LOAD] Loaded python_programing.txt
[VDB] Created 5 text chunks

RAG Agent ready — type a question or 'exit' to quit.


[PLAN] retrieval needed? True
[RETRIEVE] retrieving chunks from vector DB...
[ANSWER] The development of machines capable of performing tasks that typically require h ...
[REFLECT] Score: 10 Reason: The answer provides a concise and accurate definition of AI.  ...

===== FINAL ANSWER =====

The development of machines capable of performing tasks that typically require human intelligence.


===== REFLECTION =====

Score: 10
Reason: The answer provides a concise and accurate definition of AI.




Failed to send telemetry event ClientStartEvent: capture() takes 1 positional argument but 3 were given
Failed to send telemetry event ClientCreateCollectionEvent: capture() takes 1 positional argument but 3 were given

c:\Users\abhin\FSDS\task 3\ai_env\Lib\site-packages\pydantic\v1\main.py:1054: UserWarning: LangSmith now uses UUID v7 for run and trace identifiers. This warning appears when passing custom IDs. Please use: from langsmith import uuid7
            id = uuid7()
Future versions will require UUID v7.
  input_data = validator(cls_, input_data)