In [2]:
# Imports
from langgraph.graph import START, END, StateGraph, MessagesState
from langgraph.checkpoint.memory import MemorySaver
from langgraph.prebuilt import ToolNode, create_react_agent
from langchain_core.messages import HumanMessage, AIMessage, SystemMessage, ToolMessage
from langchain_core.tools import tool
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv
from IPython.display import Image, display
from typing import Literal, TypedDict, Annotated,List
import operator
import os
from typing import Literal
print("‚úÖ All imports successful")

‚úÖ All imports successful


In [3]:
from pydantic import BaseModel, Field

class QualityScore(BaseModel):
    clarity: int = Field(ge=1, le=5)
    completeness: int = Field(ge=1, le=5)
    accuracy: int = Field(ge=1, le=5)
    feedback: str


In [4]:
class ReflectionState(TypedDict):
    task: str
    draft: str
    critique: str
    scores: QualityScore | None
    score_history: List[QualityScore]
    iterations: int
    final_output: str


In [6]:
# Load API key
load_dotenv()
openai_api_key = os.getenv("openai_key")

if not openai_api_key:
    raise ValueError("OPENAI_API_KEY not found!")

print("‚úÖ API key loaded")

‚úÖ API key loaded


In [7]:
# Initialize LLM
llm = ChatOpenAI(
    model="gpt-4o-mini",
    temperature=0,
    api_key=openai_api_key
)

print(f"‚úÖ LLM initialized: {llm.model_name}")

‚úÖ LLM initialized: gpt-4o-mini


In [8]:
import json

def generator(state: ReflectionState) -> dict:
    """Generate or refine based on critique."""
    if state["iterations"] == 0:
        prompt = f"""
Task: {state['task']}

Create a clear, complete, and accurate response."""
        print("\n‚úçÔ∏è Generating initial draft...")
    else:
        prompt = f"""
        Task: {state['task']}

        Current Draft:
        {state['draft']}

        Critic Feedback:
        {state['critique']}

        Improve the draft addressing all weaknesses.
        """
        print(f"\n‚úçÔ∏è Refining (iteration {state['iterations']})...")

    response = llm.invoke([HumanMessage(content=prompt)])

    return {"draft": response.content}


def critic(state: ReflectionState) -> dict:
    prompt = f"""
    Evaluate the response below using a 1-5 scale.

    Task:
    {state['task']}

    Response:
    {state['draft']}

    Score strictly using this JSON format:
    {{
      "clarity": int,
      "completeness": int,
      "accuracy": int,
      "feedback": "text"
    }}
    """

    print("Scoring draft...")
    response = llm.invoke([HumanMessage(content=prompt)])

    raw = response.content.strip()

    scores = QualityScore(**json.loads(raw))

    history = state.get("score_history", [])
    history.append(scores)

    print(
        f"Iteration {state['iterations'] + 1}: "
        f"Clarity={scores.clarity}, "
        f"Completeness={scores.completeness}, "
        f"Accuracy={scores.accuracy}"
    )

    return {
        "scores": scores,
        "critique": scores.feedback,
        "score_history": history,
        "iterations": state["iterations"] + 1,
    }

def reflection_finalizer(state: ReflectionState) -> dict:
    return {
        "final_output": state["draft"]
    }



MAX_REFLECTIONS = 3

def should_reflect_again(state: ReflectionState) -> Literal["generator", "reflection_finalizer"]:

    scores = state["scores"]

    if (scores.clarity >= 4 and scores.completeness >= 4 and scores.accuracy >= 4):
        print("‚úÖ All quality thresholds met")
        return "finalizer"

    if state["iterations"] >= MAX_REFLECTIONS:
        print("‚ö†Ô∏è Max iterations reached")
        return "finalizer"

    print("üîÅ Quality below threshold ‚Üí refining")
    return "generator"





In [9]:
from langgraph.graph import StateGraph, START, END

builder = StateGraph(ReflectionState)

builder.add_node("generator", generator)
builder.add_node("critic", critic)
builder.add_node("finalizer", reflection_finalizer)

builder.add_edge(START, "generator")
builder.add_edge("generator", "critic")
builder.add_conditional_edges(
    "critic",
    should_reflect_again,
    {
        "generator": "generator",
        "finalizer": "finalizer"
    }
)
builder.add_edge("finalizer", END)

reflection_agent = builder.compile()


In [10]:
result = reflection_agent.invoke({
    "task": "Explain how RAG works in large language models.",
    "draft": "",
    "critique": "",
    "scores": None,
    "score_history": [],
    "iterations": 0,
    "final_output": ""
})



‚úçÔ∏è Generating initial draft...
Scoring draft...
Iteration 1: Clarity=5, Completeness=5, Accuracy=5
‚úÖ All quality thresholds met


In [11]:
for i, s in enumerate(result["score_history"], 1):
    print(
        f"Iteration {i}: "
        f"Clarity={s.clarity}, "
        f"Completeness={s.completeness}, "
        f"Accuracy={s.accuracy}"
    )


Iteration 1: Clarity=5, Completeness=5, Accuracy=5
