<a href="https://colab.research.google.com/drive/1Gf_1mipiJe09PjiweSFBDi9cEMPkbS5N?usp=sharing" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"></a>

### Reflexion Agentic Pattern

A self-improving agent that learns from execution feedback through:
- Action: Execute real tasks
- Evaluation: Observe actual outcomes
- Reflection: Analyze failures and successes
- Memory: Store insights for future attempts
- Retry: Improve using past reflections


In [None]:
!pip install -qU google-generativeai

In [None]:
import google.generativeai as genai
import getpass
from datetime import datetime

Get Google's Gemini API Key here: https://aistudio.google.com/app/apikey

In [None]:
API_KEY = getpass.getpass("Enter your Google API key: ")

Enter your Google AI API key: ··········


In [None]:
# Configure API
genai.configure(api_key=API_KEY)

In [None]:
class ReflexionAgent:
    def __init__(self):
        self.model = genai.GenerativeModel("gemini-2.0-flash")
        self.tools = {}
        self.episodic_memory = []  # Stores past attempts and reflections

    def add_tool(self, name, func, description):
        self.tools[name] = {"func": func, "desc": description}

    def execute_action(self, tool_name, params):
        """Execute a tool and return result with success status"""
        try:
            if tool_name not in self.tools:
                return {"success": False, "result": f"Tool {tool_name} not found", "error": "Invalid tool"}

            result = self.tools[tool_name]["func"](**params)
            return {"success": True, "result": result, "error": None}
        except Exception as e:
            return {"success": False, "result": None, "error": str(e)}

    def evaluate_outcome(self, task, action, outcome):
        """Evaluate if the action succeeded and why"""
        prompt = f"""Evaluate this task execution:

Task: {task}
Action Taken: {action}
Outcome: {outcome}

Analyze:
1. Did it succeed? (Yes/No)
2. If failed, what went wrong?
3. If succeeded, what worked well?

Provide a brief evaluation (2-3 sentences):"""

        response = self.model.generate_content(prompt).text
        return response.strip()

    def reflect(self, task, action, outcome, evaluation):
        """Generate verbal reflection on what to improve"""
        prompt = f"""Reflect on this experience to learn and improve:

Task: {task}
Action: {action}
Outcome: {outcome}
Evaluation: {evaluation}

Reflection (answer these):
1. What should be done differently next time?
2. What specific mistakes to avoid?
3. What strategy would work better?

Provide actionable insights:"""

        response = self.model.generate_content(prompt).text
        return response.strip()

    def retrieve_relevant_memory(self, task):
        """Get past reflections related to current task"""
        if not self.episodic_memory:
            return "No past experience with similar tasks."

        # Simple relevance matching
        relevant = []
        for memory in self.episodic_memory:
            if any(word in memory["task"].lower() for word in task.lower().split()):
                relevant.append(memory)

        if not relevant:
            return "No directly relevant past experience."

        # Return most recent relevant memories
        memory_text = "\n\n".join([
            f"Past Attempt:\nTask: {m['task']}\nWhat Failed: {m['outcome']}\nLesson Learned: {m['reflection']}"
            for m in relevant[-3:]  # Last 3 relevant memories
        ])
        return memory_text

    def plan_action(self, task, past_memories):
        """Plan action using past reflections"""
        tools_desc = "\n".join([f"- {n}: {t['desc']}" for n, t in self.tools.items()])

        prompt = f"""You are a Reflexion agent that learns from experience.

Task: {task}

Available Tools:
{tools_desc}

Past Experience:
{past_memories}

Based on past failures and lessons, plan your action.
Respond in this format:
Tool: tool_name
Params: {{"param1": "value1", "param2": "value2"}}
Reasoning: Why this approach will work

Response:"""

        response = self.model.generate_content(prompt).text
        return response

    def run(self, task, max_attempts=3):
        """Run task with reflexion loop"""
        print(f"\n🎯 Task: {task}\n")

        for attempt in range(1, max_attempts + 1):
            print(f"{'='*60}")
            print(f"ATTEMPT {attempt}/{max_attempts}")
            print(f"{'='*60}\n")

            # Step 1: Retrieve relevant memories
            past_memories = self.retrieve_relevant_memory(task)
            print(f"📚 Consulting Memory:\n{past_memories}\n")

            # Step 2: Plan action based on memories
            plan = self.plan_action(task, past_memories)
            print(f"🧠 Plan:\n{plan}\n")

            # Parse plan to extract tool and params
            tool_name = None
            params = {}

            for line in plan.split("\n"):
                if line.startswith("Tool:"):
                    tool_name = line.split("Tool:")[-1].strip()
                elif line.startswith("Params:"):
                    try:
                        params_str = line.split("Params:")[-1].strip()
                        params = eval(params_str)
                    except:
                        params = {}

            if not tool_name:
                print("❌ Could not parse action plan\n")
                continue

            # Step 3: Execute action
            print(f"⚡ Executing: {tool_name} with {params}")
            outcome = self.execute_action(tool_name, params)
            print(f"📊 Outcome: {outcome}\n")

            # Step 4: Evaluate the outcome
            evaluation = self.evaluate_outcome(task, f"{tool_name}({params})", outcome)
            print(f"🔍 Evaluation:\n{evaluation}\n")

            # Step 5: Check if successful
            if outcome["success"] and "yes" in evaluation.lower():
                print(f"✅ SUCCESS! Task completed.\n")
                print(f"Final Result: {outcome['result']}\n")

                # Store successful experience
                self.episodic_memory.append({
                    "task": task,
                    "attempt": attempt,
                    "action": f"{tool_name}({params})",
                    "outcome": outcome["result"],
                    "evaluation": evaluation,
                    "reflection": "Success - approach worked well",
                    "timestamp": datetime.now().isoformat()
                })

                return outcome["result"]

            # Step 6: Reflect on failure
            reflection = self.reflect(task, f"{tool_name}({params})", outcome, evaluation)
            print(f"💭 Reflection:\n{reflection}\n")

            # Step 7: Store in episodic memory
            self.episodic_memory.append({
                "task": task,
                "attempt": attempt,
                "action": f"{tool_name}({params})",
                "outcome": outcome,
                "evaluation": evaluation,
                "reflection": reflection,
                "timestamp": datetime.now().isoformat()
            })

            print(f"💾 Stored reflection in memory for next attempt\n")

        print(f"❌ Task failed after {max_attempts} attempts\n")
        return "Task not completed successfully"

In [None]:
# Define tools with realistic success/failure scenarios
def run_code(code):
    """Execute Python code - can fail with errors"""
    try:
        # Simulate code execution with potential errors
        if "divide" in code.lower() and "0" in code:
            raise ZeroDivisionError("Cannot divide by zero")
        if "import unknown" in code.lower():
            raise ImportError("Module 'unknown' not found")

        # Simple eval for demo
        result = eval(code)
        return f"Code executed successfully. Result: {result}"
    except Exception as e:
        return f"Error: {type(e).__name__}: {str(e)}"

def api_call(endpoint):
    """Simulate API call - can fail or return errors"""
    apis = {
        "user": {"status": 200, "data": "User data retrieved"},
        "posts": {"status": 200, "data": "Posts list retrieved"},
        "invalid": {"status": 404, "data": "Endpoint not found"},
    }

    result = apis.get(endpoint, {"status": 500, "data": "Server error"})

    if result["status"] != 200:
        raise Exception(f"API Error {result['status']}: {result['data']}")

    return result["data"]

def search_docs(query):
    """Search documentation - may return incomplete results"""
    docs = {
        "python list": "Lists are mutable sequences. Use append() to add items.",
        "error handling": "Use try-except blocks to handle exceptions.",
        "api": "APIs allow communication between applications.",
    }

    for key, val in docs.items():
        if key in query.lower():
            return val

    return f"No documentation found for: {query}"

In [None]:
# Usage Examples
print("="*60)
print("REFLEXION AGENT DEMO")
print("="*60)

agent = ReflexionAgent()

# Add tools
agent.add_tool("run_code", run_code, "Execute Python code")
agent.add_tool("api_call", api_call, "Make API request to endpoint")
agent.add_tool("search_docs", search_docs, "Search documentation")

# Example 1: Code execution that might fail initially
print("\n" + "="*60)
print("EXAMPLE 1: Code Execution with Error Recovery")
print("="*60)
result = agent.run("Calculate the result of 100 divided by 5")

# Example 2: API call that needs correction
print("\n" + "="*60)
print("EXAMPLE 2: API Call with Endpoint Correction")
print("="*60)
result = agent.run("Get user information from the API")

# Show learned memories
print("\n" + "="*60)
print("EPISODIC MEMORY (What the agent learned)")
print("="*60)
for i, memory in enumerate(agent.episodic_memory, 1):
    print(f"\nMemory {i}:")
    print(f"Task: {memory['task']}")
    print(f"Attempt: {memory['attempt']}")
    print(f"Reflection: {memory['reflection'][:100]}...")

REFLEXION AGENT DEMO

EXAMPLE 1: Code Execution with Error Recovery

🎯 Task: Calculate the result of 100 divided by 5

ATTEMPT 1/3

📚 Consulting Memory:
No past experience with similar tasks.

🧠 Plan:
Tool: run_code
Params: {"code": "print(100 / 5)"}
Reasoning: I can use the run_code tool to execute a simple Python expression that performs the division. This is a straightforward calculation and should be accurate.


⚡ Executing: run_code with {'code': 'print(100 / 5)'}
20.0
📊 Outcome: {'success': True, 'result': 'Code executed successfully. Result: None', 'error': None}

🔍 Evaluation:
1. Yes
2. N/A
3. The code executed successfully and performed the division as requested. However, the 'result' is None, indicating that the code printed the output to standard output but did not return a value that was captured by the execution environment.

Evaluation: The task was successfully executed in the sense that the calculation was likely performed and printed. Ideally, the code should have retu