# reflexion.py

Auto-generated implementation from the Agentic RL PhD codebase.

### Original Implementations & References
The following links point to the official or high-quality reference implementations for the papers covered in this notebook:

- https://github.com/noahshinn/reflexion

*Note: The code below is a simplified pedagogical implementation.*

In [None]:
from typing import List, Dict

# Paper: "Reflexion: Language Agents with Verbal Reinforcement Learning" (Shinn et al., 2023)
# Category: Agentic / Verbal RL

class ReflexionAgent:
    def __init__(self, llm_client, system_prompt):
        self.llm = llm_client
        self.system_prompt = system_prompt
        self.memory = [] # Short-term trajectory
        self.reflections = [] # "Verbal" gradients/updates

    def act(self, observation: str) -> str:
        """
        Generates an action based on context + past reflections.
        """
        context = self._build_context(observation)
        action = self.llm.generate(context)
        self.memory.append(f"Obs: {observation} -> Action: {action}")
        return action

    def reflect(self, success: bool, feedback: str):
        """
        The "Learning" Step.
        Instead of backpropagation, we ask the LLM to criticize itself.
        """
        if success:
            return # No need to fix what isn't broken
        
        trajectory = "\n".join(self.memory[-5:]) # Look at last 5 steps
        
        prompt = f"""
        You failed the task. 
        Trajectory:
        {trajectory}
        
        Feedback: {feedback}
        
        Reflect on why you failed and devise a new plan. 
        Be concise.
        """
        
        critique = self.llm.generate(prompt)
        self.reflections.append(critique)
        # Clear specific memory to try again with new wisdom
        self.memory = [] 

    def _build_context(self, observation):
        """
        Injects 'Reflections' into the context window.
        This is analogous to 'updating the policy weights'.
        """
        context = f"System: {self.system_prompt}\n"
        
        if self.reflections:
            context += "Past Mistakes & Lessons:\n"
            for i, r in enumerate(self.reflections):
                context += f"{i+1}. {r}\n"
        
        context += f"\nCurrent Observation: {observation}"
        return context

# Mock LLM for demonstration
class MockLLM:
    def generate(self, prompt):
        return "Thinking... [Placeholder Action]"
