In [None]:
%pip install pydantic-ai python-dotenv nest_asyncio
from IPython.display import clear_output ; clear_output()

from dotenv import load_dotenv ; load_dotenv()
import nest_asyncio ; nest_asyncio.apply()

In [2]:
import os
from pprint import pprint
from typing import List, Dict

from pydantic import BaseModel, Field
from pydantic_ai import Agent

AI_MODEL = os.environ['AI_MODEL']

In [3]:
class GeneratorResponse(BaseModel):
    thoughts: str = Field(..., description='Your understanding of the task and feedback and how you plan to improve')
    response: str = Field(..., description="The generated solution.")


async def generate(prompt: str, task: str, context: str = "") -> tuple[str, str]:
    """Generate and improve a solution based on feedback."""
    full_prompt = f"{prompt}\n{context}\nTask: {task}" if context else f"{prompt}\nTask: {task}"
    response = (await Agent(AI_MODEL, result_type=GeneratorResponse).run(full_prompt)).data
    thoughts = response.thoughts
    result = response.response
    
    print("\n=== GENERATION START ===")
    print(f"Thoughts:\n{thoughts}\n")
    print(f"Generated:\n{result}")
    print("=== GENERATION END ===\n")
    
    return thoughts, result


class EvaluatorResponse(BaseModel):
    evaluation: str = Field(..., description='PASS, NEEDS_IMPROVEMENT, or FAIL')
    feedback: str = Field(..., description='What needs improvement and why.')


async def evaluate(prompt: str, content: str, task: str) -> tuple[str, str]:
    """Evaluate if a solution meets requirements."""
    full_prompt = f"{prompt}\nOriginal task: {task}\nContent to evaluate: {content}"
    response = (await Agent(AI_MODEL, result_type=EvaluatorResponse).run(full_prompt)).data
    evaluation = response.evaluation
    feedback = response.feedback
    
    print("=== EVALUATION START ===")
    print(f"Status: {evaluation}")
    print(f"Feedback: {feedback}")
    print("=== EVALUATION END ===\n")
    
    return evaluation, feedback


async def loop(task: str, evaluator_prompt: str, generator_prompt: str) -> tuple[str, list[dict]]:
    """Keep generating and evaluating until requirements are met."""
    memory = []
    chain_of_thought = []
    
    thoughts, result = await generate(generator_prompt, task)
    memory.append(result)
    chain_of_thought.append({"thoughts": thoughts, "result": result})
    
    while True:
        evaluation, feedback = await evaluate(evaluator_prompt, result, task)
        if evaluation == "PASS":
            return result, chain_of_thought
            
        context = "\n".join([
            "Previous attempts:",
            *[f"- {m}" for m in memory],
            f"\nFeedback: {feedback}"
        ])
        
        thoughts, result = await generate(generator_prompt, task, context)
        memory.append(result)
        chain_of_thought.append({"thoughts": thoughts, "result": result})

In [4]:
evaluator_prompt = """
Evaluate this following code implementation for:
1. code correctness
2. time complexity
3. style and best practices

You should be evaluating only and not attemping to solve the task.
Only output "PASS" if all criteria are met and you have no further suggestions for improvements."""

generator_prompt = """
Your goal is to complete the task based on <user input>. If there are feedback 
from your previous generations, you should reflect on them to improve your solution."""

task = """
<user input>
Implement a Stack with:
1. push(x)
2. pop()
3. getMin()
All operations should be O(1).
</user input>
"""

result, chain_of_thought = await loop(task, evaluator_prompt, generator_prompt)

print("=== FINAL RESULT ===")
print(result)

print("\n=== CHAIN OF THOUGHT ===")
pprint(chain_of_thought)


=== GENERATION START ===
Thoughts:
The task requires implementation of a stack with operations push, pop, and getMin, all in constant O(1) time complexity. This can be achieved by using an auxiliary stack to keep track of the minimum elements.

Generated:
To implement a stack with `push(x)`, `pop()`, and `getMin()` all in O(1) time complexity, you can use two stacks: one for the actual stack operations and another to keep track of the minimum elements.

### Implementation

```python
class MinStack:
    def __init__(self):
        self.stack = []      # Main stack to store elements
        self.min_stack = []  # Auxiliary stack to store minimum values

    def push(self, x: int):
        # Push element onto the main stack
        self.stack.append(x)
        # If the min stack is empty, or the current element is less than or equal to the top of the min stack,
        # push the current element onto the min stack
        if not self.min_stack or x <= self.min_stack[-1]:
            self