In [1]:
%pip install -r requirements.txt
from IPython.display import clear_output ; clear_output()

In [2]:
from util import initialize, show
AI_MODEL = initialize()

from typing import List, Dict

from pydantic import BaseModel, Field
from pydantic_ai import Agent

Available AI models:
['openai:gpt-4o',
 'openai:gpt-4o-mini',
 'gemini-1.5-pro',
 'gemini-2.0-flash-exp',
 'claude-3-5-haiku-latest',
 'claude-3-5-sonnet-latest']
Using AI model: openai:gpt-4o


In [3]:
class GeneratorResponse(BaseModel):
    thoughts: str = Field(..., description='Your understanding of the task and feedback and how you plan to improve')
    response: str = Field(..., description="The generated solution.")


async def generate(prompt: str, task: str, context: str = "") -> tuple[str, str]:
    """Generate and improve a solution based on feedback."""
    full_prompt = f"{prompt}\n{context}\nTask: {task}" if context else f"{prompt}\nTask: {task}"
    response = (await Agent(AI_MODEL, result_type=GeneratorResponse).run(full_prompt)).data
    thoughts = response.thoughts
    result = response.response
    
    show('', title='Generation')
    show(thoughts, title='Thoughts')
    show(result, title='Generated')
    
    return thoughts, result


class EvaluatorResponse(BaseModel):
    evaluation: str = Field(..., description='PASS, NEEDS_IMPROVEMENT, or FAIL')
    feedback: str = Field(..., description='What needs improvement and why.')


async def evaluate(prompt: str, content: str, task: str) -> tuple[str, str]:
    """Evaluate if a solution meets requirements."""
    full_prompt = f"{prompt}\nOriginal task: {task}\nContent to evaluate: {content}"
    response = (await Agent(AI_MODEL, result_type=EvaluatorResponse).run(full_prompt)).data
    evaluation = response.evaluation
    feedback = response.feedback
    
    show('', title='Evaluation')
    show(evaluation, title='Status')
    show(feedback, title='Feedback')
    
    return evaluation, feedback


async def loop(task: str, evaluator_prompt: str, generator_prompt: str) -> tuple[str, list[dict]]:
    """Keep generating and evaluating until requirements are met."""
    memory = []
    chain_of_thought = []
    
    thoughts, result = await generate(generator_prompt, task)
    memory.append(result)
    chain_of_thought.append({"thoughts": thoughts, "result": result})
    
    while True:
        evaluation, feedback = await evaluate(evaluator_prompt, result, task)
        if evaluation == "PASS":
            return result, chain_of_thought
            
        context = "\n".join([
            "Previous attempts:",
            *[f"- {m}" for m in memory],
            f"\nFeedback: {feedback}"
        ])
        
        thoughts, result = await generate(generator_prompt, task, context)
        memory.append(result)
        chain_of_thought.append({"thoughts": thoughts, "result": result})

In [4]:
evaluator_prompt = """
Evaluate this following code implementation for:
1. code correctness
2. time complexity
3. style and best practices

You should be evaluating only and not attemping to solve the task.
Only output "PASS" if all criteria are met and you have no further suggestions for improvements."""

generator_prompt = """
Your goal is to complete the task based on <user input>. If there are feedback 
from your previous generations, you should reflect on them to improve your solution."""

task = """
<user input>
Implement a Stack with:
1. push(x)
2. pop()
3. getMin()
All operations should be O(1).
</user input>
"""

result, chain_of_thought = await loop(task, evaluator_prompt, generator_prompt)

show(result, title='Final Result')
show(chain_of_thought, title='Chain of Thought')


Generation
----------


Thoughts
--------

Implementing a stack with push, pop, and getMin operations in O(1) time requires careful management of auxiliary storage. Based on past implementations, attention should be given to optimal space usage and ensuring constant time complexity across all operations.


Generated
---------

```python
class MinStack:
    def __init__(self):
        self.stack = []  # main stack to store elements
        self.min_stack = []  # auxiliary stack to store minimum values

    def push(self, x: int) -> None:
        self.stack.append(x)
        # If min_stack is empty or x is less than or equal to the current minimum, push it onto min_stack
        if not self.min_stack or x <= self.min_stack[-1]:
            self.min_stack.append(x)

    def pop(self) -> None:
        if not self.stack:
            return None  # or raise exception if preferred
        popped = self.stack.pop()
        # If the popped element is the minimum, pop it from min_stack as well
