#### Generator - Evaluator and Optimizer - Agentic Design Pattern

In [None]:
import os

from llama_stack_client import LlamaStackClient, Agent, AgentEventLogger
from termcolor import colored
from dotenv import load_dotenv
from pydantic import BaseModel

import json
from rich.pretty import pprint
from uuid import uuid4

In [None]:
load_dotenv()

In [None]:
host = os.environ["TOGETHER_URL"]
together_api_key = os.environ["TOGETHER_API_KEY"]

client = LlamaStackClient(
    base_url=host,
    provider_data={
        "tavily_search_api_key": os.getenv("TAVILY_SEARCH_API_KEY"),
        "together_api_key": together_api_key,
    },
)

In [None]:
MODEL_ID = "meta-llama/Llama-3.3-70B-Instruct"

base_agent_config = dict(
    model=MODEL_ID,
    instructions="You are a helpful assistant.",
    sampling_params={
        "strategy": {"type": "top_p", "temperature": 1.0, "top_p": 0.9},
    },
)

In [None]:
class GeneratorOutputSchema(BaseModel):
    thoughts: str
    response: str

In [None]:
generator_agent_config = {
    **base_agent_config,
    "instructions": """Your goal is to complete the task based on <user input>. If there are feedback 
    from your previous generations, you should reflect on them to improve your solution

    Output your answer concisely in the following JSON format:
    {{
        "thoughts": "<Your understanding of the task and feedback and how you plan to improve>",
        "response": "<Your code implementation here>"
    }}
    """,
    "response_format": {
        "type": "json_schema",
        "json_schema": GeneratorOutputSchema.model_json_schema()
    }
}

In [None]:
class EvaluatorOutputSchema(BaseModel):
    evaluation: str
    feedback: str


In [None]:
evaluator_agent_config = {
    **base_agent_config,
    "instructions": """Evaluate this following code implementation for:
    1. code correctness
    2. time complexity
    3. style and best practices

    You should be evaluating only and not attemping to solve the task.
    Only output "PASS" if all criteria are met and you have no further suggestions for improvements.
    Output your evaluation concisely in the following JSON format.
    {{
        "evaluation": "<evaluation enum output>",
        "feedback": "What needs improvement and why."
    }}

    The evaluation enum output should be one of the following:
    - PASS
    - NEEDS_IMPROVEMENT
    - FAIL
    """,
    "response_format": {
        "type": "json_schema",
        "json_schema": EvaluatorOutputSchema.model_json_schema()
    }
}

In [None]:
generator_agent = Agent(client, **generator_agent_config)
evaluator_agent = Agent(client, **evaluator_agent_config)

generator_session_id = generator_agent.create_session(session_name=f"generator_agent_{uuid4()}")
evaluator_session_id = evaluator_agent.create_session(session_name=f"evaluator_agent_{uuid4()}")


In [None]:
def generator_evaluator_workflow(user_input):
    generator_response = generator_agent.create_turn(
        messages=[
            {"role": "user", "content": user_input}
        ],
        session_id=generator_session_id,
        stream=False,
    )
    generator_result = json.loads(generator_response.output_message.content)

    while True:
        evaluator_response = evaluator_agent.create_turn(
            messages=[
                {"role": "user", "content": generator_result["response"]}
            ],
            session_id=evaluator_session_id,
            stream=False,
        )

        evaluator_result = json.loads(evaluator_response.output_message.content)

        if evaluator_result["evaluation"] == "PASS":
            return generator_result

        generator_response = generator_agent.create_turn(
            messages=[
                {"role": "user", "content": f"{evaluator_result['feedback']}"}
            ],
            session_id=generator_session_id,
            stream=False,
        )
        
        generator_result = json.loads(generator_response.output_message.content)

In [None]:
coding_task = """
Implement a Stack with:
1. push(x)
2. pop()
3. getMin()
All operations should be O(1).
"""

output = generator_evaluator_workflow(coding_task)

print(output["response"])

In [None]:
generator_agent_session = client.agents.session.retrieve(
    session_id=generator_session_id, 
    agent_id=generator_agent.agent_id)

pprint(generator_agent_session.to_dict())


In [None]:

evaluator_agent_session = client.agents.session.retrieve(
    session_id=evaluator_session_id, agent_id=evaluator_agent.agent_id)

pprint(evaluator_agent_session.to_dict())