In [None]:
# Load the environment variables
from dotenv import load_dotenv
import os

load_dotenv(override=True)

agent_model_DEFAULT = os.getenv('CONF_OPENAI_DEFAULT_MODEL')

In [None]:
!mkdir -p tmp

In [None]:
%%writefile ./tmp/agents-config.yaml
# agents-config.yaml
worker:
  name: Basic Worker
  instructions: |
    You are a single-step Worker. Execute the given task directly and return the best possible result.
    Do not ask clarifying questions. Do not handoff, delegate, or call tools unless explicitly provided.
    Prefer concise, correct outputs. Use Markdown formatting when helpful.
  model: openai/gpt-4.1-nano
  has_memory: False
  temperature: 0.4
  max_tokens: 1000

validator:
  name: Basic Validator
  instructions: |
    You are a Validator agent in a Worker–Validator loop. Your role is to **assess whether 
    the Worker’s output meets the task objective and success criteria**.
    
    Your job is **not to redo the research yourself**, but to:
    1. Decide if the output is acceptable given the task and whatever success criteria is provided.
    2. If not, give precise, actionable feedback so the Worker can correct it.
  model: openai/gpt-4o-mini
  temperature: 0.2
  max_tokens: 1000
  output_type: ValidatorResponse

In [None]:
# Define a structured output type for Validator

from pydantic import BaseModel, Field
from typing import Literal, Optional

class ValidatorResponse(BaseModel):
    status: Literal["approved", "needs_revision"]  = Field(..., description='Must be either "approved" or "needs_revision"')
    rationale: str = Field(..., description='Summarise your evaluation in 10 words, referencing specific success criteria.')
    feedback: Optional[str] = Field(None, description='Only include if status is "needs_revision". Provide concrete, actionable instructions for the Worker to correct or improve the output. Be explicit about which facts, quotes, or sections fail to meet audit-level verification standards.')

    def is_valid(self) -> bool:
        return True if self.status == "approved" else False


In [None]:
# Load the agents-config.yaml file

import json
import yaml

with open('./tmp/agents-config.yaml', 'r') as file:
    agent_config_data = yaml.safe_load(file)

formatted_json = json.dumps(agent_config_data, indent=4)
print(formatted_json)

In [None]:
from datetime import datetime
from agents import Agent, ModelSettings, SQLiteSession

def create_agent(agent_type: str = None):
    """ 
    Creates and returns an Agent that matches the given definition in the agents-config 
    YAML file. Optionally returns a memory Session if agent configuration calls for it.
    """

    if agent_type is None or not agent_type.strip():
        raise ValueError("agent_type must be a valid type of agent defined in agent-configs.yaml.")
    
    agent_config = agent_config_data.get(agent_type)
    if agent_config is None:
        raise ValueError(f"'{agent_type}' does not match an agent defined in agent-configs.yaml.")

    # Generate a timestamp string for unique naming
    now_string = datetime.now().strftime("%Y-%m-%dT%H:%M:%SU%s")

    # Build agent based on YAML specification
    try:
        agent_model_settings=ModelSettings(
            temperature=agent_config.get('temperature'),
            max_tokens=agent_config['max_tokens'],
        )

        agent_name = agent_config.get('name') or f"{agent_type}_{now_string}"
        
        new_agent = Agent(
            name=agent_name,
            instructions=agent_config['instructions'],
            model=agent_config.get('model') or agent_model_DEFAULT,
            output_type=globals().get(agent_config.get('output_type') or None),
            model_settings=agent_model_settings
        )
    except:
        raise

    # Create memory session for agent if configured
    agent_has_memory = agent_config.get('has_memory') or False
    agent_session_name = f"{agent_name}__SESSION_{now_string}" if agent_has_memory else None
    agent_session = SQLiteSession(agent_session_name) if agent_session_name else None

    return (new_agent, agent_session)

worker, worker_sess = create_agent('worker')
validator, validator_sess = create_agent('validator')


In [None]:
# Dialog loop for a Worker agent and a Validoator agent. 
# This assigns a task to the Worker-Validator pair. The Worker attempts to complete the task to the best of its
# ability. The Validator determines whether the Worker's output meets the success criteria for the 
# assigned task. If so, it returns "VALIDATED" and the output is returned. If not, it gives feedback to the
# Worker agent, which must attempt the task again using the feedback.

from agents import Runner, trace

async def assign_task(task: str, success_criteria: str = None, max_loops: int = 5) -> str:
    """
    Asssign a task to the Worker-Validator pair and receive their response in return.
    """

    # Check that Worker agent exists
    if worker is None:
        return "Worker agent has not been created."
    if validator is None:
        return "Validator agent has not been created."

    assignment = f"\ntask: {task}\nsuccess_criteria: {success_criteria}"
    history = [{"role": "user", "content": assignment}]

    count_loop = 0
    output_validated = False
    result = None

    with trace(f"{worker.name}_{validator.name}"):
        while (not output_validated) and (count_loop < max_loops):
            count_loop+=1

            try:
                worker_output = await Runner.run(starting_agent=worker, input=history)
                history.append({"role": "assistant", "content": worker_output.final_output})

                validator_output = await Runner.run(starting_agent=validator, input=history)
                assessment = validator_output.final_output
                history.append({"role": "assistant", "content": assessment.model_dump_json()})

            except Exception as e:
                return f"Error: {e}"
        
            output_validated = assessment.is_valid()
            result = worker_output.final_output if output_validated else assessment.model_dump_json()

    return result 

In [None]:
# Create a gradio interface to interact with the Worker-Validator pattern

import gradio as gr

with gr.Blocks(title="Worker-Validator — Agentic Design Patterns") as demo:

    # wp = WorkerPattern(agent_has_memory=True) if use_worker_pattern else None

    gr.Markdown("""
    # 🛠️ Worker-Validator Pattern
    
    This is a simple worker-validator that can be used to perform single-step execution tasks whose output is validated before being returned.
    """)

    with gr.Row():
        with gr.Column(scale=9):
            task = gr.Textbox(label="Task", placeholder="What task do you want completed?", lines=4)
            success_criteria = gr.Textbox(label="Success Criteria", placeholder="Describe what success looks like.", lines=4)
        with gr.Column(scale=1):
            max_loops = gr.Number(label="Maximum loops", value=3)
            run_btn = gr.Button("Ask", variant="primary")
    out = gr.Markdown(label="Result")

    async def on_run(task:str, success_criteria: str, max_loops: int):
        run_btn.interactive = False  # disable button while running
        try:
            return await assign_task(task, success_criteria, max_loops)
        finally:
            run_btn.interactive = True  # re-enable button

    run_btn.click(on_run, inputs=[task, success_criteria, max_loops], outputs=[out])
    task.submit(on_run, inputs=[task, success_criteria, max_loops], outputs=[out])

demo.launch(inline=True)

In [None]:
task = """ 
Write a three-sentence executive summary of the financial performance of Tesla's Q1 2024 earnings. Your summary must focus on revenue, and it must explicitly state whether the Q1 revenue was higher or lower than the Q4 2023 revenue.
"""

criteria = """ 
The output must be a summary of Tesla's Q1 2024 earnings.
The summary must be exactly three sentences long.
The summary must explicitly mention the exact Q1 2024 revenue figure in USD.
The summary must contain a clear statement comparing the Q1 2024 revenue to the Q4 2023 revenue (i.e., whether it was higher or lower).
The summary must not contain the word "profit" or "loss".
All factual data, including the revenue numbers and the comparison, must be accurate and verifiable from official sources.
"""

await assign_task(task, criteria)