In [None]:
from __future__ import annotations

from dataclasses import dataclass
from typing import Literal
from openai import AsyncAzureOpenAI
from agents import Agent, ItemHelpers, Runner, TResponseInputItem, trace, OpenAIChatCompletionsModel
import os
from dotenv import load_dotenv
load_dotenv()

"""
This example shows the LLM as a judge pattern. The first agent generates an outline for a story.
The second agent judges the outline and provides feedback. We loop until the judge is satisfied
with the outline.
"""

client = AsyncAzureOpenAI(
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),
    api_version=os.getenv("AZURE_OPENAI_API_VERSION"),
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT")
)

story_outline_generator = Agent(
    name="story_outline_generator",
    model=OpenAIChatCompletionsModel(
        model=os.getenv("AZURE_OPENAI_DEPLOYMENT"),
        openai_client=client,
    ),
    instructions=(
        "You generate a very short story outline based on the user's input."
        "If there is any feedback provided, use it to improve the outline."
    ),
)


@dataclass
class EvaluationFeedback:
  feedback: str
  score: Literal["pass", "needs_improvement", "fail"]


evaluator = Agent[None](
    name="evaluator",
    model=OpenAIChatCompletionsModel(
        model=os.getenv("AZURE_OPENAI_DEPLOYMENT"),
        openai_client=client,
    ),
    instructions=(
        "You evaluate a story outline and decide if it's good enough."
        "If it's not good enough, you provide feedback on what needs to be improved."
        "Never give it a pass on the first try."
    ),
    output_type=EvaluationFeedback,
)


async def main() -> None:
  msg = input("What kind of story would you like to hear? ")
  input_items: list[TResponseInputItem] = [{"content": msg, "role": "user"}]

  latest_outline: str | None = None

  # We'll run the entire workflow in a single trace
  with trace("LLM as a judge"):
    while True:
      story_outline_result = await Runner.run(
          story_outline_generator,
          input_items,
      )

      input_items = story_outline_result.to_input_list()
      latest_outline = ItemHelpers.text_message_outputs(
          story_outline_result.new_items)
      print("Story outline generated")

      evaluator_result = await Runner.run(evaluator, input_items)
      result: EvaluationFeedback = evaluator_result.final_output

      print(f"Evaluator score: {result.score}")

      if result.score == "pass":
        print("Story outline is good enough, exiting.")
        break

      print("Re-running with feedback")

      input_items.append(
          {"content": f"Feedback: {result.feedback}", "role": "user"})

  print(f"Final story outline: {latest_outline}")


if __name__ == "__main__":
  await main()

Story outline generated
Evaluator score: needs_improvement
Re-running with feedback
Story outline generated
Evaluator score: needs_improvement
Re-running with feedback
Story outline generated
Evaluator score: pass
Story outline is good enough, exiting.
Final story outline: Title: "The Last Move"

Outline:  
Elena, once a celebrated chess prodigy whose promising career ended after a traumatic injury, is challenged to a historic match against ORION—an AI designed not only to master chess but to simulate human intuition and emotions. As the match unfolds in a high-stakes stadium filled with global spectators and live broadcasts, Elena and ORION engage in more than a game; subtle exchanges reveal ORION’s emerging emotional responses and its attempt to connect with Elena on an empathetic level. Parallel to the match, society grapples with polarized views: some hail the AI’s victory as inevitable progress, others fear a future where human creativity is eclipsed. Post-match, Elena confronts h