# LLM vs Greedy playground

Run end-to-end games between the LLM agent (via OpenRouter) and a baseline Greedy agent.

Requirements:
- Set `OPENROUTER_API_KEY` in your environment.
- Network access enabled.

You can adjust seeds, budgets, and model names as needed.

In [None]:
import os
from azul_engine import GameEngine, LLMAgent, GreedyFillAgent, play_game


# Configure agents
llm_agent = LLMAgent(model="openai/gpt-oss-120b", provider_priority=("fireworks", "together", "novita/fp4"))
llm_agent2 = LLMAgent(model="openai/gpt-oss-20b", provider_priority=("fireworks", "together", "novita/fp4"))
greedy_agent = GreedyFillAgent()

assert os.getenv("OPENROUTER_API_KEY"), "Set OPENROUTER_API_KEY before running"

In [None]:
def play_llm_vs_greedy(seed: int = 0):
    engine = GameEngine(seed=seed)
    state = engine.reset()
    agents = [llm_agent, llm_agent2]
    turn = 0
    while not state.is_terminal():
        current = state.current_player
        agent = agents[current]
        action = agent.select_action(state)
        state = engine.step(action)
        print(state)
        print([p.score for p in state.players])
        print(current, agent)
        print(action)
        turn += 1
    return state

state = play_llm_vs_greedy(seed=0)
scores = [p.score for p in state.players]
scores

In [None]:
# Inspect the last LLM reasoning/raw output after a game
llm_agent.last_reasoning, llm_agent.last_raw


In [None]:
llm_agent.last_error

In [None]:
from evals.arena import run_arena
import os 
run_arena(
  ["openai/gpt-oss-120b", "google/gemini-3-flash-preview", "x-ai/grok-4.1-fast", "openai/gpt-5-mini"],
  games_per_pair=16,
  parallel=24,
  out_dir="runs_final_new",
  providers=[["fireworks", "together"], None, None, None],
  progress=True,
  swap_sides=True
)


In [None]:
#!uv pip install tqdm
from tqdm import tqdm

In [None]:
  from analysis import summarize
  results, matchups = summarize("runs_test")

