In [12]:
import sys
from pathlib import Path

SRC_DIR = Path("../src").resolve()
sys.path.append(str(SRC_DIR))

In [13]:
from config import GAME_DATA_FILE, AGENT_SYSTEM_PROMPT_FILE
from loaders import load_text, build_agent_prompt_text, safe_json_load, parse_council_df

from dotenv import load_dotenv

from lm_council import LanguageModelCouncil
from lm_council.judging import PRESET_EVAL_CONFIGS

from aiolimiter import AsyncLimiter

import json
import yaml
import pandas as pd
from json_repair import repair_json

In [14]:
load_dotenv()

True

In [15]:
lmc = LanguageModelCouncil(
models = [
                "x-ai/grok-code-fast-1",
                "anthropic/claude-sonnet-4.5",
                "google/gemini-2.5-flash",
                "openai/gpt-5-mini",
                "deepseek/deepseek-chat-v3-0324",
                "minimax/minimax-m2",
                "z-ai/glm-4.6",
                "qwen/qwen3-235b-a22b-2507",
            ],
judge_models=[
    "x-ai/grok-code-fast-1"],
eval_config=PRESET_EVAL_CONFIGS["default_rubric"],
)

In [16]:
# Load both text files
game_data_text = load_text(GAME_DATA_FILE)
agent_system_prompt = load_text(AGENT_SYSTEM_PROMPT_FILE)

# Build combined prompt
full_prompt = build_agent_prompt_text(agent_system_prompt, game_data_text)

In [17]:
lmc._limiter = AsyncLimiter(5, 30)

council_raw, council_judgment = await lmc.execute(full_prompt)

Generated 8 completion tasks.


100%|██████████| 8/8 [00:36<00:00,  4.52s/it]


Generated 8 judging tasks.


100%|██████████| 8/8 [00:32<00:00,  4.12s/it]


In [18]:
council_raw

Unnamed: 0,user_prompt,model,completion_text,completion_tokens,prompt_tokens,total_tokens
0,You are one member of an independent LLM Predi...,google/gemini-2.5-flash,"{\n ""winner_team_id"": 2,\n ""winner_team_name...",421,2810,3231
1,You are one member of an independent LLM Predi...,z-ai/glm-4.6,"\n{\n ""winner_team_id"": 1,\n ""winner_team_na...",1338,2394,3732
2,You are one member of an independent LLM Predi...,x-ai/grok-code-fast-1,"{\n ""winner_team_id"": 2,\n ""winner_team_name...",1003,2552,3555
3,You are one member of an independent LLM Predi...,deepseek/deepseek-chat-v3-0324,"{\n ""winner_team_id"": 1,\n ""winner_team_name...",283,2492,2775
4,You are one member of an independent LLM Predi...,minimax/minimax-m2,"\n\n{\n ""winner_team_id"": 1,\n ""winner_team_...",964,2430,3394
5,You are one member of an independent LLM Predi...,qwen/qwen3-235b-a22b-2507,"{\n ""winner_team_id"": 2,\n ""winner_team_name...",381,2485,2866
6,You are one member of an independent LLM Predi...,anthropic/claude-sonnet-4.5,"```json\n{\n ""winner_team_id"": 2,\n ""winner_...",526,2850,3376
7,You are one member of an independent LLM Predi...,openai/gpt-5-mini,"{\n ""winner_team_id"": 1,\n ""winner_team_name...",2221,2382,4603


In [19]:

parsed = parse_council_df(council_raw)

In [20]:
display(parsed)

Unnamed: 0,model,valid_json,winner_team_id,winner_team_name,score_team_1,score_team_2,confidence_winner,confidence_score_band,key_factors,risk_factors
0,google/gemini-2.5-flash,True,2,Texas A&M Aggies,26,29,0.55,medium,[Texas A&M has a significantly better overall ...,"[Texas has a significant home-field advantage,..."
1,z-ai/glm-4.6,True,1,Texas Longhorns,27,24,0.55,medium,[Texas home field advantage at DKR with high c...,[TAMU's superior offensive production (37.1 pp...
2,x-ai/grok-code-fast-1,True,2,Texas A&M Aggies,24,28,0.55,medium,[Texas A&M's superior offensive production wit...,[Texas A&M has multiple key injuries including...
3,deepseek/deepseek-chat-v3-0324,True,1,Texas Longhorns,28,24,0.55,medium,[Texas has a strong home-field advantage with ...,[Texas A&M has a superior overall record (11-0...
4,minimax/minimax-m2,True,1,Texas Longhorns,27,24,0.58,medium,[Texas has significant home field advantage at...,[TAMU has superior offensive production (37.1 ...
5,qwen/qwen3-235b-a22b-2507,True,2,Texas A&M Aggies,26,29,0.62,medium,[Texas A&M has a superior points-per-play offe...,[Texas has a strong home-field advantage at DK...
6,anthropic/claude-sonnet-4.5,True,2,Texas A&M Aggies,24,28,0.54,low,[Texas A&M's superior offensive efficiency wit...,[Texas A&M missing two starting safeties (Bryc...
7,openai/gpt-5-mini,True,1,Texas Longhorns,28,24,0.58,medium,[Home-field edge: GAME_DATA.context.home_field...,[TAMU offensive dominance: TAMU stats.points_s...


In [21]:
council_judgment

Unnamed: 0,user_prompt,judge_model,model_being_judged,temperature,completion_tokens,prompt_tokens,total_tokens,Coherence,Relevance,Overall
0,You are one member of an independent LLM Predi...,x-ai/grok-code-fast-1,qwen/qwen3-235b-a22b-2507,0.0,271,3202,3473,5,5,5.0
1,You are one member of an independent LLM Predi...,x-ai/grok-code-fast-1,google/gemini-2.5-flash,0.0,355,3204,3559,5,5,5.0
2,You are one member of an independent LLM Predi...,x-ai/grok-code-fast-1,x-ai/grok-code-fast-1,0.0,543,3164,3707,5,5,5.0
3,You are one member of an independent LLM Predi...,x-ai/grok-code-fast-1,minimax/minimax-m2,0.0,405,3139,3544,5,5,5.0
4,You are one member of an independent LLM Predi...,x-ai/grok-code-fast-1,deepseek/deepseek-chat-v3-0324,0.0,293,3115,3408,5,5,5.0
5,You are one member of an independent LLM Predi...,x-ai/grok-code-fast-1,z-ai/glm-4.6,0.0,349,3105,3454,5,5,5.0
6,You are one member of an independent LLM Predi...,x-ai/grok-code-fast-1,anthropic/claude-sonnet-4.5,0.0,403,3293,3696,5,5,5.0
7,You are one member of an independent LLM Predi...,x-ai/grok-code-fast-1,openai/gpt-5-mini,0.0,384,3320,3704,5,5,5.0


In [22]:
lmc.save("run_3")
lmc.load("run_3")


<lm_council.council.LanguageModelCouncil at 0x1659345f0>