In [9]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [10]:
import pandas as pd

teams = pd.read_csv("../data/march_madness_2024.csv")

In [12]:
from march_madness.tournament import Tournament

tournament = Tournament(teams=teams)

unplayed_games = tournament.get_unplayed_games()

while len(unplayed_games) > 1:
    unplayed_games = tournament.get_unplayed_games()
    print(f"Round {tournament.current_round}: {len(unplayed_games)} games")
    for game in unplayed_games:
        tournament.update_game_result(game, game.team1)

Round 1: 32 games
Round 2: 16 games
Round 3: 8 games
Round 4: 4 games
Round 5: 2 games
Round 6: 1 games


In [13]:
from march_madness.prediction import random_prediction, team1_always_wins
from march_madness.tournament import Tournament, TournamentSimulator

simulator = TournamentSimulator(num_trials=1000,
                                tournament_class=Tournament,
                                tournament_params={"teams": teams},
                                prediction_strategy=random_prediction,
                                result_path="../results/random-42.csv",
                                seed=42)

simulator.run(verbose=True)

  0%|          | 0/63000 [00:00<?, ?it/s, game_winner=None, tournament_winner=None, trial=1]

In [None]:
from march_madness.prediction import llm_prediction
from march_madness.tournament import Tournament, TournamentSimulator

simulator = TournamentSimulator(num_trials=100,
                                tournament_class=Tournament,
                                tournament_params={"teams": teams},
                                prediction_strategy=llm_prediction,
                                prediction_strategy_params={
                                    "model_name": "llama3.2:1b"
                                },
                                result_path="../results/llama3_2-1b.csv",
                                seed=42)

simulator.run(resume=True, verbose=True)

  0%|          | 0/6300 [00:00<?, ?it/s, game_winner=None, tournament_winner=None, trial=1]

Unnamed: 0,trial,trial_seed,round,team1,team2,game_winner,prediction_confidence,game_id,tournament_winner,prediction_reasoning,prediction_details,team1_details,team2_details,winner_details,tournament_state
0,1,424638473,1,North Carolina,Wagner,Wagner,0.9,"{'round': 1, 'region': 'West', 'game': 1}",,North Carolina's strong defense and offense co...,"{'confidence': 0.9, 'reasoning': 'North Caroli...","{'team': 'North Carolina', 'seed': 1, 'region'...","{'team': 'Wagner', 'seed': 16, 'region': 'West'}","{'team': 'Wagner', 'seed': 16, 'region': 'West'}","[{'game_id': {'round': 1, 'region': 'West', 'g..."
1,1,424638473,1,Mississippi State,Michigan State,Michigan State,0.8,"{'round': 1, 'region': 'West', 'game': 2}",,Mississippi State's strong defense against Mic...,"{'confidence': 0.8, 'reasoning': 'Mississippi ...","{'team': 'Mississippi State', 'seed': 8, 'regi...","{'team': 'Michigan State', 'seed': 9, 'region'...","{'team': 'Michigan State', 'seed': 9, 'region'...","[{'game_id': {'round': 1, 'region': 'West', 'g..."
2,1,424638473,1,Saint Mary's,Grand Canyon,Saint Mary's,0.9,"{'round': 1, 'region': 'West', 'game': 3}",,Saint Mary's' balanced roster and Grand Canyon...,"{'confidence': 0.9, 'reasoning': 'Saint Mary's...","{'team': 'Saint Mary's', 'seed': 5, 'region': ...","{'team': 'Grand Canyon', 'seed': 12, 'region':...","{'team': 'Saint Mary's', 'seed': 5, 'region': ...","[{'game_id': {'round': 1, 'region': 'West', 'g..."
3,1,424638473,1,Alabama,Charleston,Alabama,0.85,"{'round': 1, 'region': 'West', 'game': 4}",,Alabama's strong defense and Charleston's poor...,"{'confidence': 0.85, 'reasoning': 'Alabama's s...","{'team': 'Alabama', 'seed': 4, 'region': 'West'}","{'team': 'Charleston', 'seed': 13, 'region': '...","{'team': 'Alabama', 'seed': 4, 'region': 'West'}","[{'game_id': {'round': 1, 'region': 'West', 'g..."
4,1,424638473,1,Clemson,New Mexico,Clemson,0.9,"{'round': 1, 'region': 'West', 'game': 5}",,Clemson's high-powered offense and stifling de...,"{'confidence': 0.9, 'reasoning': 'Clemson's hi...","{'team': 'Clemson', 'seed': 6, 'region': 'West'}","{'team': 'New Mexico', 'seed': 11, 'region': '...","{'team': 'Clemson', 'seed': 6, 'region': 'West'}","[{'game_id': {'round': 1, 'region': 'West', 'g..."


In [48]:
import ast

df = pd.read_csv("../results/random-42.csv")

# Simple metric: frequency of winner at each unique game (not matchup)
team_counts = df.groupby(["game_id", "game_winner"]).size().reset_index(name="count")
team_counts["probability"] = team_counts["count"] / team_counts.groupby("game_id")["count"].transform("sum")

team_counts["game_id"] = team_counts["game_id"].apply(ast.literal_eval)
champions = team_counts[team_counts["game_id"].apply(lambda x: x["region"] == "championship")].sort_values("probability", ascending=False)
print(champions[["game_winner", "count", "probability"]].to_string(index=False))

       game_winner  count  probability
            Nevada     27        0.027
      Grand Canyon     24        0.024
            Baylor     24        0.024
               UAB     23        0.023
           Colgate     22        0.022
             Drake     21        0.021
    South Carolina     21        0.021
             Texas     20        0.020
          Illinois     20        0.020
      Northwestern     19        0.019
  Long Beach State     19        0.019
             Akron     19        0.019
        Charleston     18        0.018
        Texas Tech     18        0.018
   Grambling State     18        0.018
           McNeese     18        0.018
     James Madison     18        0.018
           Vermont     18        0.018
    Morehead State     17        0.017
  Florida Atlantic     17        0.017
            Wagner     17        0.017
     Saint Peter's     17        0.017
           Houston     17        0.017
           Stetson     17        0.017
         Tennessee     16