In [1]:
%load_ext autoreload
%autoreload 2

In [None]:
TEAMS_DATA = "../data/march_madness_2024.csv"

In [None]:
from march_madness.tournament import Tournament

tournament = Tournament(team_data=TEAMS_DATA)

unplayed_games = tournament.get_unplayed_games()

while len(unplayed_games) > 1:
    unplayed_games = tournament.get_unplayed_games()
    print(f"Round {tournament.current_round}: {len(unplayed_games)} games")
    for game in unplayed_games:
        tournament.update_game_result(game, game.team1)
        # print(f"{game.team1_details['seed']}-{game.team2_details['seed']}: {game.game_id}")

Round 1: 32 games
Round 2: 16 games
Round 3: 8 games
Round 4: 4 games
Round 5: 2 games
Round 6: 1 games


In [None]:
from march_madness.prediction import random_prediction, team1_always_wins
from march_madness.tournament import Tournament, TournamentSimulator

simulator = TournamentSimulator(num_trials=10000,
                                tournament_class=Tournament,
                                tournament_params={"teams_data": TEAMS_DATA},
                                prediction_strategy=random_prediction,
                                prediction_strategy_params={"p_team1": 0.7},
                                result_path="../results/random-42-70.csv",
                                seed=42)

simulator.run(verbose=True, pbar_level="trial")

  0%|          | 0/630000 [00:00<?, ?it/s, game_winner=None, tournament_winner=None, trial=1]

In [None]:
from march_madness.prediction import llm_prediction
from march_madness.tournament import Tournament, TournamentSimulator

simulator = TournamentSimulator(num_trials=100,
                                tournament_class=Tournament,
                                tournament_params={"teams_data": TEAMS_DATA},
                                prediction_strategy=llm_prediction,
                                prediction_strategy_params={
                                    "model_name": "llama3.2:1b"
                                },
                                result_path="../results/llama3_2-1b.csv",
                                seed=42)

simulator.run(resume=True, verbose=True)

In [76]:
import pandas as pd
# Read in 100 trials
df = pd.read_csv("../results/random-42-70.csv", nrows=6300)
df.head()

Unnamed: 0,trial,trial_seed,game_id,round,team1,team2,game_winner,tournament_winner,prediction_confidence,prediction_reasoning,team1_seed,team1_region,team2_seed,team2_region,winner_seed,winner_region,tournament_state
0,1,3187113985,1,1,UConn,Stetson,UConn,,0.7,Randomly selected UConn as the winner.,1,East,16,East,1,East,"[{'game_id': 1, 'winner': 'UConn'}]"
1,1,3187113985,2,1,Florida Atlantic,Northwestern,Florida Atlantic,,0.7,Randomly selected Florida Atlantic as the winner.,8,East,9,East,8,East,"[{'game_id': 1, 'winner': 'UConn'}, {'game_id'..."
2,1,3187113985,3,1,San Diego State,UAB,San Diego State,,0.7,Randomly selected San Diego State as the winner.,5,East,12,East,5,East,"[{'game_id': 1, 'winner': 'UConn'}, {'game_id'..."
3,1,3187113985,4,1,Auburn,Yale,Auburn,,0.7,Randomly selected Auburn as the winner.,4,East,13,East,4,East,"[{'game_id': 1, 'winner': 'UConn'}, {'game_id'..."
4,1,3187113985,5,1,BYU,Duquesne,BYU,,0.7,Randomly selected BYU as the winner.,6,East,11,East,6,East,"[{'game_id': 1, 'winner': 'UConn'}, {'game_id'..."


In [None]:
df_results = df.copy()

# Frequency & probability of a given team winning a certain game in the bracket
df_results["count"] = df_results.groupby(["game_id", "game_winner"])["game_winner"].transform("count")
df_results["probability"] = df_results["count"] / df_results.groupby("game_id")["count"].transform("sum")

# Weight probabilities by round and scale up by number of trials
df_results["scoring_weight"] = 2 ** (df_results["round"] - 1)   # Higher rounds have exponentially more possibilities
df_results["game_score"] = df_results["probability"] * df_results["scoring_weight"] * len(df_results["trial"].unique())

# Evaluate brackets by summing game scores
df_results["bracket_score"] = df_results.groupby("trial")["game_score"].transform("sum")

df_results["bracket_score"].describe()

count    6300.000000
mean      192.000000
std        48.350374
min       129.519125
25%       152.528176
50%       179.816865
75%       220.498088
max       314.938265
Name: bracket_score, dtype: float64

In [None]:
# Visualize the results
REGION_ORDER = ["East", "Midwest", "South", "West"]
TOP_N = 5
METRIC = "game_score"

max_bracket_score_idx = df_results['bracket_score'].idxmax()
max_bracket_score_trial = df_results.iloc[max_bracket_score_idx]["trial"]

for game_id in df_results["game_id"].unique():    
    round = df_results[df_results['game_id'] == game_id]['round'].values[0]
    if round == df['round'].max() - 1:
        region1 = df_results[df_results['game_id'] == game_id]['team1_region'].values[0]
        region2 = df_results[df_results['game_id'] == game_id]['team2_region'].values[0]
        region = f"{region1}-{region2}"
    elif round == df['round'].max():
        region = "Championship"
    else:
        region = df_results[df_results['game_id'] == game_id]['team1_region'].values[0]
    print(f"Game ID: {game_id}, Round: {round}, Region: {region}")
    
    game = df_results[df_results['game_id'] == game_id]
    winners = game.sort_values("probability", ascending=False).drop_duplicates(subset=["game_winner"]).head(TOP_N)
    for _, winner in winners.iterrows():
        if winner["trial"] == max_bracket_score_trial:
            best_bracket = ", Best Bracket" #TODO: need to do this before dropping dupes... should try to label the top 5 best brackets and always include them in the lists...
        else:
            best_bracket = ""
        print(f"  Winner: {winner['game_winner']}, {METRIC}: {winner[METRIC]:.4f}{best_bracket}")   # printing trial doesnt make sense here... that team can be in that spot multiple times in different trials
    print()

Game ID: 1, Round: 1, Region: East
  Winner: UConn, game_score: 1.1927
  Winner: Stetson, game_score: 0.6422

Game ID: 2, Round: 1, Region: East
  Winner: Florida Atlantic, game_score: 1.2064
  Winner: Northwestern, game_score: 0.4692

Game ID: 3, Round: 1, Region: East
  Winner: San Diego State, game_score: 1.2069
  Winner: UAB, game_score: 0.5172, Best Bracket

Game ID: 4, Round: 1, Region: East
  Winner: Auburn, game_score: 1.1765
  Winner: Yale, game_score: 0.2941

Game ID: 5, Round: 1, Region: East
  Winner: BYU, game_score: 1.1725
  Winner: Duquesne, game_score: 0.7186

Game ID: 6, Round: 1, Region: East
  Winner: Illinois, game_score: 1.2064
  Winner: Morehead State, game_score: 0.4692

Game ID: 7, Round: 1, Region: East
  Winner: Washington State, game_score: 1.2050
  Winner: Drake, game_score: 0.4457

Game ID: 8, Round: 1, Region: East
  Winner: Iowa State, game_score: 1.1802
  Winner: South Dakota State, game_score: 0.6931

Game ID: 9, Round: 1, Region: West
  Winner: North C