# March Madness 2025 XGBoost 

This notebook demonstrates how to build a machine learning model to predict the winner of the 2025 NCAA Division I Men's Basketball Tournament (March Madness). Using historical college basketball data and advanced feature engineering techniques, we develop a fine-tuned XGBoost regressor capable of predicting individual game outcomes throughout the entire tournament bracket.

In [1]:
pip install cbbd

Note: you may need to restart the kernel to use updated packages.


In [4]:
import cbbd
import pandas as pd
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor

configuration = cbbd.Configuration(
    access_token = 'Your-Token'
)

In [5]:
games = []
with cbbd.ApiClient(configuration) as api_client:
    games_api = cbbd.GamesApi(api_client)
    for season in range(2024, 2013, -1):
        results = games_api.get_games(season=season, tournament='NCAA')
        games += results
len(games)

678

In [6]:
games[0]

GameInfo(id=12010, source_id='401638579', season_label='20232024', season=2024, season_type=<SeasonType.POSTSEASON: 'postseason'>, start_date=datetime.datetime(2024, 3, 19, 18, 40, tzinfo=datetime.timezone.utc), start_time_tbd=False, neutral_site=True, conference_game=False, game_type='TRNMNT', tournament='NCAA', game_notes="Men's Basketball Championship - West Region - First Four", status=<GameStatus.FINAL: 'final'>, attendance=0, home_team_id=114, home_team='Howard', home_conference_id=18, home_conference='MEAC', home_seed=16, home_points=68, home_period_points=[27, 41], home_winner=False, away_team_id=341, away_team='Wagner', away_conference_id=21, away_conference='NEC', away_seed=16, away_points=71, away_period_points=[38, 33], away_winner=True, excitement=4.7, venue_id=76, venue='UD Arena', city='Dayton', state='OH')

In [7]:
stats = []
with cbbd.ApiClient(configuration) as api_client:
    stats_api = cbbd.StatsApi(api_client)
    for season in range(2024, 2005, -1):
        results = stats_api.get_team_season_stats(season=season, season_type='regular')
        stats += results
len(stats)

11939

In [10]:
records = []
for game in games:
    record = game.to_dict()
    home_stats = [stat for stat in stats if stat.team_id == game.home_team_id and stat.season == game.season][0]
    away_stats = [stat for stat in stats if stat.team_id == game.away_team_id and stat.season == game.season][0]
    
    record['home_pace'] = home_stats.pace
    record['home_o_rating'] = home_stats.team_stats.rating
    record['home_d_rating'] = home_stats.opponent_stats.rating
    record['home_free_throw_rate'] = home_stats.team_stats.four_factors.free_throw_rate
    record['home_offensive_rebound_rate'] = home_stats.team_stats.four_factors.offensive_rebound_pct
    record['home_turnover_ratio'] = home_stats.team_stats.four_factors.turnover_ratio
    record['home_efg'] = home_stats.team_stats.four_factors.effective_field_goal_pct
    record['home_free_throw_rate_allowed'] = home_stats.opponent_stats.four_factors.free_throw_rate
    record['home_offensive_rebound_rate_allowed'] = home_stats.opponent_stats.four_factors.offensive_rebound_pct
    record['home_turnover_ratio_forced'] = home_stats.opponent_stats.four_factors.turnover_ratio
    record['home_efg_allowed'] = home_stats.opponent_stats.four_factors.effective_field_goal_pct
    record['away_pace'] = away_stats.pace
    record['away_o_rating'] = away_stats.team_stats.rating
    record['away_d_rating'] = away_stats.opponent_stats.rating
    record['away_free_throw_rate'] = away_stats.team_stats.four_factors.free_throw_rate
    record['away_offensive_rebound_rate'] = away_stats.team_stats.four_factors.offensive_rebound_pct
    record['away_turnover_ratio'] = away_stats.team_stats.four_factors.turnover_ratio
    record['away_efg'] = away_stats.team_stats.four_factors.effective_field_goal_pct
    record['away_free_throw_rate_allowed'] = away_stats.opponent_stats.four_factors.free_throw_rate
    record['away_offensive_rebound_rate_allowed'] = away_stats.opponent_stats.four_factors.offensive_rebound_pct
    record['away_turnover_ratio_forced'] = away_stats.opponent_stats.four_factors.turnover_ratio
    record['away_efg_allowed'] = away_stats.opponent_stats.four_factors.effective_field_goal_pct
    record['home_true_shooting'] = home_stats.team_stats.true_shooting
    record['home_fast_break_points'] = home_stats.team_stats.points.fast_break
    record['home_two_point_field_goal_pct'] = home_stats.team_stats.two_point_field_goals.pct
    record['home_three_point_field_goal_pct'] = home_stats.team_stats.three_point_field_goals.pct
    record['home_free_throw_pct'] = home_stats.team_stats.free_throws.pct
    record['home_points_off_turnover'] = home_stats.team_stats.points.off_turnovers
    record['home_points_in_the_paint'] = home_stats.team_stats.points.in_paint
    record['away_true_shooting'] = away_stats.team_stats.true_shooting
    record['away_fast_break_points'] = away_stats.team_stats.points.fast_break
    record['away_two_point_field_goal_pct'] = away_stats.team_stats.two_point_field_goals.pct
    record['away_three_point_field_goal_pct'] = away_stats.team_stats.three_point_field_goals.pct
    record['away_free_throw_pct'] = away_stats.team_stats.free_throws.pct
    record['away_points_off_turnover'] = away_stats.team_stats.points.off_turnovers
    record['away_points_in_the_paint'] = away_stats.team_stats.points.in_paint

    records.append(record)

len(records)


678

In [11]:
df = pd.DataFrame(records)
df['margin'] = df.homePoints - df.awayPoints
df.head()

Unnamed: 0,id,sourceId,seasonLabel,season,seasonType,startDate,startTimeTbd,neutralSite,conferenceGame,gameType,...,home_points_off_turnover,home_points_in_the_paint,away_true_shooting,away_fast_break_points,away_two_point_field_goal_pct,away_three_point_field_goal_pct,away_free_throw_pct,away_points_off_turnover,away_points_in_the_paint,margin
0,12010,401638579,20232024,2024,SeasonType.POSTSEASON,2024-03-19 18:40:00+00:00,False,True,False,TRNMNT,...,452,1000,49.1,335,43.6,32.2,71.0,407,812,-3
1,12009,401638580,20232024,2024,SeasonType.POSTSEASON,2024-03-19 21:10:00+00:00,False,True,False,TRNMNT,...,407,916,59.2,422,58.2,34.1,75.4,453,1116,-25
2,12023,401638581,20232024,2024,SeasonType.POSTSEASON,2024-03-20 18:40:00+00:00,False,True,False,TRNMNT,...,538,984,53.5,365,48.1,33.9,70.6,493,1072,-7
3,12022,401638582,20232024,2024,SeasonType.POSTSEASON,2024-03-20 21:28:00+00:00,False,True,False,TRNMNT,...,381,1052,59.7,425,53.6,39.4,77.8,480,1302,-7
4,12049,401638585,20232024,2024,SeasonType.POSTSEASON,2024-03-21 12:15:00+00:00,False,True,False,TRNMNT,...,465,1184,54.8,474,50.4,35.9,70.6,488,1108,-18


In [21]:
print(home_stats.to_dict())

{'season': 2014, 'seasonLabel': '20132014', 'teamId': 314, 'team': 'UConn', 'conference': 'American', 'games': 34, 'wins': 26, 'losses': 8, 'totalMinutes': 1375, 'pace': 61.7, 'teamStats': {'fieldGoals': {'pct': 44.8, 'attempted': 1849, 'made': 829}, 'twoPointFieldGoals': {'pct': 48, 'attempted': 1224, 'made': 587}, 'threePointFieldGoals': {'pct': 38.7, 'attempted': 625, 'made': 242}, 'freeThrows': {'pct': 76.1, 'attempted': 714, 'made': 543}, 'rebounds': {'total': 1203, 'defensive': 860, 'offensive': 343}, 'turnovers': {'teamTotal': 9, 'total': 397}, 'fouls': {'flagrant': 0, 'technical': 5, 'total': 621}, 'points': {'fastBreak': 12, 'offTurnovers': 79, 'inPaint': 82, 'total': 2443}, 'fourFactors': {'freeThrowRate': 38.6, 'offensiveReboundPct': 28.5, 'turnoverRatio': 0.2, 'effectiveFieldGoalPct': 51.4}, 'assists': 428, 'blocks': 211, 'steals': 237, 'possessions': 2121, 'rating': 115.2, 'trueShooting': 56.5}, 'opponentStats': {'fieldGoals': {'pct': 38.7, 'attempted': 1926, 'made': 746},

In [12]:
# First, create the new interaction columns in your DataFrame.
# (Assuming 'df' already contains all the columns listed in your features list)
df['home_pace_rating'] = df['home_pace'] * df['home_o_rating']
df['away_pace_rating'] = df['away_pace'] * df['away_o_rating']


# Update your features list to include the new interaction features.
features = [
    'home_o_rating',
    'home_d_rating',
    'home_pace',
    'home_free_throw_rate',
    'home_offensive_rebound_rate',
    'home_turnover_ratio',
    'home_efg',
    'home_free_throw_rate_allowed',
    'home_offensive_rebound_rate_allowed',
    'home_turnover_ratio_forced',
    'home_efg_allowed',
    'away_o_rating',
    'away_d_rating',
    'away_pace',
    'away_free_throw_rate',
    'away_offensive_rebound_rate',
    'away_turnover_ratio',
    'away_efg',
    'away_free_throw_rate_allowed',
    'away_offensive_rebound_rate_allowed',
    'away_turnover_ratio_forced',
    'away_efg_allowed',
    'home_true_shooting',
    'home_fast_break_points',
    'home_two_point_field_goal_pct',
    'home_three_point_field_goal_pct',
    'home_free_throw_pct',
    'home_points_off_turnover',
    'home_points_in_the_paint',
    'away_true_shooting',
    'away_fast_break_points',
    'away_two_point_field_goal_pct',
    'away_three_point_field_goal_pct',
    'away_free_throw_pct',
    'away_points_off_turnover',
    'away_points_in_the_paint',
    'homeSeed',
    'awaySeed',
    # New interaction features:
    'home_pace_rating',
    'away_pace_rating'
]

outputs = ['margin']

# Now, selecting these columns from the DataFrame
df_subset = df[features + outputs]
df_subset.head()


Unnamed: 0,home_o_rating,home_d_rating,home_pace,home_free_throw_rate,home_offensive_rebound_rate,home_turnover_ratio,home_efg,home_free_throw_rate_allowed,home_offensive_rebound_rate_allowed,home_turnover_ratio_forced,...,away_two_point_field_goal_pct,away_three_point_field_goal_pct,away_free_throw_pct,away_points_off_turnover,away_points_in_the_paint,homeSeed,awaySeed,home_pace_rating,away_pace_rating,margin
0,107.8,106.2,67.4,41.9,31.0,0.2,52.4,39.2,33.5,0.2,...,43.6,32.2,71.0,407,812,16,16,7265.72,6130.5,-3
1,103.6,96.8,59.4,25.1,26.9,0.1,49.3,25.7,27.2,0.2,...,58.2,34.1,75.4,453,1116,10,10,6153.84,7486.65,-25
2,111.7,109.8,65.2,29.7,22.2,0.2,54.5,35.9,26.5,0.2,...,48.1,33.9,70.6,493,1072,16,16,7282.84,6664.68,-7
3,113.6,101.3,65.2,36.8,30.7,0.2,52.2,31.9,24.8,0.2,...,53.6,39.4,77.8,480,1302,10,10,7406.72,7749.0,-7
4,109.1,100.7,67.8,37.9,32.4,0.2,51.7,32.4,30.3,0.2,...,50.4,35.9,70.6,488,1108,8,9,7396.98,7224.16,-18


In [13]:
training = df.query("season != 2024").copy()
testing = df.query("season == 2024").copy()

In [14]:
X_train, X_valid, y_train, y_valid = train_test_split(training[features], training[outputs], train_size=0.8, test_size=0.2, random_state=0)

In [15]:
model = XGBRegressor(random_state=0)
model.fit(X_train, y_train)

In [16]:
predictions = model.predict(X_valid)
predictions

array([  4.376003  ,   2.4297347 ,   6.1747146 ,   1.3147918 ,
        10.161909  ,   4.6511235 ,   9.090392  ,   7.9410725 ,
        17.612316  ,   2.188236  ,  -1.1080462 ,   9.76305   ,
         6.78485   ,  21.10234   ,   6.59934   ,  -4.096568  ,
        -0.56090105,   5.3775353 ,  -2.6809542 ,  10.64159   ,
         0.45834264,  12.536751  ,   6.377199  ,  15.387018  ,
        17.904732  ,  15.549259  ,  -1.3148749 ,   2.3854034 ,
         4.395057  ,   2.1150708 ,   6.0370464 ,  16.074059  ,
        -5.400235  ,  -3.9602625 ,   7.4699063 ,  19.527964  ,
         2.011003  ,   3.8663266 ,   6.7903676 ,   2.9146438 ,
       -16.197645  ,   8.073695  ,  11.786324  ,   7.800673  ,
        -1.8691713 ,  -2.9243498 ,   9.246671  ,  16.136606  ,
        13.557553  ,   0.31765974,  12.831795  ,  16.164726  ,
        15.617769  ,   3.690291  ,  14.536832  ,  15.82031   ,
         1.0706881 ,  22.359161  ,   2.2440667 ,   5.6430845 ,
        -2.1995537 ,  -7.5844793 ,  -7.283776  ,   3.58

In [17]:
mae = mean_absolute_error(predictions, y_valid)
mae

9.17762279510498

In [18]:
pip install optuna

Collecting optuna
  Using cached optuna-4.2.1-py3-none-any.whl.metadata (17 kB)
Collecting colorlog (from optuna)
  Using cached colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Using cached optuna-4.2.1-py3-none-any.whl (383 kB)
Using cached colorlog-6.9.0-py3-none-any.whl (11 kB)
Installing collected packages: colorlog, optuna
Successfully installed colorlog-6.9.0 optuna-4.2.1
Note: you may need to restart the kernel to use updated packages.


In [19]:
import optuna
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error

def objective(trial):
    # Suggest hyperparameters for tuning
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 50, 2000),
        'learning_rate': trial.suggest_float('learning_rate', 0.001, 0.3, log=True),
        'max_depth': trial.suggest_int('max_depth', 3, 12),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'n_jobs': 4,
        'random_state': 42
    }
    
    # Initialize and train the model with the suggested parameters
    model = XGBRegressor(**params)
    model.fit(X_train, y_train)
    
    # Make predictions and compute MAE on the validation set
    predictions = model.predict(X_valid)
    mae = mean_absolute_error(y_valid, predictions)
    
    return mae

# Create an Optuna study to minimize MAE
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=200)  # You can adjust the number of trials

print("Best trial:")
trial = study.best_trial
print("  MAE: ", trial.value)
print("  Best Parameters: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))


[I 2025-03-19 14:23:02,981] A new study created in memory with name: no-name-a5e91a6a-2a37-444e-9375-d6c3d068f48d
[I 2025-03-19 14:23:05,154] Trial 0 finished with value: 9.176671028137207 and parameters: {'n_estimators': 500, 'learning_rate': 0.0027134151290016555, 'max_depth': 12, 'min_child_weight': 1, 'subsample': 0.5646455932542286, 'colsample_bytree': 0.7695360572990518}. Best is trial 0 with value: 9.176671028137207.
[I 2025-03-19 14:23:05,540] Trial 1 finished with value: 8.997589111328125 and parameters: {'n_estimators': 228, 'learning_rate': 0.030116348936077043, 'max_depth': 10, 'min_child_weight': 4, 'subsample': 0.6657279012635469, 'colsample_bytree': 0.6096978131332143}. Best is trial 1 with value: 8.997589111328125.
[I 2025-03-19 14:23:06,031] Trial 2 finished with value: 9.436636924743652 and parameters: {'n_estimators': 302, 'learning_rate': 0.0018675005558107544, 'max_depth': 12, 'min_child_weight': 10, 'subsample': 0.9092204168990579, 'colsample_bytree': 0.9364597231

Best trial:
  MAE:  8.840404510498047
  Best Parameters: 
    n_estimators: 296
    learning_rate: 0.013265750735192563
    max_depth: 9
    min_child_weight: 8
    subsample: 0.9701757538878738
    colsample_bytree: 0.8614069935054632


In [20]:
best_params = study.best_trial.params
best_params.update({'n_jobs': 4, 'random_state': 42})
model = XGBRegressor(**best_params)
model.fit(X_train, y_train)

predictions = model.predict(X_valid)
mae = mean_absolute_error(y_valid, predictions)
print("MAE on validation set:", mae)

MAE on validation set: 8.840404510498047


In [22]:
predictions = model.predict(testing[features])
testing['prediction'] = predictions
testing[['homeSeed', 'homeTeam', 'awaySeed', 'awayTeam', 'margin', 'prediction']]

Unnamed: 0,homeSeed,homeTeam,awaySeed,awayTeam,margin,prediction
0,16,Howard,16,Wagner,-3,5.689422
1,10,Virginia,10,Colorado State,-25,5.247204
2,16,Montana State,16,Grambling,-7,1.472572
3,10,Boise State,10,Colorado,-7,6.298865
4,8,Mississippi State,9,Michigan State,-18,6.299580
...,...,...,...,...,...,...
62,1,Purdue,2,Tennessee,6,-1.043233
63,4,Duke,11,NC State,-12,8.243624
64,1,Purdue,11,NC State,13,11.249630
65,1,UConn,4,Alabama,14,4.865191


In [23]:
testing.query("(margin < 0 and prediction < 0) or (margin > 0 and prediction > 0)").shape[0] / testing.shape[0]

0.6119402985074627

In [24]:
testing[testing['gameNotes'].str.contains('1st')].query("(margin < 0 and prediction < 0) or (margin > 0 and prediction > 0)").shape[0] / testing[testing['gameNotes'].str.contains('1st')].shape[0]

0.65625

In [25]:
for s in stats:
    print(s.team)

Abilene Christian
Air Force
Akron
Alabama A&M
Alabama
Alabama State
Alcorn State
American University
App State
Arizona State
Arizona
Arkansas
Arkansas State
Arkansas-Pine Bluff
Army
Auburn
Austin Peay
BYU
Ball State
Baylor
Bellarmine
Belmont
Bethune-Cookman
Binghamton
Boise State
Boston College
Boston University
Bowling Green
Bradley
Brown
Bryant
Bucknell
Buffalo
Butler
Cal Poly
Cal State Bakersfield
Cal State Fullerton
Cal State Northridge
California Baptist
California
Campbell
Canisius
Central Arkansas
Central Connecticut
Central Michigan
Charleston
Charleston Southern
Charlotte
Chattanooga
Chicago State
Cincinnati
Clemson
Cleveland State
Coastal Carolina
Colgate
Colorado
Colorado State
Columbia
Coppin State
Cornell
Creighton
Dartmouth
Davidson
Dayton
DePaul
Delaware
Delaware State
Denver
Detroit Mercy
Drake
Drexel
Duke
Duquesne
East Carolina
East Tennessee State
East Texas A&M
Eastern Illinois
Eastern Kentucky
Eastern Michigan
Eastern Washington
Elon
Evansville
Fairfield
Fairleigh D

In [26]:
stats = stats_api.get_team_season_stats(season=2025, season_type='regular')

def predict_game(model, stats, projected_home_seed, home_team, projected_away_seed, away_team):
    home_stats = [stat for stat in stats if stat.team == home_team][0]
    away_stats = [stat for stat in stats if stat.team == away_team][0]
    record = {
    'home_o_rating': home_stats.team_stats.rating,
    'home_d_rating': home_stats.opponent_stats.rating,
    'home_pace': home_stats.pace,
    'home_free_throw_rate': home_stats.team_stats.four_factors.free_throw_rate,
    'home_offensive_rebound_rate': home_stats.team_stats.four_factors.offensive_rebound_pct,
    'home_turnover_ratio': home_stats.team_stats.four_factors.turnover_ratio,
    'home_efg': home_stats.team_stats.four_factors.effective_field_goal_pct,
    'home_free_throw_rate_allowed': home_stats.opponent_stats.four_factors.free_throw_rate,
    'home_offensive_rebound_rate_allowed': home_stats.opponent_stats.four_factors.offensive_rebound_pct,
    'home_turnover_ratio_forced': home_stats.opponent_stats.four_factors.turnover_ratio,
    'home_efg_allowed': home_stats.opponent_stats.four_factors.effective_field_goal_pct,
    'away_o_rating': away_stats.team_stats.rating,
    'away_d_rating': away_stats.opponent_stats.rating,
    'away_pace': away_stats.pace,
    'away_free_throw_rate': away_stats.team_stats.four_factors.free_throw_rate,
    'away_offensive_rebound_rate': away_stats.team_stats.four_factors.offensive_rebound_pct,
    'away_turnover_ratio': away_stats.team_stats.four_factors.turnover_ratio,
    'away_efg': away_stats.team_stats.four_factors.effective_field_goal_pct,
    'away_free_throw_rate_allowed': away_stats.opponent_stats.four_factors.free_throw_rate,
    'away_offensive_rebound_rate_allowed': away_stats.opponent_stats.four_factors.offensive_rebound_pct,
    'away_turnover_ratio_forced': away_stats.opponent_stats.four_factors.turnover_ratio,
    'away_efg_allowed': away_stats.opponent_stats.four_factors.effective_field_goal_pct,
    'home_true_shooting': home_stats.team_stats.true_shooting,
    'home_fast_break_points': home_stats.team_stats.points.fast_break,
    'home_two_point_field_goal_pct': home_stats.team_stats.two_point_field_goals.pct,
    'home_three_point_field_goal_pct': home_stats.team_stats.three_point_field_goals.pct,
    'home_free_throw_pct': home_stats.team_stats.free_throws.pct,
    'home_points_off_turnover': home_stats.team_stats.points.off_turnovers,
    'home_points_in_the_paint': home_stats.team_stats.points.in_paint,
    'away_true_shooting': away_stats.team_stats.true_shooting,
    'away_fast_break_points': away_stats.team_stats.points.fast_break,
    'away_two_point_field_goal_pct': away_stats.team_stats.two_point_field_goals.pct,
    'away_three_point_field_goal_pct': away_stats.team_stats.three_point_field_goals.pct,
    'away_free_throw_pct': away_stats.team_stats.free_throws.pct,
    'away_points_off_turnover': away_stats.team_stats.points.off_turnovers,
    'away_points_in_the_paint': away_stats.team_stats.points.in_paint,
    'homeSeed': projected_home_seed,
    'awaySeed': projected_away_seed,
    'home_pace_rating': home_stats.pace * home_stats.team_stats.rating,
    'away_pace_rating': away_stats.pace * away_stats.team_stats.rating
}

    return model.predict(pd.DataFrame([record]))[0]

def print_game_result(pred, home_team, away_team):
    if pred > 0:
        print(f"Predicted Winner: {home_team} (Margin: {pred:.2f})")
    elif pred < 0:
        print(f"Predicted Winner: {away_team} (Margin: {abs(pred):.2f})")
    else:
        print("Predicted: Tie")

# For each game, get the prediction and then print the result.
result = predict_game(model, stats, 11, 'San Diego State', 11, "North Carolina")
print_game_result(result, 'San Diego State', "North Carolina")

result = predict_game(model, stats, 11, 'Texas', 11, "Xavier")
print_game_result(result, 'Texas', "Xavier")

result = predict_game(model, stats, 16, 'American University', 16, "Mount St. Mary's")
print_game_result(result, 'American University', "Mount St. Mary's")

result = predict_game(model, stats, 16, 'Alabama State', 16, "St. Francis (PA)")
print_game_result(result, 'Alabama State', "St. Francis (PA)")

Predicted Winner: North Carolina (Margin: 0.02)
Predicted Winner: Texas (Margin: 0.78)
Predicted Winner: American University (Margin: 6.24)
Predicted Winner: St. Francis (PA) (Margin: 1.67)


In [27]:
bracket_matches = []

def print_game_result(pred, home_team, away_team):
    """
    Print the result of a game including who beat whom.
    Converts the prediction to a scalar float if necessary.
    """
    if hasattr(pred, 'item'):
        pred = pred.item()
    if pred > 0:
        print(f"Predicted Winner: {home_team} (Margin: {pred:.2f}) beats {away_team}")
    elif pred < 0:
        print(f"Predicted Winner: {away_team} (Margin: {abs(pred):.2f}) beats {home_team}")
    else:
        print("Tie game")

def play_match(model, stats, team1, team2, region="", round_name="", game_number=0):
    """
    Simulate a matchup between team1 and team2.
    Each team is represented as a tuple: (seed, team_name).
    Records the match result (including margin) in bracket_matches.
    Returns the winning team tuple.
    """
    seed1, name1 = team1
    seed2, name2 = team2
    pred = predict_game(model, stats, seed1, name1, seed2, name2)
    print_game_result(pred, name1, name2)
    
    # Ensure pred is a float
    if hasattr(pred, 'item'):
        pred_value = pred.item()
    else:
        pred_value = pred
    margin = abs(pred_value)
    
    if pred_value > 0:
        winner = team1
    else:
        winner = team2
        
    bracket_matches.append({
        "region": region,
        "round": round_name,
        "game": game_number,
        "team1": name1,
        "team2": name2,
        "winner": winner[1],
        "margin": margin
    })
    return winner

def play_round(model, stats, teams, round_name="", region=""):
    """
    Plays one round of matchups.
    teams: list of team tuples.
    Returns a list of winning teams.
    """
    winners = []
    print(f"--- {region} {round_name} ---")
    game_number = 1
    for i in range(0, len(teams), 2):
        winner = play_match(model, stats, teams[i], teams[i+1],
                            region=region, round_name=round_name, game_number=game_number)
        winners.append(winner)
        game_number += 1
    return winners

def run_region(model, stats, region_name, teams):
    """
    Iterates through rounds for a given region until a champion is determined.
    """
    print(f"Starting tournament for {region_name}")
    current_round = teams
    round_num = 1
    while len(current_round) > 1:
        current_round = play_round(model, stats, current_round, round_name=f"Round {round_num}", region=region_name)
        round_num += 1
    champion = current_round[0]
    print(f"{region_name} Champion: {champion[1]}\n")
    return champion

south_teams = [
    (1, 'Auburn'),
    (16, "Alabama State"),
    (8, 'Louisville'),
    (9, "Creighton"),
    (5, "Michigan"),
    (12, "UC San Diego"),
    (4, "Texas A&M"),
    (13, "Yale"),
    (6, "Ole Miss"),
    (11, "North Carolina"),
    (3, "Iowa State"),
    (14, "Lipscomb"),
    (7, "Marquette"),
    (10, "New Mexico"),
    (2, "Michigan State"),
    (15, "Bryant")
]

west_teams = [
    (1, "Florida"),
    (16, "Norfolk State"),
    (8, "UConn"),
    (9, "Oklahoma"),
    (5, "Memphis"),
    (11, "Colorado State"),
    (4, "Maryland"),
    (13, "Grand Canyon"),
    (6, "Missouri"),
    (11, "Drake"),  
    (3, "Texas Tech"),
    (14, "UNC Wilmington"),
    (7, "Kansas"),
    (10, "Arkansas"),
    (2, "St. John's"),
    (15, "Omaha")
]

east_teams = [
    (1, "Duke"),
    (16, "American University"),
    (8, "Mississippi State"),
    (9, "Baylor"),
    (5, "Oregon"),
    (12, "Liberty"),
    (4, "Arizona"),
    (13, "Akron"),
    (6, "BYU"),
    (11, "VCU"),
    (3, "Wisconsin"),
    (14, "Montana"),
    (7, "Saint Mary's"),
    (10, "Vanderbilt"),
    (2, "Alabama"),
    (15, "Robert Morris")
]

midwest_teams = [
    (1, "Houston"),
    (16, "SIU Edwardsville"),
    (8, "Gonzaga"),
    (9, "Georgia"),
    (5, "Clemson"),
    (12, "McNeese"),
    (4, "Purdue"),
    (13, "High Point"),
    (6, "Illinois"),
    (11, "Texas"),
    (3, "Kentucky"),
    (14, "Troy"),
    (7, "UCLA"),
    (10, "Utah State"),
    (2, "Tennessee"),
    (15, "Wofford")
]


champion_south = run_region(model, stats, "South", south_teams)
champion_west  = run_region(model, stats, "West", west_teams)
champion_east  = run_region(model, stats, "East", east_teams)
champion_midwest = run_region(model, stats, "Midwest", midwest_teams)


final_four_teams = [champion_south, champion_west, champion_east, champion_midwest]
print("=== Final Four ===")
final_four_round = play_round(model, stats, final_four_teams, round_name="Final Four", region="Final Four")
print("=== Championship Game ===")
championship = play_match(model, stats, final_four_round[0], final_four_round[1], region="Championship", round_name="Championship", game_number=1)
print(f"Champion: {championship[1]}")


low_margin_games = [game for game in bracket_matches if game.get("margin", float("inf")) < 3]
high_margin_games = [game for game in bracket_matches if game.get("margin", 0) > 10]

print("\nGames with margins below 3:")
for game in low_margin_games:
    print(f"{game['region']} {game['round']} Game {game['game']}: "
          f"{game['team1']} vs {game['team2']} - Winner: {game['winner']} (Margin: {game['margin']:.2f})")

print("\nGames with margins greater than 10:")
for game in high_margin_games:
    print(f"{game['region']} {game['round']} Game {game['game']}: "
          f"{game['team1']} vs {game['team2']} - Winner: {game['winner']} (Margin: {game['margin']:.2f})")


Starting tournament for South
--- South Round 1 ---
Predicted Winner: Auburn (Margin: 16.41) beats Alabama State
Predicted Winner: Louisville (Margin: 0.32) beats Creighton
Predicted Winner: UC San Diego (Margin: 0.17) beats Michigan
Predicted Winner: Texas A&M (Margin: 7.84) beats Yale
Predicted Winner: Ole Miss (Margin: 0.13) beats North Carolina
Predicted Winner: Iowa State (Margin: 9.28) beats Lipscomb
Predicted Winner: Marquette (Margin: 5.70) beats New Mexico
Predicted Winner: Michigan State (Margin: 12.46) beats Bryant
--- South Round 2 ---
Predicted Winner: Auburn (Margin: 1.16) beats Louisville
Predicted Winner: UC San Diego (Margin: 3.25) beats Texas A&M
Predicted Winner: Iowa State (Margin: 2.47) beats Ole Miss
Predicted Winner: Marquette (Margin: 2.62) beats Michigan State
--- South Round 3 ---
Predicted Winner: UC San Diego (Margin: 1.31) beats Auburn
Predicted Winner: Iowa State (Margin: 2.11) beats Marquette
--- South Round 4 ---
Predicted Winner: UC San Diego (Margin: 0