In [2]:
import numpy as np
import pandas as pd
import itertools
from numpy.random import default_rng
from tqdm import tqdm
from xgboost import XGBRegressor, XGBClassifier
from sklearn.preprocessing import StandardScaler
import joblib

# Load data and models

In [3]:
# numpy random generator
rng = default_rng()

In [4]:
# Read in the kaggle data
seeds = pd.read_csv("data/2023/kaggle_data/MNCAATourneySeeds.csv",dtype={'TeamID':str})
slots = pd.read_csv("data/2023/kaggle_data/MNCAATourneySlots.csv")
games = pd.read_csv("data/2023/kaggle_data/MNCAATourneyCompactResults.csv",dtype={'WTeamID':str,'LTeamID':str})
teams = pd.read_csv("data/2023/kaggle_data/MTeams.csv",dtype={'TeamID':str})

In [5]:
# Read in predictors 
# pred_df = pd.read_csv('data/2023/model_data/matchup_features.csv',dtype={'TeamID_1':str,'TeamID_2':str})
pred_df = pd.read_csv('data/2023/bracket_data/bracket_data.csv',dtype={'TeamID_1':str,'TeamID_2':str})

In [6]:
pred_df.columns

Index(['Season', 'TeamID_1', 'TeamID_2', 'Seed_1', 'Seed_2',
       'diff_total_games', 'diff_win_pct', 'diff_avg_points_for',
       'diff_avg_points_against', 'diff_avg_game_margin',
       'diff_std_game_margin', 'diff_non_home_wins', 'diff_home_losses',
       'diff_close_wins', 'diff_close_losses', 'diff_3FGA_pg', 'diff_3_pct',
       'diff_FTA_pg', 'diff_FT_pct', 'diff_TOV_pg', 'diff_avg_pace',
       'diff_off_eff', 'diff_def_eff', 'diff_pythag_win_pct', 'diff_luck',
       'diff_wins_vs_tourney_teams', 'diff_away_wins_vs_tourney_teams',
       'diff_losses_vs_tourney_teams', 'diff_losses_vs_non_tourney_teams',
       'diff_games_vs_tourney_teams', 'diff_final_net', 'diff_net_improvement',
       'diff_final_pom', 'diff_pom_improvement', 'diff_pi_i',
       'diff_coach_WL_car', 'diff_tourneys_car', 'diff_sw16_car',
       'diff_ff_car', 'diff_champ_car', 'diff_returning_min_pct',
       'diff_returning_score_pct', 'diff_preseason_pts'],
      dtype='object')

In [7]:
reg_model_pipeline = joblib.load('models/2023/xgb_pipeline_reg_test_v01.joblib')

In [8]:
model_pipeline = joblib.load('models/2023/xgb_pipeline_test_v01.joblib')
# X = pred_df[[x for x in pred_df.columns if x != 't1_score_diff']]
# y = pred_df['t1_score_diff'].map(lambda x: int(x>0))
# model_pipeline.score(X,y)

In [9]:
pred_df['pred_score_diff'] = reg_model_pipeline.predict(pred_df)

In [10]:
pred_df[['team2_win_prob','team1_win_prob']] = model_pipeline.predict_proba(pred_df)

In [11]:
def sorted_ids(r,c1,c2):
    return ','.join([x for x in sorted([str(r[c1]),str(r[c2])])])

# need for 2021 game where VCU couldn't play due to COVID protocals
def fix_actual_winner(r):
    if r['Season'] == 2021 and r['sorted_ids'] == '1332,1433' :
        return '1332'
    return r['actual_winner']

In [12]:
games['sorted_ids'] = games.apply(lambda r: sorted_ids(r,'WTeamID','LTeamID'),axis=1)
slots['Round'] = slots['Slot'].apply(lambda x:  int(x[1]) if x[0] == 'R' else 0)

# Make dataframe with predictions for every possible matchup in each tournament

In [13]:
# precomputed dataframe
pred_df['team1_seed_num'] = pred_df['Seed_1'].str.extract('(\d+)', expand=False).map(int)
pred_df['team2_seed_num'] = pred_df['Seed_2'].str.extract('(\d+)', expand=False).map(int)

pred_df['model_winner'] = pred_df.apply(lambda x: x['TeamID_1'] if x['team1_win_prob'] > 0.5 else x['TeamID_2'], axis=1)
pred_df['seed_winner'] = pred_df.apply(lambda x: x['TeamID_1'] if x['team1_seed_num'] <= x['team2_seed_num'] else x['TeamID_2'], axis=1)
pred_df['model_winner_prob'] = pred_df.apply(lambda x: x['team1_win_prob'] if x['model_winner']==x['TeamID_1'] else x['team2_win_prob'], axis=1)
pred_df['seed_winner_prob'] = pred_df.apply(lambda x: x['team1_win_prob'] if x['seed_winner']==x['TeamID_1'] else x['team2_win_prob'], axis=1)
pred_df['actual_winner_prob'] = 1
pred_df['sorted_ids'] = pred_df.apply(lambda r: sorted_ids(r,'TeamID_1','TeamID_2'),axis=1)
pred_df['actual_winner'] = pred_df.merge(games,how='left',on=['Season','sorted_ids'])['WTeamID'].fillna('NA')
pred_df['actual_winner'] = pred_df.apply(lambda r: fix_actual_winner(r),axis=1)

In [239]:
# add simulation columns? may not be worth it since past work showed very similar to greedy

In [240]:
def run_sim(r):
    return np.random.choice([r['TeamID_1'],r['TeamID_2']],p=[r['team1_win_prob'],1-r['team1_win_prob']])

In [241]:
for i in tqdm(range(5)):
    pred_df['simulation_'+str(i)] = pred_df.apply(lambda r: run_sim(r),axis=1)

100%|███████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 27.10it/s]


In [None]:
pred_df.to_csv()

# Bracket class - may want to add something 

In [14]:
class Bracket:
    def __init__(self, season, seeds, slots, teams, prediction_df):
        self.season = season
        self.teams= teams
        self.prediction_df = prediction_df[prediction_df.Season==season].copy()
        self.seeds = seeds[seeds.Season==season].copy()
        self.slots = slots[slots.Season==season].copy()
        self.slots['Round'] = self.slots['Slot'].apply(lambda x:  int(x[1]) if x[0] == 'R' else 0)

        self.r0 = self.slots[self.slots['Round']==0].reset_index(drop=True)
        self.r1 = self.slots[self.slots['Round']==1].reset_index(drop=True)
        self.r2 = self.slots[self.slots['Round']==2].reset_index(drop=True)
        self.r3 = self.slots[self.slots['Round']==3].reset_index(drop=True)
        self.r4 = self.slots[self.slots['Round']==4].reset_index(drop=True)
        self.r5 = self.slots[self.slots['Round']==5].reset_index(drop=True)
        self.r6 = self.slots[self.slots['Round']==6].reset_index(drop=True)
        self.bracket = self.slots.copy()
        
    def fill_bracket(self,predictor):
        
        # playin round
        self.r0['TeamID_1'] = pd.merge(self.r0,self.seeds,how='left',left_on=['StrongSeed'],right_on=['Seed'])['TeamID']
        self.r0['team1_prob'] = 1
        self.r0['TeamID_2'] = pd.merge(self.r0,self.seeds,how='left',left_on=['WeakSeed'],right_on=['Seed'])['TeamID']
        self.r0['team2_prob'] = 1
        self.r0['sorted_ids'] = self.r0.apply(lambda r: sorted_ids(r,'TeamID_1','TeamID_2'),axis=1)
        self.r0[['Winner','curr_win_prob']] = pd.merge(self.r0,self.prediction_df,how='left',on=['sorted_ids'])[[predictor,predictor+'_prob']]
        self.r0['pre_win_prob'] = 1
        self.r0['cumm_win_prob'] = self.r0['curr_win_prob']
        
        playin_seeds = self.r0[['Season','Slot','Winner']].copy()
        playin_seeds.columns = ['Season','Seed','TeamID']
        self.new_seeds = pd.concat([self.seeds,playin_seeds],ignore_index=True)
                               
        self.r1['TeamID_1'] = pd.merge(self.r1,self.new_seeds,how='left',left_on=['StrongSeed'],right_on=['Seed'])['TeamID']
        self.r1['team1_prob'] = 1
        self.r1['TeamID_2'] = pd.merge(self.r1,self.new_seeds,how='left',left_on=['WeakSeed'],right_on=['Seed'])['TeamID']
        self.r1['team2_prob'] = 1
        self.r1['sorted_ids'] = self.r1.apply(lambda r: sorted_ids(r,'TeamID_1','TeamID_2'),axis=1)
        self.r1[['Winner','curr_win_prob']] = pd.merge(self.r1,self.prediction_df,how='left',on=['sorted_ids'])[[predictor,predictor+'_prob']]
        self.r1['pre_win_prob'] = 1
        self.r1['cumm_win_prob'] = self.r1['curr_win_prob']
        
        # can probably condense to for loop for subsequent rounds
        self.r2[['TeamID_1','team1_prob']] = pd.merge(self.r2,self.r1,how='left',left_on=['StrongSeed'],right_on=['Slot'])[['Winner','cumm_win_prob']]
        self.r2[['TeamID_2','team2_prob']] = pd.merge(self.r2,self.r1,how='left',left_on=['WeakSeed'],right_on=['Slot'])[['Winner','cumm_win_prob']]
        self.r2['sorted_ids'] = self.r2.apply(lambda r: sorted_ids(r,'TeamID_1','TeamID_2'),axis=1)
        self.r2[['Winner','curr_win_prob']] = pd.merge(self.r2,self.prediction_df,how='left',on=['sorted_ids'])[[predictor,predictor+'_prob']]
        self.r2['pre_win_prob'] = self.r2.apply(lambda r: r['team1_prob'] if r['Winner']==r['TeamID_1'] else r['team2_prob'],axis=1)
        self.r2['cumm_win_prob'] = self.r2['pre_win_prob']*self.r2['curr_win_prob']
        
        self.r3[['TeamID_1','team1_prob']] = pd.merge(self.r3,self.r2,how='left',left_on=['StrongSeed'],right_on=['Slot'])[['Winner','cumm_win_prob']]
        self.r3[['TeamID_2','team2_prob']] = pd.merge(self.r3,self.r2,how='left',left_on=['WeakSeed'],right_on=['Slot'])[['Winner','cumm_win_prob']]
        self.r3['sorted_ids'] = self.r3.apply(lambda r: sorted_ids(r,'TeamID_1','TeamID_2'),axis=1)
        self.r3[['Winner','curr_win_prob']] = pd.merge(self.r3,self.prediction_df,how='left',on=['sorted_ids'])[[predictor,predictor+'_prob']]
        self.r3['pre_win_prob'] = self.r3.apply(lambda r: r['team1_prob'] if r['Winner']==r['TeamID_1'] else r['team2_prob'],axis=1)
        self.r3['cumm_win_prob'] = self.r3['pre_win_prob']*self.r3['curr_win_prob']
        
        self.r4[['TeamID_1','team1_prob']] = pd.merge(self.r4,self.r3,how='left',left_on=['StrongSeed'],right_on=['Slot'])[['Winner','cumm_win_prob']]
        self.r4[['TeamID_2','team2_prob']] = pd.merge(self.r4,self.r3,how='left',left_on=['WeakSeed'],right_on=['Slot'])[['Winner','cumm_win_prob']]
        self.r4['sorted_ids'] = self.r4.apply(lambda r: sorted_ids(r,'TeamID_1','TeamID_2'),axis=1)
        self.r4[['Winner','curr_win_prob']] = pd.merge(self.r4,self.prediction_df,how='left',on=['sorted_ids'])[[predictor,predictor+'_prob']]
        self.r4['pre_win_prob'] = self.r4.apply(lambda r: r['team1_prob'] if r['Winner']==r['TeamID_1'] else r['team2_prob'],axis=1)
        self.r4['cumm_win_prob'] = self.r4['pre_win_prob']*self.r4['curr_win_prob']
        
        self.r5[['TeamID_1','team1_prob']] = pd.merge(self.r5,self.r4,how='left',left_on=['StrongSeed'],right_on=['Slot'])[['Winner','cumm_win_prob']]
        self.r5[['TeamID_2','team2_prob']] = pd.merge(self.r5,self.r4,how='left',left_on=['WeakSeed'],right_on=['Slot'])[['Winner','cumm_win_prob']]
        self.r5['sorted_ids'] = self.r5.apply(lambda r: sorted_ids(r,'TeamID_1','TeamID_2'),axis=1)
        self.r5[['Winner','curr_win_prob']] = pd.merge(self.r5,self.prediction_df,how='left',on=['sorted_ids'])[[predictor,predictor+'_prob']]
        self.r5['pre_win_prob'] = self.r5.apply(lambda r: r['team1_prob'] if r['Winner']==r['TeamID_1'] else r['team2_prob'],axis=1)
        self.r5['cumm_win_prob'] = self.r5['pre_win_prob']*self.r5['curr_win_prob']
        
        self.r6[['TeamID_1','team1_prob']] = pd.merge(self.r6,self.r5,how='left',left_on=['StrongSeed'],right_on=['Slot'])[['Winner','cumm_win_prob']]
        self.r6[['TeamID_2','team2_prob']] = pd.merge(self.r6,self.r5,how='left',left_on=['WeakSeed'],right_on=['Slot'])[['Winner','cumm_win_prob']]
        self.r6['sorted_ids'] = self.r6.apply(lambda r: sorted_ids(r,'TeamID_1','TeamID_2'),axis=1)
        self.r6[['Winner','curr_win_prob']] = pd.merge(self.r6,self.prediction_df,how='left',on=['sorted_ids'])[[predictor,predictor+'_prob']]
        self.r6['pre_win_prob'] = self.r6.apply(lambda r: r['team1_prob'] if r['Winner']==r['TeamID_1'] else r['team2_prob'],axis=1)
        self.r6['cumm_win_prob'] = self.r6['pre_win_prob']*self.r6['curr_win_prob']
        
        self.bracket = pd.concat([self.r0,self.r1,self.r2,self.r3,self.r4,self.r5,self.r6],ignore_index=True)
        self.bracket['TeamName_1'] = pd.merge(self.bracket,self.teams,how='left',left_on=['TeamID_1'],right_on=['TeamID'])['TeamName']
        self.bracket['TeamName_2'] = pd.merge(self.bracket,self.teams,how='left',left_on=['TeamID_2'],right_on=['TeamID'])['TeamName']
        self.bracket['WinnerName'] = pd.merge(self.bracket,self.teams,how='left',left_on=['Winner'],right_on=['TeamID'])['TeamName']
        self.bracket['potential_pts'] = self.bracket['Round'].apply(lambda x: 0 if x == 0 else 320/(64/(2**x)))
        self.bracket['expected_pts'] = self.bracket['cumm_win_prob']*self.bracket['potential_pts']

        return
    
    def score(self,actual):
        self.bracket[['Winner_actual','WinnerName_actual']] = pd.merge(self.bracket,actual.bracket[['Season','Slot','Winner','WinnerName']],how='left',on=['Season','Slot'],suffixes=['_predicted','_actual'])[['Winner_actual','WinnerName_actual']]
        self.bracket['correct'] = self.bracket.apply(lambda r: int(r['Winner']==r['Winner_actual']),axis=1)
        self.bracket['pts'] = self.bracket['potential_pts']*self.bracket['correct']
        return
    
    def get_team_scores(self):
        return self.bracket.groupby('Winner',as_index=False)['pts'].sum()
    
        

# Test out different strategies - need to incorporate people's bracket / who picked whom

In [15]:
a = Bracket(2023,seeds,slots,teams,pred_df)

In [16]:
a.fill_bracket('model_winner')

In [None]:
a.bracket

In [248]:
a.bracket

Unnamed: 0,Season,Slot,StrongSeed,WeakSeed,Round,TeamID_1,team1_prob,TeamID_2,team2_prob,sorted_ids,Winner,curr_win_prob,pre_win_prob,cumm_win_prob,TeamName_1,TeamName_2,WinnerName,potential_pts,expected_pts
0,2023,W16,W16a,W16b,0,1192,1.0,1411,1.0,11921411,1411,0.568255,1.0,0.568255,F Dickinson,TX Southern,TX Southern,0.0,0.0
1,2023,X16,X16a,X16b,0,1369,1.0,1394,1.0,13691394,1394,0.63171,1.0,0.63171,SE Missouri St,TAM C. Christi,TAM C. Christi,0.0,0.0
2,2023,Y11,Y11a,Y11b,0,1280,1.0,1338,1.0,12801338,1280,0.52988,1.0,0.52988,Mississippi St,Pittsburgh,Mississippi St,0.0,0.0
3,2023,Z11,Z11a,Z11b,0,1113,1.0,1305,1.0,11131305,1305,0.628844,1.0,0.628844,Arizona St,Nevada,Nevada,0.0,0.0
4,2023,R1W1,W01,W16,1,1345,1.0,1411,1.0,13451411,1345,0.831127,1.0,0.831127,Purdue,TX Southern,Purdue,10.0,8.311267
5,2023,R1W2,W02,W15,1,1266,1.0,1436,1.0,12661436,1266,0.798979,1.0,0.798979,Marquette,Vermont,Marquette,10.0,7.989788
6,2023,R1W3,W03,W14,1,1243,1.0,1286,1.0,12431286,1243,0.684215,1.0,0.684215,Kansas St,Montana St,Kansas St,10.0,6.842147
7,2023,R1W4,W04,W13,1,1397,1.0,1418,1.0,13971418,1397,0.84809,1.0,0.84809,Tennessee,Louisiana,Tennessee,10.0,8.480903
8,2023,R1W5,W05,W12,1,1181,1.0,1331,1.0,11811331,1181,0.644427,1.0,0.644427,Duke,Oral Roberts,Duke,10.0,6.444274
9,2023,R1W6,W06,W11,1,1246,1.0,1344,1.0,12461344,1246,0.722877,1.0,0.722877,Kentucky,Providence,Kentucky,10.0,7.228771


In [249]:
a.bracket.head(50)

Unnamed: 0,Season,Slot,StrongSeed,WeakSeed,Round,TeamID_1,team1_prob,TeamID_2,team2_prob,sorted_ids,Winner,curr_win_prob,pre_win_prob,cumm_win_prob,TeamName_1,TeamName_2,WinnerName,potential_pts,expected_pts
0,2023,W16,W16a,W16b,0,1192,1.0,1411,1.0,11921411,1411,0.568255,1.0,0.568255,F Dickinson,TX Southern,TX Southern,0.0,0.0
1,2023,X16,X16a,X16b,0,1369,1.0,1394,1.0,13691394,1394,0.63171,1.0,0.63171,SE Missouri St,TAM C. Christi,TAM C. Christi,0.0,0.0
2,2023,Y11,Y11a,Y11b,0,1280,1.0,1338,1.0,12801338,1280,0.52988,1.0,0.52988,Mississippi St,Pittsburgh,Mississippi St,0.0,0.0
3,2023,Z11,Z11a,Z11b,0,1113,1.0,1305,1.0,11131305,1305,0.628844,1.0,0.628844,Arizona St,Nevada,Nevada,0.0,0.0
4,2023,R1W1,W01,W16,1,1345,1.0,1411,1.0,13451411,1345,0.831127,1.0,0.831127,Purdue,TX Southern,Purdue,10.0,8.311267
5,2023,R1W2,W02,W15,1,1266,1.0,1436,1.0,12661436,1266,0.798979,1.0,0.798979,Marquette,Vermont,Marquette,10.0,7.989788
6,2023,R1W3,W03,W14,1,1243,1.0,1286,1.0,12431286,1243,0.684215,1.0,0.684215,Kansas St,Montana St,Kansas St,10.0,6.842147
7,2023,R1W4,W04,W13,1,1397,1.0,1418,1.0,13971418,1397,0.84809,1.0,0.84809,Tennessee,Louisiana,Tennessee,10.0,8.480903
8,2023,R1W5,W05,W12,1,1181,1.0,1331,1.0,11811331,1181,0.644427,1.0,0.644427,Duke,Oral Roberts,Duke,10.0,6.444274
9,2023,R1W6,W06,W11,1,1246,1.0,1344,1.0,12461344,1246,0.722877,1.0,0.722877,Kentucky,Providence,Kentucky,10.0,7.228771


In [212]:
a.bracket['expected_pts'].sum()

511.34765867458907

In [150]:
b = Bracket(2022,seeds,slots,teams,pred_df)

In [151]:
b.fill_bracket('model_winner')

In [156]:
b.bracket.head(50)

Unnamed: 0,Season,Slot,StrongSeed,WeakSeed,Round,TeamID_1,team1_prob,TeamID_2,team2_prob,sorted_ids,Winner,curr_win_prob,pre_win_prob,cumm_win_prob,TeamName_1,TeamName_2,WinnerName,potential_pts,expected_pts
0,2022,W12,W12a,W12b,0,1231,1.0,1461,1.0,12311461,1231.0,0.580749,1.0,0.580749,Indiana,Wyoming,Indiana,0.0,0.0
1,2022,X11,X11a,X11b,0,1323,1.0,1353,1.0,13231353,1323.0,0.67816,1.0,0.67816,Notre Dame,Rutgers,Notre Dame,0.0,0.0
2,2022,Y16,Y16a,Y16b,0,1394,1.0,1411,1.0,13941411,1411.0,0.588396,1.0,0.588396,TAM C. Christi,TX Southern,TX Southern,0.0,0.0
3,2022,Z16,Z16a,Z16b,0,1136,1.0,1460,1.0,11361460,1460.0,0.618647,1.0,0.618647,Bryant,Wright St,Wright St,0.0,0.0
4,2022,R1W1,W01,W16,1,1124,1.0,1313,1.0,11241313,1124.0,0.818957,1.0,0.818957,Baylor,Norfolk St,Baylor,10.0,8.189568
5,2022,R1W2,W02,W15,1,1246,1.0,1389,1.0,12461389,1246.0,0.818957,1.0,0.818957,Kentucky,St Peter's,Kentucky,10.0,8.189568
6,2022,R1W3,W03,W14,1,1345,1.0,1463,1.0,13451463,1345.0,0.831127,1.0,0.831127,Purdue,Yale,Purdue,10.0,8.311267
7,2022,R1W4,W04,W13,1,1417,1.0,1103,1.0,11031417,1417.0,0.831127,1.0,0.831127,UCLA,Akron,UCLA,10.0,8.311267
8,2022,R1W5,W05,W12,1,1388,1.0,1231,1.0,12311388,1388.0,0.628844,1.0,0.628844,St Mary's CA,Indiana,St Mary's CA,10.0,6.288437
9,2022,R1W6,W06,W11,1,1400,1.0,1439,1.0,14001439,1400.0,0.724525,1.0,0.724525,Texas,Virginia Tech,Texas,10.0,7.245253


In [158]:
b.bracket['expected_pts'].sum()

344.50487282790846

In [42]:
def run_tourney(season,predictor,actual):
    b = Bracket(season,seeds,slots,teams,pred_df)
    b.fill_bracket(predictor)
    b.score(actual)
    round_score_df = b.bracket.groupby('Round')['pts'].sum()
    round_scores = {x:round_score_df[x] for x in range(1,7)}
    round_scores['total'] = b.bracket['pts'].sum()
    return round_scores

In [43]:
scores = {}
for season in tqdm(seeds[seeds.Season>=2003].Season.unique()):
    scores[season] = {}
    actual = Bracket(season,seeds,slots,teams,pred_df)
    actual.fill_bracket('actual_winner')
    predictors = ['seed_winner','model_winner']
    for p in predictors:
        scores[season][p] = run_tourney(season,p,actual)

100%|█████████████████████████████████████████████████████████████████████| 20/20 [00:02<00:00,  9.69it/s]


In [44]:
scores

{2003: {'seed_winner': {1: 230.0,
   2: 80.0,
   3: 0.0,
   4: 0.0,
   5: 0.0,
   6: 0.0,
   'total': 310.0},
  'model_winner': {1: 260.0,
   2: 140.0,
   3: 0.0,
   4: 0.0,
   5: 0.0,
   6: 0.0,
   'total': 400.0}},
 2004: {'seed_winner': {1: 270.0,
   2: 140.0,
   3: 40.0,
   4: 0.0,
   5: 0.0,
   6: 0.0,
   'total': 450.0},
  'model_winner': {1: 250.0,
   2: 120.0,
   3: 80.0,
   4: 0.0,
   5: 0.0,
   6: 0.0,
   'total': 450.0}},
 2005: {'seed_winner': {1: 240.0,
   2: 80.0,
   3: 0.0,
   4: 0.0,
   5: 0.0,
   6: 0.0,
   'total': 320.0},
  'model_winner': {1: 250.0,
   2: 100.0,
   3: 0.0,
   4: 0.0,
   5: 0.0,
   6: 0.0,
   'total': 350.0}},
 2006: {'seed_winner': {1: 230.0,
   2: 80.0,
   3: 0.0,
   4: 0.0,
   5: 0.0,
   6: 0.0,
   'total': 310.0},
  'model_winner': {1: 230.0,
   2: 80.0,
   3: 0.0,
   4: 0.0,
   5: 0.0,
   6: 0.0,
   'total': 310.0}},
 2007: {'seed_winner': {1: 260.0,
   2: 100.0,
   3: 40.0,
   4: 0.0,
   5: 0.0,
   6: 0.0,
   'total': 400.0},
  'model_winner': 

In [45]:
results = pd.DataFrame([x for x in range(2009,2022)],columns=['Season'])
predictors = ['seed_winner','model_winner']
for p in predictors:
    results[p] = results['Season'].apply(lambda x: scores[x][p]['total'])

KeyError: 2020

In [None]:
# make some visualization of points for each strategy in each year

# Create 2023 Bracket

In [17]:
bracket_2023 = Bracket(2023,seeds,slots,teams,pred_df)
bracket_2023.fill_bracket('model_winner')

In [21]:
pd.set_option('display.max_rows', None)
bracket_2023.bracket

Unnamed: 0,Season,Slot,StrongSeed,WeakSeed,Round,TeamID_1,team1_prob,TeamID_2,team2_prob,sorted_ids,Winner,curr_win_prob,pre_win_prob,cumm_win_prob,TeamName_1,TeamName_2,WinnerName,potential_pts,expected_pts
0,2023,W16,W16a,W16b,0,1192,1.0,1411,1.0,11921411,1411,0.518134,1.0,0.518134,F Dickinson,TX Southern,TX Southern,0.0,0.0
1,2023,X16,X16a,X16b,0,1369,1.0,1394,1.0,13691394,1394,0.712386,1.0,0.712386,SE Missouri St,TAM C. Christi,TAM C. Christi,0.0,0.0
2,2023,Y11,Y11a,Y11b,0,1280,1.0,1338,1.0,12801338,1338,0.543733,1.0,0.543733,Mississippi St,Pittsburgh,Pittsburgh,0.0,0.0
3,2023,Z11,Z11a,Z11b,0,1113,1.0,1305,1.0,11131305,1305,0.5677,1.0,0.5677,Arizona St,Nevada,Nevada,0.0,0.0
4,2023,R1W1,W01,W16,1,1345,1.0,1411,1.0,13451411,1345,0.899843,1.0,0.899843,Purdue,TX Southern,Purdue,10.0,8.998425
5,2023,R1W2,W02,W15,1,1266,1.0,1436,1.0,12661436,1266,0.902565,1.0,0.902565,Marquette,Vermont,Marquette,10.0,9.025651
6,2023,R1W3,W03,W14,1,1243,1.0,1286,1.0,12431286,1243,0.73781,1.0,0.73781,Kansas St,Montana St,Kansas St,10.0,7.378103
7,2023,R1W4,W04,W13,1,1397,1.0,1418,1.0,13971418,1397,0.900459,1.0,0.900459,Tennessee,Louisiana,Tennessee,10.0,9.004586
8,2023,R1W5,W05,W12,1,1181,1.0,1331,1.0,11811331,1181,0.570996,1.0,0.570996,Duke,Oral Roberts,Duke,10.0,5.709963
9,2023,R1W6,W06,W11,1,1246,1.0,1344,1.0,12461344,1246,0.676211,1.0,0.676211,Kentucky,Providence,Kentucky,10.0,6.762112


In [19]:
bracket_2023.bracket['expected_pts'].sum()

721.258787214477

In [251]:
# Look at bracket
bracket_2023.bracket

Unnamed: 0,Season,Slot,StrongSeed,WeakSeed,Round,TeamID_1,team1_prob,TeamID_2,team2_prob,sorted_ids,Winner,curr_win_prob,pre_win_prob,cumm_win_prob,TeamName_1,TeamName_2,WinnerName,potential_pts,expected_pts
0,2023,W16,W16a,W16b,0,1192,1.0,1411,1.0,11921411,1411,0.568255,1.0,0.568255,F Dickinson,TX Southern,TX Southern,0.0,0.0
1,2023,X16,X16a,X16b,0,1369,1.0,1394,1.0,13691394,1394,0.63171,1.0,0.63171,SE Missouri St,TAM C. Christi,TAM C. Christi,0.0,0.0
2,2023,Y11,Y11a,Y11b,0,1280,1.0,1338,1.0,12801338,1280,0.52988,1.0,0.52988,Mississippi St,Pittsburgh,Mississippi St,0.0,0.0
3,2023,Z11,Z11a,Z11b,0,1113,1.0,1305,1.0,11131305,1305,0.628844,1.0,0.628844,Arizona St,Nevada,Nevada,0.0,0.0
4,2023,R1W1,W01,W16,1,1345,1.0,1411,1.0,13451411,1345,0.831127,1.0,0.831127,Purdue,TX Southern,Purdue,10.0,8.311267
5,2023,R1W2,W02,W15,1,1266,1.0,1436,1.0,12661436,1266,0.798979,1.0,0.798979,Marquette,Vermont,Marquette,10.0,7.989788
6,2023,R1W3,W03,W14,1,1243,1.0,1286,1.0,12431286,1243,0.684215,1.0,0.684215,Kansas St,Montana St,Kansas St,10.0,6.842147
7,2023,R1W4,W04,W13,1,1397,1.0,1418,1.0,13971418,1397,0.84809,1.0,0.84809,Tennessee,Louisiana,Tennessee,10.0,8.480903
8,2023,R1W5,W05,W12,1,1181,1.0,1331,1.0,11811331,1181,0.644427,1.0,0.644427,Duke,Oral Roberts,Duke,10.0,6.444274
9,2023,R1W6,W06,W11,1,1246,1.0,1344,1.0,12461344,1246,0.722877,1.0,0.722877,Kentucky,Providence,Kentucky,10.0,7.228771


In [252]:
bracket_2023.bracket['expected_pts'].sum()

661.6888621255096

In [253]:
bracket_2023.bracket.to_csv('brackets/2023_bracket_v1.csv',index=False)

In [254]:
pd.options.display.max_rows = 100

In [255]:
bracket_2023.bracket[['Round','StrongSeed','WeakSeed','TeamName_1','TeamName_2','WinnerName','curr_win_prob']]

Unnamed: 0,Round,StrongSeed,WeakSeed,TeamName_1,TeamName_2,WinnerName,curr_win_prob
0,0,W16a,W16b,F Dickinson,TX Southern,TX Southern,0.568255
1,0,X16a,X16b,SE Missouri St,TAM C. Christi,TAM C. Christi,0.63171
2,0,Y11a,Y11b,Mississippi St,Pittsburgh,Mississippi St,0.52988
3,0,Z11a,Z11b,Arizona St,Nevada,Nevada,0.628844
4,1,W01,W16,Purdue,TX Southern,Purdue,0.831127
5,1,W02,W15,Marquette,Vermont,Marquette,0.798979
6,1,W03,W14,Kansas St,Montana St,Kansas St,0.684215
7,1,W04,W13,Tennessee,Louisiana,Tennessee,0.84809
8,1,W05,W12,Duke,Oral Roberts,Duke,0.644427
9,1,W06,W11,Kentucky,Providence,Kentucky,0.722877
