In [427]:
import numpy as np
import pandas as pd
import itertools
from numpy.random import default_rng
from tqdm import tqdm
from xgboost import XGBRegressor, XGBClassifier
from sklearn.preprocessing import StandardScaler

# Load data and models

In [428]:
# numpy random generator
rng = default_rng()

In [429]:
# Read in the kaggle data
seeds = pd.read_csv("data/kaggle_data/MNCAATourneySeeds.csv",dtype={'TeamID':str})
slots = pd.read_csv("data/kaggle_data/MNCAATourneySlots.csv")
games = pd.read_csv("data/kaggle_data/MNCAATourneyCompactResults.csv",dtype={'WTeamID':str,'LTeamID':str})
teams = pd.read_csv("data/kaggle_data/MTeams.csv",dtype={'TeamID':str})

In [430]:
# Read in predictors 
pred_df = pd.read_csv('data/model_data/matchup_features_full.csv',dtype={'TeamID_1':str,'TeamID_2':str})

In [431]:
# Read in the model(s)... need to modify model notebook to save final one
# todo....
xgb_clf = XGBClassifier()
xgb_clf.load_model('models/xgb_final_v2.json')
model_features = ['diff_final_pom', 'diff_pi_i', 'diff_tourneys_car',
       'diff_sw16_car',
       'diff_returning_score_pct']

In [432]:
pred_df['t1_win'] = pred_df['t1_score_diff'].apply(lambda x: np.nan if np.isnan(x) else int(x>0))

In [433]:
# make sure loaded model performs well on actual games
pred_X = pred_df[pred_df.t1_score_diff.notnull()][model_features]
scaler = StandardScaler()
scaler.fit(pred_X)
pred_X = pd.DataFrame(scaler.transform(pred_X), columns=pred_X.columns)
pred_y = pred_df[pred_df.t1_score_diff.notnull()]['t1_win']
print(xgb_clf.score(pred_X,pred_y))

0.7349703640982218


In [434]:
# get predictions on all games
pred_X = pred_df[model_features]
scaler.fit(pred_X)
pred_X = pd.DataFrame(scaler.transform(pred_X), columns=pred_X.columns)

In [435]:
pred_df[['team2_win_prob','team1_win_prob']] = xgb_clf.predict_proba(pred_X)

In [436]:
def sorted_ids(r,c1,c2):
    return ','.join([x for x in sorted([str(r[c1]),str(r[c2])])])

# need for 2021 game where VCU couldn't play due to COVID protocals
def fix_actual_winner(r):
    if r['Season'] == 2021 and r['sorted_ids'] == '1332,1433' :
        return '1332'
    return r['actual_winner']

In [437]:
games['sorted_ids'] = games.apply(lambda r: sorted_ids(r,'WTeamID','LTeamID'),axis=1)
slots['Round'] = slots['Slot'].apply(lambda x:  int(x[1]) if x[0] == 'R' else 0)

# Make dataframe with predictions for every possible matchup in each tournament

In [438]:
# precomputed dataframe
pred_df['team1_seed_num'] = pred_df['Seed_1'].str.extract('(\d+)', expand=False).map(int)
pred_df['team2_seed_num'] = pred_df['Seed_2'].str.extract('(\d+)', expand=False).map(int)

#pred_df['team1_win_prob'] = pred_df.apply(lambda x: rng.random(), axis=1)
#pred_df['team2_win_prob'] = 1-pred_df['team1_win_prob']
#pred_df['team1_score_diff'] = pred_df['team1_win_prob'].apply(lambda x: (x-0.5)*30)

pred_df['model_winner'] = pred_df.apply(lambda x: x['TeamID_1'] if x['team1_win_prob'] > 0.5 else x['TeamID_2'], axis=1)
pred_df['seed_winner'] = pred_df.apply(lambda x: x['TeamID_1'] if x['team1_seed_num'] <= x['team2_seed_num'] else x['TeamID_2'], axis=1)
pred_df['model_winner_prob'] = pred_df.apply(lambda x: x['team1_win_prob'] if x['model_winner']==x['TeamID_1'] else x['team2_win_prob'], axis=1)
pred_df['seed_winner_prob'] = pred_df.apply(lambda x: x['team1_win_prob'] if x['seed_winner']==x['TeamID_1'] else x['team2_win_prob'], axis=1)
pred_df['actual_winner_prob'] = 1
pred_df['sorted_ids'] = pred_df.apply(lambda r: sorted_ids(r,'TeamID_1','TeamID_2'),axis=1)
pred_df['actual_winner'] = pred_df.merge(games,how='left',on=['Season','sorted_ids'])['WTeamID'].fillna('NA')
pred_df['actual_winner'] = pred_df.apply(lambda r: fix_actual_winner(r),axis=1)

In [439]:
# add simulation columns? may not be worth it since past work showed very similar to greedy

In [214]:
def run_sim(r):
    return np.random.choice([r['TeamID_1'],r['TeamID_2']],p=[r['team1_win_prob'],1-r['team1_win_prob']])

In [17]:
for i in tqdm(range(5)):
    pred_df['simulation_'+str(i)] = pred_df.apply(lambda r: run_sim(r),axis=1)

100%|██████████| 5/5 [00:10<00:00,  2.19s/it]


# Bracket class - may want to add something 

In [135]:
# need to add expected points

In [440]:
class Bracket:
    def __init__(self, season, seeds, slots, teams, prediction_df):
        self.season = season
        self.teams= teams
        self.prediction_df = prediction_df[prediction_df.Season==season].copy()
        self.seeds = seeds[seeds.Season==season].copy()
        self.slots = slots[slots.Season==season].copy()
        self.slots['Round'] = self.slots['Slot'].apply(lambda x:  int(x[1]) if x[0] == 'R' else 0)

        self.r0 = self.slots[self.slots['Round']==0].reset_index(drop=True)
        self.r1 = self.slots[self.slots['Round']==1].reset_index(drop=True)
        self.r2 = self.slots[self.slots['Round']==2].reset_index(drop=True)
        self.r3 = self.slots[self.slots['Round']==3].reset_index(drop=True)
        self.r4 = self.slots[self.slots['Round']==4].reset_index(drop=True)
        self.r5 = self.slots[self.slots['Round']==5].reset_index(drop=True)
        self.r6 = self.slots[self.slots['Round']==6].reset_index(drop=True)
        self.bracket = self.slots.copy()
        
    def fill_bracket(self,predictor):
        
        # playin round
        self.r0['TeamID_1'] = pd.merge(self.r0,self.seeds,how='left',left_on=['StrongSeed'],right_on=['Seed'])['TeamID']
        self.r0['team1_prob'] = 1
        self.r0['TeamID_2'] = pd.merge(self.r0,self.seeds,how='left',left_on=['WeakSeed'],right_on=['Seed'])['TeamID']
        self.r0['team2_prob'] = 1
        self.r0['sorted_ids'] = self.r0.apply(lambda r: sorted_ids(r,'TeamID_1','TeamID_2'),axis=1)
        self.r0[['Winner','curr_win_prob']] = pd.merge(self.r0,self.prediction_df,how='left',on=['sorted_ids'])[[predictor,predictor+'_prob']]
        self.r0['pre_win_prob'] = 1
        self.r0['cumm_win_prob'] = self.r0['curr_win_prob']
        
        playin_seeds = self.r0[['Season','Slot','Winner']].copy()
        playin_seeds.columns = ['Season','Seed','TeamID']
        self.new_seeds = pd.concat([self.seeds,playin_seeds],ignore_index=True)
                               
        self.r1['TeamID_1'] = pd.merge(self.r1,self.new_seeds,how='left',left_on=['StrongSeed'],right_on=['Seed'])['TeamID']
        self.r1['team1_prob'] = 1
        self.r1['TeamID_2'] = pd.merge(self.r1,self.new_seeds,how='left',left_on=['WeakSeed'],right_on=['Seed'])['TeamID']
        self.r1['team2_prob'] = 1
        self.r1['sorted_ids'] = self.r1.apply(lambda r: sorted_ids(r,'TeamID_1','TeamID_2'),axis=1)
        self.r1[['Winner','curr_win_prob']] = pd.merge(self.r1,self.prediction_df,how='left',on=['sorted_ids'])[[predictor,predictor+'_prob']]
        self.r1['pre_win_prob'] = 1
        self.r1['cumm_win_prob'] = self.r1['curr_win_prob']
        
        # can probably condense to for loop for subsequent rounds
        self.r2[['TeamID_1','team1_prob']] = pd.merge(self.r2,self.r1,how='left',left_on=['StrongSeed'],right_on=['Slot'])[['Winner','cumm_win_prob']]
        self.r2[['TeamID_2','team2_prob']] = pd.merge(self.r2,self.r1,how='left',left_on=['WeakSeed'],right_on=['Slot'])[['Winner','cumm_win_prob']]
        self.r2['sorted_ids'] = self.r2.apply(lambda r: sorted_ids(r,'TeamID_1','TeamID_2'),axis=1)
        self.r2[['Winner','curr_win_prob']] = pd.merge(self.r2,self.prediction_df,how='left',on=['sorted_ids'])[[predictor,predictor+'_prob']]
        self.r2['pre_win_prob'] = self.r2.apply(lambda r: r['team1_prob'] if r['Winner']==r['TeamID_1'] else r['team2_prob'],axis=1)
        self.r2['cumm_win_prob'] = self.r2['pre_win_prob']*self.r2['curr_win_prob']
        
        self.r3[['TeamID_1','team1_prob']] = pd.merge(self.r3,self.r2,how='left',left_on=['StrongSeed'],right_on=['Slot'])[['Winner','cumm_win_prob']]
        self.r3[['TeamID_2','team2_prob']] = pd.merge(self.r3,self.r2,how='left',left_on=['WeakSeed'],right_on=['Slot'])[['Winner','cumm_win_prob']]
        self.r3['sorted_ids'] = self.r3.apply(lambda r: sorted_ids(r,'TeamID_1','TeamID_2'),axis=1)
        self.r3[['Winner','curr_win_prob']] = pd.merge(self.r3,self.prediction_df,how='left',on=['sorted_ids'])[[predictor,predictor+'_prob']]
        self.r3['pre_win_prob'] = self.r3.apply(lambda r: r['team1_prob'] if r['Winner']==r['TeamID_1'] else r['team2_prob'],axis=1)
        self.r3['cumm_win_prob'] = self.r3['pre_win_prob']*self.r3['curr_win_prob']
        
        self.r4[['TeamID_1','team1_prob']] = pd.merge(self.r4,self.r3,how='left',left_on=['StrongSeed'],right_on=['Slot'])[['Winner','cumm_win_prob']]
        self.r4[['TeamID_2','team2_prob']] = pd.merge(self.r4,self.r3,how='left',left_on=['WeakSeed'],right_on=['Slot'])[['Winner','cumm_win_prob']]
        self.r4['sorted_ids'] = self.r4.apply(lambda r: sorted_ids(r,'TeamID_1','TeamID_2'),axis=1)
        self.r4[['Winner','curr_win_prob']] = pd.merge(self.r4,self.prediction_df,how='left',on=['sorted_ids'])[[predictor,predictor+'_prob']]
        self.r4['pre_win_prob'] = self.r4.apply(lambda r: r['team1_prob'] if r['Winner']==r['TeamID_1'] else r['team2_prob'],axis=1)
        self.r4['cumm_win_prob'] = self.r4['pre_win_prob']*self.r4['curr_win_prob']
        
        self.r5[['TeamID_1','team1_prob']] = pd.merge(self.r5,self.r4,how='left',left_on=['StrongSeed'],right_on=['Slot'])[['Winner','cumm_win_prob']]
        self.r5[['TeamID_2','team2_prob']] = pd.merge(self.r5,self.r4,how='left',left_on=['WeakSeed'],right_on=['Slot'])[['Winner','cumm_win_prob']]
        self.r5['sorted_ids'] = self.r5.apply(lambda r: sorted_ids(r,'TeamID_1','TeamID_2'),axis=1)
        self.r5[['Winner','curr_win_prob']] = pd.merge(self.r5,self.prediction_df,how='left',on=['sorted_ids'])[[predictor,predictor+'_prob']]
        self.r5['pre_win_prob'] = self.r5.apply(lambda r: r['team1_prob'] if r['Winner']==r['TeamID_1'] else r['team2_prob'],axis=1)
        self.r5['cumm_win_prob'] = self.r5['pre_win_prob']*self.r5['curr_win_prob']
        
        self.r6[['TeamID_1','team1_prob']] = pd.merge(self.r6,self.r5,how='left',left_on=['StrongSeed'],right_on=['Slot'])[['Winner','cumm_win_prob']]
        self.r6[['TeamID_2','team2_prob']] = pd.merge(self.r6,self.r5,how='left',left_on=['WeakSeed'],right_on=['Slot'])[['Winner','cumm_win_prob']]
        self.r6['sorted_ids'] = self.r6.apply(lambda r: sorted_ids(r,'TeamID_1','TeamID_2'),axis=1)
        self.r6[['Winner','curr_win_prob']] = pd.merge(self.r6,self.prediction_df,how='left',on=['sorted_ids'])[[predictor,predictor+'_prob']]
        self.r6['pre_win_prob'] = self.r6.apply(lambda r: r['team1_prob'] if r['Winner']==r['TeamID_1'] else r['team2_prob'],axis=1)
        self.r6['cumm_win_prob'] = self.r6['pre_win_prob']*self.r6['curr_win_prob']
        
        self.bracket = pd.concat([self.r0,self.r1,self.r2,self.r3,self.r4,self.r5,self.r6],ignore_index=True)
        self.bracket['TeamName_1'] = pd.merge(self.bracket,self.teams,how='left',left_on=['TeamID_1'],right_on=['TeamID'])['TeamName']
        self.bracket['TeamName_2'] = pd.merge(self.bracket,self.teams,how='left',left_on=['TeamID_2'],right_on=['TeamID'])['TeamName']
        self.bracket['WinnerName'] = pd.merge(self.bracket,self.teams,how='left',left_on=['Winner'],right_on=['TeamID'])['TeamName']
        self.bracket['potential_pts'] = self.bracket['Round'].apply(lambda x: 0 if x == 0 else 320/(64/(2**x)))
        self.bracket['expected_pts'] = self.bracket['cumm_win_prob']*self.bracket['potential_pts']

        return
    
    def score(self,actual):
        self.bracket[['Winner_actual','WinnerName_actual']] = pd.merge(self.bracket,actual.bracket[['Season','Slot','Winner','WinnerName']],how='left',on=['Season','Slot'],suffixes=['_predicted','_actual'])[['Winner_actual','WinnerName_actual']]
        self.bracket['correct'] = self.bracket.apply(lambda r: int(r['Winner']==r['Winner_actual']),axis=1)
        self.bracket['pts'] = self.bracket['potential_pts']*self.bracket['correct']
        return
    
    def get_team_scores(self):
        return self.bracket.groupby('Winner',as_index=False)['pts'].sum()
    
        

# Test out different strategies - need to incorporate people's bracket / who picked whom

In [441]:
a = Bracket(2021,seeds,slots,teams,pred_df)

In [442]:
a.fill_bracket('actual_winner')

In [443]:
a.bracket

Unnamed: 0,Season,Slot,StrongSeed,WeakSeed,Round,TeamID_1,team1_prob,TeamID_2,team2_prob,sorted_ids,Winner,curr_win_prob,pre_win_prob,cumm_win_prob,TeamName_1,TeamName_2,WinnerName,potential_pts,expected_pts
0,2021,W11,W11a,W11b,0,1277,1,1417,1,12771417,1417,1,1,1,Michigan St,UCLA,UCLA,0.0,0.0
1,2021,W16,W16a,W16b,0,1291,1,1411,1,12911411,1411,1,1,1,Mt St Mary's,TX Southern,TX Southern,0.0,0.0
2,2021,X11,X11a,X11b,0,1179,1,1455,1,11791455,1179,1,1,1,Drake,Wichita St,Drake,0.0,0.0
3,2021,X16,X16a,X16b,0,1111,1,1313,1,11111313,1313,1,1,1,Appalachian St,Norfolk St,Norfolk St,0.0,0.0
4,2021,R1W1,W01,W16,1,1276,1,1411,1,12761411,1276,1,1,1,Michigan,TX Southern,Michigan,10.0,10.0
5,2021,R1W2,W02,W15,1,1104,1,1233,1,11041233,1104,1,1,1,Alabama,Iona,Alabama,10.0,10.0
6,2021,R1W3,W03,W14,1,1400,1,1101,1,11011400,1101,1,1,1,Texas,Abilene Chr,Abilene Chr,10.0,10.0
7,2021,R1W4,W04,W13,1,1199,1,1422,1,11991422,1199,1,1,1,Florida St,UNC Greensboro,Florida St,10.0,10.0
8,2021,R1W5,W05,W12,1,1160,1,1207,1,11601207,1160,1,1,1,Colorado,Georgetown,Colorado,10.0,10.0
9,2021,R1W6,W06,W11,1,1140,1,1417,1,11401417,1417,1,1,1,BYU,UCLA,UCLA,10.0,10.0


In [444]:
a.bracket['expected_pts'].sum()

1920.0

In [445]:
b = Bracket(2021,seeds,slots,teams,pred_df)

In [446]:
b.fill_bracket('model_winner')

In [447]:
b.bracket

Unnamed: 0,Season,Slot,StrongSeed,WeakSeed,Round,TeamID_1,team1_prob,TeamID_2,team2_prob,sorted_ids,Winner,curr_win_prob,pre_win_prob,cumm_win_prob,TeamName_1,TeamName_2,WinnerName,potential_pts,expected_pts
0,2021,W11,W11a,W11b,0,1277,1.0,1417,1.0,12771417,1277,0.600717,1.0,0.600717,Michigan St,UCLA,Michigan St,0.0,0.0
1,2021,W16,W16a,W16b,0,1291,1.0,1411,1.0,12911411,1291,0.595181,1.0,0.595181,Mt St Mary's,TX Southern,Mt St Mary's,0.0,0.0
2,2021,X11,X11a,X11b,0,1179,1.0,1455,1.0,11791455,1179,0.704643,1.0,0.704643,Drake,Wichita St,Drake,0.0,0.0
3,2021,X16,X16a,X16b,0,1111,1.0,1313,1.0,11111313,1313,0.519789,1.0,0.519789,Appalachian St,Norfolk St,Norfolk St,0.0,0.0
4,2021,R1W1,W01,W16,1,1276,1.0,1291,1.0,12761291,1276,0.923416,1.0,0.923416,Michigan,Mt St Mary's,Michigan,10.0,9.234161
5,2021,R1W2,W02,W15,1,1104,1.0,1233,1.0,11041233,1104,0.848225,1.0,0.848225,Alabama,Iona,Alabama,10.0,8.482248
6,2021,R1W3,W03,W14,1,1400,1.0,1101,1.0,11011400,1400,0.611986,1.0,0.611986,Texas,Abilene Chr,Texas,10.0,6.119858
7,2021,R1W4,W04,W13,1,1199,1.0,1422,1.0,11991422,1199,0.872252,1.0,0.872252,Florida St,UNC Greensboro,Florida St,10.0,8.722521
8,2021,R1W5,W05,W12,1,1160,1.0,1207,1.0,11601207,1160,0.745947,1.0,0.745947,Colorado,Georgetown,Colorado,10.0,7.459469
9,2021,R1W6,W06,W11,1,1140,1.0,1277,1.0,11401277,1277,0.511276,1.0,0.511276,BYU,Michigan St,Michigan St,10.0,5.112761


In [448]:
b.bracket['expected_pts'].sum()

852.3074881265667

In [449]:
def run_tourney(season,predictor,actual):
    b = Bracket(season,seeds,slots,teams,pred_df)
    b.fill_bracket(predictor)
    b.score(actual)
    round_score_df = b.bracket.groupby('Round')['pts'].sum()
    round_scores = {x:round_score_df[x] for x in range(1,7)}
    round_scores['total'] = b.bracket['pts'].sum()
    return round_scores

In [450]:
scores = {}
for season in tqdm(seeds[seeds.Season>=2003].Season.unique()):
    scores[season] = {}
    actual = Bracket(season,seeds,slots,teams,pred_df)
    actual.fill_bracket('actual_winner')
    predictors = ['seed_winner','model_winner']
    for p in predictors:
        scores[season][p] = run_tourney(season,p,actual)

100%|██████████| 19/19 [00:09<00:00,  1.99it/s]


In [451]:
scores

{2003: {'seed_winner': {1: 240.0,
   2: 180.0,
   3: 200.0,
   4: 80.0,
   5: 0.0,
   6: 0.0,
   'total': 700.0},
  'model_winner': {1: 230.0,
   2: 220.0,
   3: 120.0,
   4: 0.0,
   5: 0.0,
   6: 0.0,
   'total': 570.0}},
 2004: {'seed_winner': {1: 280.0,
   2: 180.0,
   3: 160.0,
   4: 80.0,
   5: 0.0,
   6: 0.0,
   'total': 700.0},
  'model_winner': {1: 280.0,
   2: 160.0,
   3: 160.0,
   4: 240.0,
   5: 0.0,
   6: 0.0,
   'total': 840.0}},
 2005: {'seed_winner': {1: 240.0,
   2: 160.0,
   3: 120.0,
   4: 160.0,
   5: 0.0,
   6: 0.0,
   'total': 680.0},
  'model_winner': {1: 250.0,
   2: 200.0,
   3: 160.0,
   4: 240.0,
   5: 320.0,
   6: 320.0,
   'total': 1490.0}},
 2006: {'seed_winner': {1: 230.0,
   2: 200.0,
   3: 200.0,
   4: 0.0,
   5: 0.0,
   6: 0.0,
   'total': 630.0},
  'model_winner': {1: 230.0,
   2: 240.0,
   3: 200.0,
   4: 0.0,
   5: 0.0,
   6: 0.0,
   'total': 670.0}},
 2007: {'seed_winner': {1: 270.0,
   2: 220.0,
   3: 280.0,
   4: 160.0,
   5: 0.0,
   6: 0.0,
   '

In [493]:
results = pd.DataFrame([x for x in range(2009,2022)],columns=['Season'])
predictors = ['seed_winner','model_winner']
for p in predictors:
    results[p] = results['Season'].apply(lambda x: scores[x][p]['total'])

In [None]:
# make some visualization of points for each strategy in each year

# Create 2022 Bracket

In [452]:
bracket_2022 = Bracket(2022,seeds,slots,teams,pred_df)
bracket_2022.fill_bracket('model_winner')

In [453]:
# Look at bracket
bracket_2022.bracket

Unnamed: 0,Season,Slot,StrongSeed,WeakSeed,Round,TeamID_1,team1_prob,TeamID_2,team2_prob,sorted_ids,Winner,curr_win_prob,pre_win_prob,cumm_win_prob,TeamName_1,TeamName_2,WinnerName,potential_pts,expected_pts
0,2022,W12,W12a,W12b,0,1231,1.0,1461,1.0,12311461,1231,0.635628,1.0,0.635628,Indiana,Wyoming,Indiana,0.0,0.0
1,2022,X11,X11a,X11b,0,1323,1.0,1353,1.0,13231353,1323,0.665964,1.0,0.665964,Notre Dame,Rutgers,Notre Dame,0.0,0.0
2,2022,Y16,Y16a,Y16b,0,1394,1.0,1411,1.0,13941411,1411,0.559844,1.0,0.559844,TAM C. Christi,TX Southern,TX Southern,0.0,0.0
3,2022,Z16,Z16a,Z16b,0,1136,1.0,1460,1.0,11361460,1460,0.71183,1.0,0.71183,Bryant,Wright St,Wright St,0.0,0.0
4,2022,R1W1,W01,W16,1,1124,1.0,1313,1.0,11241313,1124,0.923416,1.0,0.923416,Baylor,Norfolk St,Baylor,10.0,9.234161
5,2022,R1W2,W02,W15,1,1246,1.0,1389,1.0,12461389,1246,0.92788,1.0,0.92788,Kentucky,St Peter's,Kentucky,10.0,9.278796
6,2022,R1W3,W03,W14,1,1345,1.0,1463,1.0,13451463,1345,0.92788,1.0,0.92788,Purdue,Yale,Purdue,10.0,9.278796
7,2022,R1W4,W04,W13,1,1417,1.0,1103,1.0,11031417,1417,0.91054,1.0,0.91054,UCLA,Akron,UCLA,10.0,9.105397
8,2022,R1W5,W05,W12,1,1388,1.0,1231,1.0,12311388,1388,0.611986,1.0,0.611986,St Mary's CA,Indiana,St Mary's CA,10.0,6.119858
9,2022,R1W6,W06,W11,1,1400,1.0,1439,1.0,14001439,1400,0.672385,1.0,0.672385,Texas,Virginia Tech,Texas,10.0,6.723847


In [454]:
bracket_2022.bracket['expected_pts'].sum()

795.8558394216421

In [458]:
bracket_2022.bracket.to_csv('brackets/2022_bracket_v2.csv',index=False)

In [456]:
pd.options.display.max_rows = 100

In [457]:
bracket_2022.bracket[['Round','StrongSeed','WeakSeed','TeamName_1','TeamName_2','WinnerName','curr_win_prob']]

Unnamed: 0,Round,StrongSeed,WeakSeed,TeamName_1,TeamName_2,WinnerName,curr_win_prob
0,0,W12a,W12b,Indiana,Wyoming,Indiana,0.635628
1,0,X11a,X11b,Notre Dame,Rutgers,Notre Dame,0.665964
2,0,Y16a,Y16b,TAM C. Christi,TX Southern,TX Southern,0.559844
3,0,Z16a,Z16b,Bryant,Wright St,Wright St,0.71183
4,1,W01,W16,Baylor,Norfolk St,Baylor,0.923416
5,1,W02,W15,Kentucky,St Peter's,Kentucky,0.92788
6,1,W03,W14,Purdue,Yale,Purdue,0.92788
7,1,W04,W13,UCLA,Akron,UCLA,0.91054
8,1,W05,W12,St Mary's CA,Indiana,St Mary's CA,0.611986
9,1,W06,W11,Texas,Virginia Tech,Texas,0.672385


In [393]:
pred_df[pred_df.sorted_ids=='1234,1350']

Unnamed: 0,Season,TeamID_1,TeamID_2,Seed_1,Seed_2,t1_score_diff,diff_total_games,diff_win_pct,diff_avg_points_for,diff_avg_points_against,...,team1_win_prob,team1_seed_num,team2_seed_num,model_winner,seed_winner,model_winner_prob,seed_winner_prob,actual_winner_prob,sorted_ids,actual_winner
41269,2022,1350,1234,Y12,Y05,,0,-0.085714,-12.057143,-3.0,...,0.725785,12,5,1350,1234,0.725785,0.274215,1,12341350,
