In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
        


# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/march-machine-learning-mania-2024/Conferences.csv
/kaggle/input/march-machine-learning-mania-2024/sample_submission.csv
/kaggle/input/march-machine-learning-mania-2024/WNCAATourneyDetailedResults.csv
/kaggle/input/march-machine-learning-mania-2024/WRegularSeasonCompactResults.csv
/kaggle/input/march-machine-learning-mania-2024/MNCAATourneySeedRoundSlots.csv
/kaggle/input/march-machine-learning-mania-2024/MRegularSeasonDetailedResults.csv
/kaggle/input/march-machine-learning-mania-2024/MNCAATourneyCompactResults.csv
/kaggle/input/march-machine-learning-mania-2024/MGameCities.csv
/kaggle/input/march-machine-learning-mania-2024/WGameCities.csv
/kaggle/input/march-machine-learning-mania-2024/MSeasons.csv
/kaggle/input/march-machine-learning-mania-2024/WNCAATourneySlots.csv
/kaggle/input/march-machine-learning-mania-2024/MSecondaryTourneyTeams.csv
/kaggle/input/march-machine-learning-mania-2024/2024_tourney_seeds.csv
/kaggle/input/march-machine-learning-mania-2024/Cities.csv
/

In [2]:
import datetime as dt
from itertools import repeat
from statsmodels.stats.outliers_influence import variance_inflation_factor
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier

In [3]:
path = '/kaggle/input/march-machine-learning-mania-2024/'
df = pd.read_csv(path+'2024_tourney_seeds.csv')

dfM = df[df['Tournament'] == 'M']
dfW = df[df['Tournament'] == 'W']

In [4]:
# Formatting Helper Functions
def remove_region(column):
        column = column.str.replace('W', '')
        column = column.str.replace('X', '')
        column = column.str.replace('Y', '')
        column = column.str.replace('Z', '')
        column = column.str.replace(r'^(0+)', '')
        return column.astype(int)
    
def remove_loc(column):
        column = column.str.replace('H', '1')
        column = column.str.replace('N', '0')
        column = column.str.replace('A', '2')
        return column.astype('category')

In [5]:
def compile_dataset(tourney = 'M'):
    season_df_1 = pd.read_csv(path+tourney+'RegularSeasonDetailedResults.csv')
    season_df_2 = pd.read_csv(path+tourney+'RegularSeasonDetailedResults.csv')
    
    ## Turn Wins/Losses into Teams, duplicate so winners/losers are shuffled between Team 1 and Team 2:
    
    season_df_1.columns = ['Season', 'DayNum', 'TeamID1', 'Score1', 'TeamID2', 'Score2', 'Loc',
       'NumOT', 'FGM1', 'FGA1', 'FGM31', 'FGA31', 'FTM1', 'FTA1', 'OR1', 'DR1',
       'Ast1', 'TO1', 'Stl1', 'Blk1', 'PF1', 'FGM2', 'FGA2', 'FGM32', 'FGA32',
       'FTM2', 'FTA2', 'OR2', 'DR2', 'Ast2', 'TO2', 'Stl2', 'Blk2', 'PF2']
    season_df_1['y'] = 1

    season_df_2.columns = ['Season', 'DayNum', 'TeamID2', 'Score2', 'TeamID1', 'Score1', 'Loc',
           'NumOT', 'FGM2', 'FGA2', 'FGM32', 'FGA32', 'FTM2', 'FTA2', 'OR2', 'DR2',
           'Ast2', 'TO2', 'Stl2', 'Blk2', 'PF2', 'FGM1', 'FGA1', 'FGM31', 'FGA31',
           'FTM1', 'FTA1', 'OR1', 'DR1', 'Ast1', 'TO1', 'Stl1', 'Blk1', 'PF1']
    season_df_2['y'] = 2

    season_df = pd.concat([season_df_1, season_df_2])

    
    season_df['FG%1'] = season_df['FGM1'] / season_df['FGA1']
    season_df['FT%1'] = season_df['FTM1'] / season_df['FTA1']
    season_df['FG%2'] = season_df['FGM2'] / season_df['FGA2']
    season_df['FT%2'] = season_df['FTM2'] / season_df['FTA2']
    season_df['FG3%1'] = season_df['FGM31'] / season_df['FGA31']
    season_df['FG3%2'] = season_df['FGM32'] / season_df['FGA32']    

    season_df.fillna(0, inplace=True)
    
    season_df.drop(['DayNum', 'Loc', 'NumOT', 'FGM1', 'FGA1', 'FTM1', 'FTA1', 'FGM31', 'FGA31', 'Score1', 
                    'FGM2', 'FGA2', 'FTM2', 'FTA2', 'FGM32', 'FGA32', 'Score2'], axis=1, inplace=True)
    
    # Add Seed
    seed_df = pd.read_csv(path+'2024_tourney_seeds.csv')
    seed_df = seed_df[seed_df['Tournament'] == tourney]
    seed_df.drop('Tournament', axis=1, inplace=True)
    #display(seed_df)
    
    # Merge seed with season_df:
    merged = season_df.merge(seed_df, left_on='TeamID1', right_on='TeamID')
    merged = merged.merge(seed_df, left_on='TeamID2', right_on='TeamID')
    merged.rename(columns = {'Seed_x': 'Seed1', 'Seed_y': 'Seed2'}, inplace=True)
    
    merged['Seed1'] = remove_region(merged['Seed1'])
    merged['Seed2'] = remove_region(merged['Seed2'])

    merged.drop(['TeamID_x', 'TeamID_y'], axis=1, inplace=True)    
    merged = merged.reindex(sorted(merged.columns), axis=1)
    
    return merged.sort_values('Season').reset_index(drop=True)

In [6]:
data = compile_dataset(tourney='M')
display(data.head())

Unnamed: 0,Ast1,Ast2,Blk1,Blk2,DR1,DR2,FG%1,FG%2,FG3%1,FG3%2,...,Season,Seed1,Seed2,Stl1,Stl2,TO1,TO2,TeamID1,TeamID2,y
0,13,10,3,3,32,18,0.458333,0.323077,0.416667,0.153846,...,2003,10,8,5,15,24,16,1161,1194,1
1,7,14,1,3,19,25,0.357143,0.45283,0.428571,0.466667,...,2003,2,7,3,5,17,13,1235,1400,2
2,15,12,2,5,19,21,0.482143,0.531915,0.368421,0.4375,...,2003,9,1,3,6,16,11,1321,1345,2
3,12,6,5,6,23,28,0.375,0.357143,0.2,0.352941,...,2003,10,10,5,9,16,17,1129,1305,2
4,11,16,1,4,16,24,0.383333,0.488889,0.214286,0.230769,...,2003,6,3,4,7,19,22,1376,1246,2


### Make Categorical

In [7]:
data['Seed1'] = data['Seed1'].astype('category')
data['Seed2'] = data['Seed2'].astype('category')

In [8]:
def check_multicolinearity(train_df, vif_threshold = 20):
    X_train_test = train_df.copy()
    
    vif_data = pd.DataFrame()
    vif_data["feature"] = X_train_test.columns
    vif_data["VIF"] = [variance_inflation_factor(X_train_test.values, i) for i in range(len(X_train_test.columns))]

    #above_thresh = vif_data[vif_data['VIF'] >= vif_threshold]
    below_thresh = vif_data[vif_data['VIF'] <= vif_threshold]
    bt = below_thresh['feature'].tolist()

    
    return vif_data

In [9]:
train = data[data['Season'] < 2024]
X_train = train.iloc[:, :-1]
y_train = train.iloc[:, -1]

display(check_multicolinearity (X_train))

test = data[data['Season'] >= 2024]
X_test = test.iloc[:, :-1]
y_test = test.iloc[:, -1]

print (train.shape, test.shape)

Unnamed: 0,feature,VIF
0,Ast1,19.038512
1,Ast2,19.038512
2,Blk1,4.167913
3,Blk2,4.167913
4,DR1,118.086087
5,DR2,118.086087
6,FG%1,192.352379
7,FG%2,192.352379
8,FG3%1,14.661752
9,FG3%2,14.661752


(9900, 26) (566, 26)


### Build Classification ML Algorithm- Simple Logistic Regression

In [10]:
lr_clf = LogisticRegression(solver='liblinear', max_iter=5000).fit(X_train, y_train)

In [11]:
# Testing Accuracy
pred_test = lr_clf.predict(X_test)
print ("Testing Accuracy")
print(lr_clf.score(X_test, y_test)*100)
display(confusion_matrix(y_test, pred_test))

Testing Accuracy
92.57950530035336


array([[262,  21],
       [ 21, 262]])

In [12]:
# Fit SVM kernels:
#for k in ['rbf', 'poly', 'sigmoid', 'linear']:
#    for c in [0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]:
#        svm_clf = svm.SVC(C = c, kernel=k).fit(X_train, y_train)
#        pred_test_SVM = svm_clf.predict(X_test)
#        print(svm_clf.score(X_test, y_test)*100, "C=", c)

In [13]:
svm_clf = svm.SVC(C = 1.0, kernel='linear').fit(X_train, y_train)

In [14]:
# Testing Accuracy SVM
pred_test_SVM = svm_clf.predict(X_test)
print ("Testing Accuracy SVM")
print(svm_clf.score(X_test, y_test)*100)
display(confusion_matrix(y_test, pred_test_SVM))

Testing Accuracy SVM
88.51590106007066


array([[258,  25],
       [ 40, 243]])

In [15]:
#Fit Random Forest:
#for k in ['gini', 'entropy', 'log_loss']:
#    for d in [8,10, 12, 14]:
#        for m in [2,3,4,5, 6,8]:
#            rf_clf = RandomForestClassifier(criterion = k, max_depth=d, min_samples_split = m, random_state=0).fit(X_train, y_train)
#            pred_test_RF = rf_clf.predict(X_test)
#            print(rf_clf.score(X_test, y_test)*100, "k=", k, "depth=", d, 'm=', m)

In [16]:
rf_clf = RandomForestClassifier(criterion = 'log_loss', max_depth=14, min_samples_split=2).fit(X_train, y_train)

In [17]:
# Testing Accuracy RF
pred_test_RF = rf_clf.predict(X_test)
print ("Testing Accuracy Random Forest")
print(rf_clf.score(X_test, y_test)*100)
display(confusion_matrix(y_test, pred_test_RF))

Testing Accuracy Random Forest
89.39929328621908


array([[252,  31],
       [ 29, 254]])

In [18]:
#print ("Testing Accuracy AdaBoost")
#for n in [140, 150, 160, 170, 180, 190]:
#    for l in [1.5, 1.51, 1.52, 1.53, 1.54, 1.55, 1.56, 1.57, 1.58, 1.59, 1.6, 1.61, 1.62, 1.63, 1.64, 1.65, 1.66, 1.67, 1.68, 1.69, 1.70]:
#        boost_clf = AdaBoostClassifier(n_estimators=n, algorithm="SAMME", learning_rate = l).fit(X_train, y_train)
#        pred_test_ADA = boost_clf.predict(X_test)
#        print(boost_clf.score(X_test, y_test)*100, "n=", n, "l=", l)

In [19]:
boost_clf = AdaBoostClassifier(n_estimators=150, algorithm="SAMME", learning_rate = 1.6).fit(X_train, y_train)

In [20]:
# Testing Accuracy Ada Boost
pred_test_ADA = boost_clf.predict(X_test)
print ("Testing Accuracy AdaBoost")
print(boost_clf.score(X_test, y_test)*100)
display(confusion_matrix(y_test, pred_test_ADA))

Testing Accuracy AdaBoost
93.28621908127208


array([[261,  22],
       [ 16, 267]])

In [21]:
from sklearn.linear_model import SGDClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

In [22]:
sgd_clf = make_pipeline(StandardScaler(),SGDClassifier(loss='squared_hinge', max_iter=1000, tol=1e-3))
sgd_clf.fit(X_train, y_train)
pred_test_sgd = sgd_clf.predict(X_test)

print(sgd_clf.score(X_test, y_test)*100)
display(confusion_matrix(y_test, pred_test_sgd))

92.57950530035336


array([[262,  21],
       [ 21, 262]])

### Now the fun stuff - create matchups and predict winners

In [23]:
data_live_M = compile_dataset(tourney = 'M')
data_live_M = data_live_M[data_live_M['Season'] == 2024]
display(data_live_M.head())

data_live_W = compile_dataset(tourney = 'W')
data_live_W = data_live_W[data_live_W['Season'] == 2024]
display(data_live_W.head())

Unnamed: 0,Ast1,Ast2,Blk1,Blk2,DR1,DR2,FG%1,FG%2,FG3%1,FG3%2,...,Season,Seed1,Seed2,Stl1,Stl2,TO1,TO2,TeamID1,TeamID2,y
9900,18,18,5,0,30,18,0.45098,0.432432,0.3125,0.21875,...,2024,1,5,3,8,15,5,1345,1458,2
9901,13,11,8,5,26,26,0.357143,0.375,0.2,0.3,...,2024,5,11,3,7,10,3,1361,1307,2
9902,17,13,2,4,25,30,0.373134,0.54902,0.266667,0.428571,...,2024,1,11,6,6,8,10,1314,1301,2
9903,7,17,2,3,23,28,0.267857,0.5,0.181818,0.409091,...,2024,1,2,6,9,11,7,1222,1235,2
9904,14,18,1,3,23,26,0.463768,0.463768,0.296296,0.371429,...,2024,8,3,5,3,8,9,1304,1228,2


Unnamed: 0,Ast1,Ast2,Blk1,Blk2,DR1,DR2,FG%1,FG%2,FG3%1,FG3%2,...,Season,Seed1,Seed2,Stl1,Stl2,TO1,TO2,TeamID1,TeamID2,y
7508,7,10,1,2,16,23,0.372093,0.470588,0.0,0.3,...,2024,16,12,2,8,15,10,3180,3195,2
7509,10,20,2,4,12,26,0.454545,0.627451,0.214286,0.473684,...,2024,11,4,12,6,12,20,3112,3211,2
7510,10,10,2,6,21,27,0.285714,0.436364,0.208333,0.285714,...,2024,6,8,9,7,12,15,3304,3242,2
7511,17,16,6,0,22,26,0.457627,0.463768,0.354839,0.384615,...,2024,10,9,6,8,17,12,3350,3277,2
7512,16,19,3,9,27,23,0.396226,0.377049,0.333333,0.266667,...,2024,12,7,3,8,16,6,3355,3166,2


In [24]:
cols1 = ['Ast1', 'Blk1', 'DR1',  'FG%1', 'FG3%1',
        'FT%1', 'OR1', 'PF1', 'Season', 
       'Seed1', 'Stl1', 'TO1', 'TeamID1']

cols2 = ['Ast2', 'Blk2', 'DR2', 'FG%2', 
       'FG3%2', 'FT%2', 'OR2', 'PF2',
       'Seed2',  'Stl2', 'TO2', 'TeamID2']

In [25]:
import warnings
warnings.filterwarnings('ignore')

def create_matchup (teamID_a, teamID_b):

    if type(teamID_a) == tuple:
        if teamID_a[0] in range(1000,2000):
            df_sim = data_live_M.copy()
        if teamID_a[0] in range(3000,4000):
            df_sim = data_live_W.copy()
            
        team_a = df_sim[df_sim['TeamID1'] == teamID_a[0]]
        team_a = team_a[cols1]

        team_b = df_sim[df_sim['TeamID2'] == teamID_b[0]]
        team_b = team_b[cols2]
            
    elif type(teamID_a) == np.int64:
        if teamID_a in range(1000,2000):
            df_sim = data_live_M.copy()
        if teamID_a in range(3000,4000):
            df_sim = data_live_W.copy()

        team_a = df_sim[df_sim['TeamID1'] == teamID_a]
        team_a = team_a[cols1]

        team_b = df_sim[df_sim['TeamID2'] == teamID_b]
        team_b = team_b[cols2]

    match = team_a.join(team_b, how='cross')
    match = match.reindex(sorted(match.columns), axis=1)
    
    # Simulate model predictions:

    if team_a.shape[0] > 0 and team_b.shape[0] > 0:
        pred = lr_clf.predict(match)
        counts = np.bincount(pred)
        return np.argmax(counts)
    else:
        return 1

In [26]:
def create_matchup_SVM (teamID_a, teamID_b):

    if type(teamID_a) == tuple:
        if teamID_a[0] in range(1000,2000):
            df_sim = data_live_M.copy()
        if teamID_a[0] in range(3000,4000):
            df_sim = data_live_W.copy()
            
        team_a = df_sim[df_sim['TeamID1'] == teamID_a[0]]
        team_a = team_a[cols1]

        team_b = df_sim[df_sim['TeamID2'] == teamID_b[0]]
        team_b = team_b[cols2]
            
    elif type(teamID_a) == np.int64:
        if teamID_a in range(1000,2000):
            df_sim = data_live_M.copy()
        if teamID_a in range(3000,4000):
            df_sim = data_live_W.copy()

        team_a = df_sim[df_sim['TeamID1'] == teamID_a]
        team_a = team_a[cols1]

        team_b = df_sim[df_sim['TeamID2'] == teamID_b]
        team_b = team_b[cols2]

    match = team_a.join(team_b, how='cross')
    match = match.reindex(sorted(match.columns), axis=1)
    
    # Simulate model predictions:

    if team_a.shape[0] > 0 and team_b.shape[0] > 0:
        pred = svm_clf.predict(match)
        counts = np.bincount(pred)
        return np.argmax(counts)
    else:
        return 1

In [27]:
def create_matchup_RF (teamID_a, teamID_b):

    if type(teamID_a) == tuple:
        if teamID_a[0] in range(1000,2000):
            df_sim = data_live_M.copy()
        if teamID_a[0] in range(3000,4000):
            df_sim = data_live_W.copy()
            
        team_a = df_sim[df_sim['TeamID1'] == teamID_a[0]]
        team_a = team_a[cols1]

        team_b = df_sim[df_sim['TeamID2'] == teamID_b[0]]
        team_b = team_b[cols2]
            
    elif type(teamID_a) == np.int64:
        if teamID_a in range(1000,2000):
            df_sim = data_live_M.copy()
        if teamID_a in range(3000,4000):
            df_sim = data_live_W.copy()

        team_a = df_sim[df_sim['TeamID1'] == teamID_a]
        team_a = team_a[cols1]

        team_b = df_sim[df_sim['TeamID2'] == teamID_b]
        team_b = team_b[cols2]

    match = team_a.join(team_b, how='cross')
    match = match.reindex(sorted(match.columns), axis=1)
    
    # Simulate model predictions:

    if team_a.shape[0] > 0 and team_b.shape[0] > 0:
        pred = rf_clf.predict(match)
        counts = np.bincount(pred)
        return np.argmax(counts)
    else:
        return 1

In [28]:
def create_matchup_ADA (teamID_a, teamID_b):

    if type(teamID_a) == tuple:
        if teamID_a[0] in range(1000,2000):
            df_sim = data_live_M.copy()
        if teamID_a[0] in range(3000,4000):
            df_sim = data_live_W.copy()
            
        team_a = df_sim[df_sim['TeamID1'] == teamID_a[0]]
        team_a = team_a[cols1]

        team_b = df_sim[df_sim['TeamID2'] == teamID_b[0]]
        team_b = team_b[cols2]
            
    elif type(teamID_a) == np.int64:
        if teamID_a in range(1000,2000):
            df_sim = data_live_M.copy()
        if teamID_a in range(3000,4000):
            df_sim = data_live_W.copy()

        team_a = df_sim[df_sim['TeamID1'] == teamID_a]
        team_a = team_a[cols1]

        team_b = df_sim[df_sim['TeamID2'] == teamID_b]
        team_b = team_b[cols2]

    match = team_a.join(team_b, how='cross')
    match = match.reindex(sorted(match.columns), axis=1)
    
    # Simulate model predictions:

    if team_a.shape[0] > 0 and team_b.shape[0] > 0:
        pred = boost_clf.predict(match)
        counts = np.bincount(pred)
        return np.argmax(counts)
    else:
        return 1

In [29]:
def create_matchup_SGD (teamID_a, teamID_b):

    if type(teamID_a) == tuple:
        if teamID_a[0] in range(1000,2000):
            df_sim = data_live_M.copy()
        if teamID_a[0] in range(3000,4000):
            df_sim = data_live_W.copy()
            
        team_a = df_sim[df_sim['TeamID1'] == teamID_a[0]]
        team_a = team_a[cols1]

        team_b = df_sim[df_sim['TeamID2'] == teamID_b[0]]
        team_b = team_b[cols2]
            
    elif type(teamID_a) == np.int64:
        if teamID_a in range(1000,2000):
            df_sim = data_live_M.copy()
        if teamID_a in range(3000,4000):
            df_sim = data_live_W.copy()

        team_a = df_sim[df_sim['TeamID1'] == teamID_a]
        team_a = team_a[cols1]

        team_b = df_sim[df_sim['TeamID2'] == teamID_b]
        team_b = team_b[cols2]

    match = team_a.join(team_b, how='cross')
    match = match.reindex(sorted(match.columns), axis=1)
    
    # Simulate model predictions:

    if team_a.shape[0] > 0 and team_b.shape[0] > 0:
        pred = sgd_clf.predict(match)
        counts = np.bincount(pred)
        return np.argmax(counts)
    else:
        return 1

## Simulate Round 1

In [30]:
num_brackets = 1000
model1 = create_matchup_SGD
model2 = create_matchup_SGD
model3 = create_matchup_SGD

In [31]:
def simulate_round_1 (df, tourney = 'M', bracket=1):
    regions = ['W', 'X',  'Y', 'Z']
    starting_teams = list(set(df['Seed']))
    surviving_teams = []
    round_1 = {'Tournament': list(tourney*32), 'Bracket':list(repeat(bracket, 32)), 'Slot': [], 'Team': []}
    
    for region in regions:
        regional_teams = sorted([i for i in starting_teams if region in i])

        team1 = df.loc[df['Seed'] == regional_teams[0]]['TeamID'].values[0]
        team2 = df.loc[df['Seed'] == regional_teams[1]]['TeamID'].values[0]
        team3 = df.loc[df['Seed'] == regional_teams[2]]['TeamID'].values[0]
        team4 = df.loc[df['Seed'] == regional_teams[3]]['TeamID'].values[0]
        team5 = df.loc[df['Seed'] == regional_teams[4]]['TeamID'].values[0]
        team6 = df.loc[df['Seed'] == regional_teams[5]]['TeamID'].values[0]
        team7 = df.loc[df['Seed'] == regional_teams[6]]['TeamID'].values[0]
        team8 = df.loc[df['Seed'] == regional_teams[7]]['TeamID'].values[0]
        team9 = df.loc[df['Seed'] == regional_teams[8]]['TeamID'].values[0]
        team10 = df.loc[df['Seed'] == regional_teams[9]]['TeamID'].values[0]
        team11 = df.loc[df['Seed'] == regional_teams[10]]['TeamID'].values[0]
        team12 = df.loc[df['Seed'] == regional_teams[11]]['TeamID'].values[0]
        team13 = df.loc[df['Seed'] == regional_teams[12]]['TeamID'].values[0]
        team14 = df.loc[df['Seed'] == regional_teams[13]]['TeamID'].values[0]
        team15 = df.loc[df['Seed'] == regional_teams[14]]['TeamID'].values[0]
        team16 = df.loc[df['Seed'] == regional_teams[15]]['TeamID'].values[0]

        ## Have to put together an Xtest df with the same columns as in the model
        ##### Add logic here to add coded winner of each game to an output file
       
        ## Play the games!
        if bracket <= num_brackets/3:
        # Run LR
            # game_1
            if model1(team1, team16) == 1:
                surviving_teams.append((team1, region, region+'01'))
                round_1['Slot'].append('R1'+ region+'1')
                round_1['Team'].append(region+'01')
            else:
                surviving_teams.append((team16, region, region+'16'))
                round_1['Slot'].append('R1'+ region+'1')
                round_1['Team'].append(region+'16')

            # game_8
            if model1(team8, team9) == 1:
                surviving_teams.append((team8, region, region+'08'))
                round_1['Slot'].append('R1'+ region+'8')
                round_1['Team'].append(region+'08')

            else:
                surviving_teams.append((team9, region, region+'09'))
                round_1['Slot'].append('R1'+ region+'8')
                round_1['Team'].append(region+'09')

            # game_5
            if model1(team5, team12) == 1:
                surviving_teams.append((team5, region, region+'05'))
                round_1['Slot'].append('R1'+ region+'5')
                round_1['Team'].append(region+'05')

            else:
                surviving_teams.append((team12, region, region+'12'))
                round_1['Slot'].append('R1'+ region+'5')
                round_1['Team'].append(region+'12')

            # game_4
            if model1(team4, team13) == 1:
                surviving_teams.append((team4, region, region+'04'))
                round_1['Slot'].append('R1'+ region+'4')
                round_1['Team'].append(region+'04')

            else:
                surviving_teams.append((team13, region, region+'13'))
                round_1['Slot'].append('R1'+ region+'4')
                round_1['Team'].append(region+'13')

            # game_6
            if model1(team6, team11) == 1:
                surviving_teams.append((team6, region, region+'06'))
                round_1['Slot'].append('R1'+ region+'6')
                round_1['Team'].append(region+'06')

            else:
                surviving_teams.append((team11, region, region+'11'))
                round_1['Slot'].append('R1'+ region+'6')
                round_1['Team'].append(region+'11')

            # game_3
            if model1(team3, team14) == 1:
                surviving_teams.append((team3, region, region+'03'))
                round_1['Slot'].append('R1'+ region+'3')
                round_1['Team'].append(region+'03')

            else:
                surviving_teams.append((team14, region, region+'14'))
                round_1['Slot'].append('R1'+ region+'3')
                round_1['Team'].append(region+'14')

            # game_7
            if model1(team7, team10) == 1:
                surviving_teams.append((team7, region, region+'07'))
                round_1['Slot'].append('R1'+ region+'7')
                round_1['Team'].append(region+'07')

            else:
                surviving_teams.append((team10, region, region+'10'))
                round_1['Slot'].append('R1'+ region+'7')
                round_1['Team'].append(region+'10')

            # game_2
            if model1(team2, team15) == 1:
                surviving_teams.append((team2, region, region+'02'))
                round_1['Slot'].append('R1'+ region+'2')
                round_1['Team'].append(region+'02')

            else:
                surviving_teams.append((team15, region, region+'15'))
                round_1['Slot'].append('R1'+ region+'2')
                round_1['Team'].append(region+'15')

        if bracket > num_brackets/3 and bracket <= (num_brackets - num_brackets/3):
        # Change from SVM to ADA       
            # game_1
            if model2(team1, team16) == 1:
                surviving_teams.append((team1, region, region+'01'))
                round_1['Slot'].append('R1'+ region+'1')
                round_1['Team'].append(region+'01')
            else:
                surviving_teams.append((team16, region, region+'16'))
                round_1['Slot'].append('R1'+ region+'1')
                round_1['Team'].append(region+'16')

            # game_8
            if model2(team8, team9) == 1:
                surviving_teams.append((team8, region, region+'08'))
                round_1['Slot'].append('R1'+ region+'8')
                round_1['Team'].append(region+'08')

            else:
                surviving_teams.append((team9, region, region+'09'))
                round_1['Slot'].append('R1'+ region+'8')
                round_1['Team'].append(region+'09')

            # game_5
            if model2(team5, team12) == 1:
                surviving_teams.append((team5, region, region+'05'))
                round_1['Slot'].append('R1'+ region+'5')
                round_1['Team'].append(region+'05')

            else:
                surviving_teams.append((team12, region, region+'12'))
                round_1['Slot'].append('R1'+ region+'5')
                round_1['Team'].append(region+'12')

            # game_4
            if model2(team4, team13) == 1:
                surviving_teams.append((team4, region, region+'04'))
                round_1['Slot'].append('R1'+ region+'4')
                round_1['Team'].append(region+'04')

            else:
                surviving_teams.append((team13, region, region+'13'))
                round_1['Slot'].append('R1'+ region+'4')
                round_1['Team'].append(region+'13')

            # game_6
            if model2(team6, team11) == 1:
                surviving_teams.append((team6, region, region+'06'))
                round_1['Slot'].append('R1'+ region+'6')
                round_1['Team'].append(region+'06')

            else:
                surviving_teams.append((team11, region, region+'11'))
                round_1['Slot'].append('R1'+ region+'6')
                round_1['Team'].append(region+'11')

            # game_3
            if model2(team3, team14) == 1:
                surviving_teams.append((team3, region, region+'03'))
                round_1['Slot'].append('R1'+ region+'3')
                round_1['Team'].append(region+'03')

            else:
                surviving_teams.append((team14, region, region+'14'))
                round_1['Slot'].append('R1'+ region+'3')
                round_1['Team'].append(region+'14')

            # game_7
            if model2(team7, team10) == 1:
                surviving_teams.append((team7, region, region+'07'))
                round_1['Slot'].append('R1'+ region+'7')
                round_1['Team'].append(region+'07')

            else:
                surviving_teams.append((team10, region, region+'10'))
                round_1['Slot'].append('R1'+ region+'7')
                round_1['Team'].append(region+'10')

            # game_2
            if model2(team2, team15) == 1:
                surviving_teams.append((team2, region, region+'02'))
                round_1['Slot'].append('R1'+ region+'2')
                round_1['Team'].append(region+'02')

            else:
                surviving_teams.append((team15, region, region+'15'))
                round_1['Slot'].append('R1'+ region+'2')
                round_1['Team'].append(region+'15')
            
        if bracket > (num_brackets - num_brackets/3):
        # Run RF  
            
            # game_1
            if model3(team1, team16) == 1:
                surviving_teams.append((team1, region, region+'01'))
                round_1['Slot'].append('R1'+ region+'1')
                round_1['Team'].append(region+'01')
            else:
                surviving_teams.append((team16, region, region+'16'))
                round_1['Slot'].append('R1'+ region+'1')
                round_1['Team'].append(region+'16')

            # game_8
            if model3(team8, team9) == 1:
                surviving_teams.append((team8, region, region+'08'))
                round_1['Slot'].append('R1'+ region+'8')
                round_1['Team'].append(region+'08')

            else:
                surviving_teams.append((team9, region, region+'09'))
                round_1['Slot'].append('R1'+ region+'8')
                round_1['Team'].append(region+'09')

            # game_5
            if model3(team5, team12) == 1:
                surviving_teams.append((team5, region, region+'05'))
                round_1['Slot'].append('R1'+ region+'5')
                round_1['Team'].append(region+'05')

            else:
                surviving_teams.append((team12, region, region+'12'))
                round_1['Slot'].append('R1'+ region+'5')
                round_1['Team'].append(region+'12')

            # game_4
            if model3(team4, team13) == 1:
                surviving_teams.append((team4, region, region+'04'))
                round_1['Slot'].append('R1'+ region+'4')
                round_1['Team'].append(region+'04')

            else:
                surviving_teams.append((team13, region, region+'13'))
                round_1['Slot'].append('R1'+ region+'4')
                round_1['Team'].append(region+'13')

            # game_6
            if model3(team6, team11) == 1:
                surviving_teams.append((team6, region, region+'06'))
                round_1['Slot'].append('R1'+ region+'6')
                round_1['Team'].append(region+'06')

            else:
                surviving_teams.append((team11, region, region+'11'))
                round_1['Slot'].append('R1'+ region+'6')
                round_1['Team'].append(region+'11')

            # game_3
            if model3(team3, team14) == 1:
                surviving_teams.append((team3, region, region+'03'))
                round_1['Slot'].append('R1'+ region+'3')
                round_1['Team'].append(region+'03')

            else:
                surviving_teams.append((team14, region, region+'14'))
                round_1['Slot'].append('R1'+ region+'3')
                round_1['Team'].append(region+'14')
            
            # game_7
            if model3(team7, team10) == 1:
                surviving_teams.append((team7, region, region+'07'))
                round_1['Slot'].append('R1'+ region+'7')
                round_1['Team'].append(region+'07')

            else:
                surviving_teams.append((team10, region, region+'10'))
                round_1['Slot'].append('R1'+ region+'7')
                round_1['Team'].append(region+'10')

            # game_2
            if model3(team2, team15) == 1:
                surviving_teams.append((team2, region, region+'02'))
                round_1['Slot'].append('R1'+ region+'2')
                round_1['Team'].append(region+'02')

            else:
                surviving_teams.append((team15, region, region+'15'))
                round_1['Slot'].append('R1'+ region+'2')
                round_1['Team'].append(region+'15')
                
             
    regionW_winners = surviving_teams[0:8]
    regionX_winners = surviving_teams[8:16]
    regionY_winners = surviving_teams[16:24]
    regionZ_winners = surviving_teams[24:32]
    
    return pd.DataFrame(round_1), regionW_winners, regionX_winners, regionY_winners, regionZ_winners

In [32]:
def simulate_round_2 (regionW_winners, regionX_winners, regionY_winners, regionZ_winners, tourney = 'M', bracket = 1):

    round_2 = {'Tournament': list(tourney*16), 'Bracket':list(repeat(bracket, 16)), 'Slot': [], 'Team': []}
    surviving_teams = []
    regions = []
    regions.append(regionW_winners)
    regions.append(regionX_winners)
    regions.append(regionY_winners)
    regions.append(regionZ_winners)
    
    for region in regions:
       
        ## Play the games!
        if bracket <=num_brackets/3:
        # Run LR
        # game_1
            if model1(region[0][0], region[1][0]) == 1:
                surviving_teams.append((region[0][0], region[0][1], region[0][2]))
                round_2['Slot'].append('R2'+ region[0][1]+'1') 
                round_2['Team'].append(region[0][2])

            else:
                surviving_teams.append((region[1][0],region[1][1], region[1][2]))
                round_2['Slot'].append('R2'+ region[1][1]+'1')
                round_2['Team'].append(region[1][2])

            # game_4
            if model1(region[2][0], region[3][0]) == 1:
                surviving_teams.append((region[2][0], region[2][1], region[2][2]))
                round_2['Slot'].append('R2'+ region[2][1]+'4')
                round_2['Team'].append(region[2][2])

            else:
                surviving_teams.append((region[3][0], region[3][1], region[3][2]))
                round_2['Slot'].append('R2'+ region[3][1]+'4')
                round_2['Team'].append(region[3][2])

            # game_3
            if model1(region[4][0], region[5][0]) == 1:
                surviving_teams.append((region[4][0], region[4][1], region[4][2]))
                round_2['Slot'].append('R2'+ region[4][1]+'3')
                round_2['Team'].append(region[4][2])  
            else:
                surviving_teams.append((region[5][0], region[5][1], region[5][2]))
                round_2['Slot'].append('R2'+ region[5][1]+'3')
                round_2['Team'].append(region[5][2])

            # game_2
            if model1(region[6][0], region[7][0]) == 1:
                surviving_teams.append((region[6][0], region[6][1], region[6][2]))
                round_2['Slot'].append('R2'+ region[6][1]+'2')
                round_2['Team'].append(region[6][2])
            else:
                surviving_teams.append((region[7][0], region[7][1], region[7][2]))
                round_2['Slot'].append('R2'+ region[7][1]+'2')
                round_2['Team'].append(region[7][2])

                
        if bracket > num_brackets/3 and bracket <= (num_brackets - num_brackets/3):
        # Change from SVM to ADA
        # game_1
            if model2(region[0][0], region[1][0]) == 1:
                surviving_teams.append((region[0][0], region[0][1], region[0][2]))
                round_2['Slot'].append('R2'+ region[0][1]+'1') 
                round_2['Team'].append(region[0][2])

            else:
                surviving_teams.append((region[1][0],region[1][1], region[1][2]))
                round_2['Slot'].append('R2'+ region[1][1]+'1')
                round_2['Team'].append(region[1][2])

            # game_4
            if model2(region[2][0], region[3][0]) == 1:
                surviving_teams.append((region[2][0], region[2][1], region[2][2]))
                round_2['Slot'].append('R2'+ region[2][1]+'4')
                round_2['Team'].append(region[2][2])

            else:
                surviving_teams.append((region[3][0], region[3][1], region[3][2]))
                round_2['Slot'].append('R2'+ region[3][1]+'4')
                round_2['Team'].append(region[3][2])

            # game_3
            if model2(region[4][0], region[5][0]) == 1:
                surviving_teams.append((region[4][0], region[4][1], region[4][2]))
                round_2['Slot'].append('R2'+ region[4][1]+'3')
                round_2['Team'].append(region[4][2])  
            else:
                surviving_teams.append((region[5][0], region[5][1], region[5][2]))
                round_2['Slot'].append('R2'+ region[5][1]+'3')
                round_2['Team'].append(region[5][2])

            # game_2
            if model2(region[6][0], region[7][0]) == 1:
                surviving_teams.append((region[6][0], region[6][1], region[6][2]))
                round_2['Slot'].append('R2'+ region[6][1]+'2')
                round_2['Team'].append(region[6][2])
            else:
                surviving_teams.append((region[7][0], region[7][1], region[7][2]))
                round_2['Slot'].append('R2'+ region[7][1]+'2')
                round_2['Team'].append(region[7][2])                
                


        if bracket > (num_brackets - num_brackets/3):
        # Run RF
        # game_1
            if model3(region[0][0], region[1][0]) == 1:
                surviving_teams.append((region[0][0], region[0][1], region[0][2]))
                round_2['Slot'].append('R2'+ region[0][1]+'1') 
                round_2['Team'].append(region[0][2])

            else:
                surviving_teams.append((region[1][0],region[1][1], region[1][2]))
                round_2['Slot'].append('R2'+ region[1][1]+'1')
                round_2['Team'].append(region[1][2])

            # game_4
            if model3(region[2][0], region[3][0]) == 1:
                surviving_teams.append((region[2][0], region[2][1], region[2][2]))
                round_2['Slot'].append('R2'+ region[2][1]+'4')
                round_2['Team'].append(region[2][2])

            else:
                surviving_teams.append((region[3][0], region[3][1], region[3][2]))
                round_2['Slot'].append('R2'+ region[3][1]+'4')
                round_2['Team'].append(region[3][2])

            # game_3
            if model3(region[4][0], region[5][0]) == 1:
                surviving_teams.append((region[4][0], region[4][1], region[4][2]))
                round_2['Slot'].append('R2'+ region[4][1]+'3')
                round_2['Team'].append(region[4][2])  
            else:
                surviving_teams.append((region[5][0], region[5][1], region[5][2]))
                round_2['Slot'].append('R2'+ region[5][1]+'3')
                round_2['Team'].append(region[5][2])

            # game_2
            if model3(region[6][0], region[7][0]) == 1:
                surviving_teams.append((region[6][0], region[6][1], region[6][2]))
                round_2['Slot'].append('R2'+ region[6][1]+'2')
                round_2['Team'].append(region[6][2])
            else:
                surviving_teams.append((region[7][0], region[7][1], region[7][2]))
                round_2['Slot'].append('R2'+ region[7][1]+'2')
                round_2['Team'].append(region[7][2])
                
               
                
    regionW_winners = surviving_teams[0:4]
    regionX_winners = surviving_teams[4:8]
    regionY_winners = surviving_teams[8:12]
    regionZ_winners = surviving_teams[12:16]
    
    return pd.DataFrame(round_2), regionW_winners, regionX_winners, regionY_winners, regionZ_winners

In [33]:
def simulate_sweet_16 (regionW_winners, regionX_winners, regionY_winners, regionZ_winners, tourney = 'M', bracket = 1):
    #regions = ['W', 'X',  'Y', 'Z']
    #starting_teams = list(set(df['Seed']))
    sweet_16 = {'Tournament': list(tourney*8), 'Bracket':list(repeat(bracket, 8)), 'Slot': [], 'Team': []}
    surviving_teams = []
    regions = []
    regions.append(regionW_winners)
    regions.append(regionX_winners)
    regions.append(regionY_winners)
    regions.append(regionZ_winners)
    
    for region in regions:
       
        ## Play the games!
        if bracket <= num_brackets/3:
            # game_1
            if model1(region[0][0], region[1][0]) == 1:
                surviving_teams.append((region[0][0], region[0][1], region[0][2]))
                sweet_16['Slot'].append('R3'+ region[0][1]+'1') 
                sweet_16['Team'].append(region[0][2])            
            else:
                surviving_teams.append((region[1][0], region[1][1], region[1][2]))
                sweet_16['Slot'].append('R3'+ region[1][1]+'1') 
                sweet_16['Team'].append(region[1][2])              


            # game_2
            if model1(region[2], region[3]) == 1:
                surviving_teams.append((region[2][0], region[2][1], region[2][2]))
                sweet_16['Slot'].append('R3'+ region[2][1]+'2') 
                sweet_16['Team'].append(region[2][2])  
            else:
                surviving_teams.append((region[3][0], region[3][1], region[3][2]))
                sweet_16['Slot'].append('R3'+ region[3][1]+'2') 
                sweet_16['Team'].append(region[3][2])  

                
        if bracket > num_brackets/3 and bracket <= (num_brackets - num_brackets/3):
            # game_1
            if model2(region[0][0], region[1][0]) == 1:
                surviving_teams.append((region[0][0], region[0][1], region[0][2]))
                sweet_16['Slot'].append('R3'+ region[0][1]+'1') 
                sweet_16['Team'].append(region[0][2])            
            else:
                surviving_teams.append((region[1][0], region[1][1], region[1][2]))
                sweet_16['Slot'].append('R3'+ region[1][1]+'1') 
                sweet_16['Team'].append(region[1][2])              

            # game_2
            if model2(region[2], region[3]) == 1:
                surviving_teams.append((region[2][0], region[2][1], region[2][2]))
                sweet_16['Slot'].append('R3'+ region[2][1]+'2') 
                sweet_16['Team'].append(region[2][2])  
            else:
                surviving_teams.append((region[3][0], region[3][1], region[3][2]))
                sweet_16['Slot'].append('R3'+ region[3][1]+'2') 
                sweet_16['Team'].append(region[3][2]) 

                
        if bracket > (num_brackets - num_brackets/3):
            # game_1
            if model3(region[0][0], region[1][0]) == 1:
                surviving_teams.append((region[0][0], region[0][1], region[0][2]))
                sweet_16['Slot'].append('R3'+ region[0][1]+'1') 
                sweet_16['Team'].append(region[0][2])            
            else:
                surviving_teams.append((region[1][0], region[1][1], region[1][2]))
                sweet_16['Slot'].append('R3'+ region[1][1]+'1') 
                sweet_16['Team'].append(region[1][2])              


            # game_2
            if model3(region[2], region[3]) == 1:
                surviving_teams.append((region[2][0], region[2][1], region[2][2]))
                sweet_16['Slot'].append('R3'+ region[2][1]+'2') 
                sweet_16['Team'].append(region[2][2])  
            else:
                surviving_teams.append((region[3][0], region[3][1], region[3][2]))
                sweet_16['Slot'].append('R3'+ region[3][1]+'2') 
                sweet_16['Team'].append(region[3][2])                 
                
             
                
                
    regionW_winners = surviving_teams[0:2]
    regionX_winners = surviving_teams[2:4]
    regionY_winners = surviving_teams[4:6]
    regionZ_winners = surviving_teams[6:8]
    
    return pd.DataFrame(sweet_16), regionW_winners, regionX_winners, regionY_winners, regionZ_winners

In [34]:
def simulate_elite_8 (regionW_winners, regionX_winners, regionY_winners, regionZ_winners, tourney = 'M', bracket = 1):
    #print("Play Elite 8")
    
    #print("Region W Winners", regionW_winners)
    #print("Region X Winners", regionX_winners)
    #print("Region Y Winners", regionY_winners)
    #print("Region Z Winners", regionZ_winners)
    
    
    elite_8 = {'Tournament': list(tourney*4), 'Bracket':list(repeat(bracket, 4)), 'Slot': [], 'Team': []}
    surviving_teams = []
    regions = []
    regions.append(regionW_winners)
    regions.append(regionX_winners)
    regions.append(regionY_winners)
    regions.append(regionZ_winners)
    
    for region in regions:
       
        ## Play the games!
        # game_1
        #print("Matchup:", region[0][0], region[1][0])
        
        if bracket <= num_brackets/3:
        
            if model1(region[0][0], region[1][0]) == 1:
                surviving_teams.append((region[0][0], region[0][1], region[0][2]))
                elite_8['Slot'].append('R4'+ region[0][1]+'1') 
                elite_8['Team'].append(region[0][2]) 
            else:
                surviving_teams.append((region[1][0], region[1][1], region[1][2]))
                elite_8['Slot'].append('R4'+ region[1][1]+'1') 
                elite_8['Team'].append(region[1][2]) 

        if bracket > num_brackets/3 and bracket <= (num_brackets - num_brackets/3):
        
            if model2(region[0][0], region[1][0]) == 1:
                surviving_teams.append((region[0][0], region[0][1], region[0][2]))
                elite_8['Slot'].append('R4'+ region[0][1]+'1') 
                elite_8['Team'].append(region[0][2]) 
            else:
                surviving_teams.append((region[1][0], region[1][1], region[1][2]))
                elite_8['Slot'].append('R4'+ region[1][1]+'1') 
                elite_8['Team'].append(region[1][2])                
                
                
        if bracket > (num_brackets - num_brackets/3):
        
            if model3(region[0][0], region[1][0]) == 1:
                surviving_teams.append((region[0][0], region[0][1], region[0][2]))
                elite_8['Slot'].append('R4'+ region[0][1]+'1') 
                elite_8['Team'].append(region[0][2]) 
            else:
                surviving_teams.append((region[1][0], region[1][1], region[1][2]))
                elite_8['Slot'].append('R4'+ region[1][1]+'1') 
                elite_8['Team'].append(region[1][2])              
                
                
    #print("Surviving teams", surviving_teams)
    regionW_winner = surviving_teams[0]
    regionX_winner = surviving_teams[1]
    regionY_winner = surviving_teams[2]
    regionZ_winner = surviving_teams[3]
    
    return pd.DataFrame(elite_8), regionW_winner, regionX_winner, regionY_winner, regionZ_winner

In [35]:
def simulate_final_4 (regionW_winner, regionX_winner, regionY_winner, regionZ_winner, tourney = 'M', bracket = 1):

    final_4 = {'Tournament': list(tourney*2), 'Bracket':list(repeat(bracket, 2)), 'Slot': [], 'Team': []}
    surviving_teams = []
       
    ## Play the games!
    
    #print("Region W Winner", regionW_winner)
    #print("Region X Winner", regionX_winner)
    #print("Region Y Winner", regionY_winner)
    #print("Region Z Winner", regionZ_winner)

    if bracket <= num_brackets/3:
        # game_1
        if model1(regionW_winner[0], regionX_winner[0]) == 1:
            surviving_teams.append(regionW_winner)
            final_4['Slot'].append('R5'+ 'WX') 
            final_4['Team'].append(regionW_winner[2]) 
        else:
            surviving_teams.append(regionX_winner)
            final_4['Slot'].append('R5'+'WX') 
            final_4['Team'].append(regionX_winner[2])     

        # game_2
        if model1(regionY_winner[0], regionZ_winner[0]) == 1:
            surviving_teams.append(regionY_winner)
            final_4['Slot'].append('R5'+ 'YZ') 
            final_4['Team'].append(regionY_winner[2]) 

        else:
            surviving_teams.append(regionZ_winner)        
            final_4['Slot'].append('R5'+ 'YZ') 
            final_4['Team'].append(regionZ_winner[2]) 
    
    
    
    if bracket > num_brackets/3 and bracket <= (num_brackets - num_brackets/3):
        # game_1
        if model2(regionW_winner[0], regionX_winner[0]) == 1:
            surviving_teams.append(regionW_winner)
            final_4['Slot'].append('R5'+ 'WX') 
            final_4['Team'].append(regionW_winner[2]) 
        else:
            surviving_teams.append(regionX_winner)
            final_4['Slot'].append('R5'+'WX') 
            final_4['Team'].append(regionX_winner[2])     

        # game_2
        if model2(regionY_winner[0], regionZ_winner[0]) == 1:
            surviving_teams.append(regionY_winner)
            final_4['Slot'].append('R5'+ 'YZ') 
            final_4['Team'].append(regionY_winner[2]) 

        else:
            surviving_teams.append(regionZ_winner)        
            final_4['Slot'].append('R5'+ 'YZ') 
            final_4['Team'].append(regionZ_winner[2]) 
            
            
            
    if bracket > (num_brackets - num_brackets/3):
        # game_1
        if model3(regionW_winner[0], regionX_winner[0]) == 1:
            surviving_teams.append(regionW_winner)
            final_4['Slot'].append('R5'+ 'WX') 
            final_4['Team'].append(regionW_winner[2]) 
        else:
            surviving_teams.append(regionX_winner)
            final_4['Slot'].append('R5'+'WX') 
            final_4['Team'].append(regionX_winner[2])     

        # game_2
        if model3(regionY_winner[0], regionZ_winner[0]) == 1:
            surviving_teams.append(regionY_winner)
            final_4['Slot'].append('R5'+ 'YZ') 
            final_4['Team'].append(regionY_winner[2]) 

        else:
            surviving_teams.append(regionZ_winner)        
            final_4['Slot'].append('R5'+ 'YZ') 
            final_4['Team'].append(regionZ_winner[2]) 
    
    return pd.DataFrame(final_4), surviving_teams

In [36]:
def simulate_championship (ch_team_1, ch_team_2, tourney = 'M', bracket = 1):
    championship = {'Tournament': list(tourney), 'Bracket': bracket, 'Slot': [], 'Team': []}
    
    if bracket <= num_brackets/3:
        winner = model1(ch_team_1[0], ch_team_2[0])
    if bracket > num_brackets/3 and bracket <= (num_brackets - num_brackets/3):
        winner = model2(ch_team_1[0], ch_team_2[0])
    if bracket > (num_brackets - num_brackets/3):
        winner = model3(ch_team_1[0], ch_team_2[0])    
        
        
    if winner == 1:
        winner = ch_team_1
        championship['Slot'].append('R6'+ 'CH') 
        championship['Team'].append(ch_team_1[2]) 
    if winner == 2:
        winner = ch_team_2
        championship['Slot'].append('R6'+ 'CH') 
        championship['Team'].append(ch_team_2[2]) 

    return pd.DataFrame(championship), winner

## Run the Tournament!


In [37]:
path = '/kaggle/input/march-machine-learning-mania-2024/'
df = pd.read_csv(path+'2024_tourney_seeds.csv')

dfM = df[df['Tournament'] == 'M']
dfW = df[df['Tournament'] == 'W']

submission = pd.DataFrame(columns=['Tournament', 'Bracket', 'Slot', 'Team'])

bracket_num = 1
while bracket_num <= 1000:

## Run the Mens Tournament:
## Add bracket number

    round_1_df, regW_32, regX_32, regY_32, regZ_32 = simulate_round_1(dfM, tourney = 'M', bracket = bracket_num)   
    round_2_df, regW_16, regX_16, regY_16, regZ_16 = simulate_round_2(regW_32, regX_32, regY_32, regZ_32, tourney = 'M', bracket = bracket_num)
    sweet_16_df, regW_8, regX_8, regY_8, regZ_8 = simulate_sweet_16(regW_16, regX_16, regY_16, regZ_16, tourney = 'M', bracket = bracket_num)
    elite_8_df, regW_4, regX_4, regY_4, regZ_4 = simulate_elite_8(regW_8, regX_8, regY_8, regZ_8, tourney = 'M', bracket = bracket_num)
    final_4_df, finals = simulate_final_4(regW_4, regX_4, regY_4, regZ_4, tourney = 'M', bracket = bracket_num)
    champ_df, winner = simulate_championship(finals[0], finals[1], tourney = 'M', bracket = bracket_num)


    ## Run the Womens Tournament:
    ## Add bracket number

    round_1W_df, regW_32, regX_32, regY_32, regZ_32 = simulate_round_1(dfW, tourney = 'W', bracket = bracket_num)
    round_2W_df, regW_16, regX_16, regY_16, regZ_16 = simulate_round_2(regW_32, regX_32, regY_32, regZ_32, tourney = 'W', bracket = bracket_num)
    sweet_16W_df, regW_8, regX_8, regY_8, regZ_8 = simulate_sweet_16(regW_16, regX_16, regY_16, regZ_16, tourney = 'W', bracket = bracket_num)
    elite_8W_df, regW_4, regX_4, regY_4, regZ_4 = simulate_elite_8(regW_8, regX_8, regY_8, regZ_8, tourney = 'W', bracket = bracket_num)
    final_4W_df, finals = simulate_final_4(regW_4, regX_4, regY_4, regZ_4, tourney = 'W', bracket = bracket_num)
    champW_df, winner = simulate_championship(finals[0], finals[1], tourney = 'W', bracket = bracket_num)
    
    submission = pd.concat([submission, round_1_df, round_2_df, sweet_16_df, elite_8_df, final_4_df, champ_df,
                       round_1W_df, round_2W_df, sweet_16W_df, elite_8W_df, final_4W_df, champW_df], axis=0)
    
    bracket_num += 1



# Format Submission DF:

#submission = pd.concat([round_1_df, round_2_df, sweet_16_df, elite_8_df, final_4_df, champ_df,
#                       round_1W_df, round_2W_df, sweet_16W_df, elite_8W_df, final_4W_df, champW_df], axis=0)

submission['RowId'] = range(1, submission.shape[0]+1)
submission = submission[['RowId', 'Tournament', 'Bracket', 'Slot', 'Team']]
submission.set_index('RowId', inplace=True)
#display(submission)
submission.to_csv('submission.csv')
