# Compare to Other Models

- Robert Shaw
- Data Driven March Madness

In this file, we test code written in march_madness_classes. The code allows us to compare the results of different brackets in order to see which one is better, or to see the similiarity between the two.

In [1]:
import march_madness_classes as mmc
import march_madness_games as mmg
import march_madness_models as mmm
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression as LogReg

In [3]:
# read in the dataset
seeds = pd.read_csv("datasets/kaggle_data_2021/MNCAATourneySeeds.csv")
slots = pd.read_csv("datasets/kaggle_data_2021/MNCAATourneySlots.csv")
games = pd.read_csv("datasets/kaggle_data_2021/MNCAATourneyCompactResults.csv")

seeds_arr = mmg.filter_into_seasons(seeds)
slots_arr = mmg.filter_into_seasons(slots)
games_arr = mmg.filter_into_seasons(games)

In [4]:
# extract predictors
markov_data = pd.read_csv("datasets/our_data/stationary", index_col=0)
consistency = pd.read_csv("datasets/our_data/consistency", index_col=0)
dominance = pd.read_csv("datasets/our_data/dominance", index_col=0)
past_resul = pd.read_csv("datasets/our_data/past_results", index_col=0)
rpi = pd.read_csv("datasets/our_data/rpi", index_col=0)
bad_losses = pd.read_csv("datasets/our_data/bad_losses", index_col=0)
tough_wins = pd.read_csv("datasets/our_data/tough_wins", index_col=0)
close_games= pd.read_csv("datasets/our_data/close_games", index_col=0)
close_wins = pd.read_csv("datasets/our_data/close_wins",index_col=0)
close_wins_perc = pd.read_csv("datasets/our_data/close_wins_perc", index_col=0)
momentum = pd.read_csv("datasets/our_data/momentum", index_col=0)
weighted_wins = pd.read_csv("datasets/our_data/weighted_wins", index_col=0)
seed_matrix_df = pd.read_csv("datasets/our_data/team_summary_data/seeds_matrix", index_col=0)

In [5]:
# get data into correct format
predictor_names = ["min_index_id", "max_index_id", "markov", "dominance", "rpi", "bad_losses", "tough_wins", "close_wins", "close_wins_perc", "weighted_wins", "past_resul", "momentum"] 
predictor_dfs = [markov_data, dominance, rpi, bad_losses, tough_wins, close_wins, close_wins_perc, weighted_wins, past_resul, momentum]                                           

In [6]:
pred, resp = mmg.generate_multiple_years_of_games(range(1987, 2001), 
                                                  seeds_arr, 
                                                  slots_arr, 
                                                  games_arr, 
                                                  ["min_index_id", "max_index_id", "markov", "dominance", "rpi", "bad_losses", "tough_wins", "close_wins", "close_wins_perc", "weighted_wins", "past_resul", "momentum"], 
                                                  [markov_data, dominance, rpi, bad_losses, tough_wins, close_wins, close_wins_perc, weighted_wins, past_resul, momentum],
                                                  scoring_dif = False)


Season        1987
Slot          R1W1
StrongSeed     W01
WeakSeed       W16
Name: 126, dtype: object
Season        1987
Slot          R1W2
StrongSeed     W02
WeakSeed       W15
Name: 127, dtype: object
Season        1987
Slot          R1W3
StrongSeed     W03
WeakSeed       W14
Name: 128, dtype: object
Season        1987
Slot          R1W4
StrongSeed     W04
WeakSeed       W13
Name: 129, dtype: object
Season        1987
Slot          R1W5
StrongSeed     W05
WeakSeed       W12
Name: 130, dtype: object
Season        1987
Slot          R1W6
StrongSeed     W06
WeakSeed       W11
Name: 131, dtype: object
Season        1987
Slot          R1W7
StrongSeed     W07
WeakSeed       W10
Name: 132, dtype: object
Season        1987
Slot          R1W8
StrongSeed     W08
WeakSeed       W09
Name: 133, dtype: object
Season        1987
Slot          R1X1
StrongSeed     X01
WeakSeed       X16
Name: 134, dtype: object
Season        1987
Slot          R1X2
StrongSeed     X02
WeakSeed       X15
Name: 135, dtyp

Season        1990
Slot          R1W1
StrongSeed     W01
WeakSeed       W16
Name: 315, dtype: object
Season        1990
Slot          R1W2
StrongSeed     W02
WeakSeed       W15
Name: 316, dtype: object
Season        1990
Slot          R1W3
StrongSeed     W03
WeakSeed       W14
Name: 317, dtype: object
Season        1990
Slot          R1W4
StrongSeed     W04
WeakSeed       W13
Name: 318, dtype: object
Season        1990
Slot          R1W5
StrongSeed     W05
WeakSeed       W12
Name: 319, dtype: object
Season        1990
Slot          R1W6
StrongSeed     W06
WeakSeed       W11
Name: 320, dtype: object
Season        1990
Slot          R1W7
StrongSeed     W07
WeakSeed       W10
Name: 321, dtype: object
Season        1990
Slot          R1W8
StrongSeed     W08
WeakSeed       W09
Name: 322, dtype: object
Season        1990
Slot          R1X1
StrongSeed     X01
WeakSeed       X16
Name: 323, dtype: object
Season        1990
Slot          R1X2
StrongSeed     X02
WeakSeed       X15
Name: 324, dtyp

Season        1993
Slot          R1W1
StrongSeed     W01
WeakSeed       W16
Name: 504, dtype: object
Season        1993
Slot          R1W2
StrongSeed     W02
WeakSeed       W15
Name: 505, dtype: object
Season        1993
Slot          R1W3
StrongSeed     W03
WeakSeed       W14
Name: 506, dtype: object
Season        1993
Slot          R1W4
StrongSeed     W04
WeakSeed       W13
Name: 507, dtype: object
Season        1993
Slot          R1W5
StrongSeed     W05
WeakSeed       W12
Name: 508, dtype: object
Season        1993
Slot          R1W6
StrongSeed     W06
WeakSeed       W11
Name: 509, dtype: object
Season        1993
Slot          R1W7
StrongSeed     W07
WeakSeed       W10
Name: 510, dtype: object
Season        1993
Slot          R1W8
StrongSeed     W08
WeakSeed       W09
Name: 511, dtype: object
Season        1993
Slot          R1X1
StrongSeed     X01
WeakSeed       X16
Name: 512, dtype: object
Season        1993
Slot          R1X2
StrongSeed     X02
WeakSeed       X15
Name: 513, dtyp

Season        1996
Slot          R1W1
StrongSeed     W01
WeakSeed       W16
Name: 693, dtype: object
Season        1996
Slot          R1W2
StrongSeed     W02
WeakSeed       W15
Name: 694, dtype: object
Season        1996
Slot          R1W3
StrongSeed     W03
WeakSeed       W14
Name: 695, dtype: object
Season        1996
Slot          R1W4
StrongSeed     W04
WeakSeed       W13
Name: 696, dtype: object
Season        1996
Slot          R1W5
StrongSeed     W05
WeakSeed       W12
Name: 697, dtype: object
Season        1996
Slot          R1W6
StrongSeed     W06
WeakSeed       W11
Name: 698, dtype: object
Season        1996
Slot          R1W7
StrongSeed     W07
WeakSeed       W10
Name: 699, dtype: object
Season        1996
Slot          R1W8
StrongSeed     W08
WeakSeed       W09
Name: 700, dtype: object
Season        1996
Slot          R1X1
StrongSeed     X01
WeakSeed       X16
Name: 701, dtype: object
Season        1996
Slot          R1X2
StrongSeed     X02
WeakSeed       X15
Name: 702, dtyp

Season        1999
Slot          R1W1
StrongSeed     W01
WeakSeed       W16
Name: 882, dtype: object
Season        1999
Slot          R1W2
StrongSeed     W02
WeakSeed       W15
Name: 883, dtype: object
Season        1999
Slot          R1W3
StrongSeed     W03
WeakSeed       W14
Name: 884, dtype: object
Season        1999
Slot          R1W4
StrongSeed     W04
WeakSeed       W13
Name: 885, dtype: object
Season        1999
Slot          R1W5
StrongSeed     W05
WeakSeed       W12
Name: 886, dtype: object
Season        1999
Slot          R1W6
StrongSeed     W06
WeakSeed       W11
Name: 887, dtype: object
Season        1999
Slot          R1W7
StrongSeed     W07
WeakSeed       W10
Name: 888, dtype: object
Season        1999
Slot          R1W8
StrongSeed     W08
WeakSeed       W09
Name: 889, dtype: object
Season        1999
Slot          R1X1
StrongSeed     X01
WeakSeed       X16
Name: 890, dtype: object
Season        1999
Slot          R1X2
StrongSeed     X02
WeakSeed       X15
Name: 891, dtyp

In [7]:
scaler = StandardScaler().fit(pred.iloc[:, 2:])

log_reg_model = LogReg(C = 10)
log_reg_model.fit(scaler.transform(pred.iloc[:, 2:]), resp.values.T[0])



LogisticRegression(C=10, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='warn', n_jobs=None, penalty='l2',
                   random_state=None, solver='warn', tol=0.0001, verbose=0,
                   warm_start=False)

#### Test on 2002

In [8]:
reload(mmc)
reload(mmm)

NameError: name 'reload' is not defined

In [9]:
year = 2005
seeds = seeds_arr[year - 1985]
slots = slots_arr[year - 1985]
resul = games_arr[year - 1985]

# biased model ---> predicts upsets with proability p + .05 (where p comes from log reg model)
head_to_head_biased_model = mmm.ModelPredictor(log_reg_model, 
                                               scaler,
                                               predictor_dfs, 
                                               year, 
                                               simulation=False, 
                                               higher_seed_bias=True, 
                                               seeds_df=seeds, 
                                               higher_seed_bias_delta=.05)


# biased model with cooling ----> predicts upsets with proability p + .05 * cooling_factor (where p comes from log reg model)
head_to_head_biased_model_cooling = mmm.ModelPredictor(log_reg_model, 
                                                       scaler, 
                                                       predictor_dfs, 
                                                       year, 
                                                       seeds_df=seeds, 
                                                       simulation=False, 
                                                       higher_seed_bias=True, 
                                                       higher_seed_bias_delta=.01,
                                                       cooling = {6:10, 5:10, 4:10, 3:10, 2:-5, 1:-5}
                                                      )

# unbiased model ----> predicts based on head to head log reg model
head_to_head_unbiased_model = mmm.ModelPredictor(log_reg_model, scaler, predictor_dfs, year, simulation=False, higher_seed_bias=False, seeds_df=seeds)


# for comparison
tourney_actual = mmc.Tournament(seeds, slots, mmm.ActualTournament(resul))
tourney_top_seed = mmc.Tournament(seeds, slots, mmm.BasicPredictor())

# predict tournament
tourney_biased_model = mmc.Tournament(seeds, slots, head_to_head_biased_model)
tourney_biased_model_cooling = mmc.Tournament(seeds, slots, head_to_head_biased_model_cooling)
tourney_unbiased_model = mmc.Tournament(seeds, slots, head_to_head_unbiased_model)

Season        2005
Slot          R1W1
StrongSeed     W01
WeakSeed       W16
Name: 1264, dtype: object
Season        2005
Slot          R1W2
StrongSeed     W02
WeakSeed       W15
Name: 1265, dtype: object
Season        2005
Slot          R1W3
StrongSeed     W03
WeakSeed       W14
Name: 1266, dtype: object
Season        2005
Slot          R1W4
StrongSeed     W04
WeakSeed       W13
Name: 1267, dtype: object
Season        2005
Slot          R1W5
StrongSeed     W05
WeakSeed       W12
Name: 1268, dtype: object
Season        2005
Slot          R1W6
StrongSeed     W06
WeakSeed       W11
Name: 1269, dtype: object
Season        2005
Slot          R1W7
StrongSeed     W07
WeakSeed       W10
Name: 1270, dtype: object
Season        2005
Slot          R1W8
StrongSeed     W08
WeakSeed       W09
Name: 1271, dtype: object
Season        2005
Slot          R1X1
StrongSeed     X01
WeakSeed       X16
Name: 1272, dtype: object
Season        2005
Slot          R1X2
StrongSeed     X02
WeakSeed       X15
Name: 

Season        2005
Slot          R1W1
StrongSeed     W01
WeakSeed       W16
Name: 1264, dtype: object
Season        2005
Slot          R1W2
StrongSeed     W02
WeakSeed       W15
Name: 1265, dtype: object
Season        2005
Slot          R1W3
StrongSeed     W03
WeakSeed       W14
Name: 1266, dtype: object
Season        2005
Slot          R1W4
StrongSeed     W04
WeakSeed       W13
Name: 1267, dtype: object
Season        2005
Slot          R1W5
StrongSeed     W05
WeakSeed       W12
Name: 1268, dtype: object
Season        2005
Slot          R1W6
StrongSeed     W06
WeakSeed       W11
Name: 1269, dtype: object
Season        2005
Slot          R1W7
StrongSeed     W07
WeakSeed       W10
Name: 1270, dtype: object
Season        2005
Slot          R1W8
StrongSeed     W08
WeakSeed       W09
Name: 1271, dtype: object
Season        2005
Slot          R1X1
StrongSeed     X01
WeakSeed       X16
Name: 1272, dtype: object
Season        2005
Slot          R1X2
StrongSeed     X02
WeakSeed       X15
Name: 

In [10]:
tourney_biased_model.compare_to_dif_tournament(tourney_actual, tourney_top_seed, print_res=True)

Number Correct Our Model     : 10, Number Correct Dif Model : 3
R1: Number Correct Our Model : 3, Number Correct Dif Model : 2
R2: Number Correct Our Model : 2, Number Correct Dif Model : 0
R3: Number Correct Our Model : 1, Number Correct Dif Model : 1
R4: Number Correct Our Model : 1, Number Correct Dif Model : 0
R5: Number Correct Our Model : 2, Number Correct Dif Model : 0
R6: Number Correct Our Model : 1, Number Correct Dif Model : 0


(15, 10, 3)