# Setup

In [1]:
import pdb
import re

import numpy as np
import pandas as pd

from sklearn.impute import SimpleImputer

In [2]:
pd.options.display.max_columns = 999

In [3]:
def display_df(df, n=1, tail=False, title=None):
    if title:
        print(title + ':')
    display(df.head(n), df.tail(n), df.shape) if tail else display(df.head(n), df.shape)

In [4]:
data_raw_dir = '../data/raw/'
data_interim_dir = '../data/interim/'

# Read and Process Datasets

In [5]:
def process_seeds(df):
    """Process tournament seed file"""
    return (
        df
        .assign(Seed=df.Seed.map(lambda s: int(s[1:3])))
        .assign(IsPlayIn=df.Seed.map(lambda s: str(s).endswith('a') or str(s).endswith('b')))
    )


def process_games(df):
    """Process tournament game file"""
    
    def _standardize_team_ID(row):
        """TBD"""
        if row.WTeamID < row.LTeamID:
            row.TeamOneID = row.WTeamID
            row.TeamTwoID = row.LTeamID
        else:
            row.TeamOneID = row.LTeamID
            row.TeamTwoID = row.WTeamID
        return row

    def _create_labels(row):
        """TBD"""
        if row.TeamOneID == row.WTeamID:
            row.Label = 1
        else:
            row.Label = 0
        return row
    
    return (
        df
        .assign(
            TeamOneID=np.nan,
            TeamTwoID=np.nan,
            Label=np.nan
        )
        .apply(_standardize_team_ID, axis=1)
        .apply(_create_labels, axis=1)
        .drop(['WTeamID', 'LTeamID', 'DayNum', 'WScore', 'LScore', 'WLoc', 'NumOT'], axis=1)
    )


def process_submission_games(df):
    """Process tournament game submission file"""
    return (
        df
        .assign(
            Season=df.ID.str.split('_', expand=True)[0].astype(int),    
            TeamOneID=df.ID.str.split('_', expand=True)[1].astype(int),
            TeamTwoID=df.ID.str.split('_', expand=True)[2].astype(int),
    )
    .drop(['ID', 'Pred'], axis=1)
)

def process_regular_season_detailed_results(df):
    """TBD"""
    def _rename_winner_vs_opp(old_name):
        if re.match(r'^W', old_name):
            return re.sub('^W','', old_name)
        if re.match(r'^L', old_name):
            return re.sub('^L', '', old_name) + '_opp'
        return old_name
    
    def _rename_loser_vs_opp(old_name):
        if re.match(r'^L', old_name):
            return re.sub('^L','', old_name)
        if re.match(r'^W', old_name):
            return re.sub('^W', '', old_name) + '_opp'
        return old_name
    
    df_winners = df.rename(columns=_rename_winner_vs_opp)
    df_losers = df.rename(columns=_rename_loser_vs_opp)
    
    return pd.concat([df_winners, df_losers])

In [6]:
# Read and process seeds
df_seeds = (
    pd.read_csv(data_raw_dir + 'DataFiles/NCAATourneySeeds.csv')
    .pipe(process_seeds)
)

# Read and process tournament games
df_games = (
    pd.read_csv(data_raw_dir + 'DataFiles/NCAATourneyCompactResults.csv')
    .pipe(process_games)
)

# Read and process submission games
df_sub = (
    pd.read_csv(data_raw_dir + 'SampleSubmissionStage1.csv')
    .pipe(process_submission_games)
)

# Read and process regular season detailed results
df_rs_d_res = (
    pd.read_csv(data_raw_dir + 'DataFiles/RegularSeasonDetailedResults.csv')
    .pipe(process_regular_season_detailed_results)
)

In [7]:
display_df(df_seeds, title="Seeds")
display_df(df_games, title="Games")
display_df(df_sub, title="SubmissionGames")
display_df(df_rs_d_res, title="Regular Season Detailed Results")

Seeds:


Unnamed: 0,Season,Seed,TeamID,IsPlayIn
0,1985,1,1207,False


(2218, 4)

Games:


Unnamed: 0,Season,Label,TeamOneID,TeamTwoID
0,1985,1,1116,1234


(2184, 4)

SubmissionGames:


Unnamed: 0,Season,TeamOneID,TeamTwoID
0,2014,1107,1110


(11390, 3)

Regular Season Detailed Results:


Unnamed: 0,Ast,Ast_opp,Blk,Blk_opp,DR,DR_opp,DayNum,FGA,FGA3,FGA3_opp,FGA_opp,FGM,FGM3,FGM3_opp,FGM_opp,FTA,FTA_opp,FTM,FTM_opp,Loc,Loc_opp,NumOT,OR,OR_opp,PF,PF_opp,Score,Score_opp,Season,Stl,Stl_opp,TO,TO_opp,TeamID,TeamID_opp
0,13,8,1,2,24,22,10,58,14,10,53,27,3,2,22,18,22,11,16,N,,0,14,10,22,20,68,62,2003,7,9,23,18,1104,1328


(164082, 35)

# Aggregate Datasets

In [8]:
df_rs_d_res_agg = (
    df_rs_d_res
    .groupby(['TeamID', 'Season'])
    .mean()
    .drop(['TeamID_opp', 'DayNum'], axis=1)
    .reset_index()
)

In [9]:
display_df(df_rs_d_res_agg, title="Average Regular Season Detailed Results")

Average Regular Season Detailed Results:


Unnamed: 0,TeamID,Season,Ast,Ast_opp,Blk,Blk_opp,DR,DR_opp,FGA,FGA3,FGA3_opp,FGA_opp,FGM,FGM3,FGM3_opp,FGM_opp,FTA,FTA_opp,FTM,FTM_opp,NumOT,OR,OR_opp,PF,PF_opp,Score,Score_opp,Stl,Stl_opp,TO,TO_opp
0,1101,2014,10.0,15.571429,1.47619,5.0,20.333333,24.095238,50.142857,17.857143,16.190476,53.428571,20.333333,6.666667,6.0,27.142857,21.190476,25.809524,15.809524,18.333333,0.142857,8.0,10.380952,21.571429,18.666667,63.142857,78.619048,5.761905,7.0,15.0,12.142857


(5481, 31)

# Merge Datasets

In [10]:
def merge_seed_dataset(df, df_seeds, submission_file):
    """TBD"""
    df_out = (
        df
        .merge(df_seeds.rename(index=str, columns={'TeamID': 'TeamOneID'}), on=['Season', 'TeamOneID'])
        .rename(index=str, columns={'Seed': 'TeamOne_Seed', 'IsPlayIn': 'TeamOneIsPlayIn'})
        .merge(df_seeds.rename(index=str, columns={'TeamID': 'TeamTwoID'}), on=['Season', 'TeamTwoID'])
        .rename(index=str, columns={'Seed': 'TeamTwo_Seed', 'IsPlayIn': 'TeamTwoIsPlayIn'})  
    )
    
    if not(submission_file):
        df_out = df_out.loc[~(df_out.TeamOneIsPlayIn & df_out.TeamTwoIsPlayIn)]  # remove play-in games 
    
    return df_out.drop(['TeamOneIsPlayIn', 'TeamTwoIsPlayIn'], axis=1)


def merge_aggregated_regular_season_detailed_results(df, df_rs_d_res_agg):
    """TBD"""
    
    def _suffix_to_prefix(old_name):
        #pdb.set_trace()
        if old_name.endswith('_TeamOne'):
            return 'TeamOne_{}'.format(re.sub('_TeamOne', '', old_name))
        if old_name.endswith('_TeamTwo'):
            return 'TeamTwo_{}'.format(re.sub('_TeamTwo', '', old_name))
        return old_name
    
    return (
        df
        .merge(df_rs_d_res_agg.rename(index=str, columns={'TeamID': 'TeamOneID'}), on=['Season', 'TeamOneID'], 
               how='left')  # team one data 
        .merge(df_rs_d_res_agg.rename(index=str, columns={'TeamID': 'TeamTwoID'}), on=['Season', 'TeamTwoID'], 
               how='left', suffixes=('_TeamOne', '_TeamTwo'))  # team two data
        .rename(columns=_suffix_to_prefix)
    )

In [24]:
# Merge datasets with tournament games
df_interim_mdl = (
    df_games
    .pipe(merge_seed_dataset, df_seeds=df_seeds, submission_file=False)
    .pipe(merge_aggregated_regular_season_detailed_results, df_rs_d_res_agg=df_rs_d_res_agg)
)

# Merge datasets with submission games
df_interim_sub = (
    df_sub
    .pipe(merge_seed_dataset, df_seeds=df_seeds, submission_file=True)
    .pipe(merge_aggregated_regular_season_detailed_results, df_rs_d_res_agg=df_rs_d_res_agg)
)

In [25]:
display_df(df_interim_mdl, tail=True, n=5)
display_df(df_interim_sub, n=2)

Unnamed: 0,Season,Label,TeamOneID,TeamTwoID,TeamOne_Seed,TeamTwo_Seed,TeamOne_Ast,TeamOne_Ast_opp,TeamOne_Blk,TeamOne_Blk_opp,TeamOne_DR,TeamOne_DR_opp,TeamOne_FGA,TeamOne_FGA3,TeamOne_FGA3_opp,TeamOne_FGA_opp,TeamOne_FGM,TeamOne_FGM3,TeamOne_FGM3_opp,TeamOne_FGM_opp,TeamOne_FTA,TeamOne_FTA_opp,TeamOne_FTM,TeamOne_FTM_opp,TeamOne_NumOT,TeamOne_OR,TeamOne_OR_opp,TeamOne_PF,TeamOne_PF_opp,TeamOne_Score,TeamOne_Score_opp,TeamOne_Stl,TeamOne_Stl_opp,TeamOne_TO,TeamOne_TO_opp,TeamTwo_Ast,TeamTwo_Ast_opp,TeamTwo_Blk,TeamTwo_Blk_opp,TeamTwo_DR,TeamTwo_DR_opp,TeamTwo_FGA,TeamTwo_FGA3,TeamTwo_FGA3_opp,TeamTwo_FGA_opp,TeamTwo_FGM,TeamTwo_FGM3,TeamTwo_FGM3_opp,TeamTwo_FGM_opp,TeamTwo_FTA,TeamTwo_FTA_opp,TeamTwo_FTM,TeamTwo_FTM_opp,TeamTwo_NumOT,TeamTwo_OR,TeamTwo_OR_opp,TeamTwo_PF,TeamTwo_PF_opp,TeamTwo_Score,TeamTwo_Score_opp,TeamTwo_Stl,TeamTwo_Stl_opp,TeamTwo_TO,TeamTwo_TO_opp
0,1985,1,1116,1234,9,8,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,1985,0,1116,1385,9,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,1985,1,1207,1385,1,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,1985,0,1246,1385,12,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,1985,0,1380,1385,16,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


Unnamed: 0,Season,Label,TeamOneID,TeamTwoID,TeamOne_Seed,TeamTwo_Seed,TeamOne_Ast,TeamOne_Ast_opp,TeamOne_Blk,TeamOne_Blk_opp,TeamOne_DR,TeamOne_DR_opp,TeamOne_FGA,TeamOne_FGA3,TeamOne_FGA3_opp,TeamOne_FGA_opp,TeamOne_FGM,TeamOne_FGM3,TeamOne_FGM3_opp,TeamOne_FGM_opp,TeamOne_FTA,TeamOne_FTA_opp,TeamOne_FTM,TeamOne_FTM_opp,TeamOne_NumOT,TeamOne_OR,TeamOne_OR_opp,TeamOne_PF,TeamOne_PF_opp,TeamOne_Score,TeamOne_Score_opp,TeamOne_Stl,TeamOne_Stl_opp,TeamOne_TO,TeamOne_TO_opp,TeamTwo_Ast,TeamTwo_Ast_opp,TeamTwo_Blk,TeamTwo_Blk_opp,TeamTwo_DR,TeamTwo_DR_opp,TeamTwo_FGA,TeamTwo_FGA3,TeamTwo_FGA3_opp,TeamTwo_FGA_opp,TeamTwo_FGM,TeamTwo_FGM3,TeamTwo_FGM3_opp,TeamTwo_FGM_opp,TeamTwo_FTA,TeamTwo_FTA_opp,TeamTwo_FTM,TeamTwo_FTM_opp,TeamTwo_NumOT,TeamTwo_OR,TeamTwo_OR_opp,TeamTwo_PF,TeamTwo_PF_opp,TeamTwo_Score,TeamTwo_Score_opp,TeamTwo_Stl,TeamTwo_Stl_opp,TeamTwo_TO,TeamTwo_TO_opp
2137,2018,0,1139,1345,10,2,14.0,12.69697,2.939394,3.363636,24.787879,24.575758,61.848485,23.0,21.0,55.939394,29.212121,8.181818,7.848485,25.181818,16.242424,19.636364,12.515152,14.575758,0.181818,9.212121,8.121212,18.242424,16.878788,79.121212,72.787879,6.727273,5.151515,11.181818,14.151515,16.705882,12.029412,5.029412,2.382353,26.588235,22.029412,56.705882,22.823529,20.588235,59.794118,28.176471,9.588235,6.941176,24.529412,20.470588,14.205882,15.205882,9.647059,0.029412,8.441176,10.441176,15.529412,18.647059,81.147059,65.647059,5.823529,5.205882,10.676471,12.088235
2138,2018,1,1393,1395,11,6,10.969697,16.181818,5.575758,3.181818,25.333333,22.393939,54.424242,18.181818,24.181818,55.272727,22.757576,5.848485,7.939394,21.878788,21.848485,17.30303,16.181818,12.848485,0.121212,12.090909,10.484848,16.30303,18.393939,67.545455,64.545455,7.242424,6.30303,12.575758,12.848485,18.78125,15.71875,3.59375,4.25,25.1875,21.3125,60.15625,21.15625,21.34375,59.40625,30.0,8.46875,8.09375,27.28125,20.53125,18.21875,14.53125,13.28125,0.15625,11.25,8.8125,16.90625,18.625,83.0,75.9375,6.59375,6.25,12.6875,12.59375
2139,2018,1,1420,1438,16,1,14.870968,13.580645,2.548387,2.806452,24.354839,27.129032,58.451613,25.774194,23.16129,56.612903,25.612903,9.83871,8.322581,25.322581,17.580645,17.580645,11.419355,12.0,0.0,9.709677,9.935484,16.516129,18.774194,72.483871,70.967742,7.387097,6.258065,11.677419,14.322581,13.727273,8.848485,3.727273,2.636364,24.272727,22.212121,54.181818,18.878788,20.30303,50.151515,25.0,7.363636,6.151515,18.818182,13.424242,13.121212,10.181818,9.606061,0.030303,8.393939,8.393939,14.060606,15.060606,67.545455,53.393939,6.787879,3.878788,8.575758,12.727273
2140,2018,1,1243,1420,9,16,14.333333,12.666667,3.0,2.454545,22.393939,23.060606,54.878788,20.181818,21.363636,53.787879,25.878788,6.939394,7.090909,23.090909,18.393939,19.69697,13.666667,14.606061,0.030303,8.181818,10.69697,18.393939,18.242424,72.363636,67.878788,7.787879,5.575758,11.454545,14.333333,14.870968,13.580645,2.548387,2.806452,24.354839,27.129032,58.451613,25.774194,23.16129,56.612903,25.612903,9.83871,8.322581,25.322581,17.580645,17.580645,11.419355,12.0,0.0,9.709677,9.935484,16.516129,18.774194,72.483871,70.967742,7.387097,6.258065,11.677419,14.322581
2141,2018,0,1243,1260,9,11,14.333333,12.666667,3.0,2.454545,22.393939,23.060606,54.878788,20.181818,21.363636,53.787879,25.878788,6.939394,7.090909,23.090909,18.393939,19.69697,13.666667,14.606061,0.030303,8.181818,10.69697,18.393939,18.242424,72.363636,67.878788,7.787879,5.575758,11.454545,14.333333,15.4375,11.625,2.375,3.03125,25.71875,21.71875,51.34375,18.34375,20.0625,55.125,26.0,7.34375,6.53125,22.78125,17.03125,14.1875,12.34375,9.9375,0.0,6.15625,8.96875,14.0,16.875,71.6875,62.03125,6.65625,6.4375,12.375,13.1875


(2142, 64)

Unnamed: 0,Season,TeamOneID,TeamTwoID,TeamOne_Seed,TeamTwo_Seed,TeamOne_Ast,TeamOne_Ast_opp,TeamOne_Blk,TeamOne_Blk_opp,TeamOne_DR,TeamOne_DR_opp,TeamOne_FGA,TeamOne_FGA3,TeamOne_FGA3_opp,TeamOne_FGA_opp,TeamOne_FGM,TeamOne_FGM3,TeamOne_FGM3_opp,TeamOne_FGM_opp,TeamOne_FTA,TeamOne_FTA_opp,TeamOne_FTM,TeamOne_FTM_opp,TeamOne_NumOT,TeamOne_OR,TeamOne_OR_opp,TeamOne_PF,TeamOne_PF_opp,TeamOne_Score,TeamOne_Score_opp,TeamOne_Stl,TeamOne_Stl_opp,TeamOne_TO,TeamOne_TO_opp,TeamTwo_Ast,TeamTwo_Ast_opp,TeamTwo_Blk,TeamTwo_Blk_opp,TeamTwo_DR,TeamTwo_DR_opp,TeamTwo_FGA,TeamTwo_FGA3,TeamTwo_FGA3_opp,TeamTwo_FGA_opp,TeamTwo_FGM,TeamTwo_FGM3,TeamTwo_FGM3_opp,TeamTwo_FGM_opp,TeamTwo_FTA,TeamTwo_FTA_opp,TeamTwo_FTM,TeamTwo_FTM_opp,TeamTwo_NumOT,TeamTwo_OR,TeamTwo_OR_opp,TeamTwo_PF,TeamTwo_PF_opp,TeamTwo_Score,TeamTwo_Score_opp,TeamTwo_Stl,TeamTwo_Stl_opp,TeamTwo_TO,TeamTwo_TO_opp
0,2014,1107,1110,16,15,11.0625,12.34375,3.0625,3.03125,24.21875,21.59375,51.09375,13.46875,19.59375,54.34375,22.4375,4.90625,6.90625,22.75,21.71875,15.84375,16.25,11.4375,0.09375,10.34375,9.9375,17.25,19.25,66.03125,63.84375,5.53125,5.84375,12.625,11.96875,15.125,10.09375,3.4375,2.4375,22.9375,19.3125,46.5625,16.59375,15.0625,49.03125,23.03125,6.34375,4.71875,20.15625,16.25,20.1875,11.53125,13.59375,0.03125,7.125,9.78125,17.21875,17.625,63.9375,58.625,6.03125,7.03125,13.96875,12.125
1,2014,1107,1112,16,1,11.0625,12.34375,3.0625,3.03125,24.21875,21.59375,51.09375,13.46875,19.59375,54.34375,22.4375,4.90625,6.90625,22.75,21.71875,15.84375,16.25,11.4375,0.09375,10.34375,9.9375,17.25,19.25,66.03125,63.84375,5.53125,5.84375,12.625,11.96875,15.088235,9.176471,4.235294,3.176471,26.294118,21.205882,56.0,14.852941,14.029412,53.5,26.235294,5.294118,4.411765,20.382353,23.352941,18.441176,15.294118,12.970588,0.088235,12.588235,9.235294,16.441176,18.617647,73.058824,58.147059,5.882353,4.764706,10.441176,12.411765


(11390, 63)

# Create Features

In [30]:
def create_diff_feats(df):
    """TBD"""
    feats = ['Ast', 'Blk', 'DR', 'FGA', 'FGA3', 'FGM', 'FGM3', 'FTA', 'FTM', 'OR', 'PF', 'Score', 'Stl', 'TO']
    feats_no_opp = ['Seed', 'NumOT']
    
    for feat in feats + feats_no_opp:
        df['Diff_' + feat] = eval('df.TeamOne_{feat} - df.TeamTwo_{feat}'.format(feat=feat))
        if feat not in feats_no_opp:
            df['Diff_' + feat + '_opp'] = eval('df.TeamOne_{feat}_opp - df.TeamTwo_{feat}_opp'.format(feat=feat))
                        
    return df

def impute_missing_values(df, imputer):
    """TBD"""
    return pd.DataFrame(imputer.fit_transform(df.values), columns=df.columns)

In [31]:
# Create features for modeling dataset
df_interim_mdl = (
    df_interim_mdl
    .pipe(create_diff_feats)
    .pipe(impute_missing_values, imputer=SimpleImputer(missing_values=np.nan, strategy='median'))
)

# Create features for submission dataset
df_interim_sub = (
    df_interim_sub
    .pipe(create_diff_feats)
)

In [32]:
display_df(df_interim_mdl, n=5, tail=True, title="Games")
display_df(df_interim_sub, n=2, title="Submission Games")

Games:


Unnamed: 0,Season,Label,TeamOneID,TeamTwoID,TeamOne_Seed,TeamTwo_Seed,TeamOne_Ast,TeamOne_Ast_opp,TeamOne_Blk,TeamOne_Blk_opp,TeamOne_DR,TeamOne_DR_opp,TeamOne_FGA,TeamOne_FGA3,TeamOne_FGA3_opp,TeamOne_FGA_opp,TeamOne_FGM,TeamOne_FGM3,TeamOne_FGM3_opp,TeamOne_FGM_opp,TeamOne_FTA,TeamOne_FTA_opp,TeamOne_FTM,TeamOne_FTM_opp,TeamOne_NumOT,TeamOne_OR,TeamOne_OR_opp,TeamOne_PF,TeamOne_PF_opp,TeamOne_Score,TeamOne_Score_opp,TeamOne_Stl,TeamOne_Stl_opp,TeamOne_TO,TeamOne_TO_opp,TeamTwo_Ast,TeamTwo_Ast_opp,TeamTwo_Blk,TeamTwo_Blk_opp,TeamTwo_DR,TeamTwo_DR_opp,TeamTwo_FGA,TeamTwo_FGA3,TeamTwo_FGA3_opp,TeamTwo_FGA_opp,TeamTwo_FGM,TeamTwo_FGM3,TeamTwo_FGM3_opp,TeamTwo_FGM_opp,TeamTwo_FTA,TeamTwo_FTA_opp,TeamTwo_FTM,TeamTwo_FTM_opp,TeamTwo_NumOT,TeamTwo_OR,TeamTwo_OR_opp,TeamTwo_PF,TeamTwo_PF_opp,TeamTwo_Score,TeamTwo_Score_opp,TeamTwo_Stl,TeamTwo_Stl_opp,TeamTwo_TO,TeamTwo_TO_opp,Diff_Ast,Diff_Ast_opp,Diff_Blk,Diff_Blk_opp,Diff_DR,Diff_DR_opp,Diff_FGA,Diff_FGA_opp,Diff_FGA3,Diff_FGA3_opp,Diff_FGM,Diff_FGM_opp,Diff_FGM3,Diff_FGM3_opp,Diff_FTA,Diff_FTA_opp,Diff_FTM,Diff_FTM_opp,Diff_OR,Diff_OR_opp,Diff_PF,Diff_PF_opp,Diff_Score,Diff_Score_opp,Diff_Stl,Diff_Stl_opp,Diff_TO,Diff_TO_opp,Diff_Seed,Diff_NumOT
0,1985.0,1.0,1116.0,1234.0,9.0,8.0,14.774194,11.939394,4.0,3.135632,24.8,21.863971,56.716578,18.741935,18.727273,56.298574,26.412121,6.864368,6.151515,23.096774,21.571429,18.439338,15.298574,12.612903,0.058824,11.65625,11.112395,17.636364,19.063508,74.911765,65.272727,6.9375,5.970588,12.777722,13.875,14.537054,11.934409,3.823529,3.137931,24.790998,22.140394,56.867965,18.758098,18.587198,55.8125,25.969697,6.8,6.107143,22.891176,21.368519,18.5,15.149287,12.666667,0.057143,11.735294,10.909091,17.550056,19.16129,73.809091,65.129032,6.882353,6.014706,12.545455,13.88057,0.148522,0.006061,0.102083,0.017793,0.142525,-0.092708,0.210446,0.621473,-0.124777,-0.065236,0.364468,0.298574,0.061581,0.001927,0.319073,0.036706,0.266544,-0.010959,-0.018939,0.266043,0.109927,0.053883,0.959197,0.494656,0.097232,0.134241,0.192761,0.03457,1.0,0.0
1,1985.0,0.0,1116.0,1385.0,9.0,1.0,14.774194,11.939394,4.0,3.135632,24.8,21.863971,56.716578,18.741935,18.727273,56.298574,26.412121,6.864368,6.151515,23.096774,21.571429,18.439338,15.298574,12.612903,0.058824,11.65625,11.112395,17.636364,19.063508,74.911765,65.272727,6.9375,5.970588,12.777722,13.875,14.537054,11.934409,3.823529,3.137931,24.790998,22.140394,56.867965,18.758098,18.587198,55.8125,25.969697,6.8,6.107143,22.891176,21.368519,18.5,15.149287,12.666667,0.057143,11.735294,10.909091,17.550056,19.16129,73.809091,65.129032,6.882353,6.014706,12.545455,13.88057,0.148522,0.006061,0.102083,0.017793,0.142525,-0.092708,0.210446,0.621473,-0.124777,-0.065236,0.364468,0.298574,0.061581,0.001927,0.319073,0.036706,0.266544,-0.010959,-0.018939,0.266043,0.109927,0.053883,0.959197,0.494656,0.097232,0.134241,0.192761,0.03457,8.0,0.0
2,1985.0,1.0,1207.0,1385.0,1.0,1.0,14.774194,11.939394,4.0,3.135632,24.8,21.863971,56.716578,18.741935,18.727273,56.298574,26.412121,6.864368,6.151515,23.096774,21.571429,18.439338,15.298574,12.612903,0.058824,11.65625,11.112395,17.636364,19.063508,74.911765,65.272727,6.9375,5.970588,12.777722,13.875,14.537054,11.934409,3.823529,3.137931,24.790998,22.140394,56.867965,18.758098,18.587198,55.8125,25.969697,6.8,6.107143,22.891176,21.368519,18.5,15.149287,12.666667,0.057143,11.735294,10.909091,17.550056,19.16129,73.809091,65.129032,6.882353,6.014706,12.545455,13.88057,0.148522,0.006061,0.102083,0.017793,0.142525,-0.092708,0.210446,0.621473,-0.124777,-0.065236,0.364468,0.298574,0.061581,0.001927,0.319073,0.036706,0.266544,-0.010959,-0.018939,0.266043,0.109927,0.053883,0.959197,0.494656,0.097232,0.134241,0.192761,0.03457,0.0,0.0
3,1985.0,0.0,1246.0,1385.0,12.0,1.0,14.774194,11.939394,4.0,3.135632,24.8,21.863971,56.716578,18.741935,18.727273,56.298574,26.412121,6.864368,6.151515,23.096774,21.571429,18.439338,15.298574,12.612903,0.058824,11.65625,11.112395,17.636364,19.063508,74.911765,65.272727,6.9375,5.970588,12.777722,13.875,14.537054,11.934409,3.823529,3.137931,24.790998,22.140394,56.867965,18.758098,18.587198,55.8125,25.969697,6.8,6.107143,22.891176,21.368519,18.5,15.149287,12.666667,0.057143,11.735294,10.909091,17.550056,19.16129,73.809091,65.129032,6.882353,6.014706,12.545455,13.88057,0.148522,0.006061,0.102083,0.017793,0.142525,-0.092708,0.210446,0.621473,-0.124777,-0.065236,0.364468,0.298574,0.061581,0.001927,0.319073,0.036706,0.266544,-0.010959,-0.018939,0.266043,0.109927,0.053883,0.959197,0.494656,0.097232,0.134241,0.192761,0.03457,11.0,0.0
4,1985.0,0.0,1380.0,1385.0,16.0,1.0,14.774194,11.939394,4.0,3.135632,24.8,21.863971,56.716578,18.741935,18.727273,56.298574,26.412121,6.864368,6.151515,23.096774,21.571429,18.439338,15.298574,12.612903,0.058824,11.65625,11.112395,17.636364,19.063508,74.911765,65.272727,6.9375,5.970588,12.777722,13.875,14.537054,11.934409,3.823529,3.137931,24.790998,22.140394,56.867965,18.758098,18.587198,55.8125,25.969697,6.8,6.107143,22.891176,21.368519,18.5,15.149287,12.666667,0.057143,11.735294,10.909091,17.550056,19.16129,73.809091,65.129032,6.882353,6.014706,12.545455,13.88057,0.148522,0.006061,0.102083,0.017793,0.142525,-0.092708,0.210446,0.621473,-0.124777,-0.065236,0.364468,0.298574,0.061581,0.001927,0.319073,0.036706,0.266544,-0.010959,-0.018939,0.266043,0.109927,0.053883,0.959197,0.494656,0.097232,0.134241,0.192761,0.03457,15.0,0.0


Unnamed: 0,Season,Label,TeamOneID,TeamTwoID,TeamOne_Seed,TeamTwo_Seed,TeamOne_Ast,TeamOne_Ast_opp,TeamOne_Blk,TeamOne_Blk_opp,TeamOne_DR,TeamOne_DR_opp,TeamOne_FGA,TeamOne_FGA3,TeamOne_FGA3_opp,TeamOne_FGA_opp,TeamOne_FGM,TeamOne_FGM3,TeamOne_FGM3_opp,TeamOne_FGM_opp,TeamOne_FTA,TeamOne_FTA_opp,TeamOne_FTM,TeamOne_FTM_opp,TeamOne_NumOT,TeamOne_OR,TeamOne_OR_opp,TeamOne_PF,TeamOne_PF_opp,TeamOne_Score,TeamOne_Score_opp,TeamOne_Stl,TeamOne_Stl_opp,TeamOne_TO,TeamOne_TO_opp,TeamTwo_Ast,TeamTwo_Ast_opp,TeamTwo_Blk,TeamTwo_Blk_opp,TeamTwo_DR,TeamTwo_DR_opp,TeamTwo_FGA,TeamTwo_FGA3,TeamTwo_FGA3_opp,TeamTwo_FGA_opp,TeamTwo_FGM,TeamTwo_FGM3,TeamTwo_FGM3_opp,TeamTwo_FGM_opp,TeamTwo_FTA,TeamTwo_FTA_opp,TeamTwo_FTM,TeamTwo_FTM_opp,TeamTwo_NumOT,TeamTwo_OR,TeamTwo_OR_opp,TeamTwo_PF,TeamTwo_PF_opp,TeamTwo_Score,TeamTwo_Score_opp,TeamTwo_Stl,TeamTwo_Stl_opp,TeamTwo_TO,TeamTwo_TO_opp,Diff_Ast,Diff_Ast_opp,Diff_Blk,Diff_Blk_opp,Diff_DR,Diff_DR_opp,Diff_FGA,Diff_FGA_opp,Diff_FGA3,Diff_FGA3_opp,Diff_FGM,Diff_FGM_opp,Diff_FGM3,Diff_FGM3_opp,Diff_FTA,Diff_FTA_opp,Diff_FTM,Diff_FTM_opp,Diff_OR,Diff_OR_opp,Diff_PF,Diff_PF_opp,Diff_Score,Diff_Score_opp,Diff_Stl,Diff_Stl_opp,Diff_TO,Diff_TO_opp,Diff_Seed,Diff_NumOT
2137,2018.0,0.0,1139.0,1345.0,10.0,2.0,14.0,12.69697,2.939394,3.363636,24.787879,24.575758,61.848485,23.0,21.0,55.939394,29.212121,8.181818,7.848485,25.181818,16.242424,19.636364,12.515152,14.575758,0.181818,9.212121,8.121212,18.242424,16.878788,79.121212,72.787879,6.727273,5.151515,11.181818,14.151515,16.705882,12.029412,5.029412,2.382353,26.588235,22.029412,56.705882,22.823529,20.588235,59.794118,28.176471,9.588235,6.941176,24.529412,20.470588,14.205882,15.205882,9.647059,0.029412,8.441176,10.441176,15.529412,18.647059,81.147059,65.647059,5.823529,5.205882,10.676471,12.088235,-2.705882,0.667558,-2.090018,0.981283,-1.800357,2.546346,5.142602,-3.854724,0.176471,0.411765,1.035651,0.652406,-1.406417,0.907308,-4.228164,5.430481,-2.690731,4.928699,0.770945,-2.319964,2.713012,-1.768271,-2.025847,7.14082,0.903743,-0.054367,0.505348,2.06328,8.0,0.152406
2138,2018.0,1.0,1393.0,1395.0,11.0,6.0,10.969697,16.181818,5.575758,3.181818,25.333333,22.393939,54.424242,18.181818,24.181818,55.272727,22.757576,5.848485,7.939394,21.878788,21.848485,17.30303,16.181818,12.848485,0.121212,12.090909,10.484848,16.30303,18.393939,67.545455,64.545455,7.242424,6.30303,12.575758,12.848485,18.78125,15.71875,3.59375,4.25,25.1875,21.3125,60.15625,21.15625,21.34375,59.40625,30.0,8.46875,8.09375,27.28125,20.53125,18.21875,14.53125,13.28125,0.15625,11.25,8.8125,16.90625,18.625,83.0,75.9375,6.59375,6.25,12.6875,12.59375,-7.811553,0.463068,1.982008,-1.068182,0.145833,1.081439,-5.732008,-4.133523,-2.974432,2.838068,-7.242424,-5.402462,-2.620265,-0.154356,1.317235,-0.91572,1.650568,-0.432765,0.840909,1.672348,-0.60322,-0.231061,-15.454545,-11.392045,0.648674,0.05303,-0.111742,0.254735,5.0,-0.035038
2139,2018.0,1.0,1420.0,1438.0,16.0,1.0,14.870968,13.580645,2.548387,2.806452,24.354839,27.129032,58.451613,25.774194,23.16129,56.612903,25.612903,9.83871,8.322581,25.322581,17.580645,17.580645,11.419355,12.0,0.0,9.709677,9.935484,16.516129,18.774194,72.483871,70.967742,7.387097,6.258065,11.677419,14.322581,13.727273,8.848485,3.727273,2.636364,24.272727,22.212121,54.181818,18.878788,20.30303,50.151515,25.0,7.363636,6.151515,18.818182,13.424242,13.121212,10.181818,9.606061,0.030303,8.393939,8.393939,14.060606,15.060606,67.545455,53.393939,6.787879,3.878788,8.575758,12.727273,1.143695,4.73216,-1.178886,0.170088,0.082111,4.916911,4.269795,6.461388,6.895406,2.85826,0.612903,6.504399,2.475073,2.171065,4.156403,4.459433,1.237537,2.393939,1.315738,1.541544,2.455523,3.713587,4.938416,17.573803,0.599218,2.379277,3.101662,1.595308,15.0,-0.030303
2140,2018.0,1.0,1243.0,1420.0,9.0,16.0,14.333333,12.666667,3.0,2.454545,22.393939,23.060606,54.878788,20.181818,21.363636,53.787879,25.878788,6.939394,7.090909,23.090909,18.393939,19.69697,13.666667,14.606061,0.030303,8.181818,10.69697,18.393939,18.242424,72.363636,67.878788,7.787879,5.575758,11.454545,14.333333,14.870968,13.580645,2.548387,2.806452,24.354839,27.129032,58.451613,25.774194,23.16129,56.612903,25.612903,9.83871,8.322581,25.322581,17.580645,17.580645,11.419355,12.0,0.0,9.709677,9.935484,16.516129,18.774194,72.483871,70.967742,7.387097,6.258065,11.677419,14.322581,-0.537634,-0.913978,0.451613,-0.351906,-1.960899,-4.068426,-3.572825,-2.825024,-5.592375,-1.797654,0.265885,-2.231672,-2.899316,-1.231672,0.813294,2.116325,2.247312,2.606061,-1.527859,0.761486,1.87781,-0.531769,-0.120235,-3.088954,0.400782,-0.682307,-0.222874,0.010753,-7.0,0.030303
2141,2018.0,0.0,1243.0,1260.0,9.0,11.0,14.333333,12.666667,3.0,2.454545,22.393939,23.060606,54.878788,20.181818,21.363636,53.787879,25.878788,6.939394,7.090909,23.090909,18.393939,19.69697,13.666667,14.606061,0.030303,8.181818,10.69697,18.393939,18.242424,72.363636,67.878788,7.787879,5.575758,11.454545,14.333333,15.4375,11.625,2.375,3.03125,25.71875,21.71875,51.34375,18.34375,20.0625,55.125,26.0,7.34375,6.53125,22.78125,17.03125,14.1875,12.34375,9.9375,0.0,6.15625,8.96875,14.0,16.875,71.6875,62.03125,6.65625,6.4375,12.375,13.1875,-1.104167,1.041667,0.625,-0.576705,-3.324811,1.341856,3.535038,-1.337121,1.838068,1.301136,-0.121212,0.309659,-0.404356,0.559659,1.362689,5.50947,1.322917,4.668561,2.025568,1.72822,4.393939,1.367424,0.676136,5.847538,1.131629,-0.861742,-0.920455,1.145833,-2.0,0.030303


(2142, 94)

Submission Games:


Unnamed: 0,Season,TeamOneID,TeamTwoID,TeamOne_Seed,TeamTwo_Seed,TeamOne_Ast,TeamOne_Ast_opp,TeamOne_Blk,TeamOne_Blk_opp,TeamOne_DR,TeamOne_DR_opp,TeamOne_FGA,TeamOne_FGA3,TeamOne_FGA3_opp,TeamOne_FGA_opp,TeamOne_FGM,TeamOne_FGM3,TeamOne_FGM3_opp,TeamOne_FGM_opp,TeamOne_FTA,TeamOne_FTA_opp,TeamOne_FTM,TeamOne_FTM_opp,TeamOne_NumOT,TeamOne_OR,TeamOne_OR_opp,TeamOne_PF,TeamOne_PF_opp,TeamOne_Score,TeamOne_Score_opp,TeamOne_Stl,TeamOne_Stl_opp,TeamOne_TO,TeamOne_TO_opp,TeamTwo_Ast,TeamTwo_Ast_opp,TeamTwo_Blk,TeamTwo_Blk_opp,TeamTwo_DR,TeamTwo_DR_opp,TeamTwo_FGA,TeamTwo_FGA3,TeamTwo_FGA3_opp,TeamTwo_FGA_opp,TeamTwo_FGM,TeamTwo_FGM3,TeamTwo_FGM3_opp,TeamTwo_FGM_opp,TeamTwo_FTA,TeamTwo_FTA_opp,TeamTwo_FTM,TeamTwo_FTM_opp,TeamTwo_NumOT,TeamTwo_OR,TeamTwo_OR_opp,TeamTwo_PF,TeamTwo_PF_opp,TeamTwo_Score,TeamTwo_Score_opp,TeamTwo_Stl,TeamTwo_Stl_opp,TeamTwo_TO,TeamTwo_TO_opp,Diff_Ast,Diff_Ast_opp,Diff_Blk,Diff_Blk_opp,Diff_DR,Diff_DR_opp,Diff_FGA,Diff_FGA_opp,Diff_FGA3,Diff_FGA3_opp,Diff_FGM,Diff_FGM_opp,Diff_FGM3,Diff_FGM3_opp,Diff_FTA,Diff_FTA_opp,Diff_FTM,Diff_FTM_opp,Diff_OR,Diff_OR_opp,Diff_PF,Diff_PF_opp,Diff_Score,Diff_Score_opp,Diff_Stl,Diff_Stl_opp,Diff_TO,Diff_TO_opp,Diff_Seed,Diff_NumOT
0,2014,1107,1110,16,15,11.0625,12.34375,3.0625,3.03125,24.21875,21.59375,51.09375,13.46875,19.59375,54.34375,22.4375,4.90625,6.90625,22.75,21.71875,15.84375,16.25,11.4375,0.09375,10.34375,9.9375,17.25,19.25,66.03125,63.84375,5.53125,5.84375,12.625,11.96875,15.125,10.09375,3.4375,2.4375,22.9375,19.3125,46.5625,16.59375,15.0625,49.03125,23.03125,6.34375,4.71875,20.15625,16.25,20.1875,11.53125,13.59375,0.03125,7.125,9.78125,17.21875,17.625,63.9375,58.625,6.03125,7.03125,13.96875,12.125,-4.0625,2.25,-0.375,0.59375,1.28125,2.28125,4.53125,5.3125,-3.125,4.53125,-0.59375,2.59375,-1.4375,2.1875,5.46875,-4.34375,4.71875,-2.15625,3.21875,0.15625,0.03125,1.625,2.09375,5.21875,-0.5,-1.1875,-1.34375,-0.15625,1,0.0625
1,2014,1107,1112,16,1,11.0625,12.34375,3.0625,3.03125,24.21875,21.59375,51.09375,13.46875,19.59375,54.34375,22.4375,4.90625,6.90625,22.75,21.71875,15.84375,16.25,11.4375,0.09375,10.34375,9.9375,17.25,19.25,66.03125,63.84375,5.53125,5.84375,12.625,11.96875,15.088235,9.176471,4.235294,3.176471,26.294118,21.205882,56.0,14.852941,14.029412,53.5,26.235294,5.294118,4.411765,20.382353,23.352941,18.441176,15.294118,12.970588,0.088235,12.588235,9.235294,16.441176,18.617647,73.058824,58.147059,5.882353,4.764706,10.441176,12.411765,-4.025735,3.167279,-1.172794,-0.145221,-2.075368,0.387868,-4.90625,0.84375,-1.384191,5.564338,-3.797794,2.367647,-0.387868,2.494485,-1.634191,-2.597426,0.955882,-1.533088,-2.244485,0.702206,0.808824,0.632353,-7.027574,5.696691,-0.351103,1.079044,2.183824,-0.443015,15,0.005515


(11390, 93)

# Save datasets

In [33]:
df_interim_mdl.to_csv(data_interim_dir + 'model_dataset.csv', index=False)
df_interim_sub.to_csv(data_interim_dir + 'submission_dataset.csv', index=False)