# Setup

In [86]:
import pdb
import re

import numpy as np
import pandas as pd

In [28]:
pd.options.display.max_columns = 999

In [62]:
def display_df(df, n=1, tail=False, title=None):
    if title:
        print(title + ':')
    display(df.head(n), df.tail(n), df.shape) if tail else display(df.head(n), df.shape)

In [63]:
data_raw_dir = '../data/raw/'
data_interim_dir = '../data/interim/'

# Read and Process Datasets

In [49]:
def process_seeds(df):
    """Process tournament seed file"""
    return (
        df
        .assign(Seed=df.Seed.map(lambda s: int(s[1:3])))
        .assign(IsPlayIn=df.Seed.map(lambda s: str(s).endswith('a') or str(s).endswith('b')))
    )


def process_games(df):
    """Process tournament game file"""
    
    def _standardize_team_ID(row):
        """TBD"""
        if row.WTeamID < row.LTeamID:
            row.TeamOneID = row.WTeamID
            row.TeamTwoID = row.LTeamID
        else:
            row.TeamOneID = row.LTeamID
            row.TeamTwoID = row.WTeamID
        return row

    def _create_labels(row):
        """TBD"""
        if row.TeamOneID == row.WTeamID:
            row.Label = 1
        else:
            row.Label = 0
        return row
    
    return (
        df
        .assign(
            TeamOneID=np.nan,
            TeamTwoID=np.nan,
            Label=np.nan
        )
        .apply(_standardize_team_ID, axis=1)
        .apply(_create_labels, axis=1)
        .drop(['WTeamID', 'LTeamID', 'DayNum', 'WScore', 'LScore', 'WLoc', 'NumOT'], axis=1)
    )


def process_submission_games(df):
    """Process tournament game submission file"""
    return (
        df
        .assign(
            Season=df.ID.str.split('_', expand=True)[0].astype(int),    
            TeamOneID=df.ID.str.split('_', expand=True)[1].astype(int),
            TeamTwoID=df.ID.str.split('_', expand=True)[2].astype(int),
    )
    .drop(['ID', 'Pred'], axis=1)
)

def process_regular_season_detailed_results(df):
    """TBD"""
    def _rename_winner_vs_opp(old_name):
        if re.match(r'^W', old_name):
            return re.sub('^W','', old_name)
        if re.match(r'^L', old_name):
            return re.sub('^L', '', old_name) + '_opp'
        return old_name
    
    def _rename_loser_vs_opp(old_name):
        if re.match(r'^L', old_name):
            return re.sub('^L','', old_name)
        if re.match(r'^W', old_name):
            return re.sub('^W', '', old_name) + '_opp'
        return old_name
    
    df_winners = df.rename(columns=_rename_winner_vs_opp)
    df_losers = df.rename(columns=_rename_loser_vs_opp)
    
    return pd.concat([df_winners, df_losers])

In [50]:
# Read and process seeds
df_seeds = (
    pd.read_csv(data_raw_dir + 'DataFiles/NCAATourneySeeds.csv')
    .pipe(process_seeds)
)

# Read and process tournament games
df_games = (
    pd.read_csv(data_raw_dir + 'DataFiles/NCAATourneyCompactResults.csv')
    .pipe(process_games)
)

# Read and process submission games
df_sub = (
    pd.read_csv(data_raw_dir + 'SampleSubmissionStage1.csv')
    .pipe(process_submission_games)
)

# Read and process regular season detailed results
df_rs_d_res = (
    pd.read_csv(data_raw_dir + 'DataFiles/RegularSeasonDetailedResults.csv')
    .pipe(process_regular_season_detailed_results)
)

In [108]:
display_df(df_seeds, title="Seeds")
display_df(df_games, title="Games")
display_df(df_sub, title="SubmissionGames")
display_df(df_rs_d_res, title="Regular Season Detailed Results")

Seeds:


Unnamed: 0,Season,Seed,TeamID,IsPlayIn
0,1985,1,1207,False


(2218, 4)

Games:


Unnamed: 0,Season,Label,TeamOneID,TeamTwoID
0,1985,1,1116,1234


(2184, 4)

SubmissionGames:


Unnamed: 0,Season,TeamOneID,TeamTwoID
0,2014,1107,1110


(11390, 3)

Regular Season Detailed Results:


Unnamed: 0,Ast,Ast_opp,Blk,Blk_opp,DR,DR_opp,DayNum,FGA,FGA3,FGA3_opp,FGA_opp,FGM,FGM3,FGM3_opp,FGM_opp,FTA,FTA_opp,FTM,FTM_opp,Loc,Loc_opp,NumOT,OR,OR_opp,PF,PF_opp,Score,Score_opp,Season,Stl,Stl_opp,TO,TO_opp,TeamID,TeamID_opp
0,13,8,1,2,24,22,10,58,14,10,53,27,3,2,22,18,22,11,16,N,,0,14,10,22,20,68,62,2003,7,9,23,18,1104,1328


(164082, 35)

# Aggregate Datasets

In [75]:
df_rs_d_res_agg = (
    df_rs_d_res
    .groupby(['TeamID', 'Season'])
    .mean()
    .drop(['TeamID_opp', 'DayNum'], axis=1)
    .reset_index()
)

In [76]:
display_df(df_rs_d_res_agg, title="Average Regular Season Detailed Results")

Average Regular Season Detailed Results:


Unnamed: 0,TeamID,Season,Ast,Ast_opp,Blk,Blk_opp,DR,DR_opp,FGA,FGA3,FGA3_opp,FGA_opp,FGM,FGM3,FGM3_opp,FGM_opp,FTA,FTA_opp,FTM,FTM_opp,NumOT,OR,OR_opp,PF,PF_opp,Score,Score_opp,Stl,Stl_opp,TO,TO_opp
0,1101,2014,10.0,15.571429,1.47619,5.0,20.333333,24.095238,50.142857,17.857143,16.190476,53.428571,20.333333,6.666667,6.0,27.142857,21.190476,25.809524,15.809524,18.333333,0.142857,8.0,10.380952,21.571429,18.666667,63.142857,78.619048,5.761905,7.0,15.0,12.142857


(5481, 31)

# Merge Datasets

In [116]:
def merge_seed_dataset(df, df_seeds, submission_file):
    """TBD"""
    df_out = (
        df
        .merge(df_seeds.rename(index=str, columns={'TeamID': 'TeamOneID'}), on=['Season', 'TeamOneID'])
        .rename(index=str, columns={'Seed': 'TeamOneSeed', 'IsPlayIn': 'TeamOneIsPlayIn'})
        .merge(df_seeds.rename(index=str, columns={'TeamID': 'TeamTwoID'}), on=['Season', 'TeamTwoID'])
        .rename(index=str, columns={'Seed': 'TeamTwoSeed', 'IsPlayIn': 'TeamTwoIsPlayIn'})  
    )
    
    if not(submission_file):
        df_out = df_out.loc[~(df_out.TeamOneIsPlayIn & df_out.TeamTwoIsPlayIn)]  # remove play-in games 
    
    return df_out.drop(['TeamOneIsPlayIn', 'TeamTwoIsPlayIn'], axis=1)


def merge_aggregated_regular_season_detailed_results(df, df_rs_d_res_agg):
    """TBD"""
    return (
        df
        .merge(df_rs_d_res_agg.rename(index=str, columns={'TeamID': 'TeamOneID'}), on=['Season', 'TeamOneID'], 
               how='left')  # team one data 
        .merge(df_rs_d_res_agg.rename(index=str, columns={'TeamID': 'TeamTwoID'}), on=['Season', 'TeamTwoID'], 
               how='left', suffixes=('_TeamOne', '_TeamTwo'))  # team two data
    )

In [119]:
# Merge datasets with tournament games
df_interim_mdl = (
    df_games
    .pipe(merge_seed_dataset, df_seeds=df_seeds, submission_file=False)
    .pipe(merge_aggregated_regular_season_detailed_results, df_rs_d_res_agg=df_rs_d_res_agg)
)

# Merge datasets with submission games
df_interim_sub = (
    df_sub
    .pipe(merge_seed_dataset, df_seeds=df_seeds, submission_file=True)
    .pipe(merge_aggregated_regular_season_detailed_results, df_rs_d_res_agg=df_rs_d_res_agg)
)

In [120]:
display_df(df_interim_mdl, tail=True, n=5)
display_df(df_interim_sub)

Unnamed: 0,Season,Label,TeamOneID,TeamTwoID,TeamOneSeed,TeamTwoSeed,Ast_TeamOne,Ast_opp_TeamOne,Blk_TeamOne,Blk_opp_TeamOne,DR_TeamOne,DR_opp_TeamOne,FGA_TeamOne,FGA3_TeamOne,FGA3_opp_TeamOne,FGA_opp_TeamOne,FGM_TeamOne,FGM3_TeamOne,FGM3_opp_TeamOne,FGM_opp_TeamOne,FTA_TeamOne,FTA_opp_TeamOne,FTM_TeamOne,FTM_opp_TeamOne,NumOT_TeamOne,OR_TeamOne,OR_opp_TeamOne,PF_TeamOne,PF_opp_TeamOne,Score_TeamOne,Score_opp_TeamOne,Stl_TeamOne,Stl_opp_TeamOne,TO_TeamOne,TO_opp_TeamOne,Ast_TeamTwo,Ast_opp_TeamTwo,Blk_TeamTwo,Blk_opp_TeamTwo,DR_TeamTwo,DR_opp_TeamTwo,FGA_TeamTwo,FGA3_TeamTwo,FGA3_opp_TeamTwo,FGA_opp_TeamTwo,FGM_TeamTwo,FGM3_TeamTwo,FGM3_opp_TeamTwo,FGM_opp_TeamTwo,FTA_TeamTwo,FTA_opp_TeamTwo,FTM_TeamTwo,FTM_opp_TeamTwo,NumOT_TeamTwo,OR_TeamTwo,OR_opp_TeamTwo,PF_TeamTwo,PF_opp_TeamTwo,Score_TeamTwo,Score_opp_TeamTwo,Stl_TeamTwo,Stl_opp_TeamTwo,TO_TeamTwo,TO_opp_TeamTwo
0,1985,1,1116,1234,9,8,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,1985,0,1116,1385,9,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,1985,1,1207,1385,1,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,1985,0,1246,1385,12,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,1985,0,1380,1385,16,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


Unnamed: 0,Season,Label,TeamOneID,TeamTwoID,TeamOneSeed,TeamTwoSeed,Ast_TeamOne,Ast_opp_TeamOne,Blk_TeamOne,Blk_opp_TeamOne,DR_TeamOne,DR_opp_TeamOne,FGA_TeamOne,FGA3_TeamOne,FGA3_opp_TeamOne,FGA_opp_TeamOne,FGM_TeamOne,FGM3_TeamOne,FGM3_opp_TeamOne,FGM_opp_TeamOne,FTA_TeamOne,FTA_opp_TeamOne,FTM_TeamOne,FTM_opp_TeamOne,NumOT_TeamOne,OR_TeamOne,OR_opp_TeamOne,PF_TeamOne,PF_opp_TeamOne,Score_TeamOne,Score_opp_TeamOne,Stl_TeamOne,Stl_opp_TeamOne,TO_TeamOne,TO_opp_TeamOne,Ast_TeamTwo,Ast_opp_TeamTwo,Blk_TeamTwo,Blk_opp_TeamTwo,DR_TeamTwo,DR_opp_TeamTwo,FGA_TeamTwo,FGA3_TeamTwo,FGA3_opp_TeamTwo,FGA_opp_TeamTwo,FGM_TeamTwo,FGM3_TeamTwo,FGM3_opp_TeamTwo,FGM_opp_TeamTwo,FTA_TeamTwo,FTA_opp_TeamTwo,FTM_TeamTwo,FTM_opp_TeamTwo,NumOT_TeamTwo,OR_TeamTwo,OR_opp_TeamTwo,PF_TeamTwo,PF_opp_TeamTwo,Score_TeamTwo,Score_opp_TeamTwo,Stl_TeamTwo,Stl_opp_TeamTwo,TO_TeamTwo,TO_opp_TeamTwo
2137,2018,0,1139,1345,10,2,14.0,12.69697,2.939394,3.363636,24.787879,24.575758,61.848485,23.0,21.0,55.939394,29.212121,8.181818,7.848485,25.181818,16.242424,19.636364,12.515152,14.575758,0.181818,9.212121,8.121212,18.242424,16.878788,79.121212,72.787879,6.727273,5.151515,11.181818,14.151515,16.705882,12.029412,5.029412,2.382353,26.588235,22.029412,56.705882,22.823529,20.588235,59.794118,28.176471,9.588235,6.941176,24.529412,20.470588,14.205882,15.205882,9.647059,0.029412,8.441176,10.441176,15.529412,18.647059,81.147059,65.647059,5.823529,5.205882,10.676471,12.088235
2138,2018,1,1393,1395,11,6,10.969697,16.181818,5.575758,3.181818,25.333333,22.393939,54.424242,18.181818,24.181818,55.272727,22.757576,5.848485,7.939394,21.878788,21.848485,17.30303,16.181818,12.848485,0.121212,12.090909,10.484848,16.30303,18.393939,67.545455,64.545455,7.242424,6.30303,12.575758,12.848485,18.78125,15.71875,3.59375,4.25,25.1875,21.3125,60.15625,21.15625,21.34375,59.40625,30.0,8.46875,8.09375,27.28125,20.53125,18.21875,14.53125,13.28125,0.15625,11.25,8.8125,16.90625,18.625,83.0,75.9375,6.59375,6.25,12.6875,12.59375
2139,2018,1,1420,1438,16,1,14.870968,13.580645,2.548387,2.806452,24.354839,27.129032,58.451613,25.774194,23.16129,56.612903,25.612903,9.83871,8.322581,25.322581,17.580645,17.580645,11.419355,12.0,0.0,9.709677,9.935484,16.516129,18.774194,72.483871,70.967742,7.387097,6.258065,11.677419,14.322581,13.727273,8.848485,3.727273,2.636364,24.272727,22.212121,54.181818,18.878788,20.30303,50.151515,25.0,7.363636,6.151515,18.818182,13.424242,13.121212,10.181818,9.606061,0.030303,8.393939,8.393939,14.060606,15.060606,67.545455,53.393939,6.787879,3.878788,8.575758,12.727273
2140,2018,1,1243,1420,9,16,14.333333,12.666667,3.0,2.454545,22.393939,23.060606,54.878788,20.181818,21.363636,53.787879,25.878788,6.939394,7.090909,23.090909,18.393939,19.69697,13.666667,14.606061,0.030303,8.181818,10.69697,18.393939,18.242424,72.363636,67.878788,7.787879,5.575758,11.454545,14.333333,14.870968,13.580645,2.548387,2.806452,24.354839,27.129032,58.451613,25.774194,23.16129,56.612903,25.612903,9.83871,8.322581,25.322581,17.580645,17.580645,11.419355,12.0,0.0,9.709677,9.935484,16.516129,18.774194,72.483871,70.967742,7.387097,6.258065,11.677419,14.322581
2141,2018,0,1243,1260,9,11,14.333333,12.666667,3.0,2.454545,22.393939,23.060606,54.878788,20.181818,21.363636,53.787879,25.878788,6.939394,7.090909,23.090909,18.393939,19.69697,13.666667,14.606061,0.030303,8.181818,10.69697,18.393939,18.242424,72.363636,67.878788,7.787879,5.575758,11.454545,14.333333,15.4375,11.625,2.375,3.03125,25.71875,21.71875,51.34375,18.34375,20.0625,55.125,26.0,7.34375,6.53125,22.78125,17.03125,14.1875,12.34375,9.9375,0.0,6.15625,8.96875,14.0,16.875,71.6875,62.03125,6.65625,6.4375,12.375,13.1875


(2142, 64)

Unnamed: 0,Season,TeamOneID,TeamTwoID,TeamOneSeed,TeamTwoSeed,Ast_TeamOne,Ast_opp_TeamOne,Blk_TeamOne,Blk_opp_TeamOne,DR_TeamOne,DR_opp_TeamOne,FGA_TeamOne,FGA3_TeamOne,FGA3_opp_TeamOne,FGA_opp_TeamOne,FGM_TeamOne,FGM3_TeamOne,FGM3_opp_TeamOne,FGM_opp_TeamOne,FTA_TeamOne,FTA_opp_TeamOne,FTM_TeamOne,FTM_opp_TeamOne,NumOT_TeamOne,OR_TeamOne,OR_opp_TeamOne,PF_TeamOne,PF_opp_TeamOne,Score_TeamOne,Score_opp_TeamOne,Stl_TeamOne,Stl_opp_TeamOne,TO_TeamOne,TO_opp_TeamOne,Ast_TeamTwo,Ast_opp_TeamTwo,Blk_TeamTwo,Blk_opp_TeamTwo,DR_TeamTwo,DR_opp_TeamTwo,FGA_TeamTwo,FGA3_TeamTwo,FGA3_opp_TeamTwo,FGA_opp_TeamTwo,FGM_TeamTwo,FGM3_TeamTwo,FGM3_opp_TeamTwo,FGM_opp_TeamTwo,FTA_TeamTwo,FTA_opp_TeamTwo,FTM_TeamTwo,FTM_opp_TeamTwo,NumOT_TeamTwo,OR_TeamTwo,OR_opp_TeamTwo,PF_TeamTwo,PF_opp_TeamTwo,Score_TeamTwo,Score_opp_TeamTwo,Stl_TeamTwo,Stl_opp_TeamTwo,TO_TeamTwo,TO_opp_TeamTwo
0,2014,1107,1110,16,15,11.0625,12.34375,3.0625,3.03125,24.21875,21.59375,51.09375,13.46875,19.59375,54.34375,22.4375,4.90625,6.90625,22.75,21.71875,15.84375,16.25,11.4375,0.09375,10.34375,9.9375,17.25,19.25,66.03125,63.84375,5.53125,5.84375,12.625,11.96875,15.125,10.09375,3.4375,2.4375,22.9375,19.3125,46.5625,16.59375,15.0625,49.03125,23.03125,6.34375,4.71875,20.15625,16.25,20.1875,11.53125,13.59375,0.03125,7.125,9.78125,17.21875,17.625,63.9375,58.625,6.03125,7.03125,13.96875,12.125


(11390, 63)

# Create Features

In [121]:
def create_seedDiff_feat(df):
    return (
        df
        .assign(SeedDiff=df.TeamOneSeed - df.TeamTwoSeed)
    )

In [122]:
# Create features for modeling dataset
df_interim_mdl = (
    df_interim_mdl
    .pipe(create_seedDiff_feat)
)

# Create features for submission dataset
df_interim_sub = (
    df_interim_sub
    .pipe(create_seedDiff_feat)
)

In [123]:
display_df(df_interim_mdl, n=2, title="Games")
display_df(df_interim_sub, n=2, title="Submissin Games")

Games:


Unnamed: 0,Season,Label,TeamOneID,TeamTwoID,TeamOneSeed,TeamTwoSeed,Ast_TeamOne,Ast_opp_TeamOne,Blk_TeamOne,Blk_opp_TeamOne,DR_TeamOne,DR_opp_TeamOne,FGA_TeamOne,FGA3_TeamOne,FGA3_opp_TeamOne,FGA_opp_TeamOne,FGM_TeamOne,FGM3_TeamOne,FGM3_opp_TeamOne,FGM_opp_TeamOne,FTA_TeamOne,FTA_opp_TeamOne,FTM_TeamOne,FTM_opp_TeamOne,NumOT_TeamOne,OR_TeamOne,OR_opp_TeamOne,PF_TeamOne,PF_opp_TeamOne,Score_TeamOne,Score_opp_TeamOne,Stl_TeamOne,Stl_opp_TeamOne,TO_TeamOne,TO_opp_TeamOne,Ast_TeamTwo,Ast_opp_TeamTwo,Blk_TeamTwo,Blk_opp_TeamTwo,DR_TeamTwo,DR_opp_TeamTwo,FGA_TeamTwo,FGA3_TeamTwo,FGA3_opp_TeamTwo,FGA_opp_TeamTwo,FGM_TeamTwo,FGM3_TeamTwo,FGM3_opp_TeamTwo,FGM_opp_TeamTwo,FTA_TeamTwo,FTA_opp_TeamTwo,FTM_TeamTwo,FTM_opp_TeamTwo,NumOT_TeamTwo,OR_TeamTwo,OR_opp_TeamTwo,PF_TeamTwo,PF_opp_TeamTwo,Score_TeamTwo,Score_opp_TeamTwo,Stl_TeamTwo,Stl_opp_TeamTwo,TO_TeamTwo,TO_opp_TeamTwo,SeedDiff
0,1985,1,1116,1234,9,8,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1
1,1985,0,1116,1385,9,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,8


(2142, 65)

Submissin Games:


Unnamed: 0,Season,TeamOneID,TeamTwoID,TeamOneSeed,TeamTwoSeed,Ast_TeamOne,Ast_opp_TeamOne,Blk_TeamOne,Blk_opp_TeamOne,DR_TeamOne,DR_opp_TeamOne,FGA_TeamOne,FGA3_TeamOne,FGA3_opp_TeamOne,FGA_opp_TeamOne,FGM_TeamOne,FGM3_TeamOne,FGM3_opp_TeamOne,FGM_opp_TeamOne,FTA_TeamOne,FTA_opp_TeamOne,FTM_TeamOne,FTM_opp_TeamOne,NumOT_TeamOne,OR_TeamOne,OR_opp_TeamOne,PF_TeamOne,PF_opp_TeamOne,Score_TeamOne,Score_opp_TeamOne,Stl_TeamOne,Stl_opp_TeamOne,TO_TeamOne,TO_opp_TeamOne,Ast_TeamTwo,Ast_opp_TeamTwo,Blk_TeamTwo,Blk_opp_TeamTwo,DR_TeamTwo,DR_opp_TeamTwo,FGA_TeamTwo,FGA3_TeamTwo,FGA3_opp_TeamTwo,FGA_opp_TeamTwo,FGM_TeamTwo,FGM3_TeamTwo,FGM3_opp_TeamTwo,FGM_opp_TeamTwo,FTA_TeamTwo,FTA_opp_TeamTwo,FTM_TeamTwo,FTM_opp_TeamTwo,NumOT_TeamTwo,OR_TeamTwo,OR_opp_TeamTwo,PF_TeamTwo,PF_opp_TeamTwo,Score_TeamTwo,Score_opp_TeamTwo,Stl_TeamTwo,Stl_opp_TeamTwo,TO_TeamTwo,TO_opp_TeamTwo,SeedDiff
0,2014,1107,1110,16,15,11.0625,12.34375,3.0625,3.03125,24.21875,21.59375,51.09375,13.46875,19.59375,54.34375,22.4375,4.90625,6.90625,22.75,21.71875,15.84375,16.25,11.4375,0.09375,10.34375,9.9375,17.25,19.25,66.03125,63.84375,5.53125,5.84375,12.625,11.96875,15.125,10.09375,3.4375,2.4375,22.9375,19.3125,46.5625,16.59375,15.0625,49.03125,23.03125,6.34375,4.71875,20.15625,16.25,20.1875,11.53125,13.59375,0.03125,7.125,9.78125,17.21875,17.625,63.9375,58.625,6.03125,7.03125,13.96875,12.125,1
1,2014,1107,1112,16,1,11.0625,12.34375,3.0625,3.03125,24.21875,21.59375,51.09375,13.46875,19.59375,54.34375,22.4375,4.90625,6.90625,22.75,21.71875,15.84375,16.25,11.4375,0.09375,10.34375,9.9375,17.25,19.25,66.03125,63.84375,5.53125,5.84375,12.625,11.96875,15.088235,9.176471,4.235294,3.176471,26.294118,21.205882,56.0,14.852941,14.029412,53.5,26.235294,5.294118,4.411765,20.382353,23.352941,18.441176,15.294118,12.970588,0.088235,12.588235,9.235294,16.441176,18.617647,73.058824,58.147059,5.882353,4.764706,10.441176,12.411765,15


(11390, 64)

In [124]:
# Save datasets
df_interim_mdl.to_csv(data_interim_dir + 'model_dataset.csv', index=False)
df_interim_sub.to_csv(data_interim_dir + 'submission_dataset.csv', index=False)