In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from IPython.display import clear_output

from tqdm import tqdm
from nba_api.stats.static import players, teams
from nba_api.stats.library.parameters import SeasonAll
from nba_api.stats.endpoints import leaguegamelog

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression, ElasticNet, Ridge, Lasso
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor

from sklearn.pipeline import Pipeline

from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.preprocessing import Normalizer
from sklearn.model_selection import learning_curve
from sklearn.metrics import mean_absolute_error, mean_squared_error


pd.options.display.max_columns=200

In C:\Users\Jordan Nishimura\Anaconda3\lib\site-packages\matplotlib\mpl-data\stylelib\_classic_test.mplstyle: 
The text.latex.preview rcparam was deprecated in Matplotlib 3.3 and will be removed two minor releases later.
In C:\Users\Jordan Nishimura\Anaconda3\lib\site-packages\matplotlib\mpl-data\stylelib\_classic_test.mplstyle: 
The mathtext.fallback_to_cm rcparam was deprecated in Matplotlib 3.3 and will be removed two minor releases later.
In C:\Users\Jordan Nishimura\Anaconda3\lib\site-packages\matplotlib\mpl-data\stylelib\_classic_test.mplstyle: Support for setting the 'mathtext.fallback_to_cm' rcParam is deprecated since 3.3 and will be removed two minor releases later; use 'mathtext.fallback : 'cm' instead.
In C:\Users\Jordan Nishimura\Anaconda3\lib\site-packages\matplotlib\mpl-data\stylelib\_classic_test.mplstyle: 
The validate_bool_maybe_none function was deprecated in Matplotlib 3.3 and will be removed two minor releases later.
In C:\Users\Jordan Nishimura\Anaconda3\lib\site-

In [185]:
seasons = ["20{}-{}".format(x, x+1) for x in range(13, 21)]
print(seasons)
def gather_data(seasons = seasons):
    full_player_data = []
    for season in seasons:
        player_gls = pd.read_csv('./data/player_gamelogs_{}.csv'.format(season), dtype={'GAME_ID':'object'})
        player_gls_adv = pd.read_csv('./data/player_advanced_boxscores_{}.csv'.format(season), dtype={'GAME_ID':'object'})
        player_gls_scoring = pd.read_csv('./data/player_scoring_boxscores_{}.csv'.format(season), dtype={'GAME_ID':'object'})
        player_gls_tracking = pd.read_csv('./data/player_tracking_boxscores_{}.csv'.format(season), dtype={'GAME_ID':'object'})
        
        player_gls = player_gls[['SEASON_YEAR', 'PLAYER_ID', 'PLAYER_NAME', 'TEAM_ID',
       'TEAM_ABBREVIATION', 'TEAM_NAME', 'GAME_ID', 'GAME_DATE', 'MATCHUP',
       'WL', 'MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM',
       'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'TOV', 'STL', 'BLK',
       'BLKA', 'PF', 'PFD', 'PTS', 'PLUS_MINUS']]

        player_full = pd.merge(player_gls, player_gls_adv, how='left', on=['GAME_ID', 'PLAYER_ID'], suffixes=['', '_x'])
        player_full = pd.merge(player_full, player_gls_scoring, how='left', on=['GAME_ID', 'PLAYER_ID'], suffixes=['', '_x'])
        player_full = pd.merge(player_full, player_gls_tracking, how='left', on=['GAME_ID', 'PLAYER_ID'], suffixes=['', '_x'])
    
        # drop unnecessary columns
        player_full = player_full.drop(columns = ['TEAM_ID', 'TEAM_NAME', 'TEAM_ID_x',
                         'TEAM_ABBREVIATION_x','TEAM_CITY','PLAYER_NAME_x','START_POSITION',
                          'COMMENT','MIN_x','TEAM_ID_x', 'TEAM_ABBREVIATION_x','TEAM_CITY_x', 
                          'PLAYER_NAME_x', 'START_POSITION_x', 'COMMENT_x', 'MIN_x', 
                         'TEAM_ID_x', 'TEAM_ABBREVIATION_x', 'TEAM_CITY_x', 'PLAYER_NAME_x', 
                          'START_POSITION_x', 'COMMENT_x', 'MIN_x', 'AST_x', 'FG_PCT_x',
                         'FG_PCT', 'FG3_PCT', 'FT_PCT', 'E_NET_RATING', 'NET_RATING', 
                         'AST_PCT', 'OREB_PCT', 'DREB_PCT', 'REB_PCT', 'EFG_PCT', 'TS_PCT', 
                         'PCT_FGA_2PT', 'PCT_FGA_3PT', 'PCT_PTS_FT', 'CFG_PCT',
                        'UFG_PCT', 'DFG_PCT'], 
                       errors= 'ignore')

        # Convert date to datetime
        player_full['GAME_DATE'] = pd.to_datetime(player_full['GAME_DATE'])
        
        # Convert W/L to a binary 1/0 for win/loss
        player_full['WL'] = (player_full['WL'].str[0] == 'W').astype(int)
        
        # Add a binary home_game feature
        player_full['HOME_GAME'] = 0 
        player_full['HOME_GAME'] = (player_full['MATCHUP'].str[:6] == player_full['TEAM_ABBREVIATION'].str[:] + ' vs').astype(int)
        
        # convert the percentages into raw numbers (because we will be averaging them, we don't want to average percentages)
        # for example, percentage of points from midrange will be points scored from midrange
        player_full['FG2M'] = player_full['FGM'] - player_full['FG3M']
        player_full['FG2A'] = player_full['FGA'] - player_full['FG3A']
        player_full['PTS_2PT'] = (player_full['PTS'] * player_full['PCT_PTS_2PT']).astype('int8')
        player_full['PTS_2PT_MR'] = (player_full['PTS'] * player_full['PCT_PTS_2PT_MR']).astype('int8')
        player_full['PTS_3PT'] = (player_full['PTS'] * player_full['PCT_PTS_3PT']).astype('int8')
        player_full['PTS_FB'] = (player_full['PTS'] * player_full['PCT_PTS_FB']).astype('int8')
        player_full['PTS_OFF_TOV'] = (player_full['PTS'] * player_full['PCT_PTS_OFF_TOV']).astype('int8')
        player_full['PTS_PAINT'] = (player_full['PTS'] * player_full['PCT_PTS_PAINT']).astype('int8')
        player_full['AST_2PM'] = (player_full['FG2M'] * player_full['PCT_AST_2PM']).astype('int8')
        player_full['UAST_2PM'] = (player_full['FG2M'] * player_full['PCT_UAST_2PM']).astype('int8')
        player_full['AST_3PM'] = (player_full['FG3M'] * player_full['PCT_AST_3PM']).astype('int8')
        player_full['UAST_3PM'] = (player_full['FG3M'] * player_full['PCT_UAST_3PM']).astype('int8')



        player_full = player_full.drop(columns = ['PCT_PTS_2PT', 'PCT_PTS_2PT_MR', 'PCT_PTS_3PT', 
                                                  'PCT_PTS_FB', 'PCT_PTS_OFF_TOV', 'PCT_PTS_PAINT',
                                                  'PCT_AST_2PM', 'PCT_UAST_2PM', 'PCT_AST_3PM',
                                                  'PCT_UAST_3PM', 'PCT_AST_FGM', 'PCT_UAST_FGM', 
                                                  'FGM', 'FGA', 'AST_TOV', 'USG_PCT', 'PACE',
                                                  'PACE_PER40', ], errors='ignore')
        
        
        full_player_data.append(player_full)
        
        
    
    player_df = pd.concat(full_player_data)
    
    
    player_df['home_team_abbr'] = player_df.apply(lambda row: row['TEAM_ABBREVIATION'] if row['HOME_GAME'] == 1 else row['MATCHUP'][-3:], axis=1)
    player_df['away_team_abbr'] = player_df.apply(lambda row: row['TEAM_ABBREVIATION'] if row['HOME_GAME'] == 0 else row['MATCHUP'][-3:], axis=1)
    
    player_df[['home_team_abbr', 'away_team_abbr']] = player_df[['home_team_abbr', 'away_team_abbr']].replace({'NOH':'NOP',
                                                                                                               'NJN':'BKN'})
    
    # Reorder columns
    player_df = player_df[['SEASON_YEAR', 'TEAM_ABBREVIATION', 'PLAYER_NAME', 'PLAYER_ID', 
                           'home_team_abbr', 'away_team_abbr', 'GAME_ID', 
                           'GAME_DATE', 'MATCHUP', 'WL', 'HOME_GAME', 'MIN',
                           'FG3M', 'FG3A', 'FTM', 'FTA', 'OREB', 'DREB',
                           'AST', 'TOV', 'STL', 'BLK', 'BLKA', 'PF', 'PFD',
                           'PTS', 'PLUS_MINUS', 'E_OFF_RATING', 'E_DEF_RATING',
                           'AST_RATIO', 'TM_TOV_PCT', 'E_USG_PCT',
                           'E_PACE', 'POSS', 'PIE', 'SPD', 'DIST', 'ORBC', 
                           'DRBC', 'RBC', 'TCHS', 'SAST', 'FTAST', 'PASS', 
                           'CFGM', 'CFGA', 'UFGM', 'UFGA', 'DFGM', 'DFGA', 
                           'FG2M', 'FG2A', 'PTS_2PT_MR', 'PTS_FB', 
                           'PTS_OFF_TOV', 'PTS_PAINT', 'AST_2PM',
                           'UAST_2PM', 'AST_3PM', 'UAST_3PM']]
    
    return player_df

player_df = gather_data(seasons)

['2013-14', '2014-15', '2015-16', '2016-17', '2017-18', '2018-19', '2019-20', '2020-21']


In [187]:
def aggregate_player_stats(df = player_df):

    seasons = player_df['SEASON_YEAR'].unique()

    df = df.sort_values('GAME_DATE')

    for season in seasons:
        season_df = df.loc[df['SEASON_YEAR'] == season]
        player_ids = season_df['PLAYER_ID'].unique()
        for p_id in tqdm(player_ids, desc='Progress'):
            player_log = season_df.loc[season_df['PLAYER_ID'] == p_id]

            avg_player_data = player_log.iloc[:, 11:].shift(1).rolling(10, min_periods=5).mean()

            cols = avg_player_data.columns

            df.loc[(df['SEASON_YEAR'] == season) & (df['PLAYER_ID'] == p_id), cols] = avg_player_data

    df.to_csv('player_avg_last10.csv', index=False)
        
    return df
    
player_stats = aggregate_player_stats(df=player_df)

Progress: 100%|██████████| 482/482 [00:47<00:00, 10.16it/s]
Progress: 100%|██████████| 492/492 [00:49<00:00,  9.85it/s]
Progress: 100%|██████████| 476/476 [00:44<00:00, 10.70it/s]
Progress: 100%|██████████| 486/486 [00:45<00:00, 10.74it/s]
Progress: 100%|██████████| 540/540 [00:49<00:00, 10.98it/s]
Progress: 100%|██████████| 530/530 [00:50<00:00, 10.45it/s]
Progress: 100%|██████████| 529/529 [00:48<00:00, 10.88it/s]
Progress: 100%|██████████| 474/474 [00:44<00:00, 10.66it/s]


In [192]:
player_stats

Unnamed: 0,SEASON_YEAR,TEAM_ABBREVIATION,PLAYER_NAME,PLAYER_ID,home_team_abbr,away_team_abbr,GAME_ID,GAME_DATE,MATCHUP,WL,HOME_GAME,MIN,FG3M,FG3A,FTM,FTA,OREB,DREB,AST,TOV,STL,BLK,BLKA,PF,PFD,PTS,PLUS_MINUS,E_OFF_RATING,E_DEF_RATING,AST_RATIO,TM_TOV_PCT,E_USG_PCT,E_PACE,POSS,PIE,SPD,DIST,ORBC,DRBC,RBC,TCHS,SAST,FTAST,PASS,CFGM,CFGA,UFGM,UFGA,DFGM,DFGA,FG2M,FG2A,PTS_2PT_MR,PTS_FB,PTS_OFF_TOV,PTS_PAINT,AST_2PM,UAST_2PM,AST_3PM,UAST_3PM
25570,2013-14,IND,C.J. Watson,201228,IND,ORL,0021300001,2013-10-29,IND vs. ORL,1,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
25585,2013-14,LAL,Jodie Meeks,201975,LAL,LAC,0021300003,2013-10-29,LAL vs. LAC,1,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
25584,2013-14,CHI,Tony Snell,203503,MIA,CHI,0021300002,2013-10-29,CHI @ MIA,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
25583,2013-14,ORL,Andrew Nicholson,203094,IND,ORL,0021300001,2013-10-29,ORL @ IND,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
25582,2013-14,LAC,Darren Collison,201954,LAL,LAC,0021300003,2013-10-29,LAC @ LAL,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
84,2020-21,UTA,Jordan Clarkson,203903,ATL,UTA,0022000338,2021-02-04,UTA @ ATL,1,0,26.405333,3.200000,9.400000,2.1,2.100000,0.600000,3.600000,2.0,1.300000,0.900000,0.300000,0.400000,1.400000,2.100000,17.700000,3.500000,119.960000,111.760000,10.370000,7.140000,0.268200,101.229000,55.300000,0.120200,4.396000,2.030000,1.000000,5.100000,6.100000,40.200000,0.2,0.400000,22.200000,2.500000,5.600000,3.700000,8.700000,0.600000,1.000000,3.000000,5.2,0.200000,2.400000,3.400000,5.400000,0.600000,2.200000,2.100000,1.100000
85,2020-21,ATL,Danilo Gallinari,201568,ATL,UTA,0022000338,2021-02-04,ATL vs. UTA,0,1,16.450185,1.444444,3.777778,3.0,3.111111,0.222222,1.777778,1.0,1.333333,0.111111,0.111111,0.444444,1.333333,1.888889,10.444444,2.111111,128.833333,127.933333,6.877778,12.933333,0.262111,98.663333,34.888889,0.096667,3.906667,1.156667,0.666667,3.222222,3.777778,30.555556,0.0,0.111111,20.777778,0.666667,1.888889,2.333333,4.888889,1.222222,2.111111,1.555556,3.0,1.444444,0.222222,0.666667,1.444444,0.666667,0.666667,1.333333,0.111111
86,2020-21,GSW,Brad Wanamaker,202954,DAL,GSW,0022000339,2021-02-04,GSW @ DAL,1,0,16.988167,0.300000,1.800000,0.7,0.800000,0.100000,1.100000,2.8,0.800000,0.600000,0.100000,0.500000,1.600000,1.400000,4.800000,-1.200000,103.950000,110.540000,29.540000,7.910000,0.145300,98.559000,35.300000,0.064400,4.286000,1.213000,0.400000,1.900000,2.300000,34.700000,0.3,0.100000,27.800000,1.000000,1.700000,0.800000,2.500000,1.000000,1.100000,1.600000,2.8,0.900000,1.000000,0.800000,2.100000,1.100000,0.500000,0.200000,0.100000
74,2020-21,DAL,Jalen Brunson,1628973,DAL,GSW,0022000339,2021-02-04,DAL vs. GSW,0,1,27.394167,1.100000,2.400000,2.1,2.300000,0.300000,2.700000,3.3,1.100000,0.400000,0.000000,0.200000,1.800000,3.000000,11.800000,-5.000000,103.130000,114.200000,24.460000,8.060000,0.167500,100.050000,57.300000,0.106800,3.915000,1.908000,1.000000,5.000000,5.900000,42.000000,0.3,0.400000,31.400000,1.700000,3.300000,2.200000,4.400000,1.200000,1.700000,3.200000,5.7,1.000000,1.200000,0.900000,4.700000,0.600000,2.500000,0.900000,0.100000


In [214]:
test2 = player_stats.loc[player_stats['GAME_ID'] == '0021501223']
home = test2.loc[test2['HOME_GAME'] == 1].sort_values('MIN', ascending=False)
away = test2.loc[test2['HOME_GAME'] == 0].sort_values('MIN', ascending=False)

home_cols = home.columns[:11]
away_cols = away.columns[:11]

home_matchup_info = home[home_cols].iloc[0].values.reshape(1, -1)


home_stats = home.iloc[:8, 11:].values.reshape(1, -1)
away_stats = away.iloc[:8, 11:].values.reshape(1, -1)

print(home_stats.shape, away_stats.shape)



(1, 392) (1, 392)


(1, 392)

In [205]:
player_stats.iloc[:, 11:]

Unnamed: 0,MIN,FG3M,FG3A,FTM,FTA,OREB,DREB,AST,TOV,STL,BLK,BLKA,PF,PFD,PTS,PLUS_MINUS,E_OFF_RATING,E_DEF_RATING,AST_RATIO,TM_TOV_PCT,E_USG_PCT,E_PACE,POSS,PIE,SPD,DIST,ORBC,DRBC,RBC,TCHS,SAST,FTAST,PASS,CFGM,CFGA,UFGM,UFGA,DFGM,DFGA,FG2M,FG2A,PTS_2PT_MR,PTS_FB,PTS_OFF_TOV,PTS_PAINT,AST_2PM,UAST_2PM,AST_3PM,UAST_3PM
25570,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
25585,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
25584,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
25583,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
25582,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
84,26.405333,3.200000,9.400000,2.1,2.100000,0.600000,3.600000,2.0,1.300000,0.900000,0.300000,0.400000,1.400000,2.100000,17.700000,3.500000,119.960000,111.760000,10.370000,7.140000,0.268200,101.229000,55.300000,0.120200,4.396000,2.030000,1.000000,5.100000,6.100000,40.200000,0.2,0.400000,22.200000,2.500000,5.600000,3.700000,8.700000,0.600000,1.000000,3.000000,5.2,0.200000,2.400000,3.400000,5.400000,0.600000,2.200000,2.100000,1.100000
85,16.450185,1.444444,3.777778,3.0,3.111111,0.222222,1.777778,1.0,1.333333,0.111111,0.111111,0.444444,1.333333,1.888889,10.444444,2.111111,128.833333,127.933333,6.877778,12.933333,0.262111,98.663333,34.888889,0.096667,3.906667,1.156667,0.666667,3.222222,3.777778,30.555556,0.0,0.111111,20.777778,0.666667,1.888889,2.333333,4.888889,1.222222,2.111111,1.555556,3.0,1.444444,0.222222,0.666667,1.444444,0.666667,0.666667,1.333333,0.111111
86,16.988167,0.300000,1.800000,0.7,0.800000,0.100000,1.100000,2.8,0.800000,0.600000,0.100000,0.500000,1.600000,1.400000,4.800000,-1.200000,103.950000,110.540000,29.540000,7.910000,0.145300,98.559000,35.300000,0.064400,4.286000,1.213000,0.400000,1.900000,2.300000,34.700000,0.3,0.100000,27.800000,1.000000,1.700000,0.800000,2.500000,1.000000,1.100000,1.600000,2.8,0.900000,1.000000,0.800000,2.100000,1.100000,0.500000,0.200000,0.100000
74,27.394167,1.100000,2.400000,2.1,2.300000,0.300000,2.700000,3.3,1.100000,0.400000,0.000000,0.200000,1.800000,3.000000,11.800000,-5.000000,103.130000,114.200000,24.460000,8.060000,0.167500,100.050000,57.300000,0.106800,3.915000,1.908000,1.000000,5.000000,5.900000,42.000000,0.3,0.400000,31.400000,1.700000,3.300000,2.200000,4.400000,1.200000,1.700000,3.200000,5.7,1.000000,1.200000,0.900000,4.700000,0.600000,2.500000,0.900000,0.100000


In [201]:
49 * 8 + 8 

392

In [218]:
def long_to_wide(df = player_stats):
    
    
    new_cols = ['SEASON_YEAR', 'home_team_abbr',
                'away_team_abbr', 'GAME_ID', 'GAME_DATE', 
                'MATCHUP', 'WL', 'HOME_GAME']
    

    # Number of columns in player_stats minus the 11 matchup info columns
    num_stat_cols = player_stats.shape[1] - 11
    
    matchup_data = np.empty((0, len(new_cols) + 2*num_stat_cols*8))
       
    
    # All game_ids
    game_ids = df['GAME_ID'].unique()
     
    for game_id in tqdm(game_ids, desc="Progress"):
        
        # Select all the players boxscores from one game
        game_df = df.loc[df['GAME_ID'] == game_id]
               
        # Select the home team players, sort by minutes
        home_team = game_df.loc[game_df['HOME_GAME'] == 1].sort_values('MIN', ascending=False)
        
        # Select matchup info

        home_matchup_info = home_team[new_cols].iloc[0].values.reshape(1, -1)
        # unroll the home players' stats up to player 12 into one long row
        home_stats_row = home_team.iloc[:8, 11:].values.reshape(1, -1)
        
        # if 
        if home_stats_row.shape[1] < 8*num_stat_cols:
            missing_stats = 8*num_stat_cols - home_stats_row.shape[1]
            padding = np.zeros((1, missing_stats))
            home_stats_row = np.append(home_stats_row, padding, axis=1)      
        
        home_stats_row = np.append(home_matchup_info, home_stats_row, axis=1)
        
        away_team = game_df.loc[game_df['HOME_GAME'] == 0].sort_values('MIN', ascending=False)
        
        
        away_stats_row = away_team.iloc[:8, 11:].values.reshape(1, -1)

        if away_stats_row.shape[1] < 8*num_stat_cols:
            missing_stats = 8*num_stat_cols - away_stats_row.shape[1]
            padding = np.zeros((1, missing_stats))
            away_stats_row = np.append(away_stats_row, padding, axis=1)        
    
        matchup_row = np.append(home_stats_row, away_stats_row, axis=1)    
        matchup_data = np.append(matchup_data, matchup_row, axis=0)
        

    for i in range(1, 9):
        home_player_cols = ['home_P{}_'.format(i) + x for x in df.columns[11:]]
        new_cols.extend(home_player_cols)
    
    for i in range(1, 9):
        away_player_cols = ['away_P{}_'.format(i) + x for x in df.columns[11:]]
        new_cols.extend(away_player_cols)
        
    matchup_df = pd.DataFrame(matchup_data, columns = new_cols)
    
    return matchup_df


matchup_df = long_to_wide(df=player_stats)
matchup_df.to_csv("matchup_boxscores_wide.csv", index=False)
matchup_df
    

Progress: 100%|██████████| 8758/8758 [09:09<00:00, 15.94it/s]


Unnamed: 0,SEASON_YEAR,home_team_abbr,away_team_abbr,GAME_ID,GAME_DATE,MATCHUP,WL,HOME_GAME,home_P1_MIN,home_P1_FG3M,home_P1_FG3A,home_P1_FTM,home_P1_FTA,home_P1_OREB,home_P1_DREB,home_P1_AST,home_P1_TOV,home_P1_STL,home_P1_BLK,home_P1_BLKA,home_P1_PF,home_P1_PFD,home_P1_PTS,home_P1_PLUS_MINUS,home_P1_E_OFF_RATING,home_P1_E_DEF_RATING,home_P1_AST_RATIO,home_P1_TM_TOV_PCT,home_P1_E_USG_PCT,home_P1_E_PACE,home_P1_POSS,home_P1_PIE,home_P1_SPD,home_P1_DIST,home_P1_ORBC,home_P1_DRBC,home_P1_RBC,home_P1_TCHS,home_P1_SAST,home_P1_FTAST,home_P1_PASS,home_P1_CFGM,home_P1_CFGA,home_P1_UFGM,home_P1_UFGA,home_P1_DFGM,home_P1_DFGA,home_P1_FG2M,home_P1_FG2A,home_P1_PTS_2PT_MR,home_P1_PTS_FB,home_P1_PTS_OFF_TOV,home_P1_PTS_PAINT,home_P1_AST_2PM,home_P1_UAST_2PM,home_P1_AST_3PM,home_P1_UAST_3PM,home_P2_MIN,home_P2_FG3M,home_P2_FG3A,home_P2_FTM,home_P2_FTA,home_P2_OREB,home_P2_DREB,home_P2_AST,home_P2_TOV,home_P2_STL,home_P2_BLK,home_P2_BLKA,home_P2_PF,home_P2_PFD,home_P2_PTS,home_P2_PLUS_MINUS,home_P2_E_OFF_RATING,home_P2_E_DEF_RATING,home_P2_AST_RATIO,home_P2_TM_TOV_PCT,home_P2_E_USG_PCT,home_P2_E_PACE,home_P2_POSS,home_P2_PIE,home_P2_SPD,home_P2_DIST,home_P2_ORBC,home_P2_DRBC,home_P2_RBC,home_P2_TCHS,home_P2_SAST,home_P2_FTAST,home_P2_PASS,home_P2_CFGM,home_P2_CFGA,home_P2_UFGM,home_P2_UFGA,home_P2_DFGM,home_P2_DFGA,home_P2_FG2M,home_P2_FG2A,home_P2_PTS_2PT_MR,home_P2_PTS_FB,...,away_P6_AST_3PM,away_P6_UAST_3PM,away_P7_MIN,away_P7_FG3M,away_P7_FG3A,away_P7_FTM,away_P7_FTA,away_P7_OREB,away_P7_DREB,away_P7_AST,away_P7_TOV,away_P7_STL,away_P7_BLK,away_P7_BLKA,away_P7_PF,away_P7_PFD,away_P7_PTS,away_P7_PLUS_MINUS,away_P7_E_OFF_RATING,away_P7_E_DEF_RATING,away_P7_AST_RATIO,away_P7_TM_TOV_PCT,away_P7_E_USG_PCT,away_P7_E_PACE,away_P7_POSS,away_P7_PIE,away_P7_SPD,away_P7_DIST,away_P7_ORBC,away_P7_DRBC,away_P7_RBC,away_P7_TCHS,away_P7_SAST,away_P7_FTAST,away_P7_PASS,away_P7_CFGM,away_P7_CFGA,away_P7_UFGM,away_P7_UFGA,away_P7_DFGM,away_P7_DFGA,away_P7_FG2M,away_P7_FG2A,away_P7_PTS_2PT_MR,away_P7_PTS_FB,away_P7_PTS_OFF_TOV,away_P7_PTS_PAINT,away_P7_AST_2PM,away_P7_UAST_2PM,away_P7_AST_3PM,away_P7_UAST_3PM,away_P8_MIN,away_P8_FG3M,away_P8_FG3A,away_P8_FTM,away_P8_FTA,away_P8_OREB,away_P8_DREB,away_P8_AST,away_P8_TOV,away_P8_STL,away_P8_BLK,away_P8_BLKA,away_P8_PF,away_P8_PFD,away_P8_PTS,away_P8_PLUS_MINUS,away_P8_E_OFF_RATING,away_P8_E_DEF_RATING,away_P8_AST_RATIO,away_P8_TM_TOV_PCT,away_P8_E_USG_PCT,away_P8_E_PACE,away_P8_POSS,away_P8_PIE,away_P8_SPD,away_P8_DIST,away_P8_ORBC,away_P8_DRBC,away_P8_RBC,away_P8_TCHS,away_P8_SAST,away_P8_FTAST,away_P8_PASS,away_P8_CFGM,away_P8_CFGA,away_P8_UFGM,away_P8_UFGA,away_P8_DFGM,away_P8_DFGA,away_P8_FG2M,away_P8_FG2A,away_P8_PTS_2PT_MR,away_P8_PTS_FB,away_P8_PTS_OFF_TOV,away_P8_PTS_PAINT,away_P8_AST_2PM,away_P8_UAST_2PM,away_P8_AST_3PM,away_P8_UAST_3PM
0,2013-14,IND,ORL,0021300001,2013-10-29,IND vs. ORL,1,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,2013-14,LAL,LAC,0021300003,2013-10-29,LAL vs. LAC,1,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,2013-14,MIA,CHI,0021300002,2013-10-29,MIA vs. CHI,1,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,2013-14,NYK,MIL,0021300008,2013-10-30,NYK vs. MIL,1,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,2013-14,CLE,BKN,0021300004,2013-10-30,CLE vs. BKN,1,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8753,2020-21,ATL,UTA,0022000338,2021-02-04,ATL vs. UTA,0,1,36.061,2.3,5.3,0.6,0.8,0.8,3.5,4.3,1.3,1.2,0.2,0.6,2.4,0.8,11.9,0.8,111.09,108,23.96,7.41,0.1494,101.766,75.7,0.0687,4.194,2.686,1.5,5.9,7.4,51,0.1,0.2,36.7,1,3.7,3.5,7.3,2.1,3.1,2.2,5.7,0.7,1.3,2.1,3.5,1.3,0.8,1.8,0.3,35.5072,1.7,3.5,1.3,1.6,1.9,6.5,1.6,1.4,0.1,1.4,1,3.5,2.3,19.4,4.6,111.19,103.89,10.2,7.83,0.1958,100.509,74,0.1173,4.243,2.671,4.6,11.9,16.3,65.3,0.5,0.1,47.8,3.4,6.6,4.8,7.6,4,6.9,6.5,10.7,2.1,1.1,...,2.1,1.1,24.217,2.9,5.7,0.5,0.6,0.3,2.2,4.8,1.6,0.3,0.2,0.1,1.7,2,11.4,4.2,121.69,110.58,32.87,10.09,0.1677,101.033,50.5,0.1141,4.288,1.826,0.6,3.8,4.3,51.7,0.8,0.8,40.7,0.8,1.5,3.2,6.2,1.5,1.5,1.1,2,0,1.4,1,2,0.5,0.6,2.1,0.6,16.3397,0,0.1,1.1,1.4,2.1,3.8,0.6,0.5,0.5,1.1,0.3,2.6,1,5.1,2,117.52,110.02,16.25,8.92,0.1257,100.522,33.8,0.0934,4.198,1.188,4,5.9,9.7,16.8,0.2,0,12.1,1.1,2,0.9,1.6,3,5.6,2,3.5,0.2,0.2,0.7,3.5,1.4,0.5,0,0
8754,2020-21,MEM,HOU,0022000340,2021-02-04,MEM vs. HOU,0,1,29.3755,0.7,2.1,1.1,1.5,1.9,4.4,2.1,0.5,1.6,0.7,0.8,1.7,1.9,15,-3.3,106.56,108.93,12.88,2.84,0.1996,102.432,61.6,0.1194,4.405,2.283,3.7,7.3,10.7,40.9,0.6,0,26.3,3.5,6.4,3.1,6.3,2.1,3.7,5.9,10.7,0.8,2.2,2.8,10.6,4.8,0.8,0.7,0,27.9746,0.75,2.625,4.375,5.25,0.5,1.5,7.375,3.375,1.125,0.5,1.625,1.75,4.125,19.375,-0.125,113.062,110.537,27.1125,14.225,0.295,106.757,62.125,0.11525,4.3725,2.1925,2.25,3.625,5.625,65.375,0.125,0.375,43.375,4.25,8,2.875,6,1.625,2.625,6.375,11.375,0.75,2.875,...,0.8,0,21.565,1.9,5.8,2.2,3,1.5,6.4,3.2,2.2,0.9,0.5,0.5,3.7,2.2,10.3,-2.9,98.7,113.19,21.35,14.62,0.2407,105.324,46.7,0.0941,3.668,1.221,2.9,8.2,10.7,42.6,0.2,0.1,31.2,0.4,2,2.5,5.9,2.6,3.5,1.2,3.4,0,0.1,1.2,2.2,0.5,0.7,1.8,0.1,21.456,1.5,3.8,0.5,0.5,0.9,2.9,2,0.9,1.5,0.5,0.7,2,0.7,8,3.8,110.79,101.16,20.32,9.81,0.1568,103.801,45.8,0.1024,4.241,1.463,1.9,4.3,6,25.9,0,0.1,17.4,1,2.6,1.8,4.1,1.6,2.3,1.5,3.4,0,1.3,1.8,3,0.8,0.7,1.5,0
8755,2020-21,LAL,DEN,0022000341,2021-02-04,LAL vs. DEN,1,1,35.0242,3.4,7.4,4.5,6.1,0.5,6.6,7.6,3.4,1,0.5,0.6,1.8,4.7,26.1,6.7,110.63,100.55,24.18,10.71,0.3107,98.414,71.4,0.1994,3.536,2.198,2.5,9.7,11.5,92.9,1.3,0.8,65.3,2.5,4.9,6.6,13.5,2.3,3.3,5.7,11,1.5,3.6,4,9.1,1.9,3.3,1.5,1.7,33.1683,0.4,1.9,4.8,6.9,2.2,6.7,3.4,1.5,1.3,2.2,0.9,2,4.9,21.8,5.7,112.28,102.01,13.83,5.79,0.2707,101.755,68.5,0.1704,3.659,2.153,3.9,10.3,14.1,60.9,0.4,0.2,38.8,4.7,7.8,3.6,8.7,5.2,7.9,7.9,14.6,2.4,0.9,...,0.8,0.1,21.5145,1.9,4.3,1.7,1.9,1.9,4.9,1,0.7,0.6,0.2,0.6,2.4,1.3,10.2,5.6,115.45,107.25,10.97,7.25,0.1842,99.639,44.7,0.1076,3.691,1.356,3.7,4.9,8,26.7,0.2,0,18.7,1.1,2.7,1.7,4.1,1.3,1.9,1.4,3.6,0.1,0.3,1.4,2.2,1,0.4,1.9,0,12.6665,0.8,2.3,0.3,0.4,0.3,0.2,1.9,0.7,0.8,0.1,0,2,1,3.1,2.8,114.1,103.6,37.95,11.34,0.1347,99.417,26.9,0.0178,3.592,0.855,0.1,0.4,0.5,20.4,0.2,0.1,15.8,0.1,0.5,0.9,2.6,0.2,0.3,0.2,0.9,0.2,0.3,0.6,0.2,0.1,0.1,0.6,0.2
8756,2020-21,DAL,GSW,0022000339,2021-02-04,DAL vs. GSW,0,1,35.305,1.7,6.1,6.4,7.7,0.5,7,9.8,3.4,0.9,0.6,0.8,1.7,5.4,26.1,-3.3,109.59,116.84,27.03,9.52,0.3316,101.026,73.1,0.1808,3.521,2.165,1.8,9.3,10.8,77.1,0.7,0.9,50,2.6,5.9,5.3,11.4,2.1,3.6,7.3,13.6,2.5,2.8,2.6,11.6,0.6,6.2,0.2,1.4,32.5825,1.1,5,2.4,2.6,1.1,1.8,1.8,1.4,0.9,0.4,0.3,3.1,2.4,11.9,-2.6,106.37,110.55,11.09,8.64,0.1851,100.186,66.6,0.0407,4.263,2.474,2.1,3.7,5.8,44.4,0.8,0.1,29.3,1.5,3.7,2.5,7.3,0.4,0.8,3.1,6.2,1.4,1.8,...,0.2,0.1,15.6342,0.75,1.375,0.5,0.75,0.25,2.625,1.625,0.625,0.375,0.625,0.125,1.875,1.125,4.75,-2.625,94.95,106.762,35.3625,8.0375,0.085375,111.045,34.875,0.11575,4.62125,1.29,1.125,4.5,5.5,32.75,0,0.125,28.875,0.625,0.875,1.125,1.875,1.375,2,1,1.375,0,0.875,0.625,2,0.875,0.125,0.75,0,14.0382,0.8,1.8,1,1.2,0.4,2.5,1.2,1.1,1,0.5,0,2.4,1.6,5.4,1,106.45,104.63,19.01,22.28,0.1556,103.879,30.2,0.0929,4.449,1.047,0.8,4.2,4.8,20,0.1,0,14.5,0.5,0.7,1.2,2.5,0.7,1.2,1,1.7,0.2,1.6,1.2,1.7,0.3,0.6,0.8,0


## Matchup Data - Run from here to avoid having to run prior functions

In [2]:


matchup_df = pd.read_csv('matchup_boxscores_wide.csv', parse_dates=['GAME_DATE'])
matchup_df

Unnamed: 0,SEASON_YEAR,home_team_abbr,away_team_abbr,GAME_ID,GAME_DATE,MATCHUP,WL,HOME_GAME,home_P1_MIN,home_P1_FG3M,home_P1_FG3A,home_P1_FTM,home_P1_FTA,home_P1_OREB,home_P1_DREB,home_P1_AST,home_P1_TOV,home_P1_STL,home_P1_BLK,home_P1_BLKA,home_P1_PF,home_P1_PFD,home_P1_PTS,home_P1_PLUS_MINUS,home_P1_E_OFF_RATING,home_P1_E_DEF_RATING,home_P1_AST_RATIO,home_P1_TM_TOV_PCT,home_P1_E_USG_PCT,home_P1_E_PACE,home_P1_POSS,home_P1_PIE,home_P1_SPD,home_P1_DIST,home_P1_ORBC,home_P1_DRBC,home_P1_RBC,home_P1_TCHS,home_P1_SAST,home_P1_FTAST,home_P1_PASS,home_P1_CFGM,home_P1_CFGA,home_P1_UFGM,home_P1_UFGA,home_P1_DFGM,home_P1_DFGA,home_P1_FG2M,home_P1_FG2A,home_P1_PTS_2PT_MR,home_P1_PTS_FB,home_P1_PTS_OFF_TOV,home_P1_PTS_PAINT,home_P1_AST_2PM,home_P1_UAST_2PM,home_P1_AST_3PM,home_P1_UAST_3PM,home_P2_MIN,home_P2_FG3M,home_P2_FG3A,home_P2_FTM,home_P2_FTA,home_P2_OREB,home_P2_DREB,home_P2_AST,home_P2_TOV,home_P2_STL,home_P2_BLK,home_P2_BLKA,home_P2_PF,home_P2_PFD,home_P2_PTS,home_P2_PLUS_MINUS,home_P2_E_OFF_RATING,home_P2_E_DEF_RATING,home_P2_AST_RATIO,home_P2_TM_TOV_PCT,home_P2_E_USG_PCT,home_P2_E_PACE,home_P2_POSS,home_P2_PIE,home_P2_SPD,home_P2_DIST,home_P2_ORBC,home_P2_DRBC,home_P2_RBC,home_P2_TCHS,home_P2_SAST,home_P2_FTAST,home_P2_PASS,home_P2_CFGM,home_P2_CFGA,home_P2_UFGM,home_P2_UFGA,home_P2_DFGM,home_P2_DFGA,home_P2_FG2M,home_P2_FG2A,home_P2_PTS_2PT_MR,home_P2_PTS_FB,...,away_P6_AST_3PM,away_P6_UAST_3PM,away_P7_MIN,away_P7_FG3M,away_P7_FG3A,away_P7_FTM,away_P7_FTA,away_P7_OREB,away_P7_DREB,away_P7_AST,away_P7_TOV,away_P7_STL,away_P7_BLK,away_P7_BLKA,away_P7_PF,away_P7_PFD,away_P7_PTS,away_P7_PLUS_MINUS,away_P7_E_OFF_RATING,away_P7_E_DEF_RATING,away_P7_AST_RATIO,away_P7_TM_TOV_PCT,away_P7_E_USG_PCT,away_P7_E_PACE,away_P7_POSS,away_P7_PIE,away_P7_SPD,away_P7_DIST,away_P7_ORBC,away_P7_DRBC,away_P7_RBC,away_P7_TCHS,away_P7_SAST,away_P7_FTAST,away_P7_PASS,away_P7_CFGM,away_P7_CFGA,away_P7_UFGM,away_P7_UFGA,away_P7_DFGM,away_P7_DFGA,away_P7_FG2M,away_P7_FG2A,away_P7_PTS_2PT_MR,away_P7_PTS_FB,away_P7_PTS_OFF_TOV,away_P7_PTS_PAINT,away_P7_AST_2PM,away_P7_UAST_2PM,away_P7_AST_3PM,away_P7_UAST_3PM,away_P8_MIN,away_P8_FG3M,away_P8_FG3A,away_P8_FTM,away_P8_FTA,away_P8_OREB,away_P8_DREB,away_P8_AST,away_P8_TOV,away_P8_STL,away_P8_BLK,away_P8_BLKA,away_P8_PF,away_P8_PFD,away_P8_PTS,away_P8_PLUS_MINUS,away_P8_E_OFF_RATING,away_P8_E_DEF_RATING,away_P8_AST_RATIO,away_P8_TM_TOV_PCT,away_P8_E_USG_PCT,away_P8_E_PACE,away_P8_POSS,away_P8_PIE,away_P8_SPD,away_P8_DIST,away_P8_ORBC,away_P8_DRBC,away_P8_RBC,away_P8_TCHS,away_P8_SAST,away_P8_FTAST,away_P8_PASS,away_P8_CFGM,away_P8_CFGA,away_P8_UFGM,away_P8_UFGA,away_P8_DFGM,away_P8_DFGA,away_P8_FG2M,away_P8_FG2A,away_P8_PTS_2PT_MR,away_P8_PTS_FB,away_P8_PTS_OFF_TOV,away_P8_PTS_PAINT,away_P8_AST_2PM,away_P8_UAST_2PM,away_P8_AST_3PM,away_P8_UAST_3PM
0,2013-14,IND,ORL,21300001,2013-10-29,IND vs. ORL,1,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,2013-14,LAL,LAC,21300003,2013-10-29,LAL vs. LAC,1,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,2013-14,MIA,CHI,21300002,2013-10-29,MIA vs. CHI,1,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,2013-14,NYK,MIL,21300008,2013-10-30,NYK vs. MIL,1,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,2013-14,CLE,BKN,21300004,2013-10-30,CLE vs. BKN,1,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8753,2020-21,ATL,UTA,22000338,2021-02-04,ATL vs. UTA,0,1,36.061000,2.3,5.3,0.6,0.8,0.8,3.5,4.3,1.3,1.2,0.2,0.6,2.4,0.8,11.9,0.8,111.09,108.00,23.96,7.41,0.1494,101.766,75.7,0.0687,4.194,2.686,1.5,5.9,7.4,51.0,0.1,0.2,36.7,1.0,3.7,3.5,7.3,2.1,3.1,2.2,5.7,0.7,1.3,2.1,3.5,1.3,0.8,1.8,0.3,35.507167,1.70,3.500,1.300,1.60,1.9,6.5,1.600,1.400,0.100,1.4,1.000,3.50,2.300,19.400,4.600,111.1900,103.8900,10.2000,7.830,0.1958,100.5090,74.000,0.11730,4.2430,2.6710,4.60,11.900,16.300,65.300,0.500,0.100,47.800,3.40,6.6,4.800,7.6,4.000,6.900,6.500,10.700,2.10,1.100,...,2.1,1.1,24.217000,2.90,5.700,0.5,0.60,0.30,2.200,4.800,1.600,0.300,0.200,0.100,1.700,2.000,11.40,4.200,121.69,110.5800,32.8700,10.0900,0.167700,101.033,50.500,0.11410,4.28800,1.826,0.600,3.8,4.3,51.70,0.8,0.800,40.700,0.800,1.500,3.200,6.200,1.500,1.5,1.1,2.000,0.0,1.400,1.000,2.0,0.500,0.600,2.10,0.6,16.339667,0.000,0.1,1.10,1.400,2.100,3.8,0.60,0.50,0.500,1.100,0.300,2.6,1.000,5.100,2.00,117.5200,110.0200,16.2500,8.92,0.125700,100.5220,33.8,0.09340,4.19800,1.18800,4.00,5.90,9.7,16.800,0.2,0.0,12.100,1.10,2.0,0.900,1.600,3.000,5.600,2.0,3.500,0.2,0.2,0.7,3.5,1.400,0.500,0.00,0.000
8754,2020-21,MEM,HOU,22000340,2021-02-04,MEM vs. HOU,0,1,29.375500,0.7,2.1,1.1,1.5,1.9,4.4,2.1,0.5,1.6,0.7,0.8,1.7,1.9,15.0,-3.3,106.56,108.93,12.88,2.84,0.1996,102.432,61.6,0.1194,4.405,2.283,3.7,7.3,10.7,40.9,0.6,0.0,26.3,3.5,6.4,3.1,6.3,2.1,3.7,5.9,10.7,0.8,2.2,2.8,10.6,4.8,0.8,0.7,0.0,27.974583,0.75,2.625,4.375,5.25,0.5,1.5,7.375,3.375,1.125,0.5,1.625,1.75,4.125,19.375,-0.125,113.0625,110.5375,27.1125,14.225,0.2950,106.7575,62.125,0.11525,4.3725,2.1925,2.25,3.625,5.625,65.375,0.125,0.375,43.375,4.25,8.0,2.875,6.0,1.625,2.625,6.375,11.375,0.75,2.875,...,0.8,0.0,21.565000,1.90,5.800,2.2,3.00,1.50,6.400,3.200,2.200,0.900,0.500,0.500,3.700,2.200,10.30,-2.900,98.70,113.1900,21.3500,14.6200,0.240700,105.324,46.700,0.09410,3.66800,1.221,2.900,8.2,10.7,42.60,0.2,0.100,31.200,0.400,2.000,2.500,5.900,2.600,3.5,1.2,3.400,0.0,0.100,1.200,2.2,0.500,0.700,1.80,0.1,21.456000,1.500,3.8,0.50,0.500,0.900,2.9,2.00,0.90,1.500,0.500,0.700,2.0,0.700,8.000,3.80,110.7900,101.1600,20.3200,9.81,0.156800,103.8010,45.8,0.10240,4.24100,1.46300,1.90,4.30,6.0,25.900,0.0,0.1,17.400,1.00,2.6,1.800,4.100,1.600,2.300,1.5,3.400,0.0,1.3,1.8,3.0,0.800,0.700,1.50,0.000
8755,2020-21,LAL,DEN,22000341,2021-02-04,LAL vs. DEN,1,1,35.024167,3.4,7.4,4.5,6.1,0.5,6.6,7.6,3.4,1.0,0.5,0.6,1.8,4.7,26.1,6.7,110.63,100.55,24.18,10.71,0.3107,98.414,71.4,0.1994,3.536,2.198,2.5,9.7,11.5,92.9,1.3,0.8,65.3,2.5,4.9,6.6,13.5,2.3,3.3,5.7,11.0,1.5,3.6,4.0,9.1,1.9,3.3,1.5,1.7,33.168333,0.40,1.900,4.800,6.90,2.2,6.7,3.400,1.500,1.300,2.2,0.900,2.00,4.900,21.800,5.700,112.2800,102.0100,13.8300,5.790,0.2707,101.7550,68.500,0.17040,3.6590,2.1530,3.90,10.300,14.100,60.900,0.400,0.200,38.800,4.70,7.8,3.600,8.7,5.200,7.900,7.900,14.600,2.40,0.900,...,0.8,0.1,21.514500,1.90,4.300,1.7,1.90,1.90,4.900,1.000,0.700,0.600,0.200,0.600,2.400,1.300,10.20,5.600,115.45,107.2500,10.9700,7.2500,0.184200,99.639,44.700,0.10760,3.69100,1.356,3.700,4.9,8.0,26.70,0.2,0.000,18.700,1.100,2.700,1.700,4.100,1.300,1.9,1.4,3.600,0.1,0.300,1.400,2.2,1.000,0.400,1.90,0.0,12.666500,0.800,2.3,0.30,0.400,0.300,0.2,1.90,0.70,0.800,0.100,0.000,2.0,1.000,3.100,2.80,114.1000,103.6000,37.9500,11.34,0.134700,99.4170,26.9,0.01780,3.59200,0.85500,0.10,0.40,0.5,20.400,0.2,0.1,15.800,0.10,0.5,0.900,2.600,0.200,0.300,0.2,0.900,0.2,0.3,0.6,0.2,0.100,0.100,0.60,0.200
8756,2020-21,DAL,GSW,22000339,2021-02-04,DAL vs. GSW,0,1,35.305000,1.7,6.1,6.4,7.7,0.5,7.0,9.8,3.4,0.9,0.6,0.8,1.7,5.4,26.1,-3.3,109.59,116.84,27.03,9.52,0.3316,101.026,73.1,0.1808,3.521,2.165,1.8,9.3,10.8,77.1,0.7,0.9,50.0,2.6,5.9,5.3,11.4,2.1,3.6,7.3,13.6,2.5,2.8,2.6,11.6,0.6,6.2,0.2,1.4,32.582500,1.10,5.000,2.400,2.60,1.1,1.8,1.800,1.400,0.900,0.4,0.300,3.10,2.400,11.900,-2.600,106.3700,110.5500,11.0900,8.640,0.1851,100.1860,66.600,0.04070,4.2630,2.4740,2.10,3.700,5.800,44.400,0.800,0.100,29.300,1.50,3.7,2.500,7.3,0.400,0.800,3.100,6.200,1.40,1.800,...,0.2,0.1,15.634167,0.75,1.375,0.5,0.75,0.25,2.625,1.625,0.625,0.375,0.625,0.125,1.875,1.125,4.75,-2.625,94.95,106.7625,35.3625,8.0375,0.085375,111.045,34.875,0.11575,4.62125,1.290,1.125,4.5,5.5,32.75,0.0,0.125,28.875,0.625,0.875,1.125,1.875,1.375,2.0,1.0,1.375,0.0,0.875,0.625,2.0,0.875,0.125,0.75,0.0,14.038167,0.800,1.8,1.00,1.200,0.400,2.5,1.20,1.10,1.000,0.500,0.000,2.4,1.600,5.400,1.00,106.4500,104.6300,19.0100,22.28,0.155600,103.8790,30.2,0.09290,4.44900,1.04700,0.80,4.20,4.8,20.000,0.1,0.0,14.500,0.50,0.7,1.200,2.500,0.700,1.200,1.0,1.700,0.2,1.6,1.2,1.7,0.300,0.600,0.80,0.000


In [3]:
# read betting data
betting_data = pd.read_csv('./data/nba_betting_data_2010_present.csv')

betting_data = betting_data.dropna()

In [4]:
def clean_bet_data(df = betting_data):
    df['date'] = pd.to_datetime(df['date'])
    df['home_team_abbr'] = df['home_team_abbr'].replace({'NY':'NYK',
                                                                            'GS':'GSW',
                                                                            'SA':'SAS',
                                                                            'BK':'BKN',
                                                                            'NO':'NOP',
                                                                            'PHO':'PHX'
                                                                                        }
                                                                             )
    df['away_team_abbr'] = df['away_team_abbr'].replace({'NY':'NYK',
                                                                            'GS':'GSW',
                                                                            'SA':'SAS',
                                                                            'BK':'BKN',
                                                                            'NO':'NOP',
                                                                            'PHO':'PHX'}
                                                                           )

    teams = df['home_team_abbr'].unique()

    df = df.sort_values(['date'])
    df['rest'] = np.nan
    for team in teams:
        team_data = df.loc[(df['home_team_abbr'] == team) | (df['away_team_abbr'] == team)]
        team_data['rest'] = (team_data['date'] - team_data['date'].shift(1)) / np.timedelta64(1, 'D')
        df.loc[(df['home_team_abbr'] == team) | (df['away_team_abbr'] == team), 'rest'] = team_data['rest']


    df['point_diff'] = df['home_score'] - df['away_score']
    df['point_total'] = df['home_score'] + df['away_score']
    df['covered_spread'] = (df['home_score'] + df['spread'] > df['away_score']).astype(int)
    df['over'] = (df['point_total'] > df['total']).astype(int)
    
    relevant_betting = df[['date', 'home_team_abbr',  'away_team_abbr',
                           'home_score', 'away_score', 'point_diff', 
                           'rest', 'point_total', 'spread', 'total',
                          'covered_spread', 'over', ]]

    
    return relevant_betting

bet_data_clean = clean_bet_data()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [5]:
full_df = pd.merge(bet_data_clean, matchup_df, left_on=['date', 'home_team_abbr', 'away_team_abbr'], right_on=['GAME_DATE', 'home_team_abbr', 'away_team_abbr'])
full_df

Unnamed: 0,date,home_team_abbr,away_team_abbr,home_score,away_score,point_diff,rest,point_total,spread,total,covered_spread,over,SEASON_YEAR,GAME_ID,GAME_DATE,MATCHUP,WL,HOME_GAME,home_P1_MIN,home_P1_FG3M,home_P1_FG3A,home_P1_FTM,home_P1_FTA,home_P1_OREB,home_P1_DREB,home_P1_AST,home_P1_TOV,home_P1_STL,home_P1_BLK,home_P1_BLKA,home_P1_PF,home_P1_PFD,home_P1_PTS,home_P1_PLUS_MINUS,home_P1_E_OFF_RATING,home_P1_E_DEF_RATING,home_P1_AST_RATIO,home_P1_TM_TOV_PCT,home_P1_E_USG_PCT,home_P1_E_PACE,home_P1_POSS,home_P1_PIE,home_P1_SPD,home_P1_DIST,home_P1_ORBC,home_P1_DRBC,home_P1_RBC,home_P1_TCHS,home_P1_SAST,home_P1_FTAST,home_P1_PASS,home_P1_CFGM,home_P1_CFGA,home_P1_UFGM,home_P1_UFGA,home_P1_DFGM,home_P1_DFGA,home_P1_FG2M,home_P1_FG2A,home_P1_PTS_2PT_MR,home_P1_PTS_FB,home_P1_PTS_OFF_TOV,home_P1_PTS_PAINT,home_P1_AST_2PM,home_P1_UAST_2PM,home_P1_AST_3PM,home_P1_UAST_3PM,home_P2_MIN,home_P2_FG3M,home_P2_FG3A,home_P2_FTM,home_P2_FTA,home_P2_OREB,home_P2_DREB,home_P2_AST,home_P2_TOV,home_P2_STL,home_P2_BLK,home_P2_BLKA,home_P2_PF,home_P2_PFD,home_P2_PTS,home_P2_PLUS_MINUS,home_P2_E_OFF_RATING,home_P2_E_DEF_RATING,home_P2_AST_RATIO,home_P2_TM_TOV_PCT,home_P2_E_USG_PCT,home_P2_E_PACE,home_P2_POSS,home_P2_PIE,home_P2_SPD,home_P2_DIST,home_P2_ORBC,home_P2_DRBC,home_P2_RBC,home_P2_TCHS,home_P2_SAST,home_P2_FTAST,home_P2_PASS,...,away_P6_AST_3PM,away_P6_UAST_3PM,away_P7_MIN,away_P7_FG3M,away_P7_FG3A,away_P7_FTM,away_P7_FTA,away_P7_OREB,away_P7_DREB,away_P7_AST,away_P7_TOV,away_P7_STL,away_P7_BLK,away_P7_BLKA,away_P7_PF,away_P7_PFD,away_P7_PTS,away_P7_PLUS_MINUS,away_P7_E_OFF_RATING,away_P7_E_DEF_RATING,away_P7_AST_RATIO,away_P7_TM_TOV_PCT,away_P7_E_USG_PCT,away_P7_E_PACE,away_P7_POSS,away_P7_PIE,away_P7_SPD,away_P7_DIST,away_P7_ORBC,away_P7_DRBC,away_P7_RBC,away_P7_TCHS,away_P7_SAST,away_P7_FTAST,away_P7_PASS,away_P7_CFGM,away_P7_CFGA,away_P7_UFGM,away_P7_UFGA,away_P7_DFGM,away_P7_DFGA,away_P7_FG2M,away_P7_FG2A,away_P7_PTS_2PT_MR,away_P7_PTS_FB,away_P7_PTS_OFF_TOV,away_P7_PTS_PAINT,away_P7_AST_2PM,away_P7_UAST_2PM,away_P7_AST_3PM,away_P7_UAST_3PM,away_P8_MIN,away_P8_FG3M,away_P8_FG3A,away_P8_FTM,away_P8_FTA,away_P8_OREB,away_P8_DREB,away_P8_AST,away_P8_TOV,away_P8_STL,away_P8_BLK,away_P8_BLKA,away_P8_PF,away_P8_PFD,away_P8_PTS,away_P8_PLUS_MINUS,away_P8_E_OFF_RATING,away_P8_E_DEF_RATING,away_P8_AST_RATIO,away_P8_TM_TOV_PCT,away_P8_E_USG_PCT,away_P8_E_PACE,away_P8_POSS,away_P8_PIE,away_P8_SPD,away_P8_DIST,away_P8_ORBC,away_P8_DRBC,away_P8_RBC,away_P8_TCHS,away_P8_SAST,away_P8_FTAST,away_P8_PASS,away_P8_CFGM,away_P8_CFGA,away_P8_UFGM,away_P8_UFGA,away_P8_DFGM,away_P8_DFGA,away_P8_FG2M,away_P8_FG2A,away_P8_PTS_2PT_MR,away_P8_PTS_FB,away_P8_PTS_OFF_TOV,away_P8_PTS_PAINT,away_P8_AST_2PM,away_P8_UAST_2PM,away_P8_AST_3PM,away_P8_UAST_3PM
0,2013-10-29,LAL,LAC,116,103,13,195.0,219,9.5,202.5,1,1,2013-14,21300003,2013-10-29,LAL vs. LAC,1,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,2013-10-29,MIA,CHI,107,95,12,195.0,202,-5.0,188.5,1,1,2013-14,21300002,2013-10-29,MIA vs. CHI,1,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,2013-10-29,IND,ORL,97,87,10,195.0,184,-12.0,189.5,0,0,2013-14,21300001,2013-10-29,IND vs. ORL,1,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,2013-10-30,DAL,ATL,118,109,9,196.0,227,-6.0,196.0,1,1,2013-14,21300012,2013-10-30,DAL vs. ATL,1,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,2013-10-30,GSW,LAL,125,94,31,196.0,219,-12.0,206.0,1,1,2013-14,21300017,2013-10-30,GSW vs. LAL,1,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8727,2021-02-04,LAL,DEN,114,93,21,4.0,207,-5.0,217.0,1,0,2020-21,22000341,2021-02-04,LAL vs. DEN,1,1,35.024167,3.4,7.4,4.5,6.1,0.5,6.6,7.6,3.4,1.0,0.5,0.6,1.8,4.7,26.1,6.7,110.63,100.55,24.18,10.71,0.3107,98.414,71.4,0.1994,3.536,2.198,2.5,9.7,11.5,92.9,1.3,0.8,65.3,2.5,4.9,6.6,13.5,2.3,3.3,5.7,11.0,1.5,3.6,4.0,9.1,1.9,3.3,1.5,1.7,33.168333,0.40,1.900,4.800,6.90,2.2,6.7,3.400,1.500,1.300,2.2,0.900,2.00,4.900,21.800,5.700,112.2800,102.0100,13.8300,5.790,0.2707,101.7550,68.500,0.17040,3.6590,2.1530,3.90,10.300,14.100,60.900,0.400,0.200,38.800,...,0.8,0.1,21.514500,1.90,4.300,1.7,1.90,1.90,4.900,1.000,0.700,0.600,0.200,0.600,2.400,1.300,10.20,5.600,115.45,107.2500,10.9700,7.2500,0.184200,99.639,44.700,0.10760,3.69100,1.356,3.700,4.9,8.0,26.70,0.2,0.000,18.700,1.100,2.700,1.700,4.100,1.300,1.9,1.4,3.600,0.1,0.300,1.400,2.2,1.000,0.400,1.90,0.0,12.666500,0.800,2.3,0.30,0.400,0.300,0.2,1.90,0.70,0.800,0.100,0.000,2.0,1.000,3.100,2.80,114.1000,103.6000,37.9500,11.34,0.134700,99.4170,26.9,0.01780,3.59200,0.85500,0.10,0.40,0.5,20.400,0.2,0.1,15.800,0.10,0.5,0.900,2.600,0.200,0.300,0.2,0.900,0.2,0.3,0.6,0.2,0.100,0.100,0.60,0.200
8728,2021-02-04,MEM,HOU,103,115,-12,1.0,218,-3.5,226.0,0,0,2020-21,22000340,2021-02-04,MEM vs. HOU,0,1,29.375500,0.7,2.1,1.1,1.5,1.9,4.4,2.1,0.5,1.6,0.7,0.8,1.7,1.9,15.0,-3.3,106.56,108.93,12.88,2.84,0.1996,102.432,61.6,0.1194,4.405,2.283,3.7,7.3,10.7,40.9,0.6,0.0,26.3,3.5,6.4,3.1,6.3,2.1,3.7,5.9,10.7,0.8,2.2,2.8,10.6,4.8,0.8,0.7,0.0,27.974583,0.75,2.625,4.375,5.25,0.5,1.5,7.375,3.375,1.125,0.5,1.625,1.75,4.125,19.375,-0.125,113.0625,110.5375,27.1125,14.225,0.2950,106.7575,62.125,0.11525,4.3725,2.1925,2.25,3.625,5.625,65.375,0.125,0.375,43.375,...,0.8,0.0,21.565000,1.90,5.800,2.2,3.00,1.50,6.400,3.200,2.200,0.900,0.500,0.500,3.700,2.200,10.30,-2.900,98.70,113.1900,21.3500,14.6200,0.240700,105.324,46.700,0.09410,3.66800,1.221,2.900,8.2,10.7,42.60,0.2,0.100,31.200,0.400,2.000,2.500,5.900,2.600,3.5,1.2,3.400,0.0,0.100,1.200,2.2,0.500,0.700,1.80,0.1,21.456000,1.500,3.8,0.50,0.500,0.900,2.9,2.00,0.90,1.500,0.500,0.700,2.0,0.700,8.000,3.80,110.7900,101.1600,20.3200,9.81,0.156800,103.8010,45.8,0.10240,4.24100,1.46300,1.90,4.30,6.0,25.900,0.0,0.1,17.400,1.00,2.6,1.800,4.100,1.600,2.300,1.5,3.400,0.0,1.3,1.8,3.0,0.800,0.700,1.50,0.000
8729,2021-02-04,PHI,POR,105,121,-16,1.0,226,-10.0,223.0,0,1,2020-21,22000337,2021-02-04,PHI vs. POR,0,1,34.650833,1.9,4.1,3.5,4.0,0.9,5.2,2.4,2.4,0.6,0.9,1.0,1.8,2.7,22.0,7.1,110.51,102.77,10.88,10.54,0.2442,103.555,73.3,0.1394,4.091,2.517,3.3,8.5,11.4,64.7,0.5,0.1,43.6,3.4,6.4,4.9,9.5,1.9,2.7,6.4,11.8,2.7,4.3,3.6,9.3,2.4,3.5,1.4,0.3,31.187333,1.30,2.900,10.500,12.40,1.6,9.1,2.400,3.400,1.200,0.9,0.300,2.90,8.300,31.000,4.700,116.3000,108.4300,8.5500,12.760,0.3554,105.1360,66.800,0.21190,3.8740,2.1280,4.20,14.200,17.700,72.700,0.300,0.200,45.700,...,1.5,0.8,13.417333,0.10,0.500,0.5,0.60,1.10,3.800,1.100,0.300,0.300,0.200,0.100,1.400,0.600,3.40,-5.400,104.45,119.1400,19.2800,5.9300,0.130700,100.176,28.000,0.10450,4.28100,1.005,2.900,5.3,7.8,20.50,0.0,0.100,16.100,0.900,2.200,0.500,1.300,2.000,2.6,1.3,3.000,0.2,0.400,0.800,2.3,0.600,0.700,0.10,0.0,5.458333,0.375,1.0,0.25,0.375,0.125,0.5,0.25,0.25,0.125,0.125,0.125,1.0,0.375,2.375,-0.25,90.6625,122.4375,16.6625,18.75,0.180875,109.9225,12.5,-0.02525,4.37125,0.42625,0.25,0.75,1.0,6.875,0.0,0.0,4.875,0.25,0.5,0.625,1.125,0.375,0.625,0.5,0.625,0.5,0.0,0.0,0.5,0.125,0.375,0.25,0.125
8730,2021-02-04,DAL,GSW,116,147,-31,2.0,263,-4.0,229.5,0,1,2020-21,22000339,2021-02-04,DAL vs. GSW,0,1,35.305000,1.7,6.1,6.4,7.7,0.5,7.0,9.8,3.4,0.9,0.6,0.8,1.7,5.4,26.1,-3.3,109.59,116.84,27.03,9.52,0.3316,101.026,73.1,0.1808,3.521,2.165,1.8,9.3,10.8,77.1,0.7,0.9,50.0,2.6,5.9,5.3,11.4,2.1,3.6,7.3,13.6,2.5,2.8,2.6,11.6,0.6,6.2,0.2,1.4,32.582500,1.10,5.000,2.400,2.60,1.1,1.8,1.800,1.400,0.900,0.4,0.300,3.10,2.400,11.900,-2.600,106.3700,110.5500,11.0900,8.640,0.1851,100.1860,66.600,0.04070,4.2630,2.4740,2.10,3.700,5.800,44.400,0.800,0.100,29.300,...,0.2,0.1,15.634167,0.75,1.375,0.5,0.75,0.25,2.625,1.625,0.625,0.375,0.625,0.125,1.875,1.125,4.75,-2.625,94.95,106.7625,35.3625,8.0375,0.085375,111.045,34.875,0.11575,4.62125,1.290,1.125,4.5,5.5,32.75,0.0,0.125,28.875,0.625,0.875,1.125,1.875,1.375,2.0,1.0,1.375,0.0,0.875,0.625,2.0,0.875,0.125,0.75,0.0,14.038167,0.800,1.8,1.00,1.200,0.400,2.5,1.20,1.10,1.000,0.500,0.000,2.4,1.600,5.400,1.00,106.4500,104.6300,19.0100,22.28,0.155600,103.8790,30.2,0.09290,4.44900,1.04700,0.80,4.20,4.8,20.000,0.1,0.0,14.500,0.50,0.7,1.200,2.500,0.700,1.200,1.0,1.700,0.2,1.6,1.2,1.7,0.300,0.600,0.80,0.000


## Get ELO Rating

In [39]:
from time import sleep

# Gather data from 2000 to present
seasons = ["200{}-0{}".format(x, x+1) if x!=9 else "200{}-{}".format(x, x+1) for x in range(0, 10)]
seasons2 = ["20{}-{}".format(x, x+1) for x in range(10, 21)]
seasons.extend(seasons2)
print(seasons)

gamelogs_2000_present = [] 
for s in tqdm(seasons, desc='Progress:'):
    for season_type in ['Regular Season', 'Playoffs']:
        log = leaguegamelog.LeagueGameLog(season=s, season_type_all_star=season_type).get_data_frames()[0]
        gamelogs_2000_present.append(log)
        sleep(5)
    sleep(10)
    
    
gamelogs_df = pd.concat(gamelogs_2000_present)
    
    
    

Progress::   0%|                                                                                | 0/21 [00:00<?, ?it/s]

['2000-01', '2001-02', '2002-03', '2003-04', '2004-05', '2005-06', '2006-07', '2007-08', '2008-09', '2009-10', '2010-11', '2011-12', '2012-13', '2013-14', '2014-15', '2015-16', '2016-17', '2017-18', '2018-19', '2019-20', '2020-21']


Progress:: 100%|███████████████████████████████████████████████████████████████████████| 21/21 [07:09<00:00, 20.44s/it]


In [43]:
# Use the current franchines abbreviation if it has changed from the past
abbr_mapping = {'NJN':'BKN',
               'CHH':'CHA',
               'VAN':'MEM',
               'NOH':'NOP',
               'NOK':'NOP',
               'SEA':'OKC'}

gamelogs_df['TEAM_ABBREVIATION'] = gamelogs_df['TEAM_ABBREVIATION'].replace(abbr_mapping)
gamelogs_df['MATCHUP'] = gamelogs_df['MATCHUP'].str.replace('NJN', 'BKN')
gamelogs_df['MATCHUP'] = gamelogs_df['MATCHUP'].str.replace('CHH', 'CHA')
gamelogs_df['MATCHUP'] = gamelogs_df['MATCHUP'].str.replace('VAN', 'MEM')
gamelogs_df['MATCHUP'] = gamelogs_df['MATCHUP'].str.replace('NOH', 'NOP')
gamelogs_df['MATCHUP'] = gamelogs_df['MATCHUP'].str.replace('NOK', 'NOP')
gamelogs_df['MATCHUP'] = gamelogs_df['MATCHUP'].str.replace('SEA', 'OKC')

# Add binary HOME_GAME feature

gamelogs_df['HOME_GAME'] = 0
gamelogs_df['HOME_GAME'] = (gamelogs_df['MATCHUP'].str.contains('vs')).astype(int)

gamelogs_df['WL'] = (gamelogs_df['WL'] == 'W').astype(int)

print("num_unique_team_abbr:", gamelogs_df['TEAM_ABBREVIATION'].nunique())

num_unique_team_abbr: 30


In [168]:
# Separate home_games and away_games 

home_games = gamelogs_df.loc[gamelogs_df['HOME_GAME'] == 1, ['SEASON_ID', 'TEAM_ABBREVIATION', 'GAME_ID', 'GAME_DATE', 'WL', 'PTS']]
away_games = gamelogs_df.loc[gamelogs_df['HOME_GAME'] == 0, ['TEAM_ABBREVIATION', 'GAME_ID', 'PTS']]

print('home_shape:', home_games.shape, "away_games:", away_games.shape)

# Merge so that each row shows a matchup (home vs away)
elo_df = pd.merge(home_games, away_games, on='GAME_ID', suffixes = ['_home', '_away'])

elo_df = elo_df.rename(columns = {'GAME_DATE_home':'GAME_DATE'})

# Create MOV column, which is the PTS_home - PTS_away
elo_df['MOV'] = elo_df['PTS_home'] - elo_df['PTS_away']


home_shape: (26073, 6) away_games: (26073, 3)


(26073, 8)

In [170]:
# Helper functions go generate ELO rating

def get_K(MOV, elo_diff):
    """This K multiplier """
    K_0 = 20    
    # If the home team wins
    if MOV > 0:
        multiplier = (MOV+3)**(0.8)/(7.5+0.006*(elo_diff))
    else:
        multiplier = (-MOV+3)**(0.8)/(7.5+0.006*(-elo_diff))
        
    return K_0*multiplier, K_0*multiplier

def get_S(home_score, away_score):
    """S is the 1 if the team wins, and 0 if the team loses"""
    S_home, S_away = 0, 0
    if home_score > away_score:
        S_home = 1
    else:
        S_away = 1
    return S_home, S_away


def elo_prediction(home_rating, away_rating):
    """Generate the probability of a home victory based on the teams' elo ratings"""
    E_home = 1.0/(1 + 10 ** ((away_rating - home_rating) / (400.0)))
    return E_home


def elo_update(home_score, away_score, home_rating, away_rating):
    # Add 100 to the home_rating for home court advantage
    HOME_AD = 100
    home_rating += HOME_AD
    
    
    E_home = elo_prediction(home_rating, away_rating)
    E_away = 1.0 - E_home
    
    MOV = home_score - away_score
    
    if MOV > 0:
        elo_diff = home_rating - away_rating
    else:
        elo_diff = away_rating - home_rating
        
    
    S_home, S_away = get_S(home_score, away_score)
    
    K_home, K_away = get_K(MOV, elo_diff)
        
#     print("expected_win_prob: ", E_home, E_away)
#     print("elo_diff:", elo_diff)
#     print("S_home:", S_home, "S_away:", S_away)
#     print("K_home:", K_home, "K_away:", K_away)
#     print(K_home*(S_home-E_home), K_away*(S_away-E_away))
    return K_home*(S_home-E_home), K_away*(S_away-E_away)
    



def season_reset(rating):
    new_rating = 0.75*rating + 0.25*1505
    return new_rating
    

In [180]:
def get_elo_ratings(elo_df = elo_df):
    """This function will generate elo_ratings in the elo_df
    """
    
    # Initialize the following columns

    elo_df['home_elo_pred'] = np.nan

    elo_df['home_rating_i'] = np.nan
    elo_df['home_rating_n'] = np.nan

    elo_df['away_rating_i'] = np.nan
    elo_df['away_rating_n'] = np.nan

    elo_dic = {}
    for idx, row in elo_df.iterrows():
        if (idx > 2) and (row['SEASON_ID'] != elo_df.loc[idx-1, 'SEASON_ID']):
            for team in elo_dic:
                elo_dic[team] = season_reset(elo_dic[team])

        if row['TEAM_ABBREVIATION_home'] not in elo_dic:
            elo_dic[row['TEAM_ABBREVIATION_home']] = 1300
            elo_df.loc[idx, 'home_rating_i'] = 1300
        else:
            elo_df.loc[idx, 'home_rating_i'] = elo_dic[row['TEAM_ABBREVIATION_home']]

        if row['TEAM_ABBREVIATION_away'] not in elo_dic:
            elo_dic[row['TEAM_ABBREVIATION_away']] = 1300
            elo_df.loc[idx, 'away_rating_i'] = 1300
        else:
            elo_df.loc[idx, 'away_rating_i'] = elo_dic[row['TEAM_ABBREVIATION_away']]

        home_score = row['PTS_home']
        away_score = row['PTS_away']
        home_rating = elo_dic[row['TEAM_ABBREVIATION_home']]
        away_rating = elo_dic[row['TEAM_ABBREVIATION_away']]

        home_update, away_update = elo_update(home_score, away_score, home_rating, away_rating)

        elo_df.loc[idx, 'home_rating_n'] = home_rating + home_update
        elo_df.loc[idx, 'away_rating_n'] = away_rating + away_update
        elo_df.loc[idx, 'home_elo_pred'] = elo_prediction(home_rating+100, away_rating)

        elo_dic[row['TEAM_ABBREVIATION_home']] = elo_df.loc[idx, 'home_rating_n']
        elo_dic[row['TEAM_ABBREVIATION_away']] = elo_df.loc[idx, 'away_rating_n']

    elo_df['elo_MOV_prediction'] = round((elo_df['home_rating_i']+100 - elo_df['away_rating_i'])/28, 2)

    return elo_df

elo_df = get_elo_ratings(elo_df = elo_df)

Unnamed: 0,SEASON_ID,TEAM_ABBREVIATION_home,GAME_ID,GAME_DATE,WL,PTS_home,TEAM_ABBREVIATION_away,PTS_away,MOV,home_elo_pred,home_rating_i,home_rating_n,away_rating_i,away_rating_n
0,22000,HOU,0020000008,2000-10-31,0,98,MIN,106,-8,0.640065,1300.000000,1289.238242,1300.000000,1310.761758
1,22000,BKN,0020000002,2000-10-31,0,82,CLE,86,-4,0.640065,1300.000000,1292.503691,1300.000000,1307.496309
2,22000,SAS,0020000009,2000-10-31,1,98,IND,85,13,0.640065,1300.000000,1308.167047,1300.000000,1291.832953
3,22000,DAL,0020000007,2000-10-31,1,97,MIL,93,4,0.640065,1300.000000,1304.215484,1300.000000,1295.784516
4,22000,UTA,0020000010,2000-10-31,1,107,LAC,94,13,0.640065,1300.000000,1308.167047,1300.000000,1291.832953
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26068,22020,OKC,0022000415,2021-02-14,1,114,MIL,109,5,0.381796,1462.174947,1471.500612,1645.895143,1636.569478
26069,22020,PHX,0022000417,2021-02-14,1,109,ORL,90,19,0.827584,1560.246217,1564.721682,1387.752084,1383.276619
26070,22020,LAC,0022000419,2021-02-14,1,128,CLE,111,17,0.914602,1656.437414,1658.319076,1344.523496,1342.641834
26071,22020,SAC,0022000418,2021-02-14,0,110,MEM,124,-14,0.518366,1438.288859,1425.089558,1525.521077,1538.720379


In [186]:
elo_df = elo_df.rename(columns={'elo_spread_prediction':'elo_MOV_pred'})
elo_df

Unnamed: 0,SEASON_ID,TEAM_ABBREVIATION_home,GAME_ID,GAME_DATE,WL,PTS_home,TEAM_ABBREVIATION_away,PTS_away,MOV,home_elo_pred,home_rating_i,home_rating_n,away_rating_i,away_rating_n,elo_MOV_pred
0,22000,HOU,0020000008,2000-10-31,0,98,MIN,106,-8,0.640065,1300.000000,1289.238242,1300.000000,1310.761758,3.57
1,22000,BKN,0020000002,2000-10-31,0,82,CLE,86,-4,0.640065,1300.000000,1292.503691,1300.000000,1307.496309,3.57
2,22000,SAS,0020000009,2000-10-31,1,98,IND,85,13,0.640065,1300.000000,1308.167047,1300.000000,1291.832953,3.57
3,22000,DAL,0020000007,2000-10-31,1,97,MIL,93,4,0.640065,1300.000000,1304.215484,1300.000000,1295.784516,3.57
4,22000,UTA,0020000010,2000-10-31,1,107,LAC,94,13,0.640065,1300.000000,1308.167047,1300.000000,1291.832953,3.57
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26068,22020,OKC,0022000415,2021-02-14,1,114,MIL,109,5,0.381796,1462.174947,1471.500612,1645.895143,1636.569478,-2.99
26069,22020,PHX,0022000417,2021-02-14,1,109,ORL,90,19,0.827584,1560.246217,1564.721682,1387.752084,1383.276619,9.73
26070,22020,LAC,0022000419,2021-02-14,1,128,CLE,111,17,0.914602,1656.437414,1658.319076,1344.523496,1342.641834,14.71
26071,22020,SAC,0022000418,2021-02-14,0,110,MEM,124,-14,0.518366,1438.288859,1425.089558,1525.521077,1538.720379,0.46


In [None]:
full_df['prev_cover'] = full_df['covered_spread'].shift(1)
full_df['prev2_cover'] = full_df['covered_spread'].shift(2)

print("original shape", full_df.shape)
full_df = full_df.dropna()
print("dropped_na", full_df.shape)
features = full_df.drop(columns=['date', 'home_team_abbr', 'away_team_abbr', 
                                 'home_score', 'away_score', 'point_diff', 
                                 'rest', 'point_total', 'covered_spread', 
                                 'over', 'SEASON_YEAR', 'GAME_ID', 'GAME_DATE',
                                 'MATCHUP', 'HOME_GAME', 'WL'])

features.shape

In [224]:
features = features.astype(float)

In [225]:
def remove_correlated_features(features, threshold=1):
    """Removes correlated features from dataset
    INPUTS:
    final_merged_df
    threshold: float between 0 and 1 indicating how highly correlated features
    must be to be removed
    OUTPUT: final_merged_df with the correlated feature columns removed
    """
    corr = features.select_dtypes('number').corr()

    correlated_features = set()
    for i in range(len(corr.columns)):
        for j in range(i):
            if abs(corr.iloc[i, j]) >= threshold:
                colname = corr.columns[i]
                correlated_features.add(colname)
    print("Correlated Features:", list(correlated_features))
    print("Num Features Removed ", len(correlated_features))
    new_features = features.drop(columns=list(correlated_features))
    return new_features


features_removed_corr = remove_correlated_features(features, threshold=0.9)
features_removed_corr

Correlated Features: ['away_P3_DRBC', 'home_P6_FTA', 'away_P4_FG3A', 'home_P7_DFGA', 'home_P4_PFD', 'away_P4_FG2A', 'home_P4_FG3A', 'home_P6_FG3A', 'away_P1_FG2A', 'away_P2_CFGA', 'away_P3_E_PACE', 'home_P6_DIST', 'away_P5_RBC', 'home_P3_E_PACE', 'away_P5_PASS', 'away_P8_RBC', 'away_P7_FTA', 'home_P1_CFGA', 'away_P8_POSS', 'away_P8_PTS_PAINT', 'home_P7_POSS', 'away_P2_FG2A', 'away_P7_CFGA', 'home_P8_DFGA', 'away_P3_POSS', 'home_P8_FG3A', 'home_P5_FG2A', 'home_P7_PTS_PAINT', 'home_P1_PFD', 'home_P4_CFGA', 'away_P8_DIST', 'home_P5_AST_3PM', 'home_P5_FG3A', 'home_P7_FG3A', 'home_P1_RBC', 'away_P8_DRBC', 'home_P2_FG3A', 'away_P2_RBC', 'away_P2_FG3A', 'away_P2_ORBC', 'home_P2_E_PACE', 'home_P6_RBC', 'home_P2_ORBC', 'away_P1_RBC', 'home_P3_FG3A', 'home_P3_FTA', 'home_P1_DRBC', 'home_P5_RBC', 'away_P8_PASS', 'home_P6_CFGA', 'home_P7_CFGA', 'home_P2_RBC', 'home_P2_PASS', 'away_P5_PTS_PAINT', 'away_P4_PASS', 'away_P5_AST_3PM', 'home_P4_FTA', 'home_P5_POSS', 'home_P8_FTA', 'away_P2_DRBC', 'away_

Unnamed: 0,spread,total,home_P1_MIN,home_P1_FG3M,home_P1_FTM,home_P1_OREB,home_P1_DREB,home_P1_AST,home_P1_TOV,home_P1_STL,home_P1_BLK,home_P1_BLKA,home_P1_PF,home_P1_PTS,home_P1_PLUS_MINUS,home_P1_E_OFF_RATING,home_P1_E_DEF_RATING,home_P1_AST_RATIO,home_P1_TM_TOV_PCT,home_P1_E_USG_PCT,home_P1_E_PACE,home_P1_POSS,home_P1_PIE,home_P1_SPD,home_P1_DIST,home_P1_ORBC,home_P1_TCHS,home_P1_SAST,home_P1_FTAST,home_P1_CFGM,home_P1_UFGM,home_P1_UFGA,home_P1_DFGM,home_P1_FG2M,home_P1_PTS_2PT_MR,home_P1_PTS_FB,home_P1_PTS_OFF_TOV,home_P1_PTS_PAINT,home_P1_AST_2PM,home_P1_UAST_2PM,home_P1_AST_3PM,home_P1_UAST_3PM,home_P2_MIN,home_P2_FG3M,home_P2_FTM,home_P2_OREB,home_P2_DREB,home_P2_AST,home_P2_TOV,home_P2_STL,home_P2_BLK,home_P2_BLKA,home_P2_PF,home_P2_PTS,home_P2_PLUS_MINUS,home_P2_E_OFF_RATING,home_P2_E_DEF_RATING,home_P2_AST_RATIO,home_P2_TM_TOV_PCT,home_P2_E_USG_PCT,home_P2_POSS,home_P2_PIE,home_P2_SPD,home_P2_DIST,home_P2_TCHS,home_P2_SAST,home_P2_FTAST,home_P2_CFGM,home_P2_UFGM,home_P2_DFGM,home_P2_FG2M,home_P2_PTS_2PT_MR,home_P2_PTS_FB,home_P2_PTS_OFF_TOV,home_P2_PTS_PAINT,home_P2_AST_2PM,home_P2_UAST_2PM,home_P2_AST_3PM,home_P2_UAST_3PM,home_P3_MIN,home_P3_FG3M,home_P3_FTM,home_P3_OREB,home_P3_DREB,home_P3_AST,home_P3_TOV,home_P3_STL,home_P3_BLK,home_P3_BLKA,home_P3_PF,home_P3_PTS,home_P3_PLUS_MINUS,home_P3_E_OFF_RATING,home_P3_E_DEF_RATING,home_P3_AST_RATIO,home_P3_TM_TOV_PCT,home_P3_E_USG_PCT,home_P3_PIE,home_P3_SPD,home_P3_DIST,...,away_P6_BLKA,away_P6_PF,away_P6_PFD,away_P6_PTS,away_P6_PLUS_MINUS,away_P6_E_OFF_RATING,away_P6_E_DEF_RATING,away_P6_AST_RATIO,away_P6_TM_TOV_PCT,away_P6_E_USG_PCT,away_P6_E_PACE,away_P6_PIE,away_P6_SPD,away_P6_TCHS,away_P6_SAST,away_P6_FTAST,away_P6_CFGM,away_P6_UFGM,away_P6_UFGA,away_P6_DFGM,away_P6_PTS_2PT_MR,away_P6_PTS_FB,away_P6_PTS_OFF_TOV,away_P6_AST_2PM,away_P6_UAST_2PM,away_P6_UAST_3PM,away_P7_MIN,away_P7_FG3M,away_P7_FTM,away_P7_OREB,away_P7_DREB,away_P7_AST,away_P7_TOV,away_P7_STL,away_P7_BLK,away_P7_BLKA,away_P7_PF,away_P7_PFD,away_P7_PTS,away_P7_PLUS_MINUS,away_P7_E_OFF_RATING,away_P7_E_DEF_RATING,away_P7_AST_RATIO,away_P7_TM_TOV_PCT,away_P7_E_USG_PCT,away_P7_E_PACE,away_P7_PIE,away_P7_SPD,away_P7_TCHS,away_P7_SAST,away_P7_FTAST,away_P7_CFGM,away_P7_UFGM,away_P7_UFGA,away_P7_DFGM,away_P7_FG2M,away_P7_PTS_2PT_MR,away_P7_PTS_FB,away_P7_PTS_OFF_TOV,away_P7_AST_2PM,away_P7_UAST_2PM,away_P7_UAST_3PM,away_P8_MIN,away_P8_FG3M,away_P8_FTM,away_P8_OREB,away_P8_DREB,away_P8_AST,away_P8_TOV,away_P8_STL,away_P8_BLK,away_P8_BLKA,away_P8_PF,away_P8_PFD,away_P8_PTS,away_P8_PLUS_MINUS,away_P8_E_OFF_RATING,away_P8_E_DEF_RATING,away_P8_AST_RATIO,away_P8_TM_TOV_PCT,away_P8_E_USG_PCT,away_P8_E_PACE,away_P8_PIE,away_P8_SPD,away_P8_TCHS,away_P8_SAST,away_P8_FTAST,away_P8_CFGM,away_P8_UFGM,away_P8_UFGA,away_P8_DFGM,away_P8_FG2M,away_P8_PTS_2PT_MR,away_P8_PTS_FB,away_P8_PTS_OFF_TOV,away_P8_AST_2PM,away_P8_UAST_2PM,away_P8_UAST_3PM,prev_cover,prev2_cover
75,-9.5,202.5,37.846333,0.0,6.8,4.2,7.2,1.4,1.4,2.2,4.0,0.8,3.2,21.2,2.2,102.82,99.92,6.22,5.88,0.2466,96.900,75.8,0.1752,4.150,2.614,9.0,54.0,0.2,0.0,4.4,2.8,6.0,2.0,7.2,4.8,3.4,5.0,8.8,5.0,2.0,0.0,0.0,31.661667,1.40,2.000,0.4,2.6,6.000,4.000,1.600,0.4,1.000,3.00,13.800,1.000,102.7400,101.1000,24.7000,17.160,0.2530,63.600,0.08460,4.4260,2.3320,70.400,0.200,0.800,2.00,3.200,1.000,3.800,3.40,1.400,1.4,3.600,0.000,3.600,1.00,0.4,30.215000,1.8,3.6,1.0,1.8,2.4,1.4,1.8,0.6,1.2,1.6,16.6,1.6,105.18,101.28,12.64,8.40,0.2436,0.1054,4.226,2.114,...,0.333333,2.0,1.5,10.333333,-0.333333,103.566667,102.333333,29.05,16.90,0.252667,103.256667,0.111833,4.595,57.333333,0.5,0.333333,0.833333,3.166667,7.0,1.5,1.833333,1.666667,0.666667,0.5,2.166667,0.5,21.539167,0.833333,3.5,0.333333,1.833333,1.333333,1.333333,1.166667,0.333333,0.333333,2.333333,2.833333,11.666667,-3.000,93.416667,99.883333,9.7000,10.183333,0.222500,104.706667,0.077167,4.311667,27.166667,0.333333,0.000,1.333333,2.333333,5.500,0.666667,2.833333,2.666667,2.166667,1.333333,1.333333,1.333333,0.166667,17.159444,0.000,0.166667,2.000,4.333333,0.50,2.00,0.500,1.000,0.333333,2.333333,0.500,8.500,0.666667,95.266667,92.6000,6.516667,21.066667,0.212500,104.143333,0.147167,4.378333,35.333333,0.166667,0.0,1.50,2.666667,4.500,1.500,4.166667,4.333333,0.333333,1.833333,3.000,1.000,0.000,0.0,1.0
76,-4.5,210.5,36.793333,1.8,7.2,3.6,11.0,4.4,2.4,0.8,0.4,0.4,1.8,26.2,12.8,105.86,93.34,14.56,7.90,0.2722,106.036,80.4,0.2204,3.934,2.372,9.8,89.8,0.0,0.4,4.2,4.4,9.8,4.2,6.8,2.4,2.8,5.6,10.6,3.8,2.8,1.6,0.2,35.046667,2.80,5.600,0.4,2.6,2.600,1.600,1.200,0.0,1.200,1.00,21.600,9.600,101.9000,92.0000,12.5400,7.580,0.2138,76.800,0.13860,4.0220,2.3100,51.000,0.400,0.400,3.00,3.600,0.800,3.800,2.20,3.600,4.2,5.200,2.600,1.200,2.60,0.2,34.217000,0.0,2.6,3.0,6.6,1.0,2.6,0.4,0.2,1.8,2.4,11.0,2.4,96.90,97.44,5.76,16.08,0.1838,0.0716,3.964,2.236,...,1.000000,3.0,1.8,9.200000,-2.000000,111.480000,115.860000,7.00,11.48,0.210200,99.924000,0.109800,4.082,32.800000,0.0,0.200000,2.800000,1.400000,2.4,1.8,0.000000,0.000000,0.000000,2.6,1.400000,0.0,17.283000,2.600000,0.4,0.600000,1.800000,0.600000,0.400000,0.400000,0.400000,0.000000,1.600000,1.000000,10.600000,-0.200,111.580000,110.540000,17.0000,7.000000,0.144200,99.196000,0.150600,4.410000,28.600000,0.000000,0.000,0.600000,3.200000,4.800,0.600000,1.200000,0.400000,1.200000,1.600000,0.600000,0.600000,0.200000,15.161333,0.400,0.400000,0.200,1.800000,3.60,1.60,0.200,0.200,0.000000,1.200000,0.200,4.400,-1.800000,103.160000,106.1000,36.300000,19.600000,0.175600,100.542000,0.041000,4.630000,40.800000,0.400000,0.2,0.40,1.400000,2.800,0.200,1.400000,1.200000,0.600000,0.200000,0.400,0.800,0.000,1.0,0.0
78,-5.5,191.0,36.465667,2.2,3.0,0.2,5.4,4.6,1.8,1.0,0.0,0.8,1.2,18.8,4.8,101.84,98.14,18.34,7.10,0.2326,100.460,75.4,0.1214,4.250,2.550,3.2,66.6,0.6,0.0,2.8,4.0,7.2,1.0,4.6,4.2,2.2,2.2,4.4,0.8,3.8,2.2,0.0,35.510333,0.00,3.000,4.6,8.6,2.600,2.600,1.200,1.2,0.800,2.80,17.800,6.600,100.6400,93.2600,14.3400,11.880,0.2058,72.400,0.16440,4.1020,2.3760,73.800,0.800,0.000,3.80,3.600,3.200,7.400,4.80,0.000,1.4,9.400,3.400,3.400,0.00,0.0,32.730667,2.0,0.8,0.4,3.6,6.6,2.8,1.0,0.0,1.0,2.6,12.4,5.6,106.56,96.08,30.52,12.40,0.2038,0.0968,4.122,2.236,...,0.400000,3.4,2.6,8.200000,-4.000000,93.660000,113.720000,5.86,22.52,0.229200,97.926000,0.023200,4.336,39.600000,0.0,0.200000,2.400000,1.000000,3.8,2.4,0.400000,0.200000,1.400000,2.4,0.600000,0.0,20.540667,1.000000,1.8,0.400000,1.600000,3.000000,1.000000,0.800000,0.400000,0.000000,1.000000,1.800000,9.200000,-3.400,102.180000,116.420000,27.7600,7.360000,0.176600,97.672000,0.166000,4.138000,48.600000,0.600000,0.600,1.000000,2.200000,4.400,1.000000,2.200000,1.200000,1.400000,0.800000,0.000000,2.200000,0.800000,18.271333,0.600,0.400000,0.000,2.400000,1.00,1.40,1.000,0.000,0.200000,1.400000,0.600,7.000,-3.800000,96.000000,106.8200,9.740000,16.560000,0.176200,93.128000,0.095600,4.392000,25.400000,0.200000,0.2,0.80,2.200000,4.400,0.400,2.400000,1.200000,0.600000,1.400000,0.600,1.800,0.000,1.0,1.0
80,3.0,205.5,35.430000,2.0,4.2,1.2,3.8,7.6,3.0,3.0,0.2,1.0,2.0,19.8,-3.0,102.10,105.74,25.42,9.52,0.2604,107.124,77.8,0.1144,4.656,2.746,4.8,85.4,0.6,0.0,3.0,3.8,8.8,1.4,4.8,1.8,3.2,2.8,6.8,1.0,3.4,1.4,0.6,34.852667,0.20,4.800,1.6,3.8,3.400,3.000,1.600,0.0,1.200,1.60,22.200,-7.800,97.8600,108.3800,13.6000,12.540,0.2558,75.800,0.14240,4.3440,2.5100,58.600,0.800,0.400,5.20,3.400,3.600,8.400,4.40,4.800,4.0,11.600,4.000,4.000,0.20,0.0,31.564333,1.8,1.8,2.4,8.6,3.4,2.8,0.6,1.4,0.4,3.2,17.2,-5.2,102.76,109.16,17.64,14.14,0.2068,0.1388,4.434,2.320,...,0.000000,2.4,1.0,5.200000,-6.800000,92.940000,98.600000,17.50,15.50,0.099000,101.294000,0.078200,4.586,28.000000,0.0,0.000000,0.600000,1.000000,2.2,1.0,0.400000,1.800000,1.600000,0.6,0.200000,0.0,21.222000,2.400000,1.0,0.800000,2.200000,1.200000,0.800000,1.400000,0.400000,0.200000,2.000000,1.400000,14.600000,0.600,96.840000,94.680000,8.6200,7.460000,0.249000,98.592000,0.219800,4.168000,28.200000,0.000000,0.000,2.600000,3.000000,7.200,0.400000,3.200000,1.000000,1.200000,3.000000,1.800000,1.200000,0.000000,17.163333,0.200,0.600000,0.600,3.400000,0.40,1.00,0.800,0.000,0.200000,1.800000,0.400,4.000,-1.400000,92.920000,98.4600,4.880000,14.600000,0.149800,99.556000,0.043400,4.726000,20.400000,0.000000,0.0,0.60,1.000000,2.600,0.800,1.400000,1.000000,1.000000,0.400000,1.200,0.200,0.000,0.0,0.0
81,-8.5,187.5,37.172000,3.2,5.4,0.8,7.4,4.0,3.2,1.8,0.6,0.6,1.2,25.8,13.6,101.04,82.12,14.28,11.76,0.2940,98.242,74.0,0.2520,4.286,2.648,5.4,73.2,0.4,0.0,3.2,5.4,10.0,0.6,5.4,4.4,3.0,5.8,5.6,1.8,3.6,2.6,0.4,35.848333,2.80,1.200,1.4,4.8,4.600,2.600,0.200,0.2,0.600,2.60,16.400,7.800,100.2200,86.1600,21.5000,11.680,0.2160,71.400,0.12960,3.9040,2.3220,58.800,0.400,0.000,2.40,3.800,0.800,3.400,0.40,2.400,5.2,6.400,0.800,2.400,2.80,0.0,30.794333,0.0,2.8,1.0,6.2,2.8,1.8,1.4,2.0,2.0,2.4,11.6,10.4,102.56,84.98,17.18,8.54,0.2060,0.1390,3.716,1.902,...,0.400000,2.8,1.8,6.400000,0.400000,103.360000,102.800000,21.28,3.56,0.133600,91.616000,0.108400,3.504,28.400000,0.0,0.200000,1.200000,0.800000,1.4,0.8,0.000000,0.400000,0.800000,1.0,1.400000,0.0,19.686667,0.000000,2.6,2.600000,4.000000,0.000000,1.200000,0.000000,0.800000,0.200000,2.000000,2.800000,7.000000,-1.200,99.260000,102.860000,0.0000,17.340000,0.148400,95.146000,0.138400,3.332000,22.400000,0.000000,0.000,1.400000,0.400000,1.200,1.400000,2.200000,0.200000,0.400000,0.600000,1.000000,1.200000,0.000000,16.703667,1.400,1.000000,0.000,2.000000,0.60,1.00,0.600,0.000,0.200000,1.600000,1.000,6.000,3.200000,111.360000,95.5600,5.560000,20.100000,0.175800,93.794000,0.062600,3.488000,11.800000,0.200000,0.0,0.20,1.200000,2.200,0.400,0.400000,0.200000,0.000000,1.400000,0.200,0.200,0.000,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8727,-5.0,217.0,35.024167,3.4,4.5,0.5,6.6,7.6,3.4,1.0,0.5,0.6,1.8,26.1,6.7,110.63,100.55,24.18,10.71,0.3107,98.414,71.4,0.1994,3.536,2.198,2.5,92.9,1.3,0.8,2.5,6.6,13.5,2.3,5.7,1.5,3.6,4.0,9.1,1.9,3.3,1.5,1.7,33.168333,0.40,4.800,2.2,6.7,3.400,1.500,1.300,2.2,0.900,2.00,21.800,5.700,112.2800,102.0100,13.8300,5.790,0.2707,68.500,0.17040,3.6590,2.1530,60.900,0.400,0.200,4.70,3.600,5.200,7.900,2.40,0.900,2.3,12.600,4.000,3.600,0.40,0.0,30.324167,0.7,3.5,0.9,2.3,3.5,1.4,1.1,0.0,0.5,2.7,12.8,4.6,113.12,106.05,19.73,7.92,0.1984,0.0712,4.350,2.335,...,0.200000,1.7,1.7,8.400000,1.900000,114.750000,111.610000,18.12,6.98,0.147600,98.589000,0.101600,3.712,24.100000,0.3,0.000000,1.400000,1.300000,2.9,1.1,0.000000,0.600000,0.500000,1.0,1.000000,0.1,21.514500,1.900000,1.7,1.900000,4.900000,1.000000,0.700000,0.600000,0.200000,0.600000,2.400000,1.300000,10.200000,5.600,115.450000,107.250000,10.9700,7.250000,0.184200,99.639000,0.107600,3.691000,26.700000,0.200000,0.000,1.100000,1.700000,4.100,1.300000,1.400000,0.100000,0.300000,1.400000,1.000000,0.400000,0.000000,12.666500,0.800,0.300000,0.300,0.200000,1.90,0.70,0.800,0.100,0.000000,2.000000,1.000,3.100,2.800000,114.100000,103.6000,37.950000,11.340000,0.134700,99.417000,0.017800,3.592000,20.400000,0.200000,0.1,0.10,0.900000,2.600,0.200,0.200000,0.200000,0.300000,0.600000,0.100,0.100,0.200,1.0,1.0
8728,-3.5,226.0,29.375500,0.7,1.1,1.9,4.4,2.1,0.5,1.6,0.7,0.8,1.7,15.0,-3.3,106.56,108.93,12.88,2.84,0.1996,102.432,61.6,0.1194,4.405,2.283,3.7,40.9,0.6,0.0,3.5,3.1,6.3,2.1,5.9,0.8,2.2,2.8,10.6,4.8,0.8,0.7,0.0,27.974583,0.75,4.375,0.5,1.5,7.375,3.375,1.125,0.5,1.625,1.75,19.375,-0.125,113.0625,110.5375,27.1125,14.225,0.2950,62.125,0.11525,4.3725,2.1925,65.375,0.125,0.375,4.25,2.875,1.625,6.375,0.75,2.875,3.5,11.625,1.625,4.375,0.75,0.0,27.407833,1.6,2.0,0.8,2.3,2.5,1.7,1.2,0.5,0.6,2.2,14.4,-1.0,105.87,106.21,12.87,8.10,0.2662,0.0589,4.282,2.077,...,0.700000,2.6,1.3,8.100000,0.800000,101.180000,100.640000,8.41,17.25,0.132200,105.666000,0.070300,4.258,26.100000,0.1,0.200000,1.700000,1.300000,2.7,1.3,0.200000,0.600000,1.000000,0.8,1.400000,0.0,21.565000,1.900000,2.2,1.500000,6.400000,3.200000,2.200000,0.900000,0.500000,0.500000,3.700000,2.200000,10.300000,-2.900,98.700000,113.190000,21.3500,14.620000,0.240700,105.324000,0.094100,3.668000,42.600000,0.200000,0.100,0.400000,2.500000,5.900,2.600000,1.200000,0.000000,0.100000,1.200000,0.500000,0.700000,0.100000,21.456000,1.500,0.500000,0.900,2.900000,2.00,0.90,1.500,0.500,0.700000,2.000000,0.700,8.000,3.800000,110.790000,101.1600,20.320000,9.810000,0.156800,103.801000,0.102400,4.241000,25.900000,0.000000,0.1,1.00,1.800000,4.100,1.600,1.500000,0.000000,1.300000,1.800000,0.800,0.700,0.000,1.0,1.0
8729,-10.0,223.0,34.650833,1.9,3.5,0.9,5.2,2.4,2.4,0.6,0.9,1.0,1.8,22.0,7.1,110.51,102.77,10.88,10.54,0.2442,103.555,73.3,0.1394,4.091,2.517,3.3,64.7,0.5,0.1,3.4,4.9,9.5,1.9,6.4,2.7,4.3,3.6,9.3,2.4,3.5,1.4,0.3,31.187333,1.30,10.500,1.6,9.1,2.400,3.400,1.200,0.9,0.300,2.90,31.000,4.700,116.3000,108.4300,8.5500,12.760,0.3554,66.800,0.21190,3.8740,2.1280,72.700,0.300,0.200,3.70,5.900,4.500,8.300,4.50,1.200,2.6,11.300,3.100,4.600,1.20,0.1,28.213667,1.8,1.1,0.1,1.3,2.3,1.2,0.8,0.4,0.4,1.8,11.9,8.9,119.29,107.03,18.63,9.60,0.1613,0.0641,4.097,2.049,...,0.600000,1.6,0.4,9.400000,-7.200000,101.810000,124.250000,11.90,6.37,0.215000,98.339000,0.049800,4.386,36.200000,0.1,0.000000,0.900000,2.500000,5.9,1.7,0.500000,0.500000,1.100000,0.2,0.800000,0.8,13.417333,0.100000,0.5,1.100000,3.800000,1.100000,0.300000,0.300000,0.200000,0.100000,1.400000,0.600000,3.400000,-5.400,104.450000,119.140000,19.2800,5.930000,0.130700,100.176000,0.104500,4.281000,20.500000,0.000000,0.100,0.900000,0.500000,1.300,2.000000,1.300000,0.200000,0.400000,0.800000,0.600000,0.700000,0.000000,5.458333,0.375,0.250000,0.125,0.500000,0.25,0.25,0.125,0.125,0.125000,1.000000,0.375,2.375,-0.250000,90.662500,122.4375,16.662500,18.750000,0.180875,109.922500,-0.025250,4.371250,6.875000,0.000000,0.0,0.25,0.625000,1.125,0.375,0.500000,0.500000,0.000000,0.000000,0.125,0.375,0.125,0.0,1.0
8730,-4.0,229.5,35.305000,1.7,6.4,0.5,7.0,9.8,3.4,0.9,0.6,0.8,1.7,26.1,-3.3,109.59,116.84,27.03,9.52,0.3316,101.026,73.1,0.1808,3.521,2.165,1.8,77.1,0.7,0.9,2.6,5.3,11.4,2.1,7.3,2.5,2.8,2.6,11.6,0.6,6.2,0.2,1.4,32.582500,1.10,2.400,1.1,1.8,1.800,1.400,0.900,0.4,0.300,3.10,11.900,-2.600,106.3700,110.5500,11.0900,8.640,0.1851,66.600,0.04070,4.2630,2.4740,44.400,0.800,0.100,1.50,2.500,0.400,3.100,1.40,1.800,1.6,4.100,1.200,1.500,1.00,0.0,32.401167,2.3,2.3,0.5,3.0,1.5,1.0,0.7,0.1,0.7,1.7,15.4,-3.7,112.51,120.70,9.59,6.52,0.2054,0.0650,3.655,2.063,...,0.500000,1.6,1.4,4.800000,-1.200000,103.950000,110.540000,29.54,7.91,0.145300,98.559000,0.064400,4.286,34.700000,0.3,0.100000,1.000000,0.800000,2.5,1.0,0.900000,1.000000,0.800000,1.1,0.500000,0.1,15.634167,0.750000,0.5,0.250000,2.625000,1.625000,0.625000,0.375000,0.625000,0.125000,1.875000,1.125000,4.750000,-2.625,94.950000,106.762500,35.3625,8.037500,0.085375,111.045000,0.115750,4.621250,32.750000,0.000000,0.125,0.625000,1.125000,1.875,1.375000,1.000000,0.000000,0.875000,0.625000,0.875000,0.125000,0.000000,14.038167,0.800,1.000000,0.400,2.500000,1.20,1.10,1.000,0.500,0.000000,2.400000,1.600,5.400,1.000000,106.450000,104.6300,19.010000,22.280000,0.155600,103.879000,0.092900,4.449000,20.000000,0.100000,0.0,0.50,1.200000,2.500,0.700,1.000000,0.200000,1.600000,1.200000,0.300,0.600,0.000,0.0,0.0


## Building Linear Models

### Feature Selection

In [11]:
import statsmodels.api as sm
from sklearn.feature_selection import RFE
from sklearn.feature_selection import SelectFromModel
from sklearn.pipeline import Pipeline

In [12]:
point_diff = full_df['point_diff']
win = full_df['WL']
total = full_df['point_total']

X_train, X_test, point_diff_train, point_diff_test = train_test_split(features, point_diff, test_size=0.2, shuffle=False)
X_train, X_test, win_train, win_test = train_test_split(features, win, test_size=0.2, shuffle=False)
X_train, X_test, total_train, total_test = train_test_split(features, total, test_size=0.2, shuffle=False)

In [13]:
X_train.shape, X_test.shape

((6316, 788), (1579, 788))

In [313]:
np.arange(10, 461, 50)

array([ 10,  60, 110, 160, 210, 260, 310, 360, 410, 460])

In [232]:
en = ElasticNet()

rfe = RFE(estimator = en, n_features_to_select=100, step=2)

rfe.fit(X_train, point_diff_train)

feature_ranking = rfe.ranking_


In [245]:
rfe_features_train = X_train.iloc[:, rfe.support_]
rfe_features_test = X_test.iloc[:, rfe.support_]

In [242]:
X_train.shape, rfe_features.shape

((6316, 788), (7895, 100))

[50, 100, 150, 200, 250, 300, 350, 400, 450, 500]

In [256]:
number_features = list(range(50, 151, 10))
number_features

parameters = []
train_maes = []
test_maes = []
train_rmses = []
test_rmses = []

for num_feat in number_features:
    
    rfe = RFE(estimator = ElasticNet(alpha = 2.5, l1_ratio=0.9),
              n_features_to_select=num_feat, step=1)

    rfe.fit(X_train, point_diff_train)

    rfe_features_train = X_train.iloc[:, rfe.support_]
    rfe_features_test = X_test.iloc[:, rfe.support_]
    print(num_feat, rfe_features_train.shape, rfe_features_test.shape)
    best_en = ElasticNet(alpha = 2.5,
                   l1_ratio = 0.9)

#     params = {'alpha': np.logspace(-2, 2, 6),
#              'l1_ratio':np.linspace(0.1, 0.9, 9)}

#     en_gs = GridSearchCV(estimator=ElasticNet(), param_grid=params, 
#                         scoring='neg_mean_squared_error', n_jobs=-1)


#     en_gs.fit(rfe_features_train, point_diff_train)

#     best_en = en_gs.best_estimator_
    
#     best_params = en_gs.best_params_
#     parameters.append(best_params)
    best_en.fit(rfe_features_train, point_diff_train)

    en_preds_train = best_en.predict(rfe_features_train)
    en_preds_test = best_en.predict(rfe_features_test)

    train_rmse = mean_squared_error(point_diff_train, en_preds_train, squared=False)
    test_rmse = mean_squared_error(point_diff_test, en_preds_test, squared=False)

    train_mae = mean_absolute_error(point_diff_train, en_preds_train)
    test_mae = mean_absolute_error(point_diff_test, en_preds_test)

    train_rmses.append(train_rmse)
    test_rmses.append(test_rmse)
    train_maes.append(train_mae)
    test_maes.append(test_mae)
    


50 (6316, 50) (1579, 50)
60 (6316, 60) (1579, 60)
70 (6316, 70) (1579, 70)
80 (6316, 80) (1579, 80)
90 (6316, 90) (1579, 90)
100 (6316, 100) (1579, 100)
110 (6316, 110) (1579, 110)
120 (6316, 120) (1579, 120)
130 (6316, 130) (1579, 130)
140 (6316, 140) (1579, 140)
150 (6316, 150) (1579, 150)


In [264]:
rfe = RFE(estimator = ElasticNet(alpha = 2.5, l1_ratio=0.9),
          n_features_to_select=600, step=1)

rfe.fit(X_train, point_diff_train)

rfe_features_train = X_train.iloc[:, rfe.support_]
rfe_features_test = X_test.iloc[:, rfe.support_]
print(num_feat, rfe_features_train.shape, rfe_features_test.shape)
best_en = ElasticNet(alpha = 2.5,
               l1_ratio = 0.9)

best_en.fit(rfe_features_train, point_diff_train)

en_preds_train = best_en.predict(rfe_features_train)
en_preds_test = best_en.predict(rfe_features_test)

train_rmse = mean_squared_error(point_diff_train, en_preds_train, squared=False)
test_rmse = mean_squared_error(point_diff_test, en_preds_test, squared=False)

train_mae = mean_absolute_error(point_diff_train, en_preds_train)
test_mae = mean_absolute_error(point_diff_test, en_preds_test)

print(train_rmse, test_rmse)
print(train_mae, test_mae)

150 (6316, 600) (1579, 600)
12.08190210233766 12.826496274041986
9.428876061814746 10.023365360254827


In [262]:
rfe_features_train

Unnamed: 0,spread,home_P1_PLUS_MINUS,home_P2_E_OFF_RATING,home_P7_E_OFF_RATING,away_P1_TCHS,away_P1_CFGA,away_P1_AST_2PM,away_P2_TCHS,away_P7_RBC,away_P7_TCHS
75,-9.5,2.2,102.74,101.36,51.166667,1.5,0.0,66.0,5.333333,27.166667
76,-4.5,12.8,101.90,87.32,73.200000,7.8,1.2,60.8,5.200000,28.600000
78,-5.5,4.8,100.64,101.60,51.000000,7.0,1.2,49.0,3.600000,48.600000
80,3.0,-3.0,97.86,92.54,61.600000,5.2,2.2,77.6,6.600000,28.200000
81,-8.5,13.6,100.22,87.20,47.800000,8.2,2.6,44.0,10.200000,22.400000
...,...,...,...,...,...,...,...,...,...,...
6917,-7.0,-1.3,109.76,109.04,82.500000,5.3,1.1,37.5,6.500000,28.500000
6918,2.0,-1.8,107.50,103.16,96.400000,10.2,2.6,29.6,5.400000,32.600000
6919,7.5,3.4,108.26,99.19,76.300000,9.8,3.3,50.5,5.600000,25.100000
6920,3.5,-5.0,108.22,103.82,40.300000,5.9,3.3,48.2,5.100000,30.100000


In [258]:
results = pd.DataFrame({'num_features':number_features,
                        'train_rmses':train_rmses, 'test_rmses':test_rmses,
                       'train_maes':train_maes, 'test_maes':test_maes})

results

Unnamed: 0,num_features,train_rmses,test_rmses,train_maes,test_maes
0,50,12.081902,12.826496,9.428876,10.023365
1,60,12.081902,12.826496,9.428876,10.023365
2,70,12.081902,12.826496,9.428876,10.023365
3,80,12.081902,12.826496,9.428876,10.023365
4,90,12.081902,12.826496,9.428876,10.023365
5,100,12.081902,12.826496,9.428876,10.023365
6,110,12.081902,12.826496,9.428876,10.023365
7,120,12.081902,12.826496,9.428876,10.023365
8,130,12.081902,12.826496,9.428876,10.023365
9,140,12.081902,12.826496,9.428876,10.023365


In [300]:
X_train

Unnamed: 0,spread,total,home_P1_MIN,home_P1_FG3M,home_P1_FG3A,home_P1_FTM,home_P1_FTA,home_P1_OREB,home_P1_DREB,home_P1_AST,home_P1_TOV,home_P1_STL,home_P1_BLK,home_P1_BLKA,home_P1_PF,home_P1_PFD,home_P1_PTS,home_P1_PLUS_MINUS,home_P1_E_OFF_RATING,home_P1_E_DEF_RATING,home_P1_AST_RATIO,home_P1_TM_TOV_PCT,home_P1_E_USG_PCT,home_P1_E_PACE,home_P1_POSS,home_P1_PIE,home_P1_SPD,home_P1_DIST,home_P1_ORBC,home_P1_DRBC,home_P1_RBC,home_P1_TCHS,home_P1_SAST,home_P1_FTAST,home_P1_PASS,home_P1_CFGM,home_P1_CFGA,home_P1_UFGM,home_P1_UFGA,home_P1_DFGM,home_P1_DFGA,home_P1_FG2M,home_P1_FG2A,home_P1_PTS_2PT_MR,home_P1_PTS_FB,home_P1_PTS_OFF_TOV,home_P1_PTS_PAINT,home_P1_AST_2PM,home_P1_UAST_2PM,home_P1_AST_3PM,home_P1_UAST_3PM,home_P2_MIN,home_P2_FG3M,home_P2_FG3A,home_P2_FTM,home_P2_FTA,home_P2_OREB,home_P2_DREB,home_P2_AST,home_P2_TOV,home_P2_STL,home_P2_BLK,home_P2_BLKA,home_P2_PF,home_P2_PFD,home_P2_PTS,home_P2_PLUS_MINUS,home_P2_E_OFF_RATING,home_P2_E_DEF_RATING,home_P2_AST_RATIO,home_P2_TM_TOV_PCT,home_P2_E_USG_PCT,home_P2_E_PACE,home_P2_POSS,home_P2_PIE,home_P2_SPD,home_P2_DIST,home_P2_ORBC,home_P2_DRBC,home_P2_RBC,home_P2_TCHS,home_P2_SAST,home_P2_FTAST,home_P2_PASS,home_P2_CFGM,home_P2_CFGA,home_P2_UFGM,home_P2_UFGA,home_P2_DFGM,home_P2_DFGA,home_P2_FG2M,home_P2_FG2A,home_P2_PTS_2PT_MR,home_P2_PTS_FB,home_P2_PTS_OFF_TOV,home_P2_PTS_PAINT,home_P2_AST_2PM,home_P2_UAST_2PM,home_P2_AST_3PM,home_P2_UAST_3PM,...,away_P7_MIN,away_P7_FG3M,away_P7_FG3A,away_P7_FTM,away_P7_FTA,away_P7_OREB,away_P7_DREB,away_P7_AST,away_P7_TOV,away_P7_STL,away_P7_BLK,away_P7_BLKA,away_P7_PF,away_P7_PFD,away_P7_PTS,away_P7_PLUS_MINUS,away_P7_E_OFF_RATING,away_P7_E_DEF_RATING,away_P7_AST_RATIO,away_P7_TM_TOV_PCT,away_P7_E_USG_PCT,away_P7_E_PACE,away_P7_POSS,away_P7_PIE,away_P7_SPD,away_P7_DIST,away_P7_ORBC,away_P7_DRBC,away_P7_RBC,away_P7_TCHS,away_P7_SAST,away_P7_FTAST,away_P7_PASS,away_P7_CFGM,away_P7_CFGA,away_P7_UFGM,away_P7_UFGA,away_P7_DFGM,away_P7_DFGA,away_P7_FG2M,away_P7_FG2A,away_P7_PTS_2PT_MR,away_P7_PTS_FB,away_P7_PTS_OFF_TOV,away_P7_PTS_PAINT,away_P7_AST_2PM,away_P7_UAST_2PM,away_P7_AST_3PM,away_P7_UAST_3PM,away_P8_MIN,away_P8_FG3M,away_P8_FG3A,away_P8_FTM,away_P8_FTA,away_P8_OREB,away_P8_DREB,away_P8_AST,away_P8_TOV,away_P8_STL,away_P8_BLK,away_P8_BLKA,away_P8_PF,away_P8_PFD,away_P8_PTS,away_P8_PLUS_MINUS,away_P8_E_OFF_RATING,away_P8_E_DEF_RATING,away_P8_AST_RATIO,away_P8_TM_TOV_PCT,away_P8_E_USG_PCT,away_P8_E_PACE,away_P8_POSS,away_P8_PIE,away_P8_SPD,away_P8_DIST,away_P8_ORBC,away_P8_DRBC,away_P8_RBC,away_P8_TCHS,away_P8_SAST,away_P8_FTAST,away_P8_PASS,away_P8_CFGM,away_P8_CFGA,away_P8_UFGM,away_P8_UFGA,away_P8_DFGM,away_P8_DFGA,away_P8_FG2M,away_P8_FG2A,away_P8_PTS_2PT_MR,away_P8_PTS_FB,away_P8_PTS_OFF_TOV,away_P8_PTS_PAINT,away_P8_AST_2PM,away_P8_UAST_2PM,away_P8_AST_3PM,away_P8_UAST_3PM,prev_cover,prev2_cover
75,-9.5,202.5,37.846333,0.0,0.0,6.8,7.4,4.2,7.2,1.4,1.4,2.2,4.0,0.8,3.2,6.4,21.2,2.2,102.82,99.92,6.22,5.88,0.2466,96.900,75.8,0.1752,4.150,2.614,9.0,10.6,18.6,54.0,0.2,0.0,31.8,4.4,10.8,2.8,6.0,2.0,5.0,7.2,16.8,4.8,3.4,5.0,8.8,5.0,2.0,0.0,0.0,31.661667,1.4,2.6,2.0,2.0,0.4,2.6,6.0,4.0,1.6,0.4,1.0,3.0,1.6,13.8,1.0,102.74,101.10,24.70,17.16,0.2530,96.796,63.6,0.0846,4.426,2.332,2.0,3.0,4.6,70.4,0.2,0.8,49.2,2.0,6.2,3.2,7.4,1.0,1.2,3.8,11.0,3.4,1.4,1.4,3.6,0.0,3.6,1.0,0.4,...,21.539167,0.833333,2.833333,3.5,4.5,0.333333,1.833333,1.333333,1.333333,1.166667,0.333333,0.333333,2.333333,2.833333,11.666667,-3.0,93.416667,99.883333,9.70,10.183333,0.2225,104.706667,46.166667,0.077167,4.311667,1.548333,2.0,3.333333,5.333333,27.166667,0.333333,0.0,15.166667,1.333333,3.166667,2.333333,5.5,0.666667,1.0,2.833333,5.833333,2.666667,2.166667,1.333333,2.666667,1.333333,1.333333,0.666667,0.166667,17.159444,0.0,0.0,0.166667,0.333333,2.0,4.333333,0.5,2.0,0.5,1.0,0.333333,2.333333,0.5,8.5,0.666667,95.266667,92.60,6.516667,21.066667,0.2125,104.143333,37.666667,0.147167,4.378333,1.255,5.0,9.0,13.5,35.333333,0.166667,0.0,26.166667,1.5,2.333333,2.666667,4.5,1.5,3.833333,4.166667,6.833333,4.333333,0.333333,1.833333,3.833333,3.0,1.0,0.0,0.0,0.0,1.0
76,-4.5,210.5,36.793333,1.8,6.4,7.2,8.8,3.6,11.0,4.4,2.4,0.8,0.4,0.4,1.8,7.4,26.2,12.8,105.86,93.34,14.56,7.90,0.2722,106.036,80.4,0.2204,3.934,2.372,9.8,16.8,25.4,89.8,0.0,0.4,63.6,4.2,8.8,4.4,9.8,4.2,5.8,6.8,12.6,2.4,2.8,5.6,10.6,3.8,2.8,1.6,0.2,35.046667,2.8,5.0,5.6,6.2,0.4,2.6,2.6,1.6,1.2,0.0,1.2,1.0,4.4,21.6,9.6,101.90,92.00,12.54,7.58,0.2138,105.960,76.8,0.1386,4.022,2.310,3.4,4.2,7.4,51.0,0.4,0.4,30.8,3.0,6.2,3.6,8.4,0.8,1.4,3.8,9.6,2.2,3.6,4.2,5.2,2.6,1.2,2.6,0.2,...,17.283000,2.600000,3.800000,0.4,0.6,0.600000,1.800000,0.600000,0.400000,0.400000,0.400000,0.000000,1.600000,1.000000,10.600000,-0.2,111.580000,110.540000,17.00,7.000000,0.1442,99.196000,36.000000,0.150600,4.410000,1.260000,1.0,4.200000,5.200000,28.600000,0.000000,0.0,21.600000,0.600000,0.800000,3.200000,4.8,0.600000,1.0,1.200000,1.800000,0.400000,1.200000,1.600000,1.600000,0.600000,0.600000,2.400000,0.200000,15.161333,0.4,1.4,0.400000,0.400000,0.2,1.800000,3.6,1.6,0.2,0.2,0.000000,1.200000,0.2,4.4,-1.800000,103.160000,106.10,36.300000,19.600000,0.1756,100.542000,32.800000,0.041000,4.630000,1.168,1.4,3.2,4.6,40.800000,0.400000,0.2,34.000000,0.4,1.400000,1.400000,2.8,0.2,0.200000,1.400000,2.800000,1.200000,0.600000,0.200000,1.000000,0.4,0.8,0.4,0.0,1.0,0.0
78,-5.5,191.0,36.465667,2.2,4.6,3.0,4.2,0.2,5.4,4.6,1.8,1.0,0.0,0.8,1.2,3.0,18.8,4.8,101.84,98.14,18.34,7.10,0.2326,100.460,75.4,0.1214,4.250,2.550,3.2,9.8,12.8,66.6,0.6,0.0,44.6,2.8,8.8,4.0,7.2,1.0,1.8,4.6,11.4,4.2,2.2,2.2,4.4,0.8,3.8,2.2,0.0,35.510333,0.0,0.0,3.0,4.0,4.6,8.6,2.6,2.6,1.2,1.2,0.8,2.8,3.4,17.8,6.6,100.64,93.26,14.34,11.88,0.2058,100.772,72.4,0.1644,4.102,2.376,9.4,14.0,22.2,73.8,0.8,0.0,54.4,3.8,6.2,3.6,6.8,3.2,5.2,7.4,13.0,4.8,0.0,1.4,9.4,3.4,3.4,0.0,0.0,...,20.540667,1.000000,2.400000,1.8,1.8,0.400000,1.600000,3.000000,1.000000,0.800000,0.400000,0.000000,1.000000,1.800000,9.200000,-3.4,102.180000,116.420000,27.76,7.360000,0.1766,97.672000,40.800000,0.166000,4.138000,1.416000,0.8,2.800000,3.600000,48.600000,0.600000,0.6,38.800000,1.000000,2.000000,2.200000,4.4,1.000000,1.2,2.200000,4.000000,1.200000,1.400000,0.800000,2.800000,0.000000,2.200000,0.200000,0.800000,18.271333,0.6,2.0,0.400000,0.400000,0.0,2.400000,1.0,1.4,1.0,0.0,0.200000,1.400000,0.6,7.0,-3.800000,96.000000,106.82,9.740000,16.560000,0.1762,93.128000,36.000000,0.095600,4.392000,1.330,1.6,4.0,5.6,25.400000,0.200000,0.2,17.400000,0.8,1.200000,2.200000,4.4,0.4,0.800000,2.400000,3.600000,1.200000,0.600000,1.400000,2.600000,0.6,1.8,0.6,0.0,1.0,1.0
80,3.0,205.5,35.430000,2.0,5.6,4.2,6.2,1.2,3.8,7.6,3.0,3.0,0.2,1.0,2.0,4.2,19.8,-3.0,102.10,105.74,25.42,9.52,0.2604,107.124,77.8,0.1144,4.656,2.746,4.8,7.6,12.0,85.4,0.6,0.0,59.6,3.0,8.2,3.8,8.8,1.4,1.6,4.8,11.4,1.8,3.2,2.8,6.8,1.0,3.4,1.4,0.6,34.852667,0.2,1.8,4.8,5.6,1.6,3.8,3.4,3.0,1.6,0.0,1.2,1.6,4.2,22.2,-7.8,97.86,108.38,13.60,12.54,0.2558,105.426,75.8,0.1424,4.344,2.510,5.0,8.2,12.4,58.6,0.8,0.4,34.8,5.2,8.6,3.4,7.0,3.6,5.0,8.4,13.8,4.4,4.8,4.0,11.6,4.0,4.0,0.2,0.0,...,21.222000,2.400000,6.200000,1.0,1.0,0.800000,2.200000,1.200000,0.800000,1.400000,0.400000,0.200000,2.000000,1.400000,14.600000,0.6,96.840000,94.680000,8.62,7.460000,0.2490,98.592000,43.400000,0.219800,4.168000,1.468000,1.8,5.200000,6.600000,28.200000,0.000000,0.0,15.400000,2.600000,3.800000,3.000000,7.2,0.400000,0.4,3.200000,4.800000,1.000000,1.200000,3.000000,5.000000,1.800000,1.200000,2.200000,0.000000,17.163333,0.2,0.6,0.600000,0.800000,0.6,3.400000,0.4,1.0,0.8,0.0,0.200000,1.800000,0.4,4.0,-1.400000,92.920000,98.46,4.880000,14.600000,0.1498,99.556000,34.400000,0.043400,4.726000,1.336,2.8,4.8,7.6,20.400000,0.000000,0.0,13.600000,0.6,2.200000,1.000000,2.6,0.8,1.200000,1.400000,4.200000,1.000000,1.000000,0.400000,1.400000,1.2,0.2,0.2,0.0,0.0,0.0
81,-8.5,187.5,37.172000,3.2,7.2,5.4,6.4,0.8,7.4,4.0,3.2,1.8,0.6,0.6,1.2,5.8,25.8,13.6,101.04,82.12,14.28,11.76,0.2940,98.242,74.0,0.2520,4.286,2.648,5.4,12.8,17.4,73.2,0.4,0.0,47.6,3.2,7.8,5.4,10.0,0.6,1.4,5.4,10.6,4.4,3.0,5.8,5.6,1.8,3.6,2.6,0.4,35.848333,2.8,5.2,1.2,2.6,1.4,4.8,4.6,2.6,0.2,0.2,0.6,2.6,2.4,16.4,7.8,100.22,86.16,21.50,11.68,0.2160,97.832,71.4,0.1296,3.904,2.322,3.6,8.8,12.4,58.8,0.4,0.0,40.6,2.4,5.8,3.8,7.4,0.8,0.8,3.4,8.0,0.4,2.4,5.2,6.4,0.8,2.4,2.8,0.0,...,19.686667,0.000000,0.200000,2.6,3.2,2.600000,4.000000,0.000000,1.200000,0.000000,0.800000,0.200000,2.000000,2.800000,7.000000,-1.2,99.260000,102.860000,0.00,17.340000,0.1484,95.146000,38.600000,0.138400,3.332000,1.142000,4.8,6.600000,10.200000,22.400000,0.000000,0.0,17.000000,1.400000,1.800000,0.400000,1.2,1.400000,2.0,2.200000,3.400000,0.200000,0.400000,0.600000,3.600000,1.000000,1.200000,0.000000,0.000000,16.703667,1.4,3.2,1.000000,1.600000,0.0,2.000000,0.6,1.0,0.6,0.0,0.200000,1.600000,1.0,6.0,3.200000,111.360000,95.56,5.560000,20.100000,0.1758,93.794000,32.400000,0.062600,3.488000,0.884,0.8,1.8,2.4,11.800000,0.200000,0.0,6.800000,0.2,1.400000,1.200000,2.2,0.4,0.400000,0.400000,1.400000,0.200000,0.000000,1.400000,0.200000,0.2,0.2,1.2,0.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6917,-7.0,230.0,38.850333,2.7,7.2,5.0,6.2,1.5,4.5,6.3,3.0,1.7,0.7,0.9,2.9,4.8,26.7,-1.3,110.62,110.17,18.97,9.22,0.2983,100.513,80.8,0.1357,3.759,2.590,2.9,7.8,10.7,72.3,0.7,0.4,47.5,3.1,7.2,5.4,10.7,2.2,3.8,6.8,13.6,3.0,4.4,3.7,10.0,2.2,4.0,1.7,0.8,36.547167,2.8,7.7,2.7,3.8,0.8,5.0,4.3,1.4,1.1,0.2,0.3,2.5,2.6,16.5,0.0,109.76,106.35,21.11,6.88,0.1939,101.071,76.1,0.1033,3.578,2.306,2.1,7.2,8.9,48.0,0.8,0.4,33.4,1.5,3.2,3.7,8.2,2.2,2.5,2.7,4.8,1.0,2.9,3.6,4.2,1.6,1.0,2.6,0.1,...,24.777333,1.200000,3.600000,1.2,1.5,0.800000,2.800000,1.100000,0.900000,0.900000,0.400000,0.100000,2.600000,1.500000,9.600000,-3.2,109.460000,115.600000,10.08,9.590000,0.1691,104.553000,53.500000,0.071000,4.148000,1.824000,2.0,4.500000,6.500000,28.500000,0.300000,0.0,17.700000,0.900000,2.300000,2.700000,6.0,1.600000,2.0,2.400000,4.700000,1.300000,1.100000,0.800000,2.600000,1.300000,1.000000,0.700000,0.500000,20.773000,0.6,2.4,3.300000,3.800000,0.4,1.300000,3.6,2.2,0.3,0.1,0.500000,1.500000,4.2,9.9,-4.300000,99.380000,107.86,22.100000,15.340000,0.2186,104.212000,44.400000,0.101800,4.313000,1.621,1.3,3.0,3.9,46.200000,0.200000,0.1,32.000000,1.1,2.900000,1.900000,4.6,0.9,1.300000,2.400000,5.100000,0.800000,0.700000,1.300000,3.600000,0.3,2.1,0.4,0.2,1.0,1.0
6918,2.0,210.0,38.550667,2.7,7.9,6.3,8.2,1.5,6.3,5.7,3.8,0.8,0.3,1.3,3.1,8.4,28.6,-1.8,102.96,104.72,16.50,11.17,0.3389,96.734,77.1,0.1612,3.689,2.528,4.8,10.2,14.0,100.9,0.9,0.1,68.4,6.2,11.7,3.6,9.8,1.7,2.8,7.1,13.6,0.5,1.1,2.3,13.1,2.9,3.8,1.4,0.9,32.286333,2.7,7.5,1.2,1.4,0.4,3.0,2.9,1.6,0.3,0.2,0.2,2.2,1.3,11.9,3.0,107.50,101.72,18.12,11.12,0.1686,97.062,64.6,0.0720,4.231,2.443,1.1,4.4,5.2,46.2,0.4,0.0,33.2,0.8,2.1,3.2,8.0,1.4,2.3,1.3,2.6,0.6,0.6,0.8,1.9,1.1,0.2,2.5,0.2,...,16.919167,0.800000,2.900000,0.5,1.0,0.900000,2.800000,1.500000,0.400000,0.400000,0.100000,0.200000,1.300000,1.400000,8.900000,0.6,109.110000,108.080000,14.52,4.630000,0.2472,97.834000,34.600000,0.087500,3.994000,1.205000,1.4,4.100000,5.400000,32.600000,0.200000,0.2,22.300000,1.700000,3.300000,2.100000,5.2,1.500000,2.0,3.000000,5.600000,0.900000,0.800000,1.200000,4.800000,1.300000,1.400000,0.700000,0.100000,12.074833,0.4,1.2,0.000000,0.000000,0.5,2.100000,0.6,0.3,0.5,0.3,0.300000,0.500000,0.1,2.6,-0.600000,110.090000,112.18,15.610000,6.940000,0.0900,99.721000,25.100000,0.069300,4.500000,0.962,1.2,3.0,4.2,14.600000,0.000000,0.1,11.600000,0.7,1.400000,0.400000,1.2,1.1,2.000000,0.700000,1.500000,0.000000,0.100000,0.200000,1.300000,0.3,0.3,0.4,0.0,0.0,1.0
6919,7.5,230.0,31.190833,4.0,9.3,2.1,2.6,0.8,2.7,6.9,2.8,1.3,0.4,1.0,1.9,2.5,25.5,3.4,105.73,104.62,22.54,9.42,0.3142,106.557,68.3,0.1606,4.149,2.306,2.2,5.1,7.3,82.6,1.0,0.6,54.9,3.9,7.9,5.8,12.5,1.1,1.8,5.7,11.1,2.9,2.8,4.7,8.2,1.0,4.5,1.5,2.3,28.693833,1.4,4.7,3.3,5.0,1.2,4.4,1.4,1.0,0.6,0.2,0.7,1.6,3.4,12.5,3.2,108.26,105.21,9.58,7.10,0.1881,106.854,63.8,0.0788,4.064,2.076,2.8,7.3,9.7,39.0,0.1,0.1,24.4,1.3,3.7,2.5,6.4,1.4,2.2,2.5,5.5,0.5,1.4,1.4,3.9,1.8,0.6,1.3,0.0,...,20.167000,1.300000,2.900000,0.7,0.9,0.700000,2.200000,1.800000,1.400000,0.500000,0.200000,0.400000,1.300000,0.400000,7.000000,-4.3,102.870000,112.270000,16.55,17.750000,0.1440,106.029000,44.500000,0.064400,4.382000,1.576000,1.6,4.000000,5.600000,25.100000,0.100000,0.0,17.400000,0.700000,1.900000,1.800000,3.3,0.700000,0.9,1.200000,2.300000,0.200000,1.700000,1.800000,2.100000,0.600000,0.500000,1.200000,0.000000,18.468667,1.2,2.6,0.100000,0.500000,0.7,3.400000,1.1,0.6,0.3,1.0,0.400000,2.000000,0.7,6.7,2.700000,114.890000,107.95,21.040000,6.950000,0.1364,103.573000,40.000000,0.077300,4.538000,1.491,2.5,5.3,7.7,30.000000,0.200000,0.0,23.200000,0.6,1.400000,2.100000,3.8,1.5,3.000000,1.500000,2.600000,0.700000,0.100000,1.700000,2.100000,1.1,0.2,1.1,0.0,1.0,0.0
6920,3.5,219.0,36.916333,1.9,5.4,3.2,4.7,1.2,5.2,6.8,2.2,1.3,1.1,0.5,1.3,3.4,22.1,-5.0,104.64,113.34,22.85,7.49,0.2557,105.082,80.1,0.1333,4.337,2.834,3.1,7.6,10.6,85.3,0.4,0.5,59.4,4.2,7.9,4.3,10.4,3.0,3.4,6.6,12.9,2.0,2.3,1.0,10.6,2.0,4.2,1.0,0.7,31.904333,2.5,7.0,1.1,1.2,0.0,2.6,1.8,0.6,0.9,0.8,0.2,2.4,1.0,10.4,-3.7,108.22,113.35,13.70,6.29,0.1357,103.861,69.0,0.0454,3.924,2.223,0.4,5.0,5.3,36.4,0.0,0.2,25.0,0.5,2.0,2.9,7.3,1.7,2.7,0.9,2.3,0.4,1.1,1.1,1.4,0.6,0.2,2.3,0.1,...,19.111333,1.100000,3.000000,1.9,2.3,0.300000,2.200000,2.200000,1.200000,0.500000,0.100000,0.700000,1.400000,1.700000,11.800000,0.2,106.140000,106.160000,13.77,8.780000,0.2895,101.095000,39.900000,0.121800,3.937000,1.353000,1.8,3.400000,5.100000,30.100000,0.600000,0.1,16.900000,2.000000,4.400000,2.400000,5.8,0.700000,0.7,3.300000,7.200000,0.800000,2.500000,1.800000,5.200000,1.600000,1.700000,0.900000,0.200000,9.908833,1.0,2.6,0.400000,0.400000,0.1,0.800000,1.2,0.6,0.2,0.3,0.200000,1.000000,0.9,4.2,-3.900000,93.100000,138.32,15.520000,6.700000,0.2925,103.198000,21.400000,0.059400,4.054000,0.775,0.2,1.1,1.3,18.700000,0.300000,0.0,12.400000,0.2,0.800000,1.200000,3.7,0.8,0.900000,0.400000,1.900000,0.000000,0.200000,0.800000,0.700000,0.2,0.2,0.7,0.3,0.0,1.0


## Against the Spread

In [299]:
test_idx = X_test.index

betting_results = full_df.loc[test_idx, ['date', 'spread']]
betting_results['actual_point_diff'] = point_diff_test
betting_results['prediction'] = en_preds_test
betting_results['bet_home_team'] = (betting_results['prediction'] + betting_results['spread'] > 0).astype(int)

betting_results['home_covered'] = (betting_results['actual_point_diff'] + betting_results['spread'] > 0).astype(int)
betting_results['profit'] = betting_results.apply(lambda row: 100 if row['bet_home_team'] == row['home_covered'] else -110, axis=1)
betting_results['cume_profit'] = betting_results['profit'].expanding().sum()

# betting_results['random_profit'] = np.random.choice([100, -110], betting_results.shape[0])
# betting_results['random_cume_profit'] = betting_results['random_profit'].expanding().sum()
# betting_results['prediction+spread'] = round(betting_results['prediction'] + betting_results['spread'], 2)
betting_results

Unnamed: 0,date,spread,actual_point_diff,prediction,bet_home_team,home_covered,profit,cume_profit
6922,2019-02-05,-8.5,10,8.120279,0,1,-110,-110.0
6923,2019-02-05,-7.5,-10,7.057131,0,0,100,-10.0
6924,2019-02-05,4.0,2,-3.651695,1,1,100,90.0
6925,2019-02-05,-2.5,-12,2.707693,1,0,-110,-20.0
6926,2019-02-05,-3.0,-2,2.983347,0,0,100,80.0
...,...,...,...,...,...,...,...,...
8727,2021-02-04,-5.0,21,4.887767,0,1,-110,-7350.0
8728,2021-02-04,-3.5,-12,3.636002,1,0,-110,-7460.0
8729,2021-02-04,-10.0,-16,9.339269,0,0,100,-7360.0
8730,2021-02-04,-4.0,-31,4.022723,1,0,-110,-7470.0
