In [47]:
import pandas as pd
import pickle
import seaborn as sns
import matplotlib.pyplot as plt
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 30)

In [89]:
def combine_team_games(df, keep_method='home'):
    '''Combine a TEAM_ID-GAME_ID unique table into rows by game. Slow.

        Parameters
        ----------
        df : Input DataFrame.
        keep_method : {'home', 'away', 'winner', 'loser', ``None``}, default 'home'
            - 'home' : Keep rows where TEAM_A is the home team.
            - 'away' : Keep rows where TEAM_A is the away team.
            - 'winner' : Keep rows where TEAM_A is the losing team.
            - 'loser' : Keep rows where TEAM_A is the winning team.
            - ``None`` : Keep all rows. Will result in an output DataFrame the same
                length as the input DataFrame.
                
        Returns
        -------
        result : DataFrame
    '''
    # Join every row to all others with the same game ID.
    joined = pd.merge(df, df, suffixes=['_Home', '_Away'],
                      on=['SEASON_ID', 'GAME_ID', 'GAME_DATE'])
    # Filter out any row that is joined to itself.
    result = joined[joined.TEAM_ID_Home != joined.TEAM_ID_Away]
    # Take action based on the keep_method flag.
    if keep_method is None:
        # Return all the rows.
        pass
    elif keep_method.lower() == 'home':
        # Keep rows where TEAM_A is the home team.
        result = result[result.MATCHUP_Home.str.contains(' vs. ')]
    elif keep_method.lower() == 'away':
        # Keep rows where TEAM_A is the away team.
        result = result[result.MATCHUP_A.str.contains(' @ ')]
    elif keep_method.lower() == 'winner':
        result = result[result.WL_A == 'W']
    elif keep_method.lower() == 'loser':
        result = result[result.WL_A == 'L']
    else:
        raise ValueError(f'Invalid keep_method: {keep_method}')
    return result


In [3]:
with open('data/pickles/boxscoreadv21.p', 'rb') as readfile: 
    bs21 = pickle.load(readfile)
with open('data/pickles/fourfactors21.p', 'rb') as readfile: 
    ff21 = pickle.load(readfile)
with open('data/pickles/season21.p', 'rb') as readfile: 
    season21 = pickle.load(readfile)

In [27]:
bs21.reset_index(drop=True, inplace=True)

In [92]:
from nba_api.stats.endpoints import boxscorefourfactorsv2
fix = boxscorefourfactorsv2.BoxScoreFourFactorsV2(game_id = '0022001069')
fixdf = fix.get_data_frames()[1]


In [32]:
ff21.reset_index(drop=True, inplace=True)

ff21.loc[0:1]=fixdf

In [26]:
season21.reset_index(drop=True, inplace=True)

In [118]:
merge1= pd.merge(season21, bs21, on = ['GAME_ID', 'TEAM_ID']).drop(labels=['TEAM_NAME_y', 'TEAM_ABBREVIATION_y', 'MIN_x', 'TEAM_CITY'], axis=1)

df21= pd.merge(merge1, ff21, on = ['GAME_ID', 'TEAM_ID']).drop(labels=['TEAM_NAME', 'TEAM_ABBREVIATION', 'TEAM_CITY', 'MIN','EFG_PCT_y', 'TM_TOV_PCT_y'], axis=1)

df21 = combine_team_games(df21, keep_method='home')

df21.reset_index(drop=True, inplace=True)

df21 = df21.set_index(pd.to_datetime(df21['GAME_DATE']), drop=True)

df21.sort_index(inplace=True)

In [109]:
def combine_dfs(season_df, bs_df, ff_df):
    merge1 = pd.merge(season_df, bs_df, on=['GAME_ID', 'TEAM_ID']).drop(labels=['TEAM_NAME_y', 'TEAM_ABBREVIATION_y', 'MIN_y', 'TEAM_CITY'], axis=1)
    merge2 = pd.merge(merge1, ff_df, on = ['GAME_ID', 'TEAM_ID']).drop(labels=['TEAM_NAME', 'TEAM_ABBREVIATION', 'TEAM_CITY', 'MIN','EFG_PCT_y', 'TM_TOV_PCT_y'], axis=1)
    df = combine_team_games(merge2, keep_method='home')
    df.set_index(pd.to_datetime(df['GAME_DATE']), drop=True, inplace=True)
    df.sort_index(inplace=True)
    return df
    

In [119]:
df21

Unnamed: 0_level_0,SEASON_ID,TEAM_ID_Home,TEAM_ABBREVIATION_x_Home,TEAM_NAME_x_Home,GAME_ID,GAME_DATE,MATCHUP_Home,WL_Home,PTS_Home,FGM_Home,FGA_Home,FG_PCT_Home,FG3M_Home,FG3A_Home,FG3_PCT_Home,FTM_Home,FTA_Home,FT_PCT_Home,OREB_Home,DREB_Home,REB_Home,AST_Home,STL_Home,BLK_Home,TOV_Home,PF_Home,PLUS_MINUS_Home,MIN_y_Home,E_OFF_RATING_Home,OFF_RATING_Home,E_DEF_RATING_Home,DEF_RATING_Home,E_NET_RATING_Home,NET_RATING_Home,AST_PCT_Home,AST_TOV_Home,AST_RATIO_Home,OREB_PCT_x_Home,DREB_PCT_Home,REB_PCT_Home,E_TM_TOV_PCT_Home,TM_TOV_PCT_x_Home,EFG_PCT_x_Home,TS_PCT_Home,USG_PCT_Home,E_USG_PCT_Home,E_PACE_Home,PACE_Home,PACE_PER40_Home,POSS_Home,PIE_Home,FTA_RATE_Home,OREB_PCT_y_Home,OPP_EFG_PCT_Home,OPP_FTA_RATE_Home,OPP_TOV_PCT_Home,OPP_OREB_PCT_Home,TEAM_ID_Away,TEAM_ABBREVIATION_x_Away,TEAM_NAME_x_Away,MATCHUP_Away,WL_Away,PTS_Away,FGM_Away,FGA_Away,FG_PCT_Away,FG3M_Away,FG3A_Away,FG3_PCT_Away,FTM_Away,FTA_Away,FT_PCT_Away,OREB_Away,DREB_Away,REB_Away,AST_Away,STL_Away,BLK_Away,TOV_Away,PF_Away,PLUS_MINUS_Away,MIN_y_Away,E_OFF_RATING_Away,OFF_RATING_Away,E_DEF_RATING_Away,DEF_RATING_Away,E_NET_RATING_Away,NET_RATING_Away,AST_PCT_Away,AST_TOV_Away,AST_RATIO_Away,OREB_PCT_x_Away,DREB_PCT_Away,REB_PCT_Away,E_TM_TOV_PCT_Away,TM_TOV_PCT_x_Away,EFG_PCT_x_Away,TS_PCT_Away,USG_PCT_Away,E_USG_PCT_Away,E_PACE_Away,PACE_Away,PACE_PER40_Away,POSS_Away,PIE_Away,FTA_RATE_Away,OREB_PCT_y_Away,OPP_EFG_PCT_Away,OPP_FTA_RATE_Away,OPP_TOV_PCT_Away,OPP_OREB_PCT_Away
GAME_DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1
2020-12-22,22020,1610612747,LAL,Los Angeles Lakers,0022000002,2020-12-22,LAL vs. LAC,L,109,38,81,0.469,9,29,0.310,24,31,0.774,8,37,45,22,4,2,19,20,-7.0,240:00,103.2,104.8,109.1,111.5,-5.9,-6.7,0.579,1.16,16.2,0.250,0.731,0.510,17.986,18.3,0.525,0.576,1.0,0.199,106.00,104.0,86.67,104,0.509,0.383,0.182,0.548,0.204,0.150,0.269,1610612746,LAC,LA Clippers,LAC @ LAL,W,116,44,93,0.473,14,40,0.350,14,19,0.737,11,29,40,22,10,3,16,29,7.0,240:00,109.1,111.5,103.2,104.8,5.9,6.7,0.500,1.38,15.8,0.269,0.750,0.490,15.043,15.4,0.548,0.572,1.0,0.198,106.00,104.0,86.67,104,0.491,0.204,0.212,0.525,0.383,0.180,0.250
2020-12-22,22020,1610612751,BKN,Brooklyn Nets,0022000001,2020-12-22,BKN vs. GSW,W,125,42,92,0.457,15,35,0.429,26,32,0.813,13,44,57,24,11,7,20,22,26.0,240:00,110.5,111.6,86.8,88.4,23.8,23.2,0.571,1.20,16.0,0.315,0.719,0.534,17.687,17.9,0.538,0.589,1.0,0.201,113.60,112.0,93.33,112,0.650,0.348,0.241,0.424,0.232,0.158,0.281,1610612744,GSW,Golden State Warriors,GSW @ BKN,L,99,37,99,0.374,10,33,0.303,15,23,0.652,13,34,47,26,6,6,18,24,-26.0,240:00,86.8,88.4,110.5,111.6,-23.8,-23.2,0.703,1.44,17.0,0.281,0.685,0.466,15.773,16.1,0.424,0.454,1.0,0.201,113.60,112.0,93.33,112,0.350,0.232,0.203,0.538,0.348,0.177,0.315
2020-12-23,22020,1610612761,TOR,Toronto Raptors,0022000014,2020-12-23,TOR vs. NOP,L,99,38,89,0.427,14,46,0.304,9,12,0.750,7,28,35,26,10,5,20,22,-14.0,240:00,91.4,94.3,104.8,108.7,-13.4,-14.4,0.684,1.24,18.4,0.275,0.698,0.468,19.394,20.0,0.506,0.525,1.0,0.200,108.04,104.5,87.08,105,0.399,0.135,0.137,0.644,0.250,0.250,0.302,1610612740,NOP,New Orleans Pelicans,NOP @ TOR,W,113,42,80,0.525,19,42,0.452,10,20,0.500,8,37,45,30,13,7,24,17,14.0,240:00,104.8,108.7,91.4,94.3,13.4,14.4,0.714,1.11,20.6,0.302,0.725,0.532,25.046,26.0,0.644,0.636,1.0,0.195,108.04,104.5,87.08,104,0.601,0.250,0.186,0.506,0.135,0.194,0.275
2020-12-23,22020,1610612741,CHI,Chicago Bulls,0022000015,2020-12-23,CHI vs. ATL,L,104,36,87,0.414,8,35,0.229,24,28,0.857,8,29,37,20,10,3,15,22,-20.0,240:00,96.0,97.2,115.1,117.0,-19.1,-19.8,0.556,1.18,14.7,0.250,0.769,0.473,15.694,15.9,0.460,0.524,1.0,0.198,108.04,106.5,88.75,107,0.419,0.322,0.154,0.625,0.363,0.213,0.231,1610612737,ATL,Atlanta Hawks,ATL @ CHI,W,124,43,80,0.538,14,35,0.400,24,29,0.828,8,38,46,24,6,3,21,29,20.0,240:00,115.1,117.0,96.0,97.2,19.1,19.8,0.558,1.04,17.2,0.231,0.750,0.527,21.344,21.7,0.625,0.668,1.0,0.194,108.04,106.5,88.75,106,0.581,0.363,0.205,0.460,0.322,0.157,0.250
2020-12-23,22020,1610612757,POR,Portland Trail Blazers,0022000020,2020-12-23,POR vs. UTA,L,100,37,92,0.402,12,33,0.364,14,17,0.824,6,34,40,23,6,3,6,18,-20.0,240:00,100.5,101.0,118.4,120.0,-17.9,-19.0,0.622,3.83,17.9,0.145,0.717,0.426,6.031,6.1,0.467,0.503,1.0,0.201,100.42,99.5,82.92,99,0.417,0.185,0.109,0.559,0.202,0.109,0.283,1610612762,UTA,Utah Jazz,UTA @ POR,W,120,43,94,0.457,19,50,0.380,15,19,0.789,12,47,59,24,3,3,11,16,20.0,240:00,118.4,120.0,100.5,101.0,17.9,19.0,0.558,2.18,17.5,0.283,0.855,0.574,10.852,11.0,0.559,0.586,1.0,0.198,100.42,99.5,82.92,100,0.583,0.202,0.226,0.467,0.185,0.060,0.145
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-05-16,22020,1610612764,WAS,Washington Wizards,0022001080,2021-05-16,WAS vs. CHA,W,115,44,91,0.484,10,31,0.323,17,21,0.810,10,43,53,22,4,3,14,20,5.0,240:00,110.3,112.7,102.3,105.8,8.1,7.0,0.500,1.57,16.1,0.271,0.746,0.533,13.431,13.7,0.538,0.574,1.0,0.200,105.90,103.0,85.83,102,0.525,0.231,0.208,0.468,0.255,0.121,0.254,1610612766,CHA,Charlotte Hornets,CHA @ WAS,L,110,37,94,0.394,14,45,0.311,22,24,0.917,10,32,42,25,8,5,11,15,-5.0,240:00,102.3,105.8,110.3,112.7,-8.1,-7.0,0.676,1.92,17.5,0.254,0.729,0.467,12.086,12.5,0.468,0.526,1.0,0.192,105.90,103.0,85.83,104,0.475,0.255,0.169,0.538,0.231,0.134,0.271
2021-05-16,22020,1610612759,SAS,San Antonio Spurs,0022001078,2021-05-16,SAS vs. PHX,L,121,50,98,0.510,7,24,0.292,14,19,0.737,11,35,46,30,5,5,10,8,-2.0,240:00,114.8,117.5,118.5,118.3,-3.7,-0.8,0.600,3.00,20.5,0.255,0.745,0.500,9.491,9.7,0.546,0.569,1.0,0.198,104.56,103.5,86.25,103,0.479,0.194,0.216,0.572,0.038,0.077,0.255,1610612756,PHX,Phoenix Suns,PHX @ SAS,W,123,53,104,0.510,13,35,0.371,4,4,1.000,10,38,48,36,7,5,8,13,2.0,240:00,118.5,118.3,114.8,117.5,3.7,0.8,0.679,4.50,24.0,0.255,0.745,0.500,7.710,7.7,0.572,0.582,1.0,0.200,104.56,103.5,86.25,104,0.521,0.038,0.196,0.546,0.194,0.095,0.255
2021-05-16,22020,1610612744,GSW,Golden State Warriors,0022001070,2021-05-16,GSW vs. MEM,W,113,41,83,0.494,15,39,0.385,16,18,0.889,12,34,46,26,9,4,19,19,12.0,240:00,114.2,114.1,99.6,101.0,14.6,13.1,0.634,1.30,19.0,0.333,0.679,0.526,20.218,20.2,0.584,0.621,1.0,0.198,100.14,99.5,82.92,99,0.562,0.217,0.286,0.467,0.207,0.128,0.321,1610612763,MEM,Memphis Grizzlies,MEM @ GSW,L,101,40,92,0.435,6,25,0.240,15,19,0.789,12,27,39,25,12,5,12,21,-12.0,240:00,99.6,101.0,114.2,114.1,-14.6,-13.1,0.625,1.92,18.1,0.321,0.667,0.474,12.826,13.0,0.467,0.503,1.0,0.199,100.14,99.5,82.92,100,0.438,0.207,0.226,0.584,0.217,0.202,0.333
2021-05-16,22020,1610612757,POR,Portland Trail Blazers,0022001076,2021-05-16,POR vs. DEN,W,132,46,87,0.529,18,43,0.419,22,26,0.846,11,40,51,24,3,6,13,16,16.0,240:00,130.1,136.1,115.3,118.4,14.8,17.7,0.522,1.71,17.6,0.341,0.745,0.566,13.801,14.4,0.632,0.670,1.0,0.196,101.02,97.5,81.25,97,0.582,0.299,0.250,0.520,0.153,0.060,0.255,1610612743,DEN,Denver Nuggets,DEN @ POR,L,116,44,98,0.449,14,37,0.378,14,15,0.933,10,26,36,20,8,2,6,20,-16.0,240:00,115.3,118.4,130.1,136.1,-14.8,-17.7,0.455,3.33,15.3,0.255,0.659,0.434,5.964,6.1,0.520,0.554,1.0,0.197,101.02,97.5,81.25,98,0.418,0.153,0.182,0.632,0.299,0.138,0.341


In [117]:
df21.corr()['PTS_Home'].sort_values(ascending=False).head(30)

PTS_Home             1.000000
OFF_RATING_Home      0.879720
DEF_RATING_Away      0.879720
E_OFF_RATING_Home    0.875869
E_DEF_RATING_Away    0.875869
FGM_Home             0.856241
TS_PCT_Home          0.791837
OPP_EFG_PCT_Away     0.761754
EFG_PCT_x_Home       0.761754
FG_PCT_Home          0.741394
PLUS_MINUS_Home      0.603460
NET_RATING_Home      0.600128
E_NET_RATING_Home    0.591379
FG3_PCT_Home         0.588903
PIE_Home             0.561728
AST_Home             0.551766
FG3M_Home            0.493129
AST_RATIO_Home       0.446917
POSS_Home            0.412795
POSS_Away            0.402780
PACE_Home            0.349653
PACE_Away            0.349653
PACE_PER40_Home      0.349633
PACE_PER40_Away      0.349633
E_PACE_Home          0.333526
E_PACE_Away          0.333526
REB_PCT_Home         0.321434
FGA_Home             0.283557
FTM_Away             0.267994
FTA_Away             0.265492
Name: PTS_Home, dtype: float64