In [1]:
import pandas as pd
import numpy as np

In [88]:
nba_games_df = pd.read_csv("nba_games_processed.csv")
nba_games_df["GAME_DATE"] = pd.to_datetime(nba_games_df["GAME_DATE"])
nba_games_df.head(20)

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
0,22019,1610613000.0,ATL,Atlanta Hawks,21900431,2019-12-21,ATL @ BKN,L,240,112,39,105,0.371,11,48,0.229,23,27,0.852,14,38,52,17,7,5,10,22,-10.0
1,22019,1610613000.0,WAS,Washington Wizards,21900433,2019-12-21,WAS @ PHI,L,239,108,36,88,0.409,7,31,0.226,29,34,0.853,7,31,38,18,7,3,11,21,-17.0
2,22019,1610613000.0,LAC,LA Clippers,21900436,2019-12-21,LAC @ SAS,W,240,134,54,99,0.545,12,36,0.333,14,19,0.737,13,30,43,32,11,2,11,19,25.0
3,22019,1610613000.0,MIL,Milwaukee Bucks,21900434,2019-12-21,MIL @ NYK,W,241,123,41,85,0.482,18,43,0.419,23,35,0.657,8,44,52,26,9,14,19,16,21.0
4,22019,1610613000.0,DEN,Denver Nuggets,21900443,2019-12-22,DEN @ LAL,W,241,128,45,92,0.489,11,32,0.344,27,30,0.9,17,27,44,31,13,2,8,19,24.0
5,22019,1610613000.0,IND,Indiana Pacers,21900441,2019-12-22,IND @ MIL,L,240,89,37,100,0.37,12,40,0.3,3,5,0.6,10,38,48,27,7,5,13,15,-28.0
6,22019,1610613000.0,LAC,LA Clippers,21900442,2019-12-22,LAC @ OKC,L,239,112,40,86,0.465,11,27,0.407,21,24,0.875,10,32,42,22,7,2,15,23,-6.0
7,22019,1610613000.0,LAL,Los Angeles Lakers,21900443,2019-12-22,LAL vs. DEN,L,240,104,39,83,0.47,10,29,0.345,16,22,0.727,18,27,45,18,7,9,19,25,-24.0
8,22019,1610613000.0,CHA,Charlotte Hornets,21900440,2019-12-22,CHA @ BOS,L,240,93,33,82,0.402,10,34,0.294,17,23,0.739,2,25,27,26,3,5,3,17,-26.0
9,22019,1610613000.0,MIL,Milwaukee Bucks,21900441,2019-12-22,MIL vs. IND,W,242,117,44,104,0.423,15,44,0.341,14,17,0.824,16,45,61,31,10,9,9,7,28.0


In [89]:
# Function from the NBA docs
def combine_team_games(df, keep_method='home'):
    '''Combine a TEAM_ID-GAME_ID unique table into rows by game. Slow.

        Parameters
        ----------
        df : Input DataFrame.
        keep_method : {'home', 'away', 'winner', 'loser', ``None``}, default 'home'
            - 'home' : Keep rows where TEAM_A is the home team.
            - 'away' : Keep rows where TEAM_A is the away team.
            - 'winner' : Keep rows where TEAM_A is the losing team.
            - 'loser' : Keep rows where TEAM_A is the winning team.
            - ``None`` : Keep all rows. Will result in an output DataFrame the same
                length as the input DataFrame.
                
        Returns
        -------
        result : DataFrame
    '''
    # Join every row to all others with the same game ID.
    joined = pd.merge(df, df, suffixes=['_A', '_B'],
                      on=['SEASON_ID', 'GAME_ID', 'GAME_DATE'])
    # Filter out any row that is joined to itself.
    result = joined[joined.TEAM_ID_A != joined.TEAM_ID_B]
    # Take action based on the keep_method flag.
    if keep_method is None:
        # Return all the rows.
        pass
    elif keep_method.lower() == 'home':
        # Keep rows where TEAM_A is the home team.
        result = result[result.MATCHUP_A.str.contains(' vs. ')]
    elif keep_method.lower() == 'away':
        # Keep rows where TEAM_A is the away team.
        result = result[result.MATCHUP_A.str.contains(' @ ')]
    elif keep_method.lower() == 'winner':
        result = result[result.WL_A == 'W']
    elif keep_method.lower() == 'loser':
        result = result[result.WL_A == 'L']
    else:
        raise ValueError(f'Invalid keep_method: {keep_method}')
    return result

In [90]:
nba_games_joined = combine_team_games(nba_games_df)

In [91]:
nba_games_joined.info(), nba_games_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 7121 entries, 10 to 28505
Data columns (total 53 columns):
 #   Column               Non-Null Count  Dtype         
---  ------               --------------  -----         
 0   SEASON_ID            7121 non-null   int64         
 1   TEAM_ID_A            7121 non-null   float64       
 2   TEAM_ABBREVIATION_A  7121 non-null   object        
 3   TEAM_NAME_A          7121 non-null   object        
 4   GAME_ID              7121 non-null   int64         
 5   GAME_DATE            7121 non-null   datetime64[ns]
 6   MATCHUP_A            7121 non-null   object        
 7   WL_A                 7121 non-null   object        
 8   MIN_A                7121 non-null   int64         
 9   PTS_A                7121 non-null   int64         
 10  FGM_A                7121 non-null   int64         
 11  FGA_A                7121 non-null   int64         
 12  FG_PCT_A             7121 non-null   float64       
 13  FG3M_A               7121 non-null  

(None, None)

In [93]:
nba_games_df[(nba_games_df["TEAM_ABBREVIATION"] == "ATL") & (nba_games_df["GAME_DATE"] < pd.Timestamp('2023-3-08'))].iloc[-5:]

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
8206,22022,1610613000.0,ATL,Atlanta Hawks,22200911,2023-02-26,ATL vs. BKN,W,240,129,45,90,0.5,16,30,0.533,23,28,0.821,13,33,46,20,7,2,15,18,2.0
8237,22022,1610613000.0,ATL,Atlanta Hawks,22200923,2023-02-28,ATL vs. WAS,L,239,116,43,93,0.462,13,35,0.371,17,19,0.895,13,29,42,21,4,7,9,17,-3.0
8270,22022,1610613000.0,ATL,Atlanta Hawks,22200947,2023-03-03,ATL vs. POR,W,239,129,52,91,0.571,12,26,0.462,13,16,0.813,10,33,43,30,10,5,17,19,18.0
8288,22022,1610613000.0,ATL,Atlanta Hawks,22200958,2023-03-04,ATL @ MIA,L,239,109,37,79,0.468,9,30,0.3,26,30,0.867,10,27,37,22,12,1,16,23,-8.0
8319,22022,1610613000.0,ATL,Atlanta Hawks,22200974,2023-03-06,ATL @ MIA,L,239,128,51,92,0.554,9,29,0.31,17,23,0.739,12,25,37,28,5,0,10,25,-2.0


In [94]:
def get_season_to_date_stats(df, date, team_a, team_b):
    '''
    Returns a dataframe of games with the season to date rolling averages for each team
    df - nba games dataframe that is NOT joined per game
    date - string in the format of 2023-02-26
    '''
    out_df = pd.DataFrame()
    
    team_a_prev_games = df[(df["TEAM_ABBREVIATION"] == team_a) & (df["GAME_DATE"] < date)].iloc[-5:]
    team_b_prev_games = df[(df["TEAM_ABBREVIATION"] == team_b) & (df["GAME_DATE"] < date)].iloc[-5:]

    # poss = FGA + 0.44*FTA + TOV - OREB
    out_df["poss_a"] = (team_a_prev_games["FGA"] + (0.44 * team_a_prev_games["FTA"]) + team_a_prev_games["TOV"] - team_a_prev_games["OREB"]).mean()
    out_df["poss_b"] = (team_b_prev_games["FGA"] + (0.44 * team_b_prev_games["FTA"]) + team_b_prev_games["TOV"] - team_b_prev_games["OREB"]).mean()

    return out_df

In [95]:
get_season_to_date_stats(nba_games_df, "2023-02-26", "NYK", "BKN")

Unnamed: 0,poss_a,poss_b


In [96]:
nba_games_joined.head(100)

Unnamed: 0,SEASON_ID,TEAM_ID_A,TEAM_ABBREVIATION_A,TEAM_NAME_A,GAME_ID,GAME_DATE,MATCHUP_A,WL_A,MIN_A,PTS_A,FGM_A,FGA_A,FG_PCT_A,FG3M_A,FG3A_A,FG3_PCT_A,FTM_A,FTA_A,FT_PCT_A,OREB_A,DREB_A,REB_A,AST_A,STL_A,BLK_A,TOV_A,PF_A,PLUS_MINUS_A,TEAM_ID_B,TEAM_ABBREVIATION_B,TEAM_NAME_B,MATCHUP_B,WL_B,MIN_B,PTS_B,FGM_B,FGA_B,FG_PCT_B,FG3M_B,FG3A_B,FG3_PCT_B,FTM_B,FTA_B,FT_PCT_B,OREB_B,DREB_B,REB_B,AST_B,STL_B,BLK_B,TOV_B,PF_B,PLUS_MINUS_B
10,22019,1610613000.0,LAL,Los Angeles Lakers,21900443,2019-12-22,LAL vs. DEN,L,240,104,39,83,0.47,10,29,0.345,16,22,0.727,18,27,45,18,7,9,19,25,-24.0,1610613000.0,DEN,Denver Nuggets,DEN @ LAL,W,241,128,45,92,0.489,11,32,0.344,27,30,0.9,17,27,44,31,13,2,8,19,24.0
14,22019,1610613000.0,MIL,Milwaukee Bucks,21900441,2019-12-22,MIL vs. IND,W,242,117,44,104,0.423,15,44,0.341,14,17,0.824,16,45,61,31,10,9,9,7,28.0,1610613000.0,IND,Indiana Pacers,IND @ MIL,L,240,89,37,100,0.37,12,40,0.3,3,5,0.6,10,38,48,27,7,5,13,15,-28.0
16,22019,1610613000.0,BOS,Boston Celtics,21900440,2019-12-22,BOS vs. CHA,W,239,119,46,88,0.523,14,32,0.438,13,16,0.813,10,47,57,25,2,9,11,24,26.0,1610613000.0,CHA,Charlotte Hornets,CHA @ BOS,L,240,93,33,82,0.402,10,34,0.294,17,23,0.739,2,25,27,26,3,5,3,17,-26.0
18,22019,1610613000.0,OKC,Oklahoma City Thunder,21900442,2019-12-22,OKC vs. LAC,W,241,118,46,97,0.474,9,27,0.333,17,20,0.85,16,33,49,24,10,4,12,20,6.0,1610613000.0,LAC,LA Clippers,LAC @ OKC,L,239,112,40,86,0.465,11,27,0.407,21,24,0.875,10,32,42,22,7,2,15,23,-6.0
21,22019,1610613000.0,TOR,Toronto Raptors,21900439,2019-12-22,TOR vs. DAL,W,242,110,37,94,0.394,10,34,0.294,26,31,0.839,14,37,51,21,12,5,10,21,3.0,1610613000.0,DAL,Dallas Mavericks,DAL @ TOR,L,240,107,35,90,0.389,15,46,0.326,22,27,0.815,14,39,53,27,6,4,17,22,-3.0
25,22019,1610613000.0,PHX,Phoenix Suns,21900451,2019-12-23,PHX vs. DEN,L,240,111,42,87,0.483,13,37,0.351,14,18,0.778,9,29,38,29,10,0,17,19,-2.0,1610613000.0,DEN,Denver Nuggets,DEN @ PHX,W,239,113,43,78,0.551,15,28,0.536,12,15,0.8,6,33,39,31,10,1,21,24,2.0
29,22019,1610613000.0,SAC,Sacramento Kings,21900453,2019-12-23,SAC vs. HOU,L,240,104,41,87,0.471,7,30,0.233,15,23,0.652,11,38,49,20,4,4,17,24,-9.0,1610613000.0,HOU,Houston Rockets,HOU @ SAC,W,241,113,39,87,0.448,15,39,0.385,20,23,0.87,6,33,39,18,10,3,11,22,9.0
30,22019,1610613000.0,DET,Detroit Pistons,21900445,2019-12-23,DET vs. PHI,L,239,109,43,84,0.512,10,30,0.333,13,18,0.722,6,24,30,27,9,1,13,14,-16.0,1610613000.0,PHI,Philadelphia 76ers,PHI @ DET,W,240,125,52,95,0.547,12,30,0.4,9,11,0.818,17,31,48,33,8,8,12,21,16.0
43,22019,1610613000.0,IND,Indiana Pacers,21900446,2019-12-23,IND vs. TOR,W,265,120,45,95,0.474,18,42,0.429,12,20,0.6,13,32,45,34,7,8,12,22,5.0,1610613000.0,TOR,Toronto Raptors,TOR @ IND,L,266,115,43,94,0.457,11,40,0.275,18,24,0.75,12,38,50,30,6,8,16,22,-5.0
49,22019,1610613000.0,MEM,Memphis Grizzlies,21900450,2019-12-23,MEM vs. SAS,L,241,115,45,96,0.469,15,37,0.405,10,16,0.625,11,24,35,28,7,0,13,19,-30.0,1610613000.0,SAS,San Antonio Spurs,SAS @ MEM,W,242,145,60,89,0.674,15,24,0.625,10,14,0.714,4,42,46,36,9,4,12,14,30.0


In [100]:
def append_calculated_stats_to_joined_df(df):
    df['poss_A'] = df['FGA_A'] + 0.44*df['FTA_A'] + df['TOV_A'] - df['OREB_A']
    df['poss_B'] = df['FGA_B'] + 0.44*df['FTA_B'] + df['TOV_B'] - df['OREB_B']
    
    df['ortg_A'] = 100 * df['PTS_A'] / df['poss_A']
    df['ortg_B'] = 100 * df['PTS_B'] / df['poss_B']
    
    df['drtg_A'] = 100 * df['PTS_B'] / df['poss_B']
    df['drtg_B'] = 100 * df['PTS_A'] / df['poss_A']
    
    df['eFG_A']  = (df['FGM_A'] + 0.5*df['FG3M_A']) / df['FGA_A']
    df['eFG_B']  = (df['FGM_B'] + 0.5*df['FG3M_B']) / df['FGA_B']
    
    df['tovr_A'] = df['TOV_A'] / df['poss_A']
    df['tovr_B'] = df['TOV_B'] / df['poss_B']
    
    
    df['orb%_A'] = df['OREB_A'] / (df['OREB_A'] + df['DREB_B'])
    df['orb%_B'] = df['OREB_B'] / (df['OREB_B'] + df['DREB_A'])
    
    df['ftr_A']  = df['FTA_A'] / df['FGA_A']
    df['ftr_B']  = df['FTA_B'] / df['FGA_B']
    
    df['pace_A'] = 48 * (df['poss_A'] / (df['MIN_A']/5))
    df['pace_B'] = 48 * (df['poss_B'] / (df['MIN_B']/5))


In [99]:
# append_calculated_stats_to_joined_df(nba_games_joined)
# nba_games_joined.head()
nba_games_joined.head().to_csv("sample_joined_w_stats.csv", index=False)

In [None]:
nba_games_joined.shift(1).rolling(window=5).

In [None]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
nba_games_joined.head(80)

you want a dataframe that each row is a game and each column is a feature. most of which are rolling averages from the previous games.
we will take the joined df and add the relevant stats for each game for each team. then we can compute the rolling averages by going through each game and 

In [116]:
# nba_games_joined.rename(columns={'orb%__B':'orb%_B'}, inplace=True)
# nba_games_joined[nba_games_joined["TEAM_ID_A"] == 1610612749.0]

In [134]:
nba_games_joined[nba_games_joined["GAME_ID"] == 21900447]
# nba_games_joined.head(40)

Unnamed: 0,SEASON_ID,TEAM_ID_A,TEAM_ABBREVIATION_A,TEAM_NAME_A,GAME_ID,GAME_DATE,MATCHUP_A,WL_A,MIN_A,PTS_A,FGM_A,FGA_A,FG_PCT_A,FG3M_A,FG3A_A,FG3_PCT_A,FTM_A,FTA_A,FT_PCT_A,OREB_A,DREB_A,REB_A,AST_A,STL_A,BLK_A,TOV_A,PF_A,PLUS_MINUS_A,TEAM_ID_B,TEAM_ABBREVIATION_B,TEAM_NAME_B,MATCHUP_B,WL_B,MIN_B,PTS_B,FGM_B,FGA_B,FG_PCT_B,FG3M_B,FG3A_B,FG3_PCT_B,FTM_B,FTA_B,FT_PCT_B,OREB_B,DREB_B,REB_B,AST_B,STL_B,BLK_B,TOV_B,PF_B,PLUS_MINUS_B,poss_A,poss_B,ortg_A,ortg_B,drtg_A,drtg_B,eFG_A,eFG_B,tovr_A,tovr_B,orb%_A,orb%_B,ftr_A,ftr_B,pace_A,pace_B
66,22019,1610613000.0,NYK,New York Knicks,21900447,2019-12-23,NYK vs. WAS,L,241,115,44,93,0.473,15,36,0.417,12,15,0.8,11,33,44,29,4,10,14,25,-6.0,1610613000.0,WAS,Washington Wizards,WAS @ NYK,W,240,121,47,100,0.47,10,30,0.333,17,30,0.567,16,32,48,27,12,7,8,12,6.0,102.6,105.2,112.08577,115.019011,115.019011,112.08577,0.553763,0.52,0.136452,0.076046,0.255814,0.326531,0.16129,0.3,102.174274,105.2


In [135]:
nba_games_df[nba_games_df["GAME_ID"] == 21900447]

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
20,22019,1610613000.0,WAS,Washington Wizards,21900447,2019-12-23,WAS @ NYK,W,240,121,47,100,0.47,10,30,0.333,17,30,0.567,16,32,48,27,12,7,8,12,6.0
35,22019,1610613000.0,NYK,New York Knicks,21900447,2019-12-23,NYK vs. WAS,L,241,115,44,93,0.473,15,36,0.417,12,15,0.8,11,33,44,29,4,10,14,25,-6.0


In [152]:
stats = ['poss','ortg','drtg','eFG','tovr','orb%','ftr','pace']
a_subset = ["GAME_ID", "TEAM_ABBREVIATION_A"] + [f'{s}_A' for s in stats]
nba_joined_subset_a = nba_games_joined[a_subset]
games_by_home_w_stats = nba_games_df.merge(nba_joined_subset_a, left_on=["GAME_ID", "TEAM_ABBREVIATION"], right_on=["GAME_ID", "TEAM_ABBREVIATION_A"])

In [153]:
b_subset = ["GAME_ID", "TEAM_ABBREVIATION_B"] + [f'{s}_B' for s in stats]
nba_joined_subset_b = nba_games_joined[b_subset]
games_by_away_w_stats = nba_games_df.merge(nba_joined_subset_b, left_on=["GAME_ID", "TEAM_ABBREVIATION"], right_on=["GAME_ID", "TEAM_ABBREVIATION_B"])

In [139]:
nba_games_df[nba_games_df["GAME_ID"] == 21900443]

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
4,22019,1610613000.0,DEN,Denver Nuggets,21900443,2019-12-22,DEN @ LAL,W,241,128,45,92,0.489,11,32,0.344,27,30,0.9,17,27,44,31,13,2,8,19,24.0
7,22019,1610613000.0,LAL,Los Angeles Lakers,21900443,2019-12-22,LAL vs. DEN,L,240,104,39,83,0.47,10,29,0.345,16,22,0.727,18,27,45,18,7,9,19,25,-24.0


In [140]:
nba_games_joined[nba_games_joined["GAME_ID"] == 21900443]

Unnamed: 0,SEASON_ID,TEAM_ID_A,TEAM_ABBREVIATION_A,TEAM_NAME_A,GAME_ID,GAME_DATE,MATCHUP_A,WL_A,MIN_A,PTS_A,FGM_A,FGA_A,FG_PCT_A,FG3M_A,FG3A_A,FG3_PCT_A,FTM_A,FTA_A,FT_PCT_A,OREB_A,DREB_A,REB_A,AST_A,STL_A,BLK_A,TOV_A,PF_A,PLUS_MINUS_A,TEAM_ID_B,TEAM_ABBREVIATION_B,TEAM_NAME_B,MATCHUP_B,WL_B,MIN_B,PTS_B,FGM_B,FGA_B,FG_PCT_B,FG3M_B,FG3A_B,FG3_PCT_B,FTM_B,FTA_B,FT_PCT_B,OREB_B,DREB_B,REB_B,AST_B,STL_B,BLK_B,TOV_B,PF_B,PLUS_MINUS_B,poss_A,poss_B,ortg_A,ortg_B,drtg_A,drtg_B,eFG_A,eFG_B,tovr_A,tovr_B,orb%_A,orb%_B,ftr_A,ftr_B,pace_A,pace_B
10,22019,1610613000.0,LAL,Los Angeles Lakers,21900443,2019-12-22,LAL vs. DEN,L,240,104,39,83,0.47,10,29,0.345,16,22,0.727,18,27,45,18,7,9,19,25,-24.0,1610613000.0,DEN,Denver Nuggets,DEN @ LAL,W,241,128,45,92,0.489,11,32,0.344,27,30,0.9,17,27,44,31,13,2,8,19,24.0,93.68,96.2,111.016225,133.056133,133.056133,111.016225,0.53012,0.548913,0.202818,0.08316,0.4,0.386364,0.26506,0.326087,93.68,95.80083


In [158]:
games_by_home_w_stats.head(10)

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS,TEAM_ABBREVIATION_A,poss_A,ortg_A,drtg_A,eFG_A,tovr_A,orb%_A,ftr_A,pace_A
0,22019,1610613000.0,LAL,Los Angeles Lakers,21900443,2019-12-22,LAL vs. DEN,L,240,104,39,83,0.47,10,29,0.345,16,22,0.727,18,27,45,18,7,9,19,25,-24.0,LAL,93.68,111.016225,133.056133,0.53012,0.202818,0.4,0.26506,93.68
1,22019,1610613000.0,MIL,Milwaukee Bucks,21900441,2019-12-22,MIL vs. IND,W,242,117,44,104,0.423,15,44,0.341,14,17,0.824,16,45,61,31,10,9,9,7,28.0,MIL,104.48,111.983155,84.60076,0.495192,0.086141,0.296296,0.163462,103.616529
2,22019,1610613000.0,BOS,Boston Celtics,21900440,2019-12-22,BOS vs. CHA,W,239,119,46,88,0.523,14,32,0.438,13,16,0.813,10,47,57,25,2,9,11,24,26.0,BOS,96.04,123.906706,99.871134,0.602273,0.114536,0.285714,0.181818,96.441841
3,22019,1610613000.0,OKC,Oklahoma City Thunder,21900442,2019-12-22,OKC vs. LAC,W,241,118,46,97,0.474,9,27,0.333,17,20,0.85,16,33,49,24,10,4,12,20,6.0,OKC,101.8,115.913556,110.279638,0.520619,0.117878,0.333333,0.206186,101.377593
4,22019,1610613000.0,TOR,Toronto Raptors,21900439,2019-12-22,TOR vs. DAL,W,242,110,37,94,0.394,10,34,0.294,26,31,0.839,14,37,51,21,12,5,10,21,3.0,TOR,103.64,106.136627,102.021358,0.446809,0.096488,0.264151,0.329787,102.783471
5,22019,1610613000.0,PHX,Phoenix Suns,21900451,2019-12-23,PHX vs. DEN,L,240,111,42,87,0.483,13,37,0.351,14,18,0.778,9,29,38,29,10,0,17,19,-2.0,PHX,102.92,107.850758,113.453815,0.557471,0.165177,0.214286,0.206897,102.92
6,22019,1610613000.0,SAC,Sacramento Kings,21900453,2019-12-23,SAC vs. HOU,L,240,104,41,87,0.471,7,30,0.233,15,23,0.652,11,38,49,20,4,4,17,24,-9.0,SAC,103.12,100.853375,110.654132,0.511494,0.164856,0.25,0.264368,103.12
7,22019,1610613000.0,DET,Detroit Pistons,21900445,2019-12-23,DET vs. PHI,L,239,109,43,84,0.512,10,30,0.333,13,18,0.722,6,24,30,27,9,1,13,14,-16.0,DET,98.92,110.190053,131.800928,0.571429,0.131419,0.162162,0.214286,99.333891
8,22019,1610613000.0,IND,Indiana Pacers,21900446,2019-12-23,IND vs. TOR,W,265,120,45,95,0.474,18,42,0.429,12,20,0.6,13,32,45,34,7,8,12,22,5.0,IND,102.8,116.731518,105.932203,0.568421,0.116732,0.254902,0.210526,93.101887
9,22019,1610613000.0,MEM,Memphis Grizzlies,21900450,2019-12-23,MEM vs. SAS,L,241,115,45,96,0.469,15,37,0.405,10,16,0.625,11,24,35,28,7,0,13,19,-30.0,MEM,105.04,109.482102,140.558356,0.546875,0.123762,0.207547,0.166667,104.604149


In [159]:
games_by_away_w_stats.head(10)

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS,TEAM_ABBREVIATION_B,poss_B,ortg_B,drtg_B,eFG_B,tovr_B,orb%_B,ftr_B,pace_B
0,22019,1610613000.0,DEN,Denver Nuggets,21900443,2019-12-22,DEN @ LAL,W,241,128,45,92,0.489,11,32,0.344,27,30,0.9,17,27,44,31,13,2,8,19,24.0,DEN,96.2,133.056133,111.016225,0.548913,0.08316,0.386364,0.326087,95.80083
1,22019,1610613000.0,IND,Indiana Pacers,21900441,2019-12-22,IND @ MIL,L,240,89,37,100,0.37,12,40,0.3,3,5,0.6,10,38,48,27,7,5,13,15,-28.0,IND,105.2,84.60076,111.983155,0.43,0.123574,0.181818,0.05,105.2
2,22019,1610613000.0,LAC,LA Clippers,21900442,2019-12-22,LAC @ OKC,L,239,112,40,86,0.465,11,27,0.407,21,24,0.875,10,32,42,22,7,2,15,23,-6.0,LAC,101.56,110.279638,115.913556,0.52907,0.147696,0.232558,0.27907,101.984937
3,22019,1610613000.0,CHA,Charlotte Hornets,21900440,2019-12-22,CHA @ BOS,L,240,93,33,82,0.402,10,34,0.294,17,23,0.739,2,25,27,26,3,5,3,17,-26.0,CHA,93.12,99.871134,123.906706,0.463415,0.032216,0.040816,0.280488,93.12
4,22019,1610613000.0,DAL,Dallas Mavericks,21900439,2019-12-22,DAL @ TOR,L,240,107,35,90,0.389,15,46,0.326,22,27,0.815,14,39,53,27,6,4,17,22,-3.0,DAL,104.88,102.021358,106.136627,0.472222,0.16209,0.27451,0.3,104.88
5,22019,1610613000.0,PHI,Philadelphia 76ers,21900445,2019-12-23,PHI @ DET,W,240,125,52,95,0.547,12,30,0.4,9,11,0.818,17,31,48,33,8,8,12,21,16.0,PHI,94.84,131.800928,110.190053,0.610526,0.126529,0.414634,0.115789,94.84
6,22019,1610613000.0,CHI,Chicago Bulls,21900448,2019-12-23,CHI @ ORL,L,239,95,35,88,0.398,13,37,0.351,12,15,0.8,11,36,47,20,7,5,15,17,-8.0,CHI,98.6,96.348884,104.973502,0.471591,0.15213,0.215686,0.170455,99.012552
7,22019,1610613000.0,HOU,Houston Rockets,21900453,2019-12-23,HOU @ SAC,W,241,113,39,87,0.448,15,39,0.385,20,23,0.87,6,33,39,18,10,3,11,22,9.0,HOU,102.12,110.654132,100.853375,0.534483,0.107716,0.136364,0.264368,101.696266
8,22019,1610613000.0,WAS,Washington Wizards,21900447,2019-12-23,WAS @ NYK,W,240,121,47,100,0.47,10,30,0.333,17,30,0.567,16,32,48,27,12,7,8,12,6.0,WAS,105.2,115.019011,112.08577,0.52,0.076046,0.326531,0.3,105.2
9,22019,1610613000.0,ATL,Atlanta Hawks,21900444,2019-12-23,ATL @ CLE,L,239,118,45,95,0.474,15,42,0.357,13,17,0.765,5,32,37,23,11,7,16,20,-3.0,ATL,113.48,103.983081,103.171896,0.552632,0.140994,0.121951,0.178947,113.954812


In [161]:
games_by_home_w_stats.merge(games_by_away_w_stats[b_subset], on="GAME_ID").head()

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS,TEAM_ABBREVIATION_A,poss_A,ortg_A,drtg_A,eFG_A,tovr_A,orb%_A,ftr_A,pace_A,TEAM_ABBREVIATION_B,poss_B,ortg_B,drtg_B,eFG_B,tovr_B,orb%_B,ftr_B,pace_B
0,22019,1610613000.0,LAL,Los Angeles Lakers,21900443,2019-12-22,LAL vs. DEN,L,240,104,39,83,0.47,10,29,0.345,16,22,0.727,18,27,45,18,7,9,19,25,-24.0,LAL,93.68,111.016225,133.056133,0.53012,0.202818,0.4,0.26506,93.68,DEN,96.2,133.056133,111.016225,0.548913,0.08316,0.386364,0.326087,95.80083
1,22019,1610613000.0,MIL,Milwaukee Bucks,21900441,2019-12-22,MIL vs. IND,W,242,117,44,104,0.423,15,44,0.341,14,17,0.824,16,45,61,31,10,9,9,7,28.0,MIL,104.48,111.983155,84.60076,0.495192,0.086141,0.296296,0.163462,103.616529,IND,105.2,84.60076,111.983155,0.43,0.123574,0.181818,0.05,105.2
2,22019,1610613000.0,BOS,Boston Celtics,21900440,2019-12-22,BOS vs. CHA,W,239,119,46,88,0.523,14,32,0.438,13,16,0.813,10,47,57,25,2,9,11,24,26.0,BOS,96.04,123.906706,99.871134,0.602273,0.114536,0.285714,0.181818,96.441841,CHA,93.12,99.871134,123.906706,0.463415,0.032216,0.040816,0.280488,93.12
3,22019,1610613000.0,OKC,Oklahoma City Thunder,21900442,2019-12-22,OKC vs. LAC,W,241,118,46,97,0.474,9,27,0.333,17,20,0.85,16,33,49,24,10,4,12,20,6.0,OKC,101.8,115.913556,110.279638,0.520619,0.117878,0.333333,0.206186,101.377593,LAC,101.56,110.279638,115.913556,0.52907,0.147696,0.232558,0.27907,101.984937
4,22019,1610613000.0,TOR,Toronto Raptors,21900439,2019-12-22,TOR vs. DAL,W,242,110,37,94,0.394,10,34,0.294,26,31,0.839,14,37,51,21,12,5,10,21,3.0,TOR,103.64,106.136627,102.021358,0.446809,0.096488,0.264151,0.329787,102.783471,DAL,104.88,102.021358,106.136627,0.472222,0.16209,0.27451,0.3,104.88


We now have all games by the home team with calculated stats and all games by away team with calculated stats
now we have to: 
- concatenate them so we have all games by all teams so we can compute rolling averages (group by (sort?) by team name and date or game id so we can go shift().rolling and get means for the stats)
- once we do that, we join again so we have joined dataframe of all games with rolling averages for both teams in game

In [163]:
# must rename both columns stats so we can concatenate
games_by_home_w_stats.columns

Index(['SEASON_ID', 'TEAM_ID', 'TEAM_ABBREVIATION', 'TEAM_NAME', 'GAME_ID',
       'GAME_DATE', 'MATCHUP', 'WL', 'MIN', 'PTS', 'FGM', 'FGA', 'FG_PCT',
       'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB',
       'REB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PLUS_MINUS',
       'TEAM_ABBREVIATION_A', 'poss_A', 'ortg_A', 'drtg_A', 'eFG_A', 'tovr_A',
       'orb%_A', 'ftr_A', 'pace_A'],
      dtype='object')

In [166]:
games_by_home_w_stats = games_by_home_w_stats.drop(columns=["TEAM_ABBREVIATION_A"])

In [169]:
a_to_rename = [f'{s}_A' for s in stats]
games_by_home_w_stats = games_by_home_w_stats.rename(columns=dict(zip(a_to_rename, stats)))
games_by_home_w_stats.head()

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS,poss,ortg,drtg,eFG,tovr,orb%,ftr,pace
0,22019,1610613000.0,LAL,Los Angeles Lakers,21900443,2019-12-22,LAL vs. DEN,L,240,104,39,83,0.47,10,29,0.345,16,22,0.727,18,27,45,18,7,9,19,25,-24.0,93.68,111.016225,133.056133,0.53012,0.202818,0.4,0.26506,93.68
1,22019,1610613000.0,MIL,Milwaukee Bucks,21900441,2019-12-22,MIL vs. IND,W,242,117,44,104,0.423,15,44,0.341,14,17,0.824,16,45,61,31,10,9,9,7,28.0,104.48,111.983155,84.60076,0.495192,0.086141,0.296296,0.163462,103.616529
2,22019,1610613000.0,BOS,Boston Celtics,21900440,2019-12-22,BOS vs. CHA,W,239,119,46,88,0.523,14,32,0.438,13,16,0.813,10,47,57,25,2,9,11,24,26.0,96.04,123.906706,99.871134,0.602273,0.114536,0.285714,0.181818,96.441841
3,22019,1610613000.0,OKC,Oklahoma City Thunder,21900442,2019-12-22,OKC vs. LAC,W,241,118,46,97,0.474,9,27,0.333,17,20,0.85,16,33,49,24,10,4,12,20,6.0,101.8,115.913556,110.279638,0.520619,0.117878,0.333333,0.206186,101.377593
4,22019,1610613000.0,TOR,Toronto Raptors,21900439,2019-12-22,TOR vs. DAL,W,242,110,37,94,0.394,10,34,0.294,26,31,0.839,14,37,51,21,12,5,10,21,3.0,103.64,106.136627,102.021358,0.446809,0.096488,0.264151,0.329787,102.783471


In [170]:
games_by_away_w_stats = games_by_away_w_stats.drop(columns=["TEAM_ABBREVIATION_B"])

In [172]:
games_by_away_w_stats = games_by_away_w_stats.rename(columns=dict(zip([f'{s}_B' for s in stats], stats)))

In [175]:
all_games_w_stats = pd.concat([games_by_home_w_stats, games_by_away_w_stats])

In [176]:
all_games_w_stats[all_games_w_stats["GAME_ID"] == 21900443]

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS,poss,ortg,drtg,eFG,tovr,orb%,ftr,pace
0,22019,1610613000.0,LAL,Los Angeles Lakers,21900443,2019-12-22,LAL vs. DEN,L,240,104,39,83,0.47,10,29,0.345,16,22,0.727,18,27,45,18,7,9,19,25,-24.0,93.68,111.016225,133.056133,0.53012,0.202818,0.4,0.26506,93.68
0,22019,1610613000.0,DEN,Denver Nuggets,21900443,2019-12-22,DEN @ LAL,W,241,128,45,92,0.489,11,32,0.344,27,30,0.9,17,27,44,31,13,2,8,19,24.0,96.2,133.056133,111.016225,0.548913,0.08316,0.386364,0.326087,95.80083


In [None]:
all_games_w_stats[all_games_w_stats["TEAM_ABBREVIATION"] == "NYK"]

In [183]:
all_games_w_stats[all_games_w_stats["TEAM_ABBREVIATION"] == "NYK"].sort_values(["GAME_DATE"]).head(20)

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS,poss,ortg,drtg,eFG,tovr,orb%,ftr,pace
15,22019,1610613000.0,NYK,New York Knicks,21900447,2019-12-23,NYK vs. WAS,L,241,115,44,93,0.473,15,36,0.417,12,15,0.8,11,33,44,29,4,10,14,25,-6.0,102.6,112.08577,115.019011,0.553763,0.136452,0.255814,0.16129,102.174274
25,22019,1610613000.0,NYK,New York Knicks,21900461,2019-12-26,NYK @ BKN,W,239,94,37,92,0.402,9,29,0.31,11,15,0.733,13,47,60,16,6,2,13,27,12.0,98.6,95.334686,83.810303,0.451087,0.131846,0.240741,0.163043,99.012552
37,22019,1610613000.0,NYK,New York Knicks,21900476,2019-12-28,NYK @ WAS,W,240,107,38,85,0.447,11,31,0.355,20,26,0.769,11,45,56,25,5,5,21,25,7.0,106.44,100.526118,93.773443,0.511765,0.197294,0.23913,0.305882,106.44
65,22019,1610613000.0,NYK,New York Knicks,21900505,2020-01-01,NYK vs. POR,W,239,117,47,98,0.48,14,36,0.389,9,14,0.643,9,46,55,29,3,6,7,14,24.0,102.16,114.526233,94.320487,0.55102,0.06852,0.195652,0.142857,102.587448
79,22019,1610613000.0,NYK,New York Knicks,21900521,2020-01-03,NYK @ PHX,L,241,112,41,92,0.446,10,37,0.27,20,30,0.667,13,39,52,19,7,7,15,26,-8.0,107.2,104.477612,113.722517,0.5,0.139925,0.25,0.326087,106.755187
99,22019,1610613000.0,NYK,New York Knicks,21900534,2020-01-05,NYK @ LAC,L,241,132,48,84,0.571,12,23,0.522,24,37,0.649,8,29,37,22,11,4,16,26,-3.0,108.28,121.906169,126.736763,0.642857,0.147765,0.2,0.440476,107.830705
111,22019,1610613000.0,NYK,New York Knicks,21900553,2020-01-07,NYK @ LAL,L,241,87,37,95,0.389,4,20,0.2,9,16,0.563,15,32,47,15,10,3,16,20,-30.0,103.04,84.43323,114.43662,0.410526,0.15528,0.263158,0.168421,102.612448
123,22019,1610613000.0,NYK,New York Knicks,21900561,2020-01-08,NYK @ UTA,L,240,104,42,97,0.433,9,25,0.36,11,11,1.0,14,27,41,18,3,1,9,17,-24.0,96.84,107.393639,134.566863,0.479381,0.092937,0.269231,0.113402,96.84
130,22019,1610613000.0,NYK,New York Knicks,21900569,2020-01-10,NYK vs. NOP,L,241,111,46,100,0.46,5,27,0.185,14,20,0.7,14,32,46,26,13,1,13,16,-12.0,107.8,102.96846,115.514651,0.485,0.120594,0.27451,0.2,107.352697
151,22019,1610613000.0,NYK,New York Knicks,21900584,2020-01-12,NYK vs. MIA,W,240,124,46,90,0.511,9,31,0.29,23,28,0.821,10,29,39,24,11,3,6,23,3.0,98.32,126.118796,125.622924,0.561111,0.061025,0.243902,0.311111,98.32


In [181]:
all_games_w_stats[all_games_w_stats["TEAM_ABBREVIATION"] == "NYK"].sort_values(["GAME_DATE"]).poss.shift(1).rolling(5).mean().head(20)

15         NaN
25         NaN
37         NaN
65         NaN
79         NaN
99     103.400
111    104.536
123    105.424
130    103.504
151    104.632
159    102.856
179    103.688
197    104.792
201    105.008
222    102.544
234    102.424
252     98.784
263     96.416
273     96.000
291     98.656
Name: poss, dtype: float64

In [184]:
games_by_home_w_stats.head()

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS,poss,ortg,drtg,eFG,tovr,orb%,ftr,pace
0,22019,1610613000.0,LAL,Los Angeles Lakers,21900443,2019-12-22,LAL vs. DEN,L,240,104,39,83,0.47,10,29,0.345,16,22,0.727,18,27,45,18,7,9,19,25,-24.0,93.68,111.016225,133.056133,0.53012,0.202818,0.4,0.26506,93.68
1,22019,1610613000.0,MIL,Milwaukee Bucks,21900441,2019-12-22,MIL vs. IND,W,242,117,44,104,0.423,15,44,0.341,14,17,0.824,16,45,61,31,10,9,9,7,28.0,104.48,111.983155,84.60076,0.495192,0.086141,0.296296,0.163462,103.616529
2,22019,1610613000.0,BOS,Boston Celtics,21900440,2019-12-22,BOS vs. CHA,W,239,119,46,88,0.523,14,32,0.438,13,16,0.813,10,47,57,25,2,9,11,24,26.0,96.04,123.906706,99.871134,0.602273,0.114536,0.285714,0.181818,96.441841
3,22019,1610613000.0,OKC,Oklahoma City Thunder,21900442,2019-12-22,OKC vs. LAC,W,241,118,46,97,0.474,9,27,0.333,17,20,0.85,16,33,49,24,10,4,12,20,6.0,101.8,115.913556,110.279638,0.520619,0.117878,0.333333,0.206186,101.377593
4,22019,1610613000.0,TOR,Toronto Raptors,21900439,2019-12-22,TOR vs. DAL,W,242,110,37,94,0.394,10,34,0.294,26,31,0.839,14,37,51,21,12,5,10,21,3.0,103.64,106.136627,102.021358,0.446809,0.096488,0.264151,0.329787,102.783471


In [None]:
all_games_w_stats['TEAM_ID'] = all_games_w_stats['TEAM_ID'].astype('int64')
all_games_w_stats.groupby(['TEAM_ID','SEASON_ID'], sort=False).head()

In [188]:
all_games_copy = all_games_w_stats.sort_values(['TEAM_ID','SEASON_ID','GAME_DATE']).copy()
all_games_copy['TEAM_ID'] = all_games_copy['TEAM_ID'].astype('int64')
for s in stats:
    all_games_copy[f'{s}_S2D'] = (all_games_copy.groupby(['TEAM_ID','SEASON_ID'], sort=False)[s].transform(lambda x: x.shift(1).expanding().mean()))

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS,poss,ortg,drtg,eFG,tovr,orb%,ftr,pace,poss_S2D,ortg_S2D,drtg_S2D,eFG_S2D,tovr_S2D,orb%_S2D,ftr_S2D,pace_S2D
9,22019,1610612737,ATL,Atlanta Hawks,21900444,2019-12-23,ATL @ CLE,L,239,118,45,95,0.474,15,42,0.357,13,17,0.765,5,32,37,23,11,7,16,20,-3.0,113.48,103.983081,103.171896,0.552632,0.140994,0.121951,0.178947,113.954812,,,,,,,,
27,22019,1610612737,ATL,Atlanta Hawks,21900469,2019-12-27,ATL vs. MIL,L,239,86,33,91,0.363,12,41,0.293,8,14,0.571,8,38,46,20,10,8,18,18,-26.0,107.16,80.253826,103.016924,0.428571,0.167973,0.137931,0.153846,107.608368,113.48,103.983081,103.171896,0.552632,0.140994,0.121951,0.178947,113.954812
35,22019,1610612737,ATL,Atlanta Hawks,21900477,2019-12-28,ATL @ CHI,L,239,81,32,86,0.372,9,34,0.265,8,11,0.727,9,30,39,24,8,5,19,16,-35.0,100.84,80.325268,116.747182,0.424419,0.188417,0.191489,0.127907,101.261925,110.32,92.118453,103.09441,0.490602,0.154484,0.129941,0.166397,110.78159
55,22019,1610612737,ATL,Atlanta Hawks,21900491,2019-12-30,ATL @ ORL,W,240,101,39,81,0.481,9,29,0.31,14,17,0.824,11,41,52,21,8,4,20,20,8.0,97.48,103.610997,95.325953,0.537037,0.20517,0.275,0.209877,97.48,107.16,88.187392,107.645334,0.468541,0.165795,0.150457,0.153567,107.608368
83,22019,1610612737,ATL,Atlanta Hawks,21900517,2020-01-03,ATL @ BOS,L,239,106,39,93,0.419,16,45,0.356,12,15,0.8,6,36,42,26,6,5,14,24,-3.0,107.6,98.513011,103.06354,0.505376,0.130112,0.125,0.16129,108.050209,104.74,92.043293,104.565489,0.485665,0.175639,0.181593,0.167644,105.076276


In [None]:
all_games_copy["game_day_delta"] = (all_games_copy.groupby(['TEAM_ID','SEASON_ID'], sort=False)["GAME_DATE"].transform(lambda x: x.shift(1).expanding().mean()))

In [None]:
all_games_copy[all_games_copy["TEAM_ABBREVIATION"] == "NYK"]

In [None]:
all_games_copy.sample(20)

In [211]:
all_games_copy[all_games_copy["GAME_ID"] == 21900443]

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS,poss,ortg,drtg,eFG,tovr,orb%,ftr,pace,poss_S2D,ortg_S2D,drtg_S2D,eFG_S2D,tovr_S2D,orb%_S2D,ftr_S2D,pace_S2D
0,22019,1610612743,DEN,Denver Nuggets,21900443,2019-12-22,DEN @ LAL,W,241,128,45,92,0.489,11,32,0.344,27,30,0.9,17,27,44,31,13,2,8,19,24.0,96.2,133.056133,111.016225,0.548913,0.08316,0.386364,0.326087,95.80083,,,,,,,,
0,22019,1610612747,LAL,Los Angeles Lakers,21900443,2019-12-22,LAL vs. DEN,L,240,104,39,83,0.47,10,29,0.345,16,22,0.727,18,27,45,18,7,9,19,25,-24.0,93.68,111.016225,133.056133,0.53012,0.202818,0.4,0.26506,93.68,,,,,,,,


In [212]:
games_by_home_w_stats[games_by_home_w_stats["GAME_ID"] == 21900443]

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS,poss,ortg,drtg,eFG,tovr,orb%,ftr,pace
0,22019,1610613000.0,LAL,Los Angeles Lakers,21900443,2019-12-22,LAL vs. DEN,L,240,104,39,83,0.47,10,29,0.345,16,22,0.727,18,27,45,18,7,9,19,25,-24.0,93.68,111.016225,133.056133,0.53012,0.202818,0.4,0.26506,93.68


In [217]:
reduced_all_games_copy = all_games_copy[["GAME_ID", "TEAM_ABBREVIATION", "MATCHUP"] + [f'{s}_S2D' for s in stats]]

In [215]:
games_joined_w_s2d_stats = games_by_home_w_stats.merge(all_games_copy[["GAME_ID", "TEAM_ABBREVIATION", "MATCHUP"] + [f'{s}_S2D' for s in stats]], how="inner", on=["GAME_ID", "TEAM_ABBREVIATION", "MATCHUP"])

In [223]:
games_joined_w_s2d_stats.sample(5)

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS,poss,ortg,drtg,eFG,tovr,orb%,ftr,pace,poss_S2D,ortg_S2D,drtg_S2D,eFG_S2D,tovr_S2D,orb%_S2D,ftr_S2D,pace_S2D
2365,22021,1610613000.0,CHA,Charlotte Hornets,22100501,2021-12-27,CHA vs. HOU,W,240,123,47,88,0.534,18,41,0.439,11,14,0.786,7,32,39,33,13,4,16,18,24.0,103.16,119.232261,96.191216,0.636364,0.155099,0.184211,0.159091,103.16,103.564706,111.136101,112.814772,0.537597,0.119655,0.2238,0.224928,102.350254
300,22019,1610613000.0,DET,Detroit Pistons,21900737,2020-02-02,DET vs. DEN,W,264,128,48,94,0.511,14,31,0.452,18,24,0.75,11,32,43,33,4,2,11,22,5.0,104.56,122.417751,122.169249,0.585106,0.105203,0.25,0.255319,95.054545,101.854,107.93052,112.002824,0.529648,0.138149,0.217156,0.299156,100.324053
234,22019,1610613000.0,NYK,New York Knicks,21900672,2020-01-24,NYK vs. TOR,L,240,112,42,85,0.494,13,36,0.361,15,21,0.714,14,32,46,26,5,4,14,21,-6.0,94.24,118.845501,126.718213,0.570588,0.148557,0.35,0.247059,94.24,102.893333,102.984734,109.432295,0.496509,0.12913,0.234978,0.227094,102.807298
4072,22022,1610613000.0,TOR,Toronto Raptors,22200890,2023-02-23,TOR vs. NOP,W,240,115,43,92,0.467,9,31,0.29,20,25,0.8,11,37,48,17,10,5,11,20,5.0,103.0,111.650485,107.843137,0.516304,0.106796,0.229167,0.271739,103.0,100.529492,112.951347,112.905182,0.517958,0.113129,0.275274,0.278058,99.800231
3360,22022,1610613000.0,MIA,Miami Heat,22200174,2022-11-10,MIA vs. CHA,W,266,117,39,90,0.433,10,36,0.278,29,36,0.806,8,30,38,21,9,1,10,25,5.0,107.84,108.494065,102.941176,0.488889,0.09273,0.16,0.4,97.299248,99.447273,108.715823,109.580159,0.520244,0.133086,0.205998,0.26211,99.368048


In [224]:
away_games_w_s2d = reduced_all_games_copy[(reduced_all_games_copy["MATCHUP"].str.contains("@"))]

In [225]:
away_games_w_s2d.head()

Unnamed: 0,GAME_ID,TEAM_ABBREVIATION,MATCHUP,poss_S2D,ortg_S2D,drtg_S2D,eFG_S2D,tovr_S2D,orb%_S2D,ftr_S2D,pace_S2D
9,21900444,ATL,ATL @ CLE,,,,,,,,
35,21900477,ATL,ATL @ CHI,110.32,92.118453,103.09441,0.490602,0.154484,0.129941,0.166397,110.78159
55,21900491,ATL,ATL @ ORL,107.16,88.187392,107.645334,0.468541,0.165795,0.150457,0.153567,107.608368
83,21900517,ATL,ATL @ BOS,104.74,92.043293,104.565489,0.485665,0.175639,0.181593,0.167644,105.076276
135,21900567,ATL,ATL @ WAS,104.76,100.155185,108.473299,0.502735,0.145141,0.170144,0.224356,104.976641


In [230]:
games_joined_w_s2d_stats = games_joined_w_s2d_stats.merge(reduced_all_games_copy[(reduced_all_games_copy["MATCHUP"].str.contains("@"))], on="GAME_ID", suffixes=("_A", "_B"))

In [231]:
games_joined_w_s2d_stats[games_joined_w_s2d_stats["GAME_ID"] == 21900672]

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION_A,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP_A,WL,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS,poss,ortg,drtg,eFG,tovr,orb%,ftr,pace,poss_S2D_A,ortg_S2D_A,drtg_S2D_A,eFG_S2D_A,tovr_S2D_A,orb%_S2D_A,ftr_S2D_A,pace_S2D_A,TEAM_ABBREVIATION_B,MATCHUP_B,poss_S2D_B,ortg_S2D_B,drtg_S2D_B,eFG_S2D_B,tovr_S2D_B,orb%_S2D_B,ftr_S2D_B,pace_S2D_B
234,22019,1610613000.0,NYK,New York Knicks,21900672,2020-01-24,NYK vs. TOR,L,240,112,42,85,0.494,13,36,0.361,15,21,0.714,14,32,46,26,5,4,14,21,-6.0,94.24,118.845501,126.718213,0.570588,0.148557,0.35,0.247059,94.24,102.893333,102.984734,109.432295,0.496509,0.12913,0.234978,0.227094,102.807298,TOR,TOR @ NYK,101.92,109.363489,103.247451,0.532291,0.133501,0.233237,0.235019,100.548082


In [232]:
all_games_copy[all_games_copy["GAME_ID"] == 21900672]

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS,poss,ortg,drtg,eFG,tovr,orb%,ftr,pace,poss_S2D,ortg_S2D,drtg_S2D,eFG_S2D,tovr_S2D,orb%_S2D,ftr_S2D,pace_S2D
234,22019,1610612752,NYK,New York Knicks,21900672,2020-01-24,NYK vs. TOR,L,240,112,42,85,0.494,13,36,0.361,15,21,0.714,14,32,46,26,5,4,14,21,-6.0,94.24,118.845501,126.718213,0.570588,0.148557,0.35,0.247059,94.24,102.893333,102.984734,109.432295,0.496509,0.12913,0.234978,0.227094,102.807298
233,22019,1610612761,TOR,Toronto Raptors,21900672,2020-01-24,TOR @ NYK,W,240,118,41,81,0.506,17,35,0.486,19,23,0.826,5,26,31,25,10,6,7,20,6.0,93.12,126.718213,118.845501,0.611111,0.075172,0.135135,0.283951,93.12,101.92,109.363489,103.247451,0.532291,0.133501,0.233237,0.235019,100.548082


In [248]:
feature_set = games_joined_w_s2d_stats[["SEASON_ID", "GAME_DATE", "GAME_ID", "TEAM_NAME", "TEAM_ABBREVIATION_A", "TEAM_ABBREVIATION_B", "MATCHUP_A", "MATCHUP_B", "WL"] + [f'{s}_S2D_A' for s in stats] + [f'{s}_S2D_B' for s in stats]]

In [249]:
feature_set = feature_set.dropna()

In [251]:
feature_set.head()

Unnamed: 0,SEASON_ID,GAME_DATE,GAME_ID,TEAM_NAME,TEAM_ABBREVIATION_A,TEAM_ABBREVIATION_B,MATCHUP_A,MATCHUP_B,WL,poss_S2D_A,ortg_S2D_A,drtg_S2D_A,eFG_S2D_A,tovr_S2D_A,orb%_S2D_A,ftr_S2D_A,pace_S2D_A,poss_S2D_B,ortg_S2D_B,drtg_S2D_B,eFG_S2D_B,tovr_S2D_B,orb%_S2D_B,ftr_S2D_B,pace_S2D_B
8,22019,2019-12-23,21900446,Indiana Pacers,IND,TOR,IND vs. TOR,TOR @ IND,W,105.2,84.60076,111.983155,0.43,0.123574,0.181818,0.05,105.2,103.64,106.136627,102.021358,0.446809,0.096488,0.264151,0.329787,102.783471
16,22019,2019-12-25,21900458,Los Angeles Lakers,LAL,LAC,LAL vs. LAC,LAC @ LAL,L,93.68,111.016225,133.056133,0.53012,0.202818,0.4,0.26506,93.68,101.56,110.279638,115.913556,0.52907,0.147696,0.232558,0.27907,101.984937
17,22019,2019-12-25,21900457,Golden State Warriors,GSW,HOU,GSW vs. HOU,HOU @ GSW,W,106.2,106.403013,97.414762,0.505747,0.178908,0.295455,0.344828,106.644351,102.12,110.654132,100.853375,0.534483,0.107716,0.136364,0.264368,101.696266
18,22019,2019-12-25,21900456,Philadelphia 76ers,PHI,MIL,PHI vs. MIL,MIL @ PHI,W,94.84,131.800928,110.190053,0.610526,0.126529,0.414634,0.115789,94.84,104.48,111.983155,84.60076,0.495192,0.086141,0.296296,0.163462,103.616529
19,22019,2019-12-25,21900459,Denver Nuggets,DEN,NOP,DEN vs. NOP,NOP @ DEN,L,97.9,123.254974,109.433492,0.598174,0.147002,0.278896,0.259197,97.908783,99.04,102.988691,92.229199,0.489247,0.121163,0.245283,0.172043,99.454393


In [253]:
feature_set[[f'{s}_S2D_A' for s in stats] + [f'{s}_S2D_B' for s in stats]].to_numpy()

array([[105.2       ,  84.60076046, 111.98315467, ...,   0.26415094,
          0.32978723, 102.78347107],
       [ 93.68      , 111.01622545, 133.05613306, ...,   0.23255814,
          0.27906977, 101.98493724],
       [106.2       , 106.40301318,  97.41476208, ...,   0.13636364,
          0.26436782, 101.69626556],
       ...,
       [101.77      , 114.21357583, 104.56379824, ...,   0.18557022,
          0.28748131, 100.09038357],
       [100.98666667, 114.44211302, 111.9533269 , ...,   0.26001184,
          0.28951213, 101.29625684],
       [101.72363636, 113.31854912, 104.86533409, ...,   0.19771191,
          0.29032814, 100.11800241]], shape=(6978, 16))

In [259]:
training_set = feature_set[feature_set["GAME_DATE"] < "2023"]

In [260]:
training_set.info()

<class 'pandas.core.frame.DataFrame'>
Index: 3644 entries, 8 to 3731
Data columns (total 25 columns):
 #   Column               Non-Null Count  Dtype         
---  ------               --------------  -----         
 0   SEASON_ID            3644 non-null   int64         
 1   GAME_DATE            3644 non-null   datetime64[ns]
 2   GAME_ID              3644 non-null   int64         
 3   TEAM_NAME            3644 non-null   object        
 4   TEAM_ABBREVIATION_A  3644 non-null   object        
 5   TEAM_ABBREVIATION_B  3644 non-null   object        
 6   MATCHUP_A            3644 non-null   object        
 7   MATCHUP_B            3644 non-null   object        
 8   WL                   3644 non-null   object        
 9   poss_S2D_A           3644 non-null   float64       
 10  ortg_S2D_A           3644 non-null   float64       
 11  drtg_S2D_A           3644 non-null   float64       
 12  eFG_S2D_A            3644 non-null   float64       
 13  tovr_S2D_A           3644 non-null   f

In [263]:
training_set.head()

Unnamed: 0,SEASON_ID,GAME_DATE,GAME_ID,TEAM_NAME,TEAM_ABBREVIATION_A,TEAM_ABBREVIATION_B,MATCHUP_A,MATCHUP_B,WL,poss_S2D_A,ortg_S2D_A,drtg_S2D_A,eFG_S2D_A,tovr_S2D_A,orb%_S2D_A,ftr_S2D_A,pace_S2D_A,poss_S2D_B,ortg_S2D_B,drtg_S2D_B,eFG_S2D_B,tovr_S2D_B,orb%_S2D_B,ftr_S2D_B,pace_S2D_B
8,22019,2019-12-23,21900446,Indiana Pacers,IND,TOR,IND vs. TOR,TOR @ IND,W,105.2,84.60076,111.983155,0.43,0.123574,0.181818,0.05,105.2,103.64,106.136627,102.021358,0.446809,0.096488,0.264151,0.329787,102.783471
16,22019,2019-12-25,21900458,Los Angeles Lakers,LAL,LAC,LAL vs. LAC,LAC @ LAL,L,93.68,111.016225,133.056133,0.53012,0.202818,0.4,0.26506,93.68,101.56,110.279638,115.913556,0.52907,0.147696,0.232558,0.27907,101.984937
17,22019,2019-12-25,21900457,Golden State Warriors,GSW,HOU,GSW vs. HOU,HOU @ GSW,W,106.2,106.403013,97.414762,0.505747,0.178908,0.295455,0.344828,106.644351,102.12,110.654132,100.853375,0.534483,0.107716,0.136364,0.264368,101.696266
18,22019,2019-12-25,21900456,Philadelphia 76ers,PHI,MIL,PHI vs. MIL,MIL @ PHI,W,94.84,131.800928,110.190053,0.610526,0.126529,0.414634,0.115789,94.84,104.48,111.983155,84.60076,0.495192,0.086141,0.296296,0.163462,103.616529
19,22019,2019-12-25,21900459,Denver Nuggets,DEN,NOP,DEN vs. NOP,NOP @ DEN,L,97.9,123.254974,109.433492,0.598174,0.147002,0.278896,0.259197,97.908783,99.04,102.988691,92.229199,0.489247,0.121163,0.245283,0.172043,99.454393


In [264]:
training_set['win_int'] = (training_set['WL'] == 'W').astype(int)
training_set.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  training_set['win_int'] = (training_set['WL'] == 'W').astype(int)


Unnamed: 0,SEASON_ID,GAME_DATE,GAME_ID,TEAM_NAME,TEAM_ABBREVIATION_A,TEAM_ABBREVIATION_B,MATCHUP_A,MATCHUP_B,WL,poss_S2D_A,ortg_S2D_A,drtg_S2D_A,eFG_S2D_A,tovr_S2D_A,orb%_S2D_A,ftr_S2D_A,pace_S2D_A,poss_S2D_B,ortg_S2D_B,drtg_S2D_B,eFG_S2D_B,tovr_S2D_B,orb%_S2D_B,ftr_S2D_B,pace_S2D_B,win_int
8,22019,2019-12-23,21900446,Indiana Pacers,IND,TOR,IND vs. TOR,TOR @ IND,W,105.2,84.60076,111.983155,0.43,0.123574,0.181818,0.05,105.2,103.64,106.136627,102.021358,0.446809,0.096488,0.264151,0.329787,102.783471,1
16,22019,2019-12-25,21900458,Los Angeles Lakers,LAL,LAC,LAL vs. LAC,LAC @ LAL,L,93.68,111.016225,133.056133,0.53012,0.202818,0.4,0.26506,93.68,101.56,110.279638,115.913556,0.52907,0.147696,0.232558,0.27907,101.984937,0
17,22019,2019-12-25,21900457,Golden State Warriors,GSW,HOU,GSW vs. HOU,HOU @ GSW,W,106.2,106.403013,97.414762,0.505747,0.178908,0.295455,0.344828,106.644351,102.12,110.654132,100.853375,0.534483,0.107716,0.136364,0.264368,101.696266,1
18,22019,2019-12-25,21900456,Philadelphia 76ers,PHI,MIL,PHI vs. MIL,MIL @ PHI,W,94.84,131.800928,110.190053,0.610526,0.126529,0.414634,0.115789,94.84,104.48,111.983155,84.60076,0.495192,0.086141,0.296296,0.163462,103.616529,1
19,22019,2019-12-25,21900459,Denver Nuggets,DEN,NOP,DEN vs. NOP,NOP @ DEN,L,97.9,123.254974,109.433492,0.598174,0.147002,0.278896,0.259197,97.908783,99.04,102.988691,92.229199,0.489247,0.121163,0.245283,0.172043,99.454393,0


In [262]:
final_x_train = training_set[[f'{s}_S2D_A' for s in stats] + [f'{s}_S2D_B' for s in stats]]

In [265]:
final_y_train = training_set["win_int"]

In [266]:
final_x_train.head()

Unnamed: 0,poss_S2D_A,ortg_S2D_A,drtg_S2D_A,eFG_S2D_A,tovr_S2D_A,orb%_S2D_A,ftr_S2D_A,pace_S2D_A,poss_S2D_B,ortg_S2D_B,drtg_S2D_B,eFG_S2D_B,tovr_S2D_B,orb%_S2D_B,ftr_S2D_B,pace_S2D_B
8,105.2,84.60076,111.983155,0.43,0.123574,0.181818,0.05,105.2,103.64,106.136627,102.021358,0.446809,0.096488,0.264151,0.329787,102.783471
16,93.68,111.016225,133.056133,0.53012,0.202818,0.4,0.26506,93.68,101.56,110.279638,115.913556,0.52907,0.147696,0.232558,0.27907,101.984937
17,106.2,106.403013,97.414762,0.505747,0.178908,0.295455,0.344828,106.644351,102.12,110.654132,100.853375,0.534483,0.107716,0.136364,0.264368,101.696266
18,94.84,131.800928,110.190053,0.610526,0.126529,0.414634,0.115789,94.84,104.48,111.983155,84.60076,0.495192,0.086141,0.296296,0.163462,103.616529
19,97.9,123.254974,109.433492,0.598174,0.147002,0.278896,0.259197,97.908783,99.04,102.988691,92.229199,0.489247,0.121163,0.245283,0.172043,99.454393


In [267]:
final_y_train.head()

8     1
16    0
17    1
18    1
19    0
Name: win_int, dtype: int64

In [268]:
final_x_train.to_csv("x_train.csv", index=False)
final_y_train.to_csv("y_train.csv", index=False)

In [275]:
val_set = feature_set[(feature_set["GAME_DATE"] > "2023") & (feature_set["GAME_DATE"] < "2024")]

In [277]:
val_set['win_int'] = (val_set['WL'] == 'W').astype(int)
val_set.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  val_set['win_int'] = (val_set['WL'] == 'W').astype(int)


Unnamed: 0,SEASON_ID,GAME_DATE,GAME_ID,TEAM_NAME,TEAM_ABBREVIATION_A,TEAM_ABBREVIATION_B,MATCHUP_A,MATCHUP_B,WL,poss_S2D_A,ortg_S2D_A,drtg_S2D_A,eFG_S2D_A,tovr_S2D_A,orb%_S2D_A,ftr_S2D_A,pace_S2D_A,poss_S2D_B,ortg_S2D_B,drtg_S2D_B,eFG_S2D_B,tovr_S2D_B,orb%_S2D_B,ftr_S2D_B,pace_S2D_B,win_int
3735,22022,2023-01-02,22200551,Charlotte Hornets,CHA,LAL,CHA vs. LAL,LAL @ CHA,L,104.367568,106.650756,113.143758,0.503541,0.129814,0.263795,0.251614,102.736492,104.706667,110.949946,112.77643,0.541162,0.135685,0.213276,0.284582,103.745366,0
3736,22022,2023-01-02,22200552,Cleveland Cavaliers,CLE,CHI,CLE vs. CHI,CHI @ CLE,W,98.761081,112.788529,107.571886,0.552899,0.138709,0.238632,0.278573,97.019862,102.125556,111.497903,111.807346,0.546542,0.132665,0.200449,0.258178,100.92387,1
3737,22022,2023-01-02,22200557,Minnesota Timberwolves,MIN,DEN,MIN vs. DEN,DEN @ MIN,W,103.674595,110.40077,111.294967,0.555235,0.14934,0.213035,0.277334,103.349111,100.805556,115.708652,113.478566,0.581218,0.144647,0.237261,0.267618,100.16112,1
3738,22022,2023-01-02,22200553,Indiana Pacers,IND,TOR,IND vs. TOR,TOR @ IND,W,103.934054,111.163394,113.241497,0.547383,0.13829,0.220091,0.260135,103.79462,100.268889,111.076204,112.23987,0.511093,0.119262,0.270276,0.28117,99.680484,1
3739,22022,2023-01-02,22200556,Houston Rockets,HOU,DAL,HOU vs. DAL,DAL @ HOU,L,102.291111,106.929396,114.261313,0.510105,0.15988,0.302569,0.291661,101.605292,98.967568,113.826442,111.99462,0.563363,0.120754,0.18818,0.309092,97.604704,0


In [278]:
x_val = val_set[[f'{s}_S2D_A' for s in stats] + [f'{s}_S2D_B' for s in stats]]
y_val = val_set["win_int"]

In [281]:
x_val.to_csv("x_val_set.csv", index=False)
y_val.to_csv("y_val_set.csv", index=False)

In [282]:
feature_set.head()

Unnamed: 0,SEASON_ID,GAME_DATE,GAME_ID,TEAM_NAME,TEAM_ABBREVIATION_A,TEAM_ABBREVIATION_B,MATCHUP_A,MATCHUP_B,WL,poss_S2D_A,ortg_S2D_A,drtg_S2D_A,eFG_S2D_A,tovr_S2D_A,orb%_S2D_A,ftr_S2D_A,pace_S2D_A,poss_S2D_B,ortg_S2D_B,drtg_S2D_B,eFG_S2D_B,tovr_S2D_B,orb%_S2D_B,ftr_S2D_B,pace_S2D_B
8,22019,2019-12-23,21900446,Indiana Pacers,IND,TOR,IND vs. TOR,TOR @ IND,W,105.2,84.60076,111.983155,0.43,0.123574,0.181818,0.05,105.2,103.64,106.136627,102.021358,0.446809,0.096488,0.264151,0.329787,102.783471
16,22019,2019-12-25,21900458,Los Angeles Lakers,LAL,LAC,LAL vs. LAC,LAC @ LAL,L,93.68,111.016225,133.056133,0.53012,0.202818,0.4,0.26506,93.68,101.56,110.279638,115.913556,0.52907,0.147696,0.232558,0.27907,101.984937
17,22019,2019-12-25,21900457,Golden State Warriors,GSW,HOU,GSW vs. HOU,HOU @ GSW,W,106.2,106.403013,97.414762,0.505747,0.178908,0.295455,0.344828,106.644351,102.12,110.654132,100.853375,0.534483,0.107716,0.136364,0.264368,101.696266
18,22019,2019-12-25,21900456,Philadelphia 76ers,PHI,MIL,PHI vs. MIL,MIL @ PHI,W,94.84,131.800928,110.190053,0.610526,0.126529,0.414634,0.115789,94.84,104.48,111.983155,84.60076,0.495192,0.086141,0.296296,0.163462,103.616529
19,22019,2019-12-25,21900459,Denver Nuggets,DEN,NOP,DEN vs. NOP,NOP @ DEN,L,97.9,123.254974,109.433492,0.598174,0.147002,0.278896,0.259197,97.908783,99.04,102.988691,92.229199,0.489247,0.121163,0.245283,0.172043,99.454393


In [295]:
# Create delta feature set where stats for each team are subtracted from each other
stat_delta_training_set = training_set[[f'{s}_S2D_A' for s in stats]].subtract(training_set[[f'{s}_S2D_B' for s in stats]].rename(columns=dict(zip([f'{s}_S2D_B' for s in stats], [f'{s}_S2D_A' for s in stats]))), fill_value=0)

In [299]:
stat_delta_val_set = val_set[[f'{s}_S2D_A' for s in stats]].subtract(val_set[[f'{s}_S2D_B' for s in stats]].rename(columns=dict(zip([f'{s}_S2D_B' for s in stats], [f'{s}_S2D_A' for s in stats]))), fill_value=0)

In [297]:
stat_delta_training_set.info()

<class 'pandas.core.frame.DataFrame'>
Index: 3644 entries, 8 to 3731
Data columns (total 8 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   poss_S2D_A  3644 non-null   float64
 1   ortg_S2D_A  3644 non-null   float64
 2   drtg_S2D_A  3644 non-null   float64
 3   eFG_S2D_A   3644 non-null   float64
 4   tovr_S2D_A  3644 non-null   float64
 5   orb%_S2D_A  3644 non-null   float64
 6   ftr_S2D_A   3644 non-null   float64
 7   pace_S2D_A  3644 non-null   float64
dtypes: float64(8)
memory usage: 256.2 KB


In [298]:
final_y_train.info()

<class 'pandas.core.series.Series'>
Index: 3644 entries, 8 to 3731
Series name: win_int
Non-Null Count  Dtype
--------------  -----
3644 non-null   int64
dtypes: int64(1)
memory usage: 56.9 KB


In [300]:
stat_delta_val_set.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1222 entries, 3735 to 4979
Data columns (total 8 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   poss_S2D_A  1222 non-null   float64
 1   ortg_S2D_A  1222 non-null   float64
 2   drtg_S2D_A  1222 non-null   float64
 3   eFG_S2D_A   1222 non-null   float64
 4   tovr_S2D_A  1222 non-null   float64
 5   orb%_S2D_A  1222 non-null   float64
 6   ftr_S2D_A   1222 non-null   float64
 7   pace_S2D_A  1222 non-null   float64
dtypes: float64(8)
memory usage: 85.9 KB


In [301]:
stat_delta_training_set.to_csv("stat_delta_x_train.csv", index=False)
stat_delta_val_set.to_csv("stat_delta_x_val.csv", index=False)