In [4]:
import pandas as pd
import numpy as np
import json
# import matplotlib.pyplot as plt
# from nba_api.live.nba.endpoints import scoreboard
from nba_api.stats.endpoints import playercareerstats, teamgamelogs, playergamelogs
from nba_api.stats.static import players, teams

To Do:
[] 'hustle stats' game matching: https://github.com/swar/nba_api/blob/master/src/nba_api/stats/endpoints/hustlestatsboxscore.py

## Get Team Gamelogs

In [5]:
def get_team_gamelog_data(team_id, n_games, season):
    # https://github.com/swar/nba_api/blob/master/docs/nba_api/stats/endpoints/teamgamelogs.md
    team_gamelog_data = teamgamelogs.TeamGameLogs(team_id_nullable=team_id,
                                                  season_nullable=season,
                                                  last_n_games_nullable=n_games,
                                                 ).get_json()
    
    data_dict = json.loads(team_gamelog_data)
    df = pd.DataFrame(data_dict['resultSets'][0]['rowSet'], 
                      columns=data_dict['resultSets'][0]['headers'])
    
    df['GAME_DATE'] = pd.to_datetime(df['GAME_DATE'])
    df = df.set_index(['GAME_DATE', 'GAME_ID', 'TEAM_ABBREVIATION'])
    
    
    return df

In [6]:
teams_dict = teams.get_teams()
n_games = 82
seasons = ['2012-13',
           '2013-14',
           '2014-15',
           '2015-16',
           '2016-17',
           '2017-18',
           '2018-19',
           '2019-20',
           '2020-21',
           '2021-22',
           '2022-23',
           '2023-24']

df = pd.DataFrame()
for season in seasons:
    print(season)
    for i in range(0,len(teams_dict)):
        df = pd.concat([df, get_team_gamelog_data(i, n_games, season)], axis=0)

2012-13
2013-14
2014-15
2015-16
2016-17
2017-18
2018-19
2019-20
2020-21
2021-22
2022-23
2023-24


In [7]:
# SAVE...use sql in future...
df.to_csv('historical_stats_raw.csv')

## Processing

In [8]:
# factor encoding
df['Home'] = df['MATCHUP'].apply(lambda x: 0 if '@' in x else 1)
df['Win'] = df['WL'].map({'W': 1, 'L': 0})

# calculate per-48 stats, corrects for overtime games
cols = ['FGM', 'FGA', 'FG3M', 'FG3A', 'FTM', 
        'FTA','OREB', 'DREB', 'REB', 'AST', 'TOV', 'STL', 
        'BLK', 'BLKA', 'PF', 'PFD', 'PTS', 'PLUS_MINUS']

new_cols = []
for col in cols:
    new_col_name = col + '_per48'
    df[new_col_name] = df[col]/df['MIN']*48 #np.round((df[col]/df['MIN'])*48, 2)
    new_cols.append(new_col_name)

df[new_cols] = np.round(df[new_cols], 2)

### Stats Against

In [9]:
cols = [      'Win',              'FGM_per48',        'FGA_per48',
              'FG3M_per48',       'FG3A_per48',
              'FTM_per48',        'FTA_per48',
              'OREB_per48',       'DREB_per48',
              'REB_per48',        'AST_per48',        'TOV_per48',
              'STL_per48',        'BLK_per48',        'BLKA_per48',
              'PF_per48',         'PFD_per48',        'PTS_per48',
              'FG_PCT',           'FG3_PCT']

for game_id in df.index.get_level_values('GAME_ID').unique():
    
    team_abvs = df.loc[:,game_id,:].index.get_level_values('TEAM_ABBREVIATION')
    if len(team_abvs) < 2:
        print('no opponent match')
    else:
        for col in cols:
            col_name = col + '_against'

            df.loc[(slice(None), game_id, team_abvs[0]), 
                            col_name] = df.loc[(slice(None), 
                                                     game_id, team_abvs[1])][col].iloc[0]
               
            df.loc[(slice(None), game_id, team_abvs[1]), 
                            col_name] = df.loc[(slice(None), 
                                                     game_id, team_abvs[0])][col].iloc[0]
               
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,SEASON_YEAR,TEAM_ID,TEAM_NAME,MATCHUP,WL,MIN,FGM,FGA,FG_PCT,FG3M,...,AST_per48_against,TOV_per48_against,STL_per48_against,BLK_per48_against,BLKA_per48_against,PF_per48_against,PFD_per48_against,PTS_per48_against,FG_PCT_against,FG3_PCT_against
GAME_DATE,GAME_ID,TEAM_ABBREVIATION,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
2013-04-17,0021201229,LAC,2012-13,1610612746,Los Angeles Clippers,LAC @ SAC,W,48.0,44,88,0.500,12,...,23.0,11.0,5.0,4.0,3.0,20.0,20.0,108.0,0.449,0.346
2013-04-17,0021201218,ATL,2012-13,1610612737,Atlanta Hawks,ATL @ NYK,L,48.0,37,90,0.411,2,...,21.0,12.0,8.0,3.0,2.0,16.0,15.0,98.0,0.429,0.375
2013-04-17,0021201230,GSW,2012-13,1610612744,Golden State Warriors,GSW @ POR,W,48.0,40,93,0.430,9,...,18.0,14.0,2.0,5.0,3.0,13.0,17.0,88.0,0.427,0.500
2013-04-17,0021201222,CHI,2012-13,1610612741,Chicago Bulls,CHI vs. WAS,W,48.0,37,78,0.474,7,...,17.0,9.0,7.0,5.0,2.0,17.0,22.0,92.0,0.424,0.438
2013-04-17,0021201216,BOS,2012-13,1610612738,Boston Celtics,BOS @ TOR,L,48.0,37,84,0.440,7,...,26.0,11.0,5.0,5.0,2.0,14.0,22.0,114.0,0.500,0.571
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-10-25,0022300071,MEM,2023-24,1610612763,Memphis Grizzlies,MEM vs. NOP,L,48.0,38,91,0.418,12,...,22.0,21.0,8.0,5.0,7.0,21.0,19.0,111.0,0.471,0.438
2023-10-24,0022300062,GSW,2023-24,1610612744,Golden State Warriors,GSW vs. PHX,L,48.0,36,101,0.356,10,...,23.0,19.0,5.0,7.0,6.0,22.0,23.0,108.0,0.442,0.333
2023-10-24,0022300062,PHX,2023-24,1610612756,Phoenix Suns,PHX @ GSW,W,48.0,42,95,0.442,11,...,19.0,11.0,11.0,6.0,7.0,23.0,22.0,104.0,0.356,0.233
2023-10-24,0022300061,LAL,2023-24,1610612747,Los Angeles Lakers,LAL @ DEN,L,48.0,41,90,0.456,10,...,29.0,12.0,9.0,6.0,4.0,15.0,18.0,119.0,0.527,0.412


In [10]:
# SAVE for Feature Prep
df.to_csv('historical_stats.csv')