In [1]:
!pip install nba_api
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
import requests
from bs4 import BeautifulSoup
import time

You should consider upgrading via the '/opt/anaconda3/bin/python -m pip install --upgrade pip' command.[0m[33m
[0m

<h1><ins>Team Box Scores 2000-Present:</ins></h1><br>

The code below extracts all stats from every single game in a season and saves it in the "data-raw" folder as a feather file (repeats for every season 2000-present).

https://www.nba.com/stats/teams/boxscores-traditional/?Season=2020-21&SeasonType=Regular%20Season

In [2]:
### This function returns a single DataFrame of the boxscore statistics for all games in a given season
#      --(i.e. combines all Traditional/Advanceds stats for all --
#      -- games in the specified season, RegSeason AND Playoffs)--

def season_team_boxscore(season):
    tic = time.time()
    season_types = ['Regular+Season', 'Playoffs']
    measure_types = ['Base', 'Advanced']
    dfs = []
    
    for season_type in season_types:
        dfs_temp = []
        for measure_type in measure_types:
            url = 'https://stats.nba.com/stats/teamgamelogs?DateFrom=&DateTo=&GameSegment=&LastNGames=0' \
                  '&LeagueID=00&Location=&MeasureType='+measure_type+'&Month=0&OpponentTeamID=0&Outcome=&PORound=0' \
                  '&PaceAdjust=N&PerMode=Totals&Period=0&PlusMinus=N&Rank=N&Season='+season+ \
                  '&SeasonSegment=&SeasonType='+season_type+'&ShotClockRange=&VsConference=&VsDivision='

            # The headers below ensure that when we make a request to the nba_api, it doesn't time out
            headers  = {'Connection': 'keep-alive', 'Accept': 'application/json, text/plain, */*', 
                        'x-nba-stats-token': 'true', 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36', 
                        'x-nba-stats-origin': 'stats', 'Sec-Fetch-Site': 'same-origin', 'Sec-Fetch-Mode': 'cors', 
                        'Referer': 'https://stats.nba.com/', 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'en-US,en;q=0.9', }
            
            # Traditional 2020-21 Regular Season Team Boxscores #
            response = requests.get(url=url, headers=headers).json()

            # List of Column Names and List of all Rows of Data #
            df_columns = response['resultSets'][0]['headers']
            keep_cols = [col for col in df_columns if not('RANK' in col)]
            df_rows = response['resultSets'][0]['rowSet']
            df = pd.DataFrame(df_rows, columns=df_columns)
            df = df[keep_cols].sort_values(['GAME_DATE', 'GAME_ID'], ascending=True).reset_index(drop=True)
            
            ## Cleaning
            df['game_date'] = df['GAME_DATE'].apply(lambda x: x[5:7]+'/'+x[8:10]+'/'+x[:4])
            df['matchup'] = df['MATCHUP'].apply(lambda x: x[:3]+'-'+x[8:11] if 'vs' in x else x[6:9]+'-'+x[:3] if '@' in x else x[8:11])
            df['season'] = [season]*len(df)
            df['playoff'] = [1 if season_type == 'Playoffs' else 0]*len(df)
            df['game_id'] = [m[0]+'-'+m[1] for m in df[['matchup', 'game_date']].values]
            df['team'] = df['TEAM_ABBREVIATION']
            df['result'] = df['WL']
            
            columns = ['game_id', 'team', 'game_date', 'season', 'result', 'playoff']+df.columns[9:-7].to_list()
            df = df[columns]
            dfs_temp.append(df) 
        dfs.append(pd.merge(dfs_temp[0], dfs_temp[1], on=['game_id', 'team', 'game_date', 'season', 'result', 'playoff']))
    df = pd.concat([dfs[0],dfs[1]])
    print(f'{season} stats complete: {np.round(time.time()-tic, 2)}s elapsed')
    return df

In [3]:
tic = time.time()
seasons = ['2000-01','2001-02','2002-03','2003-04','2004-05', '2005-06','2006-07',
           '2007-08','2008-09','2009-10','2010-11','2011-12','2012-13','2013-14',
           '2014-15','2015-16','2016-17','2017-18','2018-19','2019-20','2020-21', '2021-22']
for season in seasons:
    df_x = season_team_boxscore(season)
    df_x.reset_index().to_feather('data-raw/team-'+season+'.feather')
print(f'Elapsed Time: {time.time()-tic} sec')
df_x

2000-01 stats complete: 12.99s elapsed
2001-02 stats complete: 10.21s elapsed
2002-03 stats complete: 13.69s elapsed
2003-04 stats complete: 11.59s elapsed
2004-05 stats complete: 9.98s elapsed
2005-06 stats complete: 10.48s elapsed
2006-07 stats complete: 10.4s elapsed
2007-08 stats complete: 10.72s elapsed
2008-09 stats complete: 10.99s elapsed
2009-10 stats complete: 13.11s elapsed
2010-11 stats complete: 11.43s elapsed
2011-12 stats complete: 11.24s elapsed
2012-13 stats complete: 10.26s elapsed
2013-14 stats complete: 12.04s elapsed
2014-15 stats complete: 10.37s elapsed
2015-16 stats complete: 10.18s elapsed
2016-17 stats complete: 11.35s elapsed
2017-18 stats complete: 11.42s elapsed
2018-19 stats complete: 11.14s elapsed
2019-20 stats complete: 11.23s elapsed
2020-21 stats complete: 11.21s elapsed
2021-22 stats complete: 7.85s elapsed
Elapsed Time: 244.23234295845032 sec


Unnamed: 0,game_id,team,game_date,season,result,playoff,FGM,FGA,FG_PCT,FG3M,...,DREB_PCT,REB_PCT,TM_TOV_PCT,EFG_PCT,TS_PCT,E_PACE,PACE,PACE_PER40,POSS,PIE
0,MIL-BKN-10/19/2021,BKN,10/19/2021,2021-22,L,0.0,37,84,0.440,17,...,0.707,0.459,0.127,0.542,0.552,105.0,102.0,85.00,102,0.407
1,MIL-BKN-10/19/2021,MIL,10/19/2021,2021-22,W,0.0,48,105,0.457,17,...,0.824,0.541,0.078,0.538,0.562,105.0,102.0,85.00,102,0.593
2,LAL-GSW-10/19/2021,LAL,10/19/2021,2021-22,L,0.0,45,95,0.474,15,...,0.759,0.477,0.161,0.553,0.551,115.3,112.5,93.75,112,0.422
3,LAL-GSW-10/19/2021,GSW,10/19/2021,2021-22,W,0.0,41,93,0.441,14,...,0.811,0.523,0.150,0.516,0.570,115.3,112.5,93.75,113,0.578
4,CHA-IND-10/20/2021,IND,10/20/2021,2021-22,L,0.0,42,90,0.467,17,...,0.667,0.487,0.159,0.561,0.607,112.2,106.5,88.75,107,0.520
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2455,PHI-DET-04/10/2022,PHI,04/10/2022,2021-22,W,0.0,46,88,0.523,5,...,0.627,0.479,0.113,0.551,0.601,99.9,96.5,80.42,97,0.591
2456,PHX-SAC-04/10/2022,PHX,04/10/2022,2021-22,L,0.0,42,103,0.408,14,...,0.821,0.539,0.111,0.476,0.497,102.4,99.0,82.50,99,0.432
2457,PHX-SAC-04/10/2022,SAC,04/10/2022,2021-22,W,0.0,40,76,0.526,14,...,0.635,0.461,0.152,0.618,0.650,102.4,99.0,82.50,99,0.568
2458,POR-UTA-04/10/2022,UTA,04/10/2022,2021-22,W,0.0,37,82,0.451,9,...,0.868,0.650,0.172,0.506,0.562,100.0,98.5,82.08,99,0.697


In [154]:
###
measure_type = 'Base'
season = '2018-19'
season_type = 'Regular+Season'

url = 'https://stats.nba.com/stats/teamgamelogs?DateFrom=&DateTo=&GameSegment=&LastNGames=0' \
      '&LeagueID=00&Location=&MeasureType='+measure_type+'&Month=0&OpponentTeamID=0&Outcome=&PORound=0' \
      '&PaceAdjust=N&PerMode=Totals&Period=0&PlusMinus=N&Rank=N&Season='+season+ \
      '&SeasonSegment=&SeasonType='+season_type+'&ShotClockRange=&VsConference=&VsDivision='

headers  = {'Connection': 'keep-alive', 'Accept': 'application/json, text/plain, */*', 
            'x-nba-stats-token': 'true', 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36', 
            'x-nba-stats-origin': 'stats', 'Sec-Fetch-Site': 'same-origin', 'Sec-Fetch-Mode': 'cors', 
            'Referer': 'https://stats.nba.com/', 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'en-US,en;q=0.9', }


# Traditional 2020-21 Regular Season Team Boxscores #
response = requests.get(url=url, headers=headers).json()

# List of Column Names and List of all Rows of Data #
df_columns = response['resultSets'][0]['headers']
# keep_cols = [col for col in df_columns if not('RANK' in col)]
df_rows = response['resultSets'][0]['rowSet']

df = pd.DataFrame(df_rows, columns=df_columns)
df = df.sort_values(['GAME_DATE', 'GAME_ID'], ascending=True).reset_index(drop=True)

df

Unnamed: 0,SEASON_YEAR,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,...,REB_RANK,AST_RANK,TOV_RANK,STL_RANK,BLK_RANK,BLKA_RANK,PF_RANK,PFD_RANK,PTS_RANK,PLUS_MINUS_RANK
0,2018-19,1610612755,PHI,Philadelphia 76ers,0021800001,2018-10-16T00:00:00,PHI @ BOS,L,48.0,34,...,893,2185,1614,870,917,1133,940,1295,2398,2188
1,2018-19,1610612738,BOS,Boston Celtics,0021800001,2018-10-16T00:00:00,BOS vs. PHI,W,48.0,42,...,155,1752,1390,1203,917,1133,940,1295,1659,251
2,2018-19,1610612744,GSW,Golden State Warriors,0021800002,2018-10-16T00:00:00,GSW vs. OKC,W,48.0,42,...,63,535,2319,1203,371,1545,2347,1044,1439,674
3,2018-19,1610612760,OKC,Oklahoma City Thunder,0021800002,2018-10-16T00:00:00,OKC @ GSW,L,48.0,33,...,1165,1752,1390,149,603,1859,1167,73,1961,1711
4,2018-19,1610612766,CHA,Charlotte Hornets,0021800003,2018-10-17T00:00:00,CHA vs. MIL,L,48.0,41,...,1720,1752,444,870,112,731,737,363,1123,1231
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2455,2018-19,1610612743,DEN,Denver Nuggets,0021801228,2019-04-10T00:00:00,DEN vs. MIN,W,48.0,39,...,267,1394,890,1543,1329,1,23,823,2012,977
2456,2018-19,1610612746,LAC,LA Clippers,0021801229,2019-04-10T00:00:00,LAC vs. UTA,W,53.0,54,...,340,84,636,1203,917,2409,2208,486,26,831
2457,2018-19,1610612762,UTA,Utah Jazz,0021801229,2019-04-10T00:00:00,UTA @ LAC,L,53.0,47,...,83,232,1809,870,20,1133,1822,171,53,1566
2458,2018-19,1610612757,POR,Portland Trail Blazers,0021801230,2019-04-10T00:00:00,POR vs. SAC,W,48.0,53,...,1283,2083,890,1855,2069,42,376,1928,57,896
