In [9]:
!pip install nba_api
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
import requests
from bs4 import BeautifulSoup
import time

You should consider upgrading via the '/opt/anaconda3/bin/python -m pip install --upgrade pip' command.[0m[33m
[0m

<h1><ins>Team Box Scores 2000-Present:</ins></h1><br>

The code below extracts all stats from every single game in a season and saves it in the "data-raw" folder as a feather file (repeats for every season 2000-present).

https://www.nba.com/stats/teams/boxscores-traditional/?Season=2020-21&SeasonType=Regular%20Season

In [10]:
### Returns DataFrame for Entire Season (combines Traditional/Adv/Misc & RegSeason/Playoffs)

def season_team_boxscore(season):
    season_types = ['Regular+Season', 'Playoffs']
    measure_types = ['Base', 'Advanced']
    
    dfs = []
    
    for season_type in season_types:
        dfs_temp = []
        for measure_type in measure_types:
            url = 'https://stats.nba.com/stats/teamgamelogs?DateFrom=&DateTo=&GameSegment=&LastNGames=0' \
                  '&LeagueID=00&Location=&MeasureType='+measure_type+'&Month=0&OpponentTeamID=0&Outcome=&PORound=0' \
                  '&PaceAdjust=N&PerMode=Totals&Period=0&PlusMinus=N&Rank=N&Season='+season+ \
                  '&SeasonSegment=&SeasonType='+season_type+'&ShotClockRange=&VsConference=&VsDivision='

            # The headers below ensure that when we make a request to the nba_api, it doesn't time out
            headers  = {'Connection': 'keep-alive', 'Accept': 'application/json, text/plain, */*', 
                        'x-nba-stats-token': 'true', 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36', 
                        'x-nba-stats-origin': 'stats', 'Sec-Fetch-Site': 'same-origin', 'Sec-Fetch-Mode': 'cors', 
                        'Referer': 'https://stats.nba.com/', 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'en-US,en;q=0.9', }
            
            # Traditional 2020-21 Regular Season Team Boxscores #
            response = requests.get(url=url, headers=headers).json()

            # List of Column Names and List of all Rows of Data #
            df_columns = response['resultSets'][0]['headers']
            keep_cols = [col for col in df_columns if not('RANK' in col)]
            df_rows = response['resultSets'][0]['rowSet']

            df = pd.DataFrame(df_rows, columns=df_columns)
            df = df[keep_cols].sort_values(['GAME_DATE', 'GAME_ID'], ascending=True).reset_index(drop=True)
            
            ## Cleaning
            df['game_date'] = df['GAME_DATE'].apply(lambda x: x[5:7]+'/'+x[8:10]+'/'+x[:4])
            #df['game_date'] = pd.to_datetime(df['game_date'])
            df['matchup'] = df['MATCHUP'].apply(lambda x: x[:3]+'-'+x[8:11] if 'vs' in x else x[6:9]+'-'+x[:3] if '@' in x else x[8:11])
            df['season'] = [season]*len(df)
            df['playoff'] = [1 if season_type == 'Playoffs' else 0]*len(df)
            df['game_id'] = [m[0]+'-'+m[1] for m in df[['matchup', 'game_date']].values]
            df['team'] = df['TEAM_ABBREVIATION']
            df['result'] = df['WL']
            
            columns = ['game_id', 'team', 'game_date', 'season', 'result', 'playoff']+df.columns[9:-7].to_list()
            df = df[columns]
            
            dfs_temp.append(df)
        dfs.append(pd.merge(dfs_temp[0], dfs_temp[1], on=['game_id', 'team', 'game_date', 'season', 'result', 'playoff']))
    df = pd.concat([dfs[0],dfs[1]])
    return df

In [11]:
tic = time.time()
seasons = ['2000-01','2001-02','2002-03','2003-04','2004-05', '2005-06','2006-07',
           '2007-08','2008-09','2009-10','2010-11','2011-12','2012-13','2013-14',
           '2014-15','2015-16','2016-17','2017-18','2018-19','2019-20','2020-21']
for season in seasons:
    df_x = season_team_boxscore(season)
    df_x.reset_index().to_feather('data-raw/team-'+season+'.feather')
print(f'Elapsed Time: {time.time()-tic} sec')
df_x

Elapsed Time: 217.46378588676453 sec


Unnamed: 0,game_id,team,game_date,season,result,playoff,FGM,FGA,FG_PCT,FG3M,...,DREB_PCT,REB_PCT,TM_TOV_PCT,EFG_PCT,TS_PCT,E_PACE,PACE,PACE_PER40,POSS,PIE
0,BKN-GSW-12/22/2020,GSW,12/22/2020,2020-21,L,0,37,99,0.374,10,...,0.685,0.466,0.161,0.424,0.454,113.6,112.0,93.33,112,0.350
1,BKN-GSW-12/22/2020,BKN,12/22/2020,2020-21,W,0,42,92,0.457,15,...,0.719,0.534,0.179,0.538,0.589,113.6,112.0,93.33,112,0.650
2,LAL-LAC-12/22/2020,LAC,12/22/2020,2020-21,W,0,44,93,0.473,14,...,0.750,0.490,0.154,0.548,0.572,106.0,104.0,86.67,104,0.491
3,LAL-LAC-12/22/2020,LAL,12/22/2020,2020-21,L,0,38,81,0.469,9,...,0.731,0.510,0.183,0.525,0.576,106.0,104.0,86.67,104,0.509
4,BOS-MIL-12/23/2020,BOS,12/23/2020,2020-21,W,0,48,101,0.475,18,...,0.667,0.430,0.070,0.564,0.576,102.9,100.5,83.75,100,0.494
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
165,MIL-PHX-07/14/2021,MIL,07/14/2021,2020-21,W,1,39,97,0.402,7,...,0.795,0.520,0.052,0.438,0.497,98.1,97.0,80.83,97,0.536
166,PHX-MIL-07/17/2021,PHX,07/17/2021,2020-21,L,1,48,87,0.552,13,...,0.643,0.476,0.098,0.626,0.648,94.2,91.5,76.25,92,0.503
167,PHX-MIL-07/17/2021,MIL,07/17/2021,2020-21,W,1,50,87,0.575,14,...,0.700,0.524,0.132,0.655,0.651,94.2,91.5,76.25,91,0.497
168,MIL-PHX-07/20/2021,PHX,07/20/2021,2020-21,L,1,38,86,0.442,6,...,0.681,0.406,0.147,0.477,0.519,103.1,102.5,85.42,102,0.417


In [154]:
###
measure_type = 'Base'
season = '2018-19'
season_type = 'Regular+Season'

url = 'https://stats.nba.com/stats/teamgamelogs?DateFrom=&DateTo=&GameSegment=&LastNGames=0' \
      '&LeagueID=00&Location=&MeasureType='+measure_type+'&Month=0&OpponentTeamID=0&Outcome=&PORound=0' \
      '&PaceAdjust=N&PerMode=Totals&Period=0&PlusMinus=N&Rank=N&Season='+season+ \
      '&SeasonSegment=&SeasonType='+season_type+'&ShotClockRange=&VsConference=&VsDivision='

headers  = {'Connection': 'keep-alive', 'Accept': 'application/json, text/plain, */*', 
            'x-nba-stats-token': 'true', 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36', 
            'x-nba-stats-origin': 'stats', 'Sec-Fetch-Site': 'same-origin', 'Sec-Fetch-Mode': 'cors', 
            'Referer': 'https://stats.nba.com/', 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'en-US,en;q=0.9', }


# Traditional 2020-21 Regular Season Team Boxscores #
response = requests.get(url=url, headers=headers).json()

# List of Column Names and List of all Rows of Data #
df_columns = response['resultSets'][0]['headers']
# keep_cols = [col for col in df_columns if not('RANK' in col)]
df_rows = response['resultSets'][0]['rowSet']

df = pd.DataFrame(df_rows, columns=df_columns)
df = df.sort_values(['GAME_DATE', 'GAME_ID'], ascending=True).reset_index(drop=True)

df

Unnamed: 0,SEASON_YEAR,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,...,REB_RANK,AST_RANK,TOV_RANK,STL_RANK,BLK_RANK,BLKA_RANK,PF_RANK,PFD_RANK,PTS_RANK,PLUS_MINUS_RANK
0,2018-19,1610612755,PHI,Philadelphia 76ers,0021800001,2018-10-16T00:00:00,PHI @ BOS,L,48.0,34,...,893,2185,1614,870,917,1133,940,1295,2398,2188
1,2018-19,1610612738,BOS,Boston Celtics,0021800001,2018-10-16T00:00:00,BOS vs. PHI,W,48.0,42,...,155,1752,1390,1203,917,1133,940,1295,1659,251
2,2018-19,1610612744,GSW,Golden State Warriors,0021800002,2018-10-16T00:00:00,GSW vs. OKC,W,48.0,42,...,63,535,2319,1203,371,1545,2347,1044,1439,674
3,2018-19,1610612760,OKC,Oklahoma City Thunder,0021800002,2018-10-16T00:00:00,OKC @ GSW,L,48.0,33,...,1165,1752,1390,149,603,1859,1167,73,1961,1711
4,2018-19,1610612766,CHA,Charlotte Hornets,0021800003,2018-10-17T00:00:00,CHA vs. MIL,L,48.0,41,...,1720,1752,444,870,112,731,737,363,1123,1231
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2455,2018-19,1610612743,DEN,Denver Nuggets,0021801228,2019-04-10T00:00:00,DEN vs. MIN,W,48.0,39,...,267,1394,890,1543,1329,1,23,823,2012,977
2456,2018-19,1610612746,LAC,LA Clippers,0021801229,2019-04-10T00:00:00,LAC vs. UTA,W,53.0,54,...,340,84,636,1203,917,2409,2208,486,26,831
2457,2018-19,1610612762,UTA,Utah Jazz,0021801229,2019-04-10T00:00:00,UTA @ LAC,L,53.0,47,...,83,232,1809,870,20,1133,1822,171,53,1566
2458,2018-19,1610612757,POR,Portland Trail Blazers,0021801230,2019-04-10T00:00:00,POR vs. SAC,W,48.0,53,...,1283,2083,890,1855,2069,42,376,1928,57,896
