Imports

In [6]:
import pandas as pd
import requests
pd.set_option('display.max_columns', None) # displays all columns in a wide DataFrame
import time
import numpy as np

Test Data Extraction

In [7]:
test_url = 'https://stats.nba.com/stats/leagueLeaders?LeagueID=00&PerMode=Totals&Scope=S&Season=2012-13&SeasonType=Regular%20Season&StatCategory=PTS'
r = requests.get(url=test_url).json()
table_headers = r['resultSet']['headers']
pd.DataFrame(r['resultSet']['rowSet'], columns=table_headers).head()

Unnamed: 0,PLAYER_ID,RANK,PLAYER,TEAM_ID,TEAM,GP,MIN,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,EFF,AST_TOV,STL_TOV
0,201142,1,Kevin Durant,1610612760,OKC,81,3119,731,1433,0.51,139,334,0.416,679,750,0.905,46,594,640,374,116,105,280,143,2280,2462,1.34,0.41
1,977,2,Kobe Bryant,1610612747,LAL,78,3013,738,1595,0.463,132,407,0.324,525,626,0.839,66,367,433,469,106,25,287,173,2133,1921,1.63,0.37
2,2544,3,LeBron James,1610612748,MIA,76,2877,765,1354,0.565,103,254,0.406,403,535,0.753,97,513,610,551,129,67,226,110,2036,2446,2.44,0.57
3,201935,4,James Harden,1610612745,HOU,78,2985,585,1337,0.438,179,486,0.368,674,792,0.851,62,317,379,455,142,38,295,178,2023,1872,1.54,0.48
4,2546,5,Carmelo Anthony,1610612752,NYK,67,2482,669,1489,0.449,157,414,0.379,425,512,0.83,134,326,460,171,52,32,175,205,1920,1553,0.98,0.3


In [8]:
temp_df1 = pd.DataFrame(r['resultSet']['rowSet'], columns=table_headers)
temp_df2 = pd.DataFrame({
    'Year': ['2023-24' for i in range(len(temp_df1))],
    'Season_type': ['Regular%20Season' for i in range(len(temp_df1))],
})
temp_df3 = pd.concat([temp_df2, temp_df1], axis=1)

In [9]:
temp_df3.head()

Unnamed: 0,Year,Season_type,PLAYER_ID,RANK,PLAYER,TEAM_ID,TEAM,GP,MIN,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,EFF,AST_TOV,STL_TOV
0,2023-24,Regular%20Season,201142,1,Kevin Durant,1610612760,OKC,81,3119,731,1433,0.51,139,334,0.416,679,750,0.905,46,594,640,374,116,105,280,143,2280,2462,1.34,0.41
1,2023-24,Regular%20Season,977,2,Kobe Bryant,1610612747,LAL,78,3013,738,1595,0.463,132,407,0.324,525,626,0.839,66,367,433,469,106,25,287,173,2133,1921,1.63,0.37
2,2023-24,Regular%20Season,2544,3,LeBron James,1610612748,MIA,76,2877,765,1354,0.565,103,254,0.406,403,535,0.753,97,513,610,551,129,67,226,110,2036,2446,2.44,0.57
3,2023-24,Regular%20Season,201935,4,James Harden,1610612745,HOU,78,2985,585,1337,0.438,179,486,0.368,674,792,0.851,62,317,379,455,142,38,295,178,2023,1872,1.54,0.48
4,2023-24,Regular%20Season,2546,5,Carmelo Anthony,1610612752,NYK,67,2482,669,1489,0.449,157,414,0.379,425,512,0.83,134,326,460,171,52,32,175,205,1920,1553,0.98,0.3


In [46]:
del temp_df1, temp_df2, temp_df3

Table Headers Setup

In [47]:
df_cols = ['Year', 'Season_type'] + table_headers

In [48]:
pd.DataFrame(columns=df_cols)

Unnamed: 0,Year,Season_type,PLAYER_ID,RANK,PLAYER,TEAM_ID,TEAM,GP,MIN,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,EFF,AST_TOV,STL_TOV


Extract Data & Export to Excel

In [None]:
df = pd.DataFrame(columns=df_cols)
season_types = ['Regular%20Season', 'Playoffs']
years = ['2012-13', '2013-14', '2014-15', '2015-16', '2016-17', '2017-18', '2018-19', '2019-20', '2020-21', '2021-22', '2022-23', '2023-24']

start_data_extraction = time.time()

for y in years:
    for s in season_types:
        api_url = f'https://stats.nba.com/stats/leagueLeaders?LeagueID=00&PerMode=Totals&Scope=S&Season={y}&SeasonType={s}&StatCategory=PTS'
        r = requests.get(url=api_url).json()
        temp_df1 = pd.DataFrame(r['resultSet']['rowSet'], columns=table_headers)
        temp_df2 = pd.DataFrame({
            'Year': [y for i in range(len(temp_df1))],
            'Season_type': [s for i in range(len(temp_df1))],
        })
        temp_df3 = pd.concat([temp_df2, temp_df1], axis=1)
        df = pd.concat([df, temp_df3], axis=0)
        print(f'Finished collecting data for the {y} {s}.')
        lag = np.random.uniform(low=5.0, high=25.0) # this lag is not mandatory
        print(f'... waiting {round(lag, 1)} seconds until next attempt.')
        time.sleep(lag) # this lag is not mandatory

print(f'Process completed! Total run time: {round((time.time() - start_data_extraction)/60,2)} minutes.')

# df.to_excel('nba_player_data.xlsx', index=False) # UNCOMMENT if you want TO CREATE a NEW nba player data SPREADSHEET!!!

---