In [1]:
# https://github.com/swar/nba_api/issues/124

In [2]:
import pandas as pd
from time import time
from nba_api.stats.static import players
from nba_api.stats.endpoints import commonplayerinfo
from nba_api.stats.endpoints import playercareerstats
from nba_api.stats.endpoints import playerawards

In [3]:
from nba_api.stats.library import http

print(http.STATS_HEADERS)

{'Host': 'stats.nba.com', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:72.0) Gecko/20100101 Firefox/72.0', 'Accept': 'application/json, text/plain, */*', 'Accept-Language': 'en-US,en;q=0.5', 'Accept-Encoding': 'gzip, deflate, br', 'x-nba-stats-origin': 'stats', 'x-nba-stats-token': 'true', 'Connection': 'keep-alive', 'Referer': 'https://stats.nba.com/', 'Pragma': 'no-cache', 'Cache-Control': 'no-cache'}


In [4]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

In [5]:
# function which collects player seasons and drops before modern 3 point era and insufficient data

def collect_seasons(id):
    season_filter = 1998 # the first year of the modern 3 point era

    career = playercareerstats.PlayerCareerStats(player_id=id)
    df = career.get_data_frames()[0]

    df['SEASON_ID'] = df['SEASON_ID'].apply(lambda x: int(x[:4]) + 1) #make seasons numeric type
    df = df[df['SEASON_ID'] >= season_filter] # drop any seasons before modern era

    return df

In [6]:
# function which gets player awards

# fun fact and test case: dominique wilkins in 94 made an All-NBA team
# but played for two teams that year

def collect_awards(id):
    awards = playerawards.PlayerAwards(player_id=id)

    df = awards.get_data_frames()[0]

    df = df[df['DESCRIPTION'] == 'All-NBA']
    df = df[['SEASON', 'ALL_NBA_TEAM_NUMBER']]

    df['SEASON'] = df['SEASON'].apply(lambda x: int(x[:4]) + 1) #make seasons numeric type

    return df

In [7]:
# function which joins the awards and seasons

def stat_join(seasons, awards):
    df = seasons.join(awards.set_index('SEASON'), on='SEASON_ID')
    df['ALL_NBA_TEAM_NUMBER'] = df['ALL_NBA_TEAM_NUMBER'].fillna(0)
    return df

In [10]:
# function which loops through each player

nba_players = players.get_players()

In [13]:
len(nba_players)

4900

In [12]:
nba_players[0]

{'id': 76001,
 'full_name': 'Alaa Abdelnaby',
 'first_name': 'Alaa',
 'last_name': 'Abdelnaby',
 'is_active': False}

In [17]:
len(nba_players[4892: 4900])

8

In [24]:
def collect_chunk(nba_players, start_point, chunk_size):
    final_df = pd.DataFrame()
    for count, player in enumerate(nba_players[start_point:start_point + chunk_size]):
        id = player['id']
        name = player['full_name']

        start = time()
        print(f"Processing {name}...")

        seasons = collect_seasons(id)
        awards = collect_awards(id)
        df = stat_join(seasons, awards)
        df['NAME'] = player['full_name']

        if count == 0:
            final_df = df
        else:
            final_df = pd.concat([final_df, df])
        
        end = time()
        print(f"Took {end - start} seconds to finish")

    final_df.to_csv(f'../data/chunks/players_{start_point}_to_{start_point + chunk_size}.csv')
    return

In [25]:
chunk_size = 100
start_point = 1000

while start_point < len(nba_players):
    if start_point + chunk_size > len(nba_players):
        chunk_size = len(nba_players) - start_point
    print(f'collecting chunk {start_point} through {start_point + chunk_size}')
    collect_chunk(nba_players, start_point, chunk_size)
    start_point = start_point + chunk_size

collecting chunk 1000 through 1100
Processing Mark Davis...
Took 2.8120172023773193 seconds to finish
Processing Mark Davis...
Took 1.2777395248413086 seconds to finish
Processing Mel Davis...


  final_df = pd.concat([final_df, df])


Took 1.4683737754821777 seconds to finish
Processing Michael Davis...
Took 2.2348341941833496 seconds to finish
Processing Mike Davis...


  final_df = pd.concat([final_df, df])


Took 1.3064830303192139 seconds to finish
Processing Monti Davis...
Took 5.316437482833862 seconds to finish
Processing Paul Davis...
Took 2.503627061843872 seconds to finish
Processing Ralph Davis...


  final_df = pd.concat([final_df, df])


Took 4.100947856903076 seconds to finish
Processing Ricky Davis...
Took 1.3409223556518555 seconds to finish
Processing Ron Davis...
Took 1.303741455078125 seconds to finish
Processing Terence Davis...
Took 2.199023723602295 seconds to finish
Processing Terry Davis...
Took 1.616363286972046 seconds to finish
Processing Tyler Davis...
Took 2.1244418621063232 seconds to finish
Processing Walt Davis...


  final_df = pd.concat([final_df, df])


Took 3.1799376010894775 seconds to finish
Processing Walter Davis...
Took 1.4265966415405273 seconds to finish
Processing JD Davison...
Took 0.31849241256713867 seconds to finish
Processing Andre Dawkins...
Took 4.46406626701355 seconds to finish
Processing Darryl Dawkins...
Took 2.549069881439209 seconds to finish
Processing Johnny Dawkins...
Took 1.5441641807556152 seconds to finish
Processing Paul Dawkins...
Took 1.5651681423187256 seconds to finish
Processing Branden Dawson...
Took 1.9734971523284912 seconds to finish
Processing Eric Dawson...
Took 1.9202392101287842 seconds to finish
Processing Tony Dawson...
Took 1.16448974609375 seconds to finish
Processing Todd Day...
Took 1.501183271408081 seconds to finish
Processing Austin Daye...
Took 1.5956008434295654 seconds to finish
Processing Darren Daye...
Took 3.246143341064453 seconds to finish
Processing Darius Days...
Took 1.1921768188476562 seconds to finish
Processing Nando De Colo...
Took 1.3870196342468262 seconds to finish
P

  final_df = pd.concat([final_df, df])


Took 2.8237128257751465 seconds to finish
Processing Andrew DeClercq...
Took 5.267230033874512 seconds to finish
Processing Javin DeLaurier...
Took 3.181478500366211 seconds to finish
Processing Nate DeLong...


  final_df = pd.concat([final_df, df])


Took 3.356187343597412 seconds to finish
Processing DeMar DeRozan...
Took 0.35973262786865234 seconds to finish
Processing Hank DeZonie...


  final_df = pd.concat([final_df, df])


Took 1.3869554996490479 seconds to finish
Processing Greg Deane...
Took 2.3216326236724854 seconds to finish
Processing Gabriel Deck...
Took 6.478639602661133 seconds to finish
Processing Dewayne Dedmon...
Took 1.6699435710906982 seconds to finish
Processing Archie Dees...


  final_df = pd.concat([final_df, df])


Took 1.6422653198242188 seconds to finish
Processing Terry Dehere...
Took 5.790404796600342 seconds to finish
Processing Red Dehnert...


  final_df = pd.concat([final_df, df])


Took 1.4902167320251465 seconds to finish
Processing Bryce Dejean-Jones...
Took 1.6965975761413574 seconds to finish
Processing Sam Dekker...
Took 1.481658697128296 seconds to finish
Processing Vinny Del Negro...
Took 1.7748260498046875 seconds to finish
Processing Malcolm Delaney...
Took 4.03285813331604 seconds to finish
Processing Bison Dele...
Took 2.346320390701294 seconds to finish
Processing Carlos Delfino...
Took 6.232673406600952 seconds to finish
Processing Angel Delgado...
Took 2.227292060852051 seconds to finish
Processing Tony Delk...
Took 1.5251336097717285 seconds to finish
Processing Matthew Dellavedova...
Took 1.6702444553375244 seconds to finish
Processing Fennis Dembo...
Took 3.17958664894104 seconds to finish
Processing Larry Demic...
Took 1.4243032932281494 seconds to finish
Processing Dell Demps...
Took 6.020212411880493 seconds to finish
Processing George Dempsey...


  final_df = pd.concat([final_df, df])


Took 2.4224836826324463 seconds to finish
Processing Luol Deng...
Took 0.5902283191680908 seconds to finish
Processing Kenny Dennard...
Took 1.4017341136932373 seconds to finish
Processing Blaine Denning...


  final_df = pd.concat([final_df, df])


Took 2.950137138366699 seconds to finish
Processing Dexter Dennis...
Took 0.4472622871398926 seconds to finish
Processing Justin Dentmon...
Took 5.6613171100616455 seconds to finish
Processing Randy Denton...


  final_df = pd.concat([final_df, df])


Took 4.063443183898926 seconds to finish
Processing Rod Derline...


  final_df = pd.concat([final_df, df])


Took 2.1672728061676025 seconds to finish
Processing Marcus Derrickson...
Took 1.5866575241088867 seconds to finish
Processing Dave Deutsch...


  final_df = pd.concat([final_df, df])


Took 3.901251792907715 seconds to finish
Processing Walter Devlin...


  final_df = pd.concat([final_df, df])


Took 2.3523170948028564 seconds to finish
Processing Ernie DiGregorio...


  final_df = pd.concat([final_df, df])


Took 4.3095996379852295 seconds to finish
Processing Donte DiVincenzo...
Took 0.3904685974121094 seconds to finish
Processing Moussa Diabate...
Took 0.38698244094848633 seconds to finish
Processing Mamadi Diakite...
Took 0.6968793869018555 seconds to finish
Processing Derrick Dial...
Took 1.4650564193725586 seconds to finish
Processing Cheick Diallo...
Took 1.5762324333190918 seconds to finish
Processing Hamidou Diallo...
Took 3.2138381004333496 seconds to finish
Processing Boris Diaw...
Took 2.4305825233459473 seconds to finish
Processing Yakhouba Diawara...
Took 2.177701234817505 seconds to finish
Processing Guillermo Diaz...
Took 4.072861433029175 seconds to finish
Processing Gradey Dick...
Took 0.34145164489746094 seconds to finish
Processing Dan Dickau...
Took 2.366132974624634 seconds to finish
Processing Kaniel Dickens...
Took 3.476590633392334 seconds to finish
Processing Henry Dickerson...


  final_df = pd.concat([final_df, df])


Took 4.416412830352783 seconds to finish
Processing Michael Dickerson...
Took 2.199319839477539 seconds to finish
Processing Derrek Dickey...


  final_df = pd.concat([final_df, df])


Took 4.673586130142212 seconds to finish
Processing Dick Dickey...


  final_df = pd.concat([final_df, df])


Took 1.3997077941894531 seconds to finish
Processing Travis Diener...
Took 6.004556894302368 seconds to finish
Processing Gorgui Dieng...
Took 2.523674488067627 seconds to finish
Processing Ousmane Dieng...
Took 0.5882406234741211 seconds to finish
Processing Connie Dierking...


  final_df = pd.concat([final_df, df])


Took 2.470858097076416 seconds to finish
Processing Coby Dietrick...
Took 4.240614414215088 seconds to finish
Processing Mickey Dillard...
Took 1.693434238433838 seconds to finish
Processing Bob Dille...


  final_df = pd.concat([final_df, df])


Took 2.9964210987091064 seconds to finish
Processing John Dillon...


  final_df = pd.concat([final_df, df])


Took 1.5653133392333984 seconds to finish
Processing Byron Dinkins...
Took 4.250410556793213 seconds to finish
Processing Jackie Dinkins...


  final_df = pd.concat([final_df, df])


Took 1.5375375747680664 seconds to finish
Processing Bill Dinwiddie...


  final_df = pd.concat([final_df, df])


Took 2.8826639652252197 seconds to finish
Processing Spencer Dinwiddie...
Took 1.3049547672271729 seconds to finish
Processing Ike Diogu...
Took 2.2297961711883545 seconds to finish
Processing DeSagana Diop...
Took 1.95070219039917 seconds to finish
Processing Terry Dischinger...


  final_df = pd.concat([final_df, df])


Took 1.755530595779419 seconds to finish
Processing Fred Diute...


  final_df = pd.concat([final_df, df])


Took 3.647797107696533 seconds to finish
Processing Vlade Divac...
Took 1.5909430980682373 seconds to finish
Processing Juan Dixon...
Took 19.809478282928467 seconds to finish
Processing Aleksandar Djordjevic...
Took 7.420011758804321 seconds to finish
Processing Earl Dodd...


  final_df = pd.concat([final_df, df])


Took 6.145531415939331 seconds to finish
collecting chunk 1100 through 1200
Processing Michael Doleac...
Took 1.6515591144561768 seconds to finish
Processing Joe Dolhon...


  final_df = pd.concat([final_df, df])


Took 6.854215860366821 seconds to finish
Processing Bob Doll...


  final_df = pd.concat([final_df, df])


Took 6.010287761688232 seconds to finish
Processing James Donaldson...
Took 1.62898588180542 seconds to finish
Processing Luka Doncic...


KeyboardInterrupt: 