In [152]:
import pandas as pd
from time import time
import nba_api
from nba_api.stats.static import players
from nba_api.stats.endpoints import commonplayerinfo
from nba_api.stats.endpoints import playercareerstats
from nba_api.stats.endpoints import playerawards

In [153]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

In [154]:
# function which collects player seasons and drops before modern 3 point era and insufficient data

def collect_seasons(id):
    season_filter = 1998 # the first year of the modern 3 point era

    career = playercareerstats.PlayerCareerStats(player_id=id)
    df = career.get_data_frames()[0]

    df['SEASON_ID'] = df['SEASON_ID'].apply(lambda x: int(x[:4]) + 1) #make seasons numeric type
    df = df[df['SEASON_ID'] >= season_filter] # drop any seasons before modern era

    return df

In [155]:
# function which gets player awards

# fun fact and test case: dominique wilkins in 94 made an All-NBA team
# but played for two teams that year

def collect_awards(id):
    awards = playerawards.PlayerAwards(player_id=id)

    df = awards.get_data_frames()[0]

    df = df[df['DESCRIPTION'] == 'All-NBA']
    df = df[['SEASON', 'ALL_NBA_TEAM_NUMBER']]

    df['SEASON'] = df['SEASON'].apply(lambda x: int(x[:4]) + 1) #make seasons numeric type

    return df

In [156]:
# function which joins the awards and seasons

def stat_join(seasons, awards):
    df = seasons.join(awards.set_index('SEASON'), on='SEASON_ID')
    df['ALL_NBA_TEAM_NUMBER'] = df['ALL_NBA_TEAM_NUMBER'].fillna(0)
    return df

In [159]:
# function which loops through each player

nba_players = players.get_players()

final_df = pd.DataFrame()

for count, player in enumerate(players.get_players()[:100]):
    id = player['id']
    name = player['full_name']

    start = time()
    print(f"Processing {name}...")

    seasons = collect_seasons(id)
    awards = collect_awards(id)
    df = stat_join(seasons, awards)
    df['NAME'] = player['full_name']

    if count == 0:
        final_df = df
    else:
        final_df = pd.concat([final_df, df])
    
    end = time()
    print(f"Took {end - start} seconds to finish")

Processing Alaa Abdelnaby...
Took 0.43891334533691406 seconds to finish
Processing Zaid Abdul-Aziz...
Took 0.42555761337280273 seconds to finish
Processing Kareem Abdul-Jabbar...
Took 0.31864452362060547 seconds to finish
Processing Mahmoud Abdul-Rauf...


  final_df = pd.concat([final_df, df])


Took 0.4627072811126709 seconds to finish
Processing Tariq Abdul-Wahad...
Took 0.40835070610046387 seconds to finish
Processing Shareef Abdur-Rahim...
Took 1.373192310333252 seconds to finish
Processing Tom Abernethy...
Took 1.6619038581848145 seconds to finish
Processing Forest Able...


  final_df = pd.concat([final_df, df])


Took 1.3359005451202393 seconds to finish
Processing John Abramovic...


  final_df = pd.concat([final_df, df])


Took 2.3885092735290527 seconds to finish
Processing Alex Abrines...
Took 1.523010015487671 seconds to finish
Processing Precious Achiuwa...
Took 0.6042819023132324 seconds to finish
Processing Alex Acker...
Took 1.7169873714447021 seconds to finish
Processing Donald Ackerman...


  final_df = pd.concat([final_df, df])


Took 1.5302512645721436 seconds to finish
Processing Mark Acres...
Took 1.3856542110443115 seconds to finish
Processing Charles Acton...


  final_df = pd.concat([final_df, df])


Took 1.9057269096374512 seconds to finish
Processing Quincy Acy...
Took 1.448082685470581 seconds to finish
Processing Alvan Adams...
Took 2.5062849521636963 seconds to finish
Processing Don Adams...


  final_df = pd.concat([final_df, df])


Took 2.5459237098693848 seconds to finish
Processing Hassan Adams...
Took 1.3590073585510254 seconds to finish
Processing Jaylen Adams...
Took 2.3735432624816895 seconds to finish
Processing Jordan Adams...
Took 2.2745540142059326 seconds to finish
Processing Michael Adams...
Took 2.7181508541107178 seconds to finish
Processing Steven Adams...
Took 0.5773468017578125 seconds to finish
Processing Rafael Addison...
Took 3.0105090141296387 seconds to finish
Processing Bam Adebayo...
Took 0.6497209072113037 seconds to finish
Processing Deng Adel...
Took 2.943626880645752 seconds to finish
Processing Rick Adelman...


  final_df = pd.concat([final_df, df])


Took 2.4832403659820557 seconds to finish
Processing Jeff Adrien...
Took 1.9590795040130615 seconds to finish
Processing Arron Afflalo...
Took 2.9204649925231934 seconds to finish
Processing Ochai Agbaji...
Took 0.6059246063232422 seconds to finish
Processing Maurice Ager...
Took 3.4128177165985107 seconds to finish
Processing Mark Aguirre...
Took 2.309556245803833 seconds to finish
Processing Blake Ahearn...
Took 2.8293280601501465 seconds to finish
Processing Danny Ainge...
Took 1.6201999187469482 seconds to finish
Processing Alexis Ajinca...
Took 1.2597761154174805 seconds to finish
Processing Henry Akin...


  final_df = pd.concat([final_df, df])


Took 2.3137221336364746 seconds to finish
Processing Josh Akognon...
Took 1.5431678295135498 seconds to finish
Processing DeVaughn Akoon-Purcell...
Took 1.5861704349517822 seconds to finish
Processing Solomon Alabi...
Took 1.535146951675415 seconds to finish
Processing Mark Alarie...
Took 1.3754212856292725 seconds to finish
Processing Gary Alcorn...


  final_df = pd.concat([final_df, df])


Took 1.8549108505249023 seconds to finish
Processing Santi Aldama...
Took 0.3940744400024414 seconds to finish
Processing Furkan Aldemir...
Took 1.2473211288452148 seconds to finish
Processing Cole Aldrich...
Took 2.6601510047912598 seconds to finish
Processing LaMarcus Aldridge...
Took 1.5543296337127686 seconds to finish
Processing Chuck Aleksinas...
Took 1.533600091934204 seconds to finish
Processing Cliff Alexander...
Took 1.5557823181152344 seconds to finish
Processing Cory Alexander...
Took 2.455441474914551 seconds to finish
Processing Courtney Alexander...
Took 2.3204617500305176 seconds to finish
Processing Gary Alexander...
Took 1.4399185180664062 seconds to finish
Processing Joe Alexander...
Took 2.4613609313964844 seconds to finish
Processing Kyle Alexander...
Took 2.3001549243927 seconds to finish
Processing Ty-Shon Alexander...
Took 1.5475478172302246 seconds to finish
Processing Victor Alexander...
Took 2.7891385555267334 seconds to finish
Processing Nickeil Alexander-Wa

  final_df = pd.concat([final_df, df])


Took 1.6250312328338623 seconds to finish
Processing Grayson Allen...
Took 0.6326422691345215 seconds to finish
Processing Jarrett Allen...
Took 0.7741265296936035 seconds to finish
Processing Jerome Allen...
Took 1.2999086380004883 seconds to finish
Processing Kadeem Allen...
Took 2.430180549621582 seconds to finish
Processing Lavoy Allen...
Took 1.6774845123291016 seconds to finish
Processing Lucius Allen...


  final_df = pd.concat([final_df, df])


Took 1.6080379486083984 seconds to finish
Processing Malik Allen...
Took 1.2514493465423584 seconds to finish
Processing Randy Allen...
Took 2.0499327182769775 seconds to finish
Processing Ray Allen...
Took 0.9459161758422852 seconds to finish
Processing Tony Allen...
Took 2.116469144821167 seconds to finish
Processing Odis Allison...


  final_df = pd.concat([final_df, df])


Took 2.659696578979492 seconds to finish
Processing Lance Allred...
Took 2.4326939582824707 seconds to finish
Processing Darrell Allums...
Took 3.805911064147949 seconds to finish
Processing Morris Almond...
Took 2.2423245906829834 seconds to finish
Processing Derrick Alston...
Took 0.4900815486907959 seconds to finish
Processing Rafer Alston...
Took 2.5947067737579346 seconds to finish
Processing Peter Aluma...
Took 1.3319697380065918 seconds to finish
Processing Jose Alvarado...
Took 0.6056029796600342 seconds to finish
Processing John Amaechi...
Took 2.3053500652313232 seconds to finish
Processing Ashraf Amaya...
Took 1.685767412185669 seconds to finish
Processing Al-Farouq Aminu...
Took 4.112792491912842 seconds to finish
Processing Lou Amundson...
Took 1.7776603698730469 seconds to finish
Processing Bob Anderegg...


  final_df = pd.concat([final_df, df])


Took 2.3196074962615967 seconds to finish
Processing Chris Andersen...
Took 1.808495044708252 seconds to finish
Processing David Andersen...
Took 1.779123306274414 seconds to finish
Processing Alan Anderson...
Took 2.522517204284668 seconds to finish
Processing Antonio Anderson...
Took 3.158414363861084 seconds to finish
Processing Cliff Anderson...


  final_df = pd.concat([final_df, df])


Took 8.548181295394897 seconds to finish
Processing Daniel Anderson...


  final_df = pd.concat([final_df, df])


Took 2.419771909713745 seconds to finish
Processing Derek Anderson...
Took 2.4767277240753174 seconds to finish
Processing Dwight Anderson...
Took 2.506911039352417 seconds to finish
Processing Eric Anderson...
Took 1.5328259468078613 seconds to finish
Processing Greg Anderson...
Took 0.4900016784667969 seconds to finish
Processing James Anderson...
Took 1.814366102218628 seconds to finish
Processing Jerome Anderson...


  final_df = pd.concat([final_df, df])


Took 2.625643730163574 seconds to finish
Processing Justin Anderson...
Took 2.548380136489868 seconds to finish
Processing Kenny Anderson...
Took 1.7660069465637207 seconds to finish
Processing Kim Anderson...


  final_df = pd.concat([final_df, df])


Took 3.376744031906128 seconds to finish
Processing Kyle Anderson...
Took 0.6476292610168457 seconds to finish
Processing Michael Anderson...
Took 2.305745840072632 seconds to finish
Processing Mitchell Anderson...
Took 1.6038563251495361 seconds to finish
Processing Nick Anderson...
Took 2.5454187393188477 seconds to finish


In [160]:
final_df

Unnamed: 0,PLAYER_ID,SEASON_ID,LEAGUE_ID,TEAM_ID,TEAM_ABBREVIATION,PLAYER_AGE,GP,GS,MIN,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,ALL_NBA_TEAM_NUMBER,NAME
7,51,1998,0,1610612758,SAC,29.0,31,0,530.0,103,273,0.377,5,31,0.161,16,16,1.0,6.0,31.0,37.0,58,16.0,1.0,19.0,31,227,0,Mahmoud Abdul-Rauf
8,51,2001,0,1610612763,VAN,32.0,41,0,485.0,120,246,0.488,4,14,0.286,22,29,0.759,5.0,20.0,25.0,76,9.0,1.0,26.0,50,266,0,Mahmoud Abdul-Rauf
0,1505,1998,0,1610612758,SAC,23.0,59,16,959.0,144,357,0.403,4,19,0.211,84,125,0.672,44.0,72.0,116.0,51,35.0,13.0,65.0,81,376,0,Tariq Abdul-Wahad
1,1505,1999,0,1610612758,SAC,24.0,49,49,1205.0,177,407,0.435,6,21,0.286,94,136,0.691,72.0,114.0,186.0,50,50.0,16.0,70.0,121,454,0,Tariq Abdul-Wahad
2,1505,2000,0,1610612753,ORL,25.0,46,46,1205.0,223,515,0.433,2,21,0.095,115,151,0.762,77.0,162.0,239.0,72,53.0,16.0,87.0,116,563,0,Tariq Abdul-Wahad
3,1505,2000,0,1610612743,DEN,25.0,15,10,373.0,51,131,0.389,1,2,0.5,31,42,0.738,24.0,28.0,52.0,26,6.0,12.0,19.0,31,134,0,Tariq Abdul-Wahad
4,1505,2000,0,0,TOT,25.0,61,56,1578.0,274,646,0.424,3,23,0.13,146,193,0.756,101.0,190.0,291.0,98,59.0,28.0,106.0,147,697,0,Tariq Abdul-Wahad
5,1505,2001,0,1610612743,DEN,26.0,29,12,421.0,43,111,0.387,4,10,0.4,21,36,0.583,14.0,45.0,59.0,22,14.0,13.0,34.0,54,111,0,Tariq Abdul-Wahad
6,1505,2002,0,1610612743,DEN,27.0,20,12,419.0,55,145,0.379,1,2,0.5,24,32,0.75,39.0,39.0,78.0,22,18.0,9.0,24.0,51,135,0,Tariq Abdul-Wahad
7,1505,2002,0,1610612742,DAL,27.0,4,0,24.0,0,2,0.0,0,0,0.0,0,1,0.0,2.0,4.0,6.0,2,2.0,1.0,3.0,5,0,0,Tariq Abdul-Wahad


In [161]:
final_df.to_csv('../data/data.csv')