In [1]:
from nba_api.stats.endpoints import playercareerstats, playerindex, commonplayerinfo, commonteamroster
from nba_api.stats.static import players, teams
import pandas as pd
import numpy as np
import time
pd.set_option('display.max_columns', None)

In [2]:
working_dir = '~/Desktop/MIDS_datasci_209/'

In [3]:
# Checking the various dataframes available 
career = playercareerstats.PlayerCareerStats(player_id="1630173", league_id_nullable='00')
[x['name'] for x in career.get_dict()['resultSets']]

['SeasonTotalsRegularSeason',
 'CareerTotalsRegularSeason',
 'SeasonTotalsPostSeason',
 'CareerTotalsPostSeason',
 'SeasonTotalsAllStarSeason',
 'CareerTotalsAllStarSeason',
 'SeasonTotalsCollegeSeason',
 'CareerTotalsCollegeSeason',
 'SeasonTotalsShowcaseSeason',
 'CareerTotalsShowcaseSeason',
 'SeasonRankingsRegularSeason',
 'SeasonRankingsPostSeason']

In [4]:
# Viewing the 'CareerTotalsRegularSeason' dataframe for one player
career.get_data_frames()[1]
career.get_data_frames()[0]


Unnamed: 0,PLAYER_ID,SEASON_ID,LEAGUE_ID,TEAM_ID,TEAM_ABBREVIATION,PLAYER_AGE,GP,GS,MIN,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS
0,1630173,2020-21,0,1610612748,MIA,21.0,61,4,737.0,124,228,0.544,0,1,0.0,56,110,0.509,73,135,208,29,20,28,43,91,304
1,1630173,2021-22,0,1610612761,TOR,22.0,73,28,1725.0,265,603,0.439,56,156,0.359,78,131,0.595,146,327,473,82,37,41,84,151,664
2,1630173,2022-23,0,1610612761,TOR,23.0,55,12,1141.0,196,404,0.485,29,108,0.269,87,124,0.702,100,228,328,50,31,30,59,102,508
3,1630173,2023-24,0,1610612761,TOR,24.0,25,0,437.0,78,170,0.459,13,47,0.277,24,42,0.571,50,86,136,44,16,12,29,40,193
4,1630173,2023-24,0,1610612752,NYK,24.0,41,18,1064.0,148,274,0.54,12,45,0.267,43,66,0.652,130,186,316,50,28,52,49,88,351
5,1630173,2023-24,0,0,TOT,24.0,66,18,1500.0,226,444,0.509,25,92,0.272,67,108,0.62,180,272,452,94,44,64,78,128,544


In [5]:
# Getting a list of players, and a list of active player IDs
players_list = players.get_players() # I realized I actually could have just used get_active_players() instead
players_ids = [players_list[i]['id'] for i in range(len(players_list)) if players_list[i]['is_active']]
all_players_ids = [players_list[i]['id'] for i in range(len(players_list))]

In [6]:
# Checking length of player_ids list 
len(players_ids)
len(all_players_ids)

4900

In [None]:
# Getting career stats for each player
players_df = pd.DataFrame()
for player in all_players_ids: 
    career_df = playercareerstats.PlayerCareerStats(player_id=player).get_data_frames()[1] # Get career stats df
    players_df = pd.concat([players_df, career_df]) # Concat with players_df
    time.sleep(3) # Wait 3 seconds so the api doesn't block us 

In [None]:
# Drop duplicates
players_df.drop_duplicates(inplace=True)

In [49]:
# Make a dict of player ID : player name
players_dict = {x['id']:x for x in players_list}


In [51]:
# Add the player name to the dataframe
def get_name(row):
    id = row['PLAYER_ID']
    name = players_dict[id]['full_name']
    return name
    
players_df['player_name'] = players_df.apply(get_name, axis=1)

In [None]:
# Add the team name to the dataframe (will be unknown for this because players play in different teams throughout their career)
teams_list = teams.get_teams()
teams_dict = {x['id']:x for x in teams_list}
def get_team(row):
    id = row['TEAM_ID']
    try:
        name = teams_dict[id]['full_name']
    except: 
        name = "Unknown"
    return name
    
players_df['team_name'] = players_df.apply(get_team, axis=1)

In [None]:
# Save to csv
players_df.to_csv(working_dir + 'nba_players_career_all.csv', index=False)

In [None]:
# Getting season stats for each player
all_players_df = pd.DataFrame()
for player in all_players_ids: 
    season_totals_df = playercareerstats.PlayerCareerStats(player_id=player).get_data_frames()[0] # Get season stats df
    all_players_df = pd.concat([all_players_df, season_totals_df]) # Concat with players_df
    time.sleep(3) # Wait 3 seconds so the api doesn't block us 

In [165]:
all_players_df

Unnamed: 0,PLAYER_ID,SEASON_ID,LEAGUE_ID,TEAM_ID,TEAM_ABBREVIATION,PLAYER_AGE,GP,GS,MIN,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS
0,76001,1990-91,00,1610612757,POR,23.0,43,0,290.0,55,116,0.474,0,0,0.0,25,44,0.568,27.0,62.0,89.0,12,4.0,12.0,22.0,39,135
1,76001,1991-92,00,1610612757,POR,24.0,71,1,934.0,178,361,0.493,0,0,0.0,76,101,0.752,81.0,179.0,260.0,30,25.0,16.0,66.0,132,432
2,76001,1992-93,00,1610612749,MIL,25.0,12,0,159.0,26,56,0.464,0,1,0.0,12,16,0.75,12.0,25.0,37.0,10,6.0,4.0,13.0,24,64
3,76001,1992-93,00,1610612738,BOS,25.0,63,52,1152.0,219,417,0.525,0,0,0.0,76,100,0.76,114.0,186.0,300.0,17,19.0,22.0,84.0,165,514
4,76001,1992-93,00,0,TOT,25.0,75,52,1311.0,245,473,0.518,0,1,0.0,88,116,0.759,126.0,211.0,337.0,27,25.0,26.0,97.0,189,578
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6,1627826,2020-21,00,1610612746,LAC,24.0,72,33,1609.0,257,394,0.652,1,4,0.25,135,171,0.789,189,330,519,90,24,62,81,187,650
7,1627826,2021-22,00,1610612746,LAC,25.0,76,76,1852.0,310,495,0.626,0,0,0.0,165,227,0.727,217,427,644,120,36,77,114,203,785
8,1627826,2022-23,00,1610612746,LAC,26.0,76,76,2169.0,326,514,0.634,0,2,0.0,166,238,0.697,236,520,756,77,29,98,117,219,818
9,1627826,2023-24,00,1610612746,LAC,26.0,49,49,1286.0,239,369,0.648,0,0,0.0,83,124,0.669,146,307,453,66,11,60,58,134,561


In [169]:
# Add the player name to the dataframe
all_players_df['player_name'] = all_players_df.apply(get_name, axis=1)

In [175]:
# Add the team name to the dataframe
all_players_df['team_name'] = all_players_df.apply(get_team, axis=1)

In [177]:
# Drop duplicates
all_players_df.drop_duplicates(inplace=True)

In [179]:
# Save to csv
all_players_df.to_csv(working_dir + 'nba_players_all.csv', index=False)
