# Pulling player counting stats data from `nba_api`

In this notebook we use the `nba_api` library to pull counting stats data for all players listed on the official NBA website.

In [1]:
import numpy as np
import pandas as pd
from nba_api.stats.static import players
from nba_api.stats.endpoints import playercareerstats
from time import sleep

In [2]:
#get dictionary with all players
all_players = players._get_players()

#get a list of just the player ids
all_player_ids = [player["id"] for player in all_players]

#sort the list
all_player_ids.sort()

In [3]:
#make master dataframe all_data
all_data = pd.DataFrame()

In [4]:
#the code below pulls a player's season stats based on a sorted list of their player IDs.
#the dataframe of their season-by-season data is appended to the all_data dataframe, and
#then we sleep for a random amount of time between 1s and 2s before making next API call,
#following advice from here: https://github.com/swar/nba_api/issues/239

i = 0

while i < len(all_player_ids[0:11]):   #for illustrative purposes, just get first 10 players
#while i < len(all_player_ids):        #UNCOMMENT THIS LINE TO GET ALL STATS DATA
    try:
        df = playercareerstats.PlayerCareerStats(player_id=all_player_ids[i]).get_data_frames()[0]  #grab data
        all_data = pd.concat([all_data, df])                                               #append to all_data
        sleep(np.random.uniform(low=2.0, high=3.0))                                        #sleep before next call

        i += 1
        if(i % 5 == 0):
            print(f"Got player {i} out of {len(all_player_ids)}")
    except:
        #if the call to PlayerCareerStats times out, wait and then try again
        print("Timeout. Sleeping for 100 seconds.")
        sleep(100)

Got player 5 out of 5019
Got player 10 out of 5019


In [5]:
#look at the data
all_data.head(3)

Unnamed: 0,PLAYER_ID,SEASON_ID,LEAGUE_ID,TEAM_ID,TEAM_ABBREVIATION,PLAYER_AGE,GP,GS,MIN,FGM,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS
0,2,1983-84,0,1610612747,LAL,23.0,74,49,1637.0,334,...,0.806,50,114,164,177,81,19,116,174,788
1,2,1984-85,0,1610612747,LAL,24.0,81,65,2305.0,541,...,0.82,57,153,210,244,100,17,138,197,1295
2,2,1985-86,0,1610612747,LAL,25.0,76,62,2190.0,507,...,0.784,55,134,189,164,85,15,110,167,1174


# Add column with player names

In [6]:
def get_player_name(playerID):
    #use the _find_player_by_id function to get player info dictionary, and
    #return a string for "full_name"
    return players._find_player_by_id(playerID)["full_name"]

In [7]:
all_data["NAME"] = all_data["PLAYER_ID"].apply(get_player_name)

In [8]:
#check that it worked
all_data.head(3)

Unnamed: 0,PLAYER_ID,SEASON_ID,LEAGUE_ID,TEAM_ID,TEAM_ABBREVIATION,PLAYER_AGE,GP,GS,MIN,FGM,...,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,NAME
0,2,1983-84,0,1610612747,LAL,23.0,74,49,1637.0,334,...,50,114,164,177,81,19,116,174,788,Byron Scott
1,2,1984-85,0,1610612747,LAL,24.0,81,65,2305.0,541,...,57,153,210,244,100,17,138,197,1295,Byron Scott
2,2,1985-86,0,1610612747,LAL,25.0,76,62,2190.0,507,...,55,134,189,164,85,15,110,167,1174,Byron Scott


# Adding `SEASON_START` column to dataframe

In [9]:
#define a function that we can .apply() to the dataframe to get the starting year
def getStartYear(yearStr):                                                      
    return int(yearStr[0:4])

#test that it works
assert(getStartYear("1995-96") == 1995)

In [10]:
#apply the function to the dataset
all_data["SEASON_START"] = all_data["SEASON_ID"].apply(getStartYear)

In [11]:
#check that it worked
all_data[["NAME", "SEASON_ID", "SEASON_START"]].sample(5)

Unnamed: 0,NAME,SEASON_ID,SEASON_START
10,Dan Schayes,1989-90,1989
13,Sedale Threatt,1992-93,1992
14,Clyde Drexler,1995-96,1995
3,Clyde Drexler,1986-87,1986
3,Greg Anthony,1994-95,1994


## Save dataset to file

In [15]:
#save dataset (without index column)
all_data.to_csv("season_counting_stats.csv", index=False)