In [None]:
# https://github.com/swar/nba_api/issues/124

In [165]:
import pandas as pd
from time import time
from nba_api.stats.static import players
from nba_api.stats.endpoints import commonplayerinfo
from nba_api.stats.endpoints import playercareerstats
from nba_api.stats.endpoints import playerawards

In [171]:
from nba_api.stats.library import http

print(http.STATS_HEADERS)

{'Host': 'stats.nba.com', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:72.0) Gecko/20100101 Firefox/72.0', 'Accept': 'application/json, text/plain, */*', 'Accept-Language': 'en-US,en;q=0.5', 'Accept-Encoding': 'gzip, deflate, br', 'x-nba-stats-origin': 'stats', 'x-nba-stats-token': 'true', 'Connection': 'keep-alive', 'Referer': 'https://stats.nba.com/', 'Pragma': 'no-cache', 'Cache-Control': 'no-cache'}


In [166]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

In [167]:
# function which collects player seasons and drops before modern 3 point era and insufficient data

def collect_seasons(id):
    season_filter = 1998 # the first year of the modern 3 point era

    career = playercareerstats.PlayerCareerStats(player_id=id)
    df = career.get_data_frames()[0]

    df['SEASON_ID'] = df['SEASON_ID'].apply(lambda x: int(x[:4]) + 1) #make seasons numeric type
    df = df[df['SEASON_ID'] >= season_filter] # drop any seasons before modern era

    return df

In [168]:
# function which gets player awards

# fun fact and test case: dominique wilkins in 94 made an All-NBA team
# but played for two teams that year

def collect_awards(id):
    awards = playerawards.PlayerAwards(player_id=id)

    df = awards.get_data_frames()[0]

    df = df[df['DESCRIPTION'] == 'All-NBA']
    df = df[['SEASON', 'ALL_NBA_TEAM_NUMBER']]

    df['SEASON'] = df['SEASON'].apply(lambda x: int(x[:4]) + 1) #make seasons numeric type

    return df

In [169]:
# function which joins the awards and seasons

def stat_join(seasons, awards):
    df = seasons.join(awards.set_index('SEASON'), on='SEASON_ID')
    df['ALL_NBA_TEAM_NUMBER'] = df['ALL_NBA_TEAM_NUMBER'].fillna(0)
    return df

In [170]:
# function which loops through each player

nba_players = players.get_players()

final_df = pd.DataFrame()

for count, player in enumerate(players.get_players()[:1000]):
    id = player['id']
    name = player['full_name']

    start = time()
    print(f"Processing {name}...")

    seasons = collect_seasons(id)
    awards = collect_awards(id)
    df = stat_join(seasons, awards)
    df['NAME'] = player['full_name']

    if count == 0:
        final_df = df
    else:
        final_df = pd.concat([final_df, df])
    
    end = time()
    print(f"Took {end - start} seconds to finish")

Processing Alaa Abdelnaby...


ReadTimeout: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)

In [None]:
final_df

In [161]:
final_df.to_csv('../data/data.csv')