In [1]:
import pandas as pd
import numpy as np
import time
import random
from datetime import datetime


from nba_api.stats.static import players, teams
from nba_api.stats.library.parameters import SeasonAll, SeasonType, MeasureTypeBase


from nba_api.stats.endpoints import playercareerstats, commonallplayers, playerdashboardbyyearoveryear



## Notes

```
# NBA League
NBA → LeagueId = 00

# Current season
SeasonAll.current_season
```

In [2]:

# ~~~ DEMO ~~~
# Lebron
career = playercareerstats.PlayerCareerStats(player_id='2544')
career.get_data_frames()[0]

# print(career.get_data_frames())
# print(career.get_response())

nba_teams = teams.get_teams()

# Select the dictionary for the Celtics, which contains their team ID
celtics = [team for team in nba_teams if team['abbreviation'] == 'BOS'][0]
celtics_id = celtics['id']

In [3]:
# All 2020 players
# print(SeasonAll.current_season)  # current season

all_players = commonallplayers.CommonAllPlayers(is_only_current_season=1, league_id='00', season=SeasonAll.current_season)
players = all_players.get_data_frames()[0]

# ~~~ DEMO ~~~ 
# Atlanta Hawks Player List
hawks = players[players.TEAM_CITY == 'Atlanta']
# print(hawks.count())
# print(hawks)

# All players Data Frame
all_players = players[players.GAMES_PLAYED_FLAG == 'Y']
# print(all_players.head())

The cell below takes a while to run. It runs for every player found (~500) times.

**YOU MAY NEED TO SPLIT UP THE seasons LIST to avoid time out errors.
We split it up 5 seasons at a time.

In [6]:
player_stats_list = []

seasons = ['2010-11', '2011-12', '2012-13', '2013-14', '2014-15', '2015-16', '2016-17', '2017-18', '2018-19', '2019-20']

# , '2015-16', '2016-17', '2017-18', '2018-19', '2019-20'
# '2000-01', '2001-02', '2002-03', '2003-04', '2004-05', '2005-06', '2006-07','2007-08','2008-09','2009-10', 
headers = [
    'PERSON_ID', 'DISPLAY_FIRST_LAST', 'TEAM_ID', 'TEAM_NAME', 'GROUP_SET', 'GROUP_VALUE', 'TEAM_ID', 'TEAM_ABBREVIATION', 'MAX_GAME_DATE', 'GP', 'W', 'L', 'W_PCT', 'MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'TOV', 'STL', 'BLK', 'BLKA', 'PF', 'PFD', 'PTS', 'PLUS_MINUS', 'NBA_FANTASY_PTS', 'DD2', 'TD3', 'GP_RANK', 'W_RANK', 'L_RANK', 'W_PCT_RANK', 'MIN_RANK', 'FGM_RANK', 'FGA_RANK', 'FG_PCT_RANK', 'FG3M_RANK', 'FG3A_RANK', 'FG3_PCT_RANK', 'FTM_RANK', 'FTA_RANK', 'FT_PCT_RANK', 'OREB_RANK', 'DREB_RANK', 'REB_RANK', 'AST_RANK', 'TOV_RANK', 'STL_RANK', 'BLK_RANK', 'BLKA_RANK', 'PF_RANK', 'PFD_RANK', 'PTS_RANK', 'PLUS_MINUS_RANK', 'NBA_FANTASY_PTS_RANK', 'DD2_RANK', 'TD3_RANK', 'CFID', 'CFPARAMS']

# print(player_stats_df.columns, "\n")
now = datetime.now()
current_time = now.strftime("%H:%M:%S")
print("Execution started at", current_time)

# Loop through seasons
for season in seasons:

    # Loop though all the players (for development: all_players[:5])
    for index, row in all_players.iterrows():
        # print(row['PERSON_ID'], row['DISPLAY_FIRST_LAST'])

        #Add some sleep time between pulls
        if index % 100 == 0:
            time.sleep(random.uniform(0, 10))
        elif index % 50 == 0:
            time.sleep(random.uniform(0, 3))
        
#         time.sleep(5)
        # Get the player stats by making an API call
        # https://github.com/swar/nba_api/blob/master/docs/nba_api/stats/endpoints/playerdashboardbyyearoveryear.md
        player_det = playerdashboardbyyearoveryear \
            .PlayerDashboardByYearOverYear(player_id=row['PERSON_ID'], \
                season=season, \
                measure_type_detailed=MeasureTypeBase.base, \
                season_type_playoffs=SeasonType.regular)

        # Get only the needed data; (OverallPlayerDashboard)
        player_stats = player_det.get_data_frames()[1]
        one_player_stats_list = player_stats[player_stats.GROUP_VALUE == season].values.tolist()

        # Some data comes in as blank; need to filter it out
        if len(one_player_stats_list) > 0:
            # Select only the "2019-20" season
            merged_one_player_stats_list = player_stats[player_stats.GROUP_VALUE == season].values.tolist()[0]

            # Append the Person ID, Name, Team to the returned stats
            merged_one_player_stats_list[:0] = [row['PERSON_ID'], row['DISPLAY_FIRST_LAST'], row['TEAM_ID'], row['TEAM_NAME']]
            # Append to list
            player_stats_list.append(merged_one_player_stats_list)


# PLAYERS DATAFRAME
# Basically the "Traditional Splits" from the page below but for all the players.
# https://www.nba.com/stats/player/203500/?sort=DREB&dir=1&Season=2019-20&SeasonType=Regular%20Season&PerMode=Totals
players_stats_df = pd.DataFrame(player_stats_list, columns = headers)

print("\n>>> Player Stats DataFrame\n")
print(players_stats_df)

# Eport data to CSV
players_stats_df.to_csv('../data/input/players_stats_multi_season.csv')

now = datetime.now()
current_time = now.strftime("%H:%M:%S")
print("Execution completed at", current_time)



Execution started at 23:11:48


ReadTimeout: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)