In [11]:
from nba_api.stats.static import players
from nba_api.stats.endpoints import playergamelog
from IPython.display import clear_output
import pandas as pd 

In [12]:
# pass in year as a string to get the player game logs for that season
def retrieve_game_logs(season, players=None):
    assert type(season) == str
    assert int(season)
    
    player_details = pd.read_csv('test_data/players.csv')
    player_details = player_details.loc[(player_details['year_start'] <= int(season)) & (player_details['year_end'] >= int(season))]
    
    if players:
        player_details = player_details.loc[player_details['player_id'].isin(players)]
    
    season_players = [{'id':p[0], 'full_name':p[1]} for p in player_details.values]
    queue = [{'id':p[0], 'full_name':p[1]} for p in player_details.values]
    attempts = {p['id']:1 for p in season_players}
    total = len(season_players)
    
    game_logs = []
    while len(queue) > 0:
        for player in season_players:
            clear_output(wait=True)
            completed = total - len(queue)
            print(f"{round((completed / total) * 100, 2)}% completion")
            if player in queue:
                print(f"Retrieving {season} game logs for {player['full_name']}")
                try:
                    game_log = playergamelog.PlayerGameLog(player_id=player['id'], season=season).get_data_frames()[0]
                    game_logs.append(game_log)
                    queue.remove(player)
                except:
                    i = attempts[player['id']]
                    if i < 50:
                        print(f"Attempt {i} FAILED to retrieve game logs for {player['full_name']}. Queued for retry.")
                        attempts[player['id']] = i + 1
                    else:
                        print(f"Attempt {i} FAILED to retrieve game logs for {player['full_name']}. Max attempts for player has been met.")
                        queue.remove(player)
            else:
                continue
                
    print('100%')
    df = pd.concat(game_logs)
    df.columns = [col.lower() for col in df.columns]
    
    player_names = {p['id']:p['full_name'] for p in season_players}
    df['player_name'] = df.apply(lambda x: player_names[x['player_id']], axis=1)
    
    df = df[['season_id', 'player_id', 'player_name', 'game_id', 'game_date', 'matchup', 'wl',
             'min', 'fgm', 'fga', 'fg_pct', 'fg3m', 'fg3a', 'fg3_pct', 'ftm', 'fta',
             'ft_pct', 'oreb', 'dreb', 'reb', 'ast', 'stl', 'blk', 'tov', 'pf',
             'pts', 'plus_minus', 'video_available']]
    return df

In [14]:
for season in range(2015, 2022):
    df = retrieve_game_logs(str(season))
    df.to_csv(f'test_data/{season}_game_logs.csv',index=False)

100%
