In [1]:
from sportsreference.nba.roster import Player
from sportsreference.nba.roster import Roster
from sportsreference.nba.teams import Teams
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta
import pandas as pd

### Create Functions for Extracting Player Info

In [2]:
sc = Player('curryst01')

In [9]:
# Function to get player info from Player object

def get_player_df(player):
    
    # helper function to get player age during each season
    def get_age(year, bd):
        if year[0] == "Career":
            return None
        else:
            year_dt = datetime(int(year[0][0:4]) + 1, 1, 1)
            age_years = relativedelta(year_dt, bd).years + relativedelta(year_dt, bd).months/12
            return age_years
        
    # helper function to get year for each row and denote rows that contain career totals
    def get_year(ix):
        if ix[0] == "Career":
            return "Career"
        elif ix[0] == "1999-00":
            return "2000"
        else:
            return ix[0][0:2] + ix[0][-2:]
    
    # get player df and add some extra info
    player_df = player.dataframe
    player_df['birth_date'] = player.birth_date
    player_df['player_id'] = player.player_id
    player_df['name'] = player.name
    player_df['year'] = [get_year(ix) for ix in player_df.index]
    player_df['id'] = [player_id + ' ' + year for player_id, year in zip(player_df['player_id'], player_df['year'])]
    player_df['age'] = [get_age(year, bd) for year, bd in zip(player_df.index, player_df['birth_date'])]
    player_df.set_index('id', drop = True, inplace = True)
    
    return player_df

In [10]:
scdf = get_player_df(sc)

In [11]:
scdf.columns

Index(['and_ones', 'assist_percentage', 'assists', 'block_percentage',
       'blocking_fouls', 'blocks', 'box_plus_minus', 'center_percentage',
       'defensive_box_plus_minus', 'defensive_rebound_percentage',
       'defensive_rebounds', 'defensive_win_shares', 'dunks',
       'effective_field_goal_percentage', 'field_goal_attempts',
       'field_goal_perc_sixteen_foot_plus_two_pointers',
       'field_goal_perc_ten_to_sixteen_feet',
       'field_goal_perc_three_to_ten_feet',
       'field_goal_perc_zero_to_three_feet', 'field_goal_percentage',
       'field_goals', 'free_throw_attempt_rate', 'free_throw_attempts',
       'free_throw_percentage', 'free_throws', 'games_played', 'games_started',
       'half_court_heaves', 'half_court_heaves_made', 'height',
       'lost_ball_turnovers', 'minutes_played', 'nationality',
       'net_plus_minus', 'offensive_box_plus_minus', 'offensive_fouls',
       'offensive_rebound_percentage', 'offensive_rebounds',
       'offensive_win_shares', '

### Get Data for All Players
Include everyone who played in the NBA the last 20 years

In [12]:
# initialize a list of players that we have pulled data for
players_collected = []
season_df_init = 0
career_df_init = 0
season_df = 0
career_df = 0

# iterate through years
for year in range(2020, 1999, -1):
    print('\n' + str(year))
        
    # iterate through all teams in that year
    for team in Teams(year = str(year)).dataframes.index:
        print('\n' + team + '\n')
        
        # iterate through every player on a team roster
        for player_id in Roster(team, year = year, slim = True).players.keys():
            
            # only pull player info if that player hasn't been pulled already
            if player_id not in players_collected:
                
                player = Player(player_id)
                player_info = get_player_df(player)
                player_seasons = player_info[player_info['year'] != "Career"]
                player_career = player_info[player_info['year'] == "Career"]
                
                # create season_df if not initialized
                if not season_df_init:
                    season_df = player_seasons
                    season_df_init = 1
                
                # else concatenate to season_df
                else:
                    season_df = pd.concat([season_df, player_seasons], axis = 0)
                    
                if not career_df_init:
                    career_df = player_career
                    career_df_init = 1
                
                # else concatenate to career_df
                else:
                    career_df = pd.concat([career_df, player_career], axis = 0)
                
                # add player to players_collected
                players_collected.append(player_id)
                print(player.name)
                


2020

DAL

Maxi Kleber
Delon Wright
Dorian Finney-Smith
Tim Hardaway Jr.
Seth Curry
Justin Jackson
Jalen Brunson
Luka Dončić
Kristaps Porziņģis
Dwight Powell
Boban Marjanović
J.J. Barea
Courtney Lee
Willie Cauley-Stein
Michael Kidd-Gilchrist
Antonius Cleveland
Josh Reaves

MIL

Wesley Matthews
Brook Lopez
Pat Connaughton
Robin Lopez
Donte DiVincenzo
Giannis Antetokounmpo
Eric Bledsoe
Ersan İlyasova
Khris Middleton
George Hill
Kyle Korver
Sterling Brown
D.J. Wilson
Thanasis Antetokounmpo
Marvin Williams
Frank Mason III
Cameron Reynolds

HOU

P.J. Tucker
Ben McLemore
James Harden
Austin Rivers
Danuel House
Russell Westbrook
Thabo Sefolosha
Eric Gordon
Chris Clemons
Tyson Chandler
Isaiah Hartenstein
Robert Covington
Jeff Green
Michael Frazier
DeMarre Carroll
Bruno Caboclo
William Howard

POR

Anfernee Simons
CJ McCollum
Hassan Whiteside
Damian Lillard
Gary Trent Jr.
Carmelo Anthony
Nassir Little
Mario Hezonja
Trevor Ariza
Rodney Hood
Caleb Swanigan
Wenyen Gabriel
Jaylen Hoard
Moses Brown

In [15]:
season_df.to_csv('Data/nba_player_stats_by_season.csv')
career_df.to_csv('Data/nba_player_stats_by_career.csv')

In [14]:
season_df

Unnamed: 0_level_0,and_ones,assist_percentage,assists,block_percentage,blocking_fouls,blocks,box_plus_minus,center_percentage,defensive_box_plus_minus,defensive_rebound_percentage,...,two_pointers_assisted_percentage,usage_percentage,value_over_replacement_player,weight,win_shares,win_shares_per_48_minutes,birth_date,name,year,age
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
klebima01 2018,,6.4,51.0,3.5,,47.0,-0.5,0.0,0.5,15.6,...,0.793,13.7,0.5,240.0,2.8,0.113,1992-01-29,Maxi Kleber,2018,25.916667
klebima01 2019,,6.8,70.0,4.4,,78.0,0.4,0.0,1.2,16.9,...,0.724,13.5,0.9,240.0,3.5,0.111,1992-01-29,Maxi Kleber,2019,26.916667
klebima01 2020,,6.2,77.0,3.9,,78.0,0.6,0.0,0.2,15.5,...,0.822,13.9,1.1,240.0,4.6,0.131,1992-01-29,Maxi Kleber,2020,27.916667
wrighde01 2016,,22.5,31.0,1.1,,3.0,0.9,0.0,-0.3,14.3,...,0.161,22.1,0.2,183.0,0.6,0.124,1992-04-26,Delon Wright,2016,23.666667
wrighde01 2017,,18.2,57.0,2.1,,11.0,2.0,0.0,2.1,8.1,...,0.128,16.6,0.5,183.0,1.1,0.117,1992-04-26,Delon Wright,2017,24.666667
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
reevekh01 1997,,25.6,226.0,0.5,,9.0,-2.4,0.0,-1.4,6.9,...,0.277,18.7,-0.1,199.0,1.2,0.041,1972-07-15,Khalid Reeves,1997,24.416667
reevekh01 1998,,19.8,230.0,0.4,,10.0,-1.9,0.0,-0.4,7.6,...,0.292,19.2,0.0,199.0,2.5,0.062,1972-07-15,Khalid Reeves,1998,25.416667
reevekh01 1999,,15.9,11.0,0,,0.0,-5.8,0.0,-1.0,4.4,...,0.571,14.5,-0.1,199.0,0.1,0.025,1972-07-15,Khalid Reeves,1999,26.416667
reevekh01 2000,,46.3,13.0,0,,0.0,-5.2,0.0,-0.5,5,...,0.333,19.3,0.0,199.0,0.0,-0.046,1972-07-15,Khalid Reeves,2000,27.416667


In [162]:
#### Mini Test ####

# initialize a list of players that we have pulled data for
players_collected = []
season_df_init = 0
career_df_init = 0
season_df = 0
career_df = 0


# iterate through every player on a team roster
for player in Roster('GSW', year = 2020).players:

    # only pull player info if that player hasn't been pulled already
    if player.player_id not in players_collected:

        player_info = get_player_df(player)
        player_seasons = player_info[player_info['year'] != "Career"]
        player_career = player_info[player_info['year'] == "Career"]

        # create season_df if not initialized
        if not season_df_init:
            season_df = player_seasons
            season_df_init = 1

        # else concatenate to season_df
        else:
            season_df = pd.concat([season_df, player_seasons], axis = 0)

        if not career_df_init:
            career_df = player_career
            career_df_init = 1

        # else concatenate to career_df
        else:
            career_df = pd.concat([career_df, player_career], axis = 0)

        # add player to players_collected
        players_collected.append(player.player_id)
        
        print(player.name)

Eric Paschall
Marquese Chriss
Jordan Poole
Damion Lee
Ky Bowman
Draymond Green
Kevon Looney
Alen Smailagić
Juan Toscano-Anderson
Andrew Wiggins
Dragan Bender
Mychal Mulder
Stephen Curry
Chasson Randle
Klay Thompson


In [136]:
Teams(year = '2020').dataframes.index

Index(['DAL', 'MIL', 'HOU', 'POR', 'ATL', 'NOP', 'LAC', 'WAS', 'MEM', 'PHO',
       'MIA', 'MIN', 'BOS', 'TOR', 'LAL', 'DEN', 'SAS', 'PHI', 'IND', 'UTA',
       'OKC', 'BRK', 'DET', 'NYK', 'SAC', 'CLE', 'CHI', 'ORL', 'GSW', 'CHO'],
      dtype='object')