In [1]:
# Partially taken from: 
# https://stats.nba.com/players/traditional/?PerMode=Totals&dir=-1&sort=PTS&Season=2019-20&SeasonType=Regular%20Season
# http://www.espn.com/nba/salaries


# Imports
import pandas as pd
import numpy as np
import nba_api
from nba_api import stats
from nba_api.stats import static
from nba_api.stats.static import players
from nba_api.stats import endpoints
from nba_api.stats.endpoints import shotchartdetail
from nba_api.stats.endpoints import commonplayerinfo


In [2]:
# Get all the active players
all_active_players = players.get_active_players()
# See one specific player - the first active
print(all_active_players[0])

{'id': 203500, 'full_name': 'Steven Adams', 'first_name': 'Steven', 'last_name': 'Adams', 'is_active': True}


In [3]:
# See all players
print(all_active_players)

[{'id': 203500, 'full_name': 'Steven Adams', 'first_name': 'Steven', 'last_name': 'Adams', 'is_active': True}, {'id': 1628389, 'full_name': 'Bam Adebayo', 'first_name': 'Bam', 'last_name': 'Adebayo', 'is_active': True}, {'id': 200746, 'full_name': 'LaMarcus Aldridge', 'first_name': 'LaMarcus', 'last_name': 'Aldridge', 'is_active': True}, {'id': 1629734, 'full_name': 'Kyle Alexander', 'first_name': 'Kyle', 'last_name': 'Alexander', 'is_active': True}, {'id': 1629638, 'full_name': 'Nickeil Alexander-Walker', 'first_name': 'Nickeil', 'last_name': 'Alexander-Walker', 'is_active': True}, {'id': 1628960, 'full_name': 'Grayson Allen', 'first_name': 'Grayson', 'last_name': 'Allen', 'is_active': True}, {'id': 1628386, 'full_name': 'Jarrett Allen', 'first_name': 'Jarrett', 'last_name': 'Allen', 'is_active': True}, {'id': 1628443, 'full_name': 'Kadeem Allen', 'first_name': 'Kadeem', 'last_name': 'Allen', 'is_active': True}, {'id': 202329, 'full_name': 'Al-Farouq Aminu', 'first_name': 'Al-Farouq',

In [4]:
# Make all players a DF
df = pd.DataFrame.from_dict(all_active_players)
df.shape

(519, 5)

In [5]:
# Bring in the salary DF for merging
url = 'https://raw.githubusercontent.com/jacobpad/jacobpad.github.io/master/data/2019_2020_nba_records.csv'
col_headers_for_salary_rank = ['salary_rank', 'full_name', 'position', 'team',
                               'salary']
df_salary_rank = pd.read_csv(url, # Calls CSV at above URL
                            header=None, # Eliminates col headers
                            names=col_headers_for_salary_rank) # Set col headers
df_salary_rank = df_salary_rank.drop(index=0, # Drops original col headers
                                     axis=0)
df_salary_rank = df_salary_rank.reset_index()
df_salary_rank.shape

(517, 6)

In [6]:
# Merge the DF's together
df_merged = pd.merge(df_salary_rank, df, on='full_name', how='inner')
df = df_merged
df.shape

(443, 10)

In [7]:
df.head(2)

Unnamed: 0,index,salary_rank,full_name,position,team,salary,id,first_name,last_name,is_active
0,1,1,Stephen Curry,PG,Golden State Warriors,"$40,231,758",201939,Stephen,Curry,True
1,2,2,Chris Paul,PG,Oklahoma City Thunder,"$38,506,482",101108,Chris,Paul,True


In [8]:
# Check if I have any null values
df.isnull().sum()

index          0
salary_rank    0
full_name      0
position       0
team           0
salary         0
id             0
first_name     0
last_name      0
is_active      0
dtype: int64

In [9]:
# Drop the is_active column
df = df.drop('is_active', axis=1)
df.shape

(443, 9)

In [10]:
##############################################################################################
##############################################################################################
##############################################################################################
##############################################################################################
##############################################################################################
##############################################################################################
##############################################################################################
##############################################################################################
##############################################################################################

###### Not quite sure why the api isn't letting me work with it the way I'd expect to.

In [11]:
# stats_of_players = shotchartdetail.ShotChartDetail.player_stats()
# stats_of_players

In [12]:
# custom_headers = {
#     'Host': 'stats.nba.com',
#     'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36'
# }

# player_stats = shotchartdetail.ShotChartDetail.(player_id=201939, 
#                                                headers=custom_headers, 
#                                                team_id=)
# player_stats

In [13]:
# player_stats = shotchartdetail.ShotChartDetail.
# player_stats

In [14]:
##############################################################################################
##############################################################################################
##############################################################################################
##############################################################################################
##############################################################################################
##############################################################################################
##############################################################################################
##############################################################################################
##############################################################################################
col_headers_for_general_stats = ['full_name', 'TEAM', 'AGE', 'GP', 'W', 'L', 'MIN_PLAYED', 'PTS', 'FGM', 'FGA',
       'FG%', 'x_3PM', 'x_3PA', 'x_3P%', 'FTM', 'FTA', 'FT%', 'OREB', 'DREB', 'REB',
       'AST', 'TOV', 'STL', 'BLK', 'PF', 'FP', 'DD2', 'TD3', 'plus_minus']
general_stats = pd.read_csv('https://raw.githubusercontent.com/jacobpad/jacobpad.github.io/master/data/nba_player_general_stats_2019-2020.csv',
                           header = None,
                           names = col_headers_for_general_stats)
general_stats = general_stats.drop(index=0, axis=0)
general_stats = general_stats.reset_index()

general_stats

Unnamed: 0,index,full_name,TEAM,AGE,GP,W,L,MIN_PLAYED,PTS,FGM,...,REB,AST,TOV,STL,BLK,PF,FP,DD2,TD3,plus_minus
0,1,James Harden,HOU,30,53,35,18,1958,1867,536,...,341,391,233,88,52,180,3049.7,18,3,273
1,2,Damian Lillard,POR,29,54,25,29,1996,1594,507,...,235,429,158,52,18,91,2571.5,16,1,47
2,3,Trae Young,ATL,21,52,15,37,1840,1558,481,...,228,477,255,60,6,89,2490.1,22,2,-205
3,4,Giannis Antetokounmpo,MIL,25,50,43,7,1543,1502,554,...,679,292,182,55,53,155,2896.8,45,4,556
4,5,Zach LaVine,CHI,24,58,20,38,2009,1463,510,...,281,243,200,86,28,122,2306.7,2,0,-183
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
453,454,Quinndary Weatherspoon,SAS,23,1,0,1,4,0,0,...,0,0,0,0,0,0,0,0,0,-3
454,455,Stanton Kidd,UTA,27,4,3,1,15,0,0,...,3,1,2,0,0,3,3.1,0,0,-1
455,456,Talen Horton-Tucker,LAL,19,2,1,1,5,0,0,...,0,2,0,1,0,0,6,0,0,-7
456,457,Vic Law,ORL,24,3,0,3,4,0,0,...,0,0,0,0,0,0,0,0,0,3


In [15]:
# Merge the DF's together
df_merged = pd.merge(df, general_stats, on='full_name', how='inner')
df_merged.shape

(390, 38)

In [16]:
df = df_merged
df.shape

(390, 38)

In [17]:
# I want: full team name, 
#         drop index_x col,
#         drop index_y col,
#         change id to player_id,
#         names, TEAM

In [18]:
df.drop(['index_x', 'index_y', 'salary_rank', 'full_name','id','first_name','last_name','plus_minus','TEAM'], 
        axis=1, inplace=True)
df.head(1)

Unnamed: 0,position,team,salary,AGE,GP,W,L,MIN_PLAYED,PTS,FGM,...,DREB,REB,AST,TOV,STL,BLK,PF,FP,DD2,TD3
0,PG,Golden State Warriors,"$40,231,758",31,4,1,3,112,81,27,...,16,20,26,15,5,2,10,150,1,0


In [19]:
# Check if I have any null values
# df.isnull().sum()

In [20]:
df['salary'].max()

'$9,881,598 '

In [21]:
df['salary'] = df['salary'].str.replace('$','')
df['salary'] = df['salary'].str.replace(',','')
df['salary'] = df['salary'].str.replace(' ','')
df.head(1)

Unnamed: 0,position,team,salary,AGE,GP,W,L,MIN_PLAYED,PTS,FGM,...,DREB,REB,AST,TOV,STL,BLK,PF,FP,DD2,TD3
0,PG,Golden State Warriors,40231758,31,4,1,3,112,81,27,...,16,20,26,15,5,2,10,150,1,0


In [22]:
df.columns

Index(['position', 'team', 'salary', 'AGE', 'GP', 'W', 'L', 'MIN_PLAYED',
       'PTS', 'FGM', 'FGA', 'FG%', 'x_3PM', 'x_3PA', 'x_3P%', 'FTM', 'FTA',
       'FT%', 'OREB', 'DREB', 'REB', 'AST', 'TOV', 'STL', 'BLK', 'PF', 'FP',
       'DD2', 'TD3'],
      dtype='object')

In [24]:
df['salary'] = df.salary.astype(int)
df['AGE'] = df.AGE.astype(int)
df['W'] = df.W.astype(int)
df['L'] = df.L.astype(int)
df['BLK'] = df.BLK.astype(int)
df['STL'] = df.STL.astype(int)
df['TOV'] = df.TOV.astype(int)
df['AST'] = df.AST.astype(int)
df['REB'] = df.REB.astype(int)
df['DREB'] = df.DREB.astype(int)
df['OREB'] = df.OREB.astype(int)
df['GP'] = df.GP.astype(int)
df['MIN_PLAYED'] = df.MIN_PLAYED.astype(int)
df['PTS'] = df.PTS.astype(int)
df['FGM'] = df.FGM.astype(int)
df['FGA'] = df.FGA.astype(int)
df['x_3PM'] = df.x_3PM.astype(int)
df['x_3PA'] = df.x_3PA.astype(int)
# df['FG%'] = df.FG%.astype(float)
# df['x_3P%'] = df.x_3P%.astype(float)
df['x_3PA'] = df.x_3PA.astype(int)

In [None]:
df.dtypes

In [None]:
df.head(1)