## NBA Player Value in relation to Win-Shares and Salary

In [41]:
#import required packages
import requests
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


### Create df from csv

In [42]:
#creating player stats df from csv
df = pd.read_csv('nba_player_stats.csv')

In [43]:
print(df)

     Unnamed: 0   Rk                  Player  Pos  Age   Tm   G    MP   PER  \
0             0    1            Alex Abrines   SG   25  OKC  31   588   6.3   
1             1    2              Quincy Acy   PF   28  PHO  10   123   2.9   
2             2    3            Jaylen Adams   PG   22  ATL  19   181   9.6   
3             3    4            Steven Adams    C   25  OKC  64  2167  19.0   
4             4    5             Bam Adebayo    C   21  MIA  65  1448  17.1   
5             5    6               Deng Adel   SF   21  CLE  12   158   2.2   
6             6    7  DeVaughn Akoon-Purcell   SG   25  DEN   7    22   8.3   
7             7    8       LaMarcus Aldridge    C   33  SAS  65  2132  22.5   
8             8    9            Rawle Alkins   SG   21  CHI   3     6  37.9   
9             9   10           Grayson Allen   SG   23  UTA  30   290   4.1   
10           10   11           Jarrett Allen    C   20  BRK  66  1760  19.0   
11           11   12            Kadeem Allen   SG   

In [44]:
df.head()

Unnamed: 0.1,Unnamed: 0,Rk,Player,Pos,Age,Tm,G,MP,PER,TS%,...,Unnamed: 19,OWS,DWS,WS,WS/48,Unnamed: 24,OBPM,DBPM,BPM,VORP
0,0,1,Alex Abrines,SG,25,OKC,31,588,6.3,0.507,...,,0.1,0.6,0.7,0.053,,-2.4,-1.0,-3.4,-0.2
1,1,2,Quincy Acy,PF,28,PHO,10,123,2.9,0.379,...,,-0.1,0.1,-0.1,-0.02,,-5.7,-0.2,-5.9,-0.1
2,2,3,Jaylen Adams,PG,22,ATL,19,181,9.6,0.542,...,,0.1,0.1,0.2,0.046,,-1.7,-2.1,-3.7,-0.1
3,3,4,Steven Adams,C,25,OKC,64,2167,19.0,0.604,...,,4.5,3.3,7.8,0.173,,0.9,2.0,2.9,2.7
4,4,5,Bam Adebayo,C,21,MIA,65,1448,17.1,0.621,...,,2.6,2.3,4.9,0.164,,-0.7,3.3,2.6,1.7


In [45]:
#remove unnamed columns from basketball-reference.com table used for
# formatting purposes
df = df.drop(columns=['Unnamed: 0', 'Unnamed: 19', 'Unnamed: 24'])


In [46]:
df.head()

Unnamed: 0,Rk,Player,Pos,Age,Tm,G,MP,PER,TS%,3PAr,...,TOV%,USG%,OWS,DWS,WS,WS/48,OBPM,DBPM,BPM,VORP
0,1,Alex Abrines,SG,25,OKC,31,588,6.3,0.507,0.809,...,7.9,12.2,0.1,0.6,0.7,0.053,-2.4,-1.0,-3.4,-0.2
1,2,Quincy Acy,PF,28,PHO,10,123,2.9,0.379,0.833,...,15.2,9.2,-0.1,0.1,-0.1,-0.02,-5.7,-0.2,-5.9,-0.1
2,3,Jaylen Adams,PG,22,ATL,19,181,9.6,0.542,0.75,...,19.7,13.7,0.1,0.1,0.2,0.046,-1.7,-2.1,-3.7,-0.1
3,4,Steven Adams,C,25,OKC,64,2167,19.0,0.604,0.002,...,12.6,16.4,4.5,3.3,7.8,0.173,0.9,2.0,2.9,2.7
4,5,Bam Adebayo,C,21,MIA,65,1448,17.1,0.621,0.029,...,17.8,15.3,2.6,2.3,4.9,0.164,-0.7,3.3,2.6,1.7


In [47]:
#full definition of all columns can be found at https://www.basketball-reference.com/leagues/NBA_2019_advanced.html
list(df)

['Rk',
 'Player',
 'Pos',
 'Age',
 'Tm',
 'G',
 'MP',
 'PER',
 'TS%',
 '3PAr',
 'FTr',
 'ORB%',
 'DRB%',
 'TRB%',
 'AST%',
 'STL%',
 'BLK%',
 'TOV%',
 'USG%',
 'OWS',
 'DWS',
 'WS',
 'WS/48',
 'OBPM',
 'DBPM',
 'BPM',
 'VORP']

In [48]:
# print total missing values for each feature
print(df.isnull().sum())

Rk        0
Player    0
Pos       0
Age       0
Tm        0
G         0
MP        0
PER       0
TS%       6
3PAr      8
FTr       8
ORB%      0
DRB%      0
TRB%      0
AST%      0
STL%      0
BLK%      0
TOV%      6
USG%      0
OWS       0
DWS       0
WS        0
WS/48     0
OBPM      0
DBPM      0
BPM       0
VORP      0
dtype: int64


## Create df_contracts for NBA Player Contracts for 2018-2019 and extended

In [52]:
df_contracts = pd.read_csv(r'C:\Users\dudad\Documents\Data Science Career Track\Data Wrangling\player_contracts.csv', delimiter = ',')
print(df_contracts)

      Rk                           Player   Tm    2018-19    2019-20  \
0      1          Stephen Curry\curryst01  GSW  $37457154  $40231758   
1      2              Chris Paul\paulch01  HOU  $35654150  $38506482   
2      3      Russell Westbrook\westbru01  OKC  $35654150  $38178000   
3      4           LeBron James\jamesle01  LAL  $35654150  $37436858   
4      5          Blake Griffin\griffbl01  DET  $32088932  $34234964   
5      6         Gordon Hayward\haywago01  BOS  $31214295  $32700690   
6      7             Kyle Lowry\lowryky01  TOR  $31200000  $33296296   
7      8            Paul George\georgpa01  OKC  $30560700  $33005556   
8      9            Mike Conley\conlemi01  MEM  $30521116  $32511623   
9     10           James Harden\hardeja01  HOU  $30431854  $37800000   
10    11           Kevin Durant\duranke01  GSW  $30000000  $31500000   
11    12           Paul Millsap\millspa01  DEN  $29730769  $30500000   
12    13             Al Horford\horfoal01  BOS  $28928710  $3012

In [53]:
df_contracts.head()

Unnamed: 0,Rk,Player,Tm,2018-19,2019-20,2020-21,2021-22,2022-23,2023-24,Signed Using,Guaranteed
0,1,Stephen Curry\curryst01,GSW,$37457154,$40231758,$43006362,$45780966,,,Bird Rights,$166476240
1,2,Chris Paul\paulch01,HOU,$35654150,$38506482,$41358814,$44211146,,,,$159730592
2,3,Russell Westbrook\westbru01,OKC,$35654150,$38178000,$41006000,$43848000,$46662000,,Bird Rights,$158686150
3,4,LeBron James\jamesle01,LAL,$35654150,$37436858,$39219565,$41002273,,,,$113310573
4,5,Blake Griffin\griffbl01,DET,$32088932,$34234964,$36595996,$38957028,,,Bird Rights,$102919892


In [54]:
print(df_contracts.isnull().sum())

Rk                0
Player            0
Tm                0
2018-19           0
2019-20         247
2020-21         389
2021-22         473
2022-23         531
2023-24         548
Signed Using    187
Guaranteed        3
dtype: int64


## Change data type for columns in df_contracts

In [66]:
#view current data types of df_contracts
print(df_contracts.dtypes)

Rk                int64
Player           object
Tm               object
2018-19          object
2019-20          object
2020-21          object
2021-22          object
2022-23          object
2023-24         float64
Signed Using     object
Guaranteed       object
dtype: object


In [67]:
#remove "$" from salary data for analysis purposes
df_contracts['2018-19'] = [x.strip('$') for x in df_contracts['2018-19']]

In [None]:
df_contracts['2018-19'] = [x.strip('$') for x in df_contracts['2018-19']]

In [None]:
df_contracts['2018-19'] = [x.strip('$') for x in df_contracts['2018-19']]

In [69]:
df_contracts['2019-20'] = [x.strip('$') for x in df_contracts['2019-20']],
df_contracts['2020-21'] = [x.strip('$') for x in df_contracts['2020-21']],
df_contracts['2021-22'] = [x.strip('$') for x in df_contracts['2021-22']],
df_contracts['2022-23'] = [x.strip('$') for x in df_contracts['2022-23']],
df_contracts['Guaranteed'] = [x.strip('$') for x in df_contracts['Guaranteed']]

AttributeError: 'float' object has no attribute 'strip'

In [70]:
print(df_contracts)

      Rk                           Player   Tm   2018-19    2019-20  \
0      1          Stephen Curry\curryst01  GSW  37457154  $40231758   
1      2              Chris Paul\paulch01  HOU  35654150  $38506482   
2      3      Russell Westbrook\westbru01  OKC  35654150  $38178000   
3      4           LeBron James\jamesle01  LAL  35654150  $37436858   
4      5          Blake Griffin\griffbl01  DET  32088932  $34234964   
5      6         Gordon Hayward\haywago01  BOS  31214295  $32700690   
6      7             Kyle Lowry\lowryky01  TOR  31200000  $33296296   
7      8            Paul George\georgpa01  OKC  30560700  $33005556   
8      9            Mike Conley\conlemi01  MEM  30521116  $32511623   
9     10           James Harden\hardeja01  HOU  30431854  $37800000   
10    11           Kevin Durant\duranke01  GSW  30000000  $31500000   
11    12           Paul Millsap\millspa01  DEN  29730769  $30500000   
12    13             Al Horford\horfoal01  BOS  28928710  $30123015   
13    

In [56]:
#convert column 2018-19 to numeric
df_contracts["2018-19"] = df_contracts["2018-19"].apply(pd.to_numeric)

ValueError: Unable to parse string "$37457154" at position 0

In [58]:
#remove header rows from df_contracts
