# NBA Player Value based on Win-Shares and Salary

In [6]:
#import required packages
import requests
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


### Create Dataframe from csv
#### Last updated 4/7/2019, includes games through 4/6/2019


In [21]:
#creating player stats df from csv
df_stats = pd.read_csv('nba_player_stats.csv')

In [22]:
print(df_stats)

      Rk                  Player  Pos  Age   Tm   G    MP   PER    TS%   3PAr  \
0      1            Alex Abrines   SG   25  OKC  31   588   6.3  0.507  0.809   
1      2              Quincy Acy   PF   28  PHO  10   123   2.9  0.379  0.833   
2      3            Jaylen Adams   PG   22  ATL  32   393   7.9  0.479  0.689   
3      4            Steven Adams    C   25  OKC  77  2593  18.9  0.596  0.003   
4      5             Bam Adebayo    C   21  MIA  79  1841  17.8  0.623  0.028   
5      6               Deng Adel   SF   21  CLE  17   187   1.7  0.376  0.629   
6      7  DeVaughn Akoon-Purcell   SG   25  DEN   7    22   8.2  0.322    0.4   
7      8       LaMarcus Aldridge    C   33  SAS  79  2621  22.7  0.573  0.031   
8      9            Rawle Alkins   SG   21  CHI   8    88   9.4  0.417  0.344   
9     10           Grayson Allen   SG   23  UTA  35   347   6.1  0.514  0.589   
10    11           Jarrett Allen    C   20  BRK  78  2046  18.8  0.637  0.079   
11    12            Kadeem A

## Headers throughout the table are in csv for readability, removing for analysis purposes

In [23]:
df_stats.head()

Unnamed: 0,Rk,Player,Pos,Age,Tm,G,MP,PER,TS%,3PAr,...,TOV%,USG%,OWS,DWS,WS,WS/48,OBPM,DBPM,BPM,VORP
0,1,Alex Abrines,SG,25,OKC,31,588,6.3,0.507,0.809,...,7.9,12.2,0.1,0.6,0.7,0.054,-2.5,-1.0,-3.4,-0.2
1,2,Quincy Acy,PF,28,PHO,10,123,2.9,0.379,0.833,...,15.2,9.2,-0.1,0.1,-0.1,-0.02,-5.6,-0.3,-5.9,-0.1
2,3,Jaylen Adams,PG,22,ATL,32,393,7.9,0.479,0.689,...,19.6,13.7,-0.1,0.2,0.1,0.014,-2.8,-1.5,-4.3,-0.2
3,4,Steven Adams,C,25,OKC,77,2593,18.9,0.596,0.003,...,12.6,16.4,5.1,4.0,9.1,0.168,0.8,2.2,3.0,3.3
4,5,Bam Adebayo,C,21,MIA,79,1841,17.8,0.623,0.028,...,17.3,15.6,3.4,3.2,6.6,0.171,-0.5,3.6,3.0,2.4


In [24]:
#full definition of all columns can be found at https://www.basketball-reference.com/leagues/NBA_2019_advanced.html
list(df_stats)

['Rk',
 'Player',
 'Pos',
 'Age',
 'Tm',
 'G',
 'MP',
 'PER',
 'TS%',
 '3PAr',
 'FTr',
 'ORB%',
 'DRB%',
 'TRB%',
 'AST%',
 'STL%',
 'BLK%',
 'TOV%',
 'USG%',
 'OWS',
 'DWS',
 'WS',
 'WS/48',
 'OBPM',
 'DBPM',
 'BPM',
 'VORP']

In [25]:
# print total missing values for each feature
print(df_stats.isnull().sum())

Rk        0
Player    0
Pos       0
Age       0
Tm        0
G         0
MP        0
PER       0
TS%       6
3PAr      7
FTr       7
ORB%      0
DRB%      0
TRB%      0
AST%      0
STL%      0
BLK%      0
TOV%      6
USG%      0
OWS       0
DWS       0
WS        0
WS/48     0
OBPM      0
DBPM      0
BPM       0
VORP      0
dtype: int64


## Create Dataframe for NBA Player Contracts for 2018-2019 and extended

In [57]:
df_contracts = pd.read_csv(r'C:\Users\dudad\Documents\Data Science Career Track\Data Wrangling\player_contracts.csv', delimiter = ',')
print(df_contracts)

      Rk              Player   Tm         2018-19          2019-20   \
0      1       Stephen Curry  GSW   37,457,154.00    40,231,758.00    
1      2          Chris Paul  HOU   35,654,150.00    38,506,482.00    
2      3   Russell Westbrook  OKC   35,654,150.00    38,178,000.00    
3      4        LeBron James  LAL   35,654,150.00    37,436,858.00    
4      5       Blake Griffin  DET   32,088,932.00    34,234,964.00    
5      6      Gordon Hayward  BOS   31,214,295.00    32,700,690.00    
6      7          Kyle Lowry  TOR   31,200,000.00    33,296,296.00    
7      8         Paul George  OKC   30,560,700.00    33,005,556.00    
8      9         Mike Conley  MEM   30,521,116.00    32,511,623.00    
9     10        James Harden  HOU   30,431,854.00    37,800,000.00    
10    11        Kevin Durant  GSW   30,000,000.00    31,500,000.00    
11    12        Paul Millsap  DEN   29,730,769.00    30,500,000.00    
12    13          Al Horford  BOS   28,928,710.00    30,123,015.00    
13    

In [58]:
df_contracts.head()

Unnamed: 0,Rk,Player,Tm,2018-19,2019-20,2020-21,2021-22,2022-23,2023-24,Signed Using,Guaranteed
0,1,Stephen Curry,GSW,37457154.0,40231758.0,43006362.0,45780966.0,,,Bird Rights,166476240.0
1,2,Chris Paul,HOU,35654150.0,38506482.0,41358814.0,44211146.0,,,,159730592.0
2,3,Russell Westbrook,OKC,35654150.0,38178000.0,41006000.0,43848000.0,46662000.0,,Bird Rights,158686150.0
3,4,LeBron James,LAL,35654150.0,37436858.0,39219565.0,41002273.0,,,,113310573.0
4,5,Blake Griffin,DET,32088932.0,34234964.0,36595996.0,38957028.0,,,Bird Rights,102919892.0


In [59]:
print(df_contracts.isnull().sum())

Rk               29
Player           29
Tm               29
 2018-19          1
 2019-20        293
 2020-21        437
 2021-22        521
 2022-23        579
 2023-24        596
Signed Using    233
 Guaranteed      32
dtype: int64


### View how columns were imported

In [60]:
#View current data types of df_contracts
print(df_contracts.dtypes)

Rk              object
Player          object
Tm              object
 2018-19        object
 2019-20        object
 2020-21        object
 2021-22        object
 2022-23        object
 2023-24        object
Signed Using    object
 Guaranteed     object
dtype: object


### Some rows imported are just headers for readability while scrolling, causing the null values in some cases
#### Remove header rows throughout table after first header

In [61]:
df_contracts = df_contracts.drop(df_contracts[(df_contracts.Rk == 'Rk')].index)

In [62]:
print(df_contracts)

      Rk                 Player   Tm         2018-19          2019-20   \
0      1          Stephen Curry  GSW   37,457,154.00    40,231,758.00    
1      2             Chris Paul  HOU   35,654,150.00    38,506,482.00    
2      3      Russell Westbrook  OKC   35,654,150.00    38,178,000.00    
3      4           LeBron James  LAL   35,654,150.00    37,436,858.00    
4      5          Blake Griffin  DET   32,088,932.00    34,234,964.00    
5      6         Gordon Hayward  BOS   31,214,295.00    32,700,690.00    
6      7             Kyle Lowry  TOR   31,200,000.00    33,296,296.00    
7      8            Paul George  OKC   30,560,700.00    33,005,556.00    
8      9            Mike Conley  MEM   30,521,116.00    32,511,623.00    
9     10           James Harden  HOU   30,431,854.00    37,800,000.00    
10    11           Kevin Durant  GSW   30,000,000.00    31,500,000.00    
11    12           Paul Millsap  DEN   29,730,769.00    30,500,000.00    
12    13             Al Horford  BOS  

### Removed one set of headers for column names, now to remove "Salary" header throughout rest of Dataframe

In [63]:
df_contracts = df_contracts.dropna(subset = ['Rk'])

In [64]:
print(df_contracts)

      Rk                 Player   Tm         2018-19          2019-20   \
0      1          Stephen Curry  GSW   37,457,154.00    40,231,758.00    
1      2             Chris Paul  HOU   35,654,150.00    38,506,482.00    
2      3      Russell Westbrook  OKC   35,654,150.00    38,178,000.00    
3      4           LeBron James  LAL   35,654,150.00    37,436,858.00    
4      5          Blake Griffin  DET   32,088,932.00    34,234,964.00    
5      6         Gordon Hayward  BOS   31,214,295.00    32,700,690.00    
6      7             Kyle Lowry  TOR   31,200,000.00    33,296,296.00    
7      8            Paul George  OKC   30,560,700.00    33,005,556.00    
8      9            Mike Conley  MEM   30,521,116.00    32,511,623.00    
9     10           James Harden  HOU   30,431,854.00    37,800,000.00    
10    11           Kevin Durant  GSW   30,000,000.00    31,500,000.00    
11    12           Paul Millsap  DEN   29,730,769.00    30,500,000.00    
12    13             Al Horford  BOS  

### Successfully removed header rows throughout table from Dataframe Contracts