# How is NBA Salary linked to Player Performance?

### The goals of this project:

#### 1. Explore and analyze how NBA players' salaries are linked to performance. 
#### 2. Players who are overpaid/underpaid by position
#### 3. Driver better decisions to identify quality or above-average players at a low-cost or below-average pay (in per minute terms)

In [1]:
# Import Libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import requests
from bs4 import BeautifulSoup

### Scraping Player Season Totals from 2000 to 2022 from Basketball Reference

#### https://www.basketball-reference.com/leagues/NBA_2020_totals.html

In [3]:
#Scraping from Basketball Reference to obtain player season totals

years = list(range(2000, 2022))

url_totals = "https://www.basketball-reference.com/leagues/NBA_{}_totals.html"


#For loop to request all years from 2000 to 2022 and store as html 
for year in years:
    url = url_totals.format(year)
    data = requests.get(url)
    
    with open("totals/{}.html".format(year), "w+", encoding = "utf-8") as f:
        f.write(data.text)

In [4]:
# Read in HTML

with open("totals/2000.html", encoding ="utf-8") as f:
    page = f.read()

In [13]:
# Parse pages with BeautifulSoup

soup = BeautifulSoup(page, "html.parser")


In [14]:
# Find the id for the stats table
totals_2000_season = soup.find(id= "all_totals_stats")

In [15]:
# Read in the html to verify
totals_2000_szn = pd.read_html(str(totals_2000_season))

totals_2000_szn[0]

Unnamed: 0,Rk,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,...,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS
0,1,Precious Achiuwa,PF,21,MIA,61,4,737,124,228,...,.509,73,135,208,29,20,28,43,91,304
1,2,Jaylen Adams,PG,24,MIL,7,0,18,1,8,...,,0,3,3,2,0,0,0,1,2
2,3,Steven Adams,C,27,NOP,58,58,1605,189,308,...,.444,213,301,514,111,54,38,78,113,438
3,4,Bam Adebayo,C,23,MIA,64,64,2143,456,800,...,.799,142,431,573,346,75,66,169,145,1197
4,5,LaMarcus Aldridge,C,35,TOT,26,23,674,140,296,...,.872,19,99,118,49,11,29,27,47,352
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
726,536,Delon Wright,PG,28,SAC,27,8,696,104,225,...,.833,28,77,105,97,43,11,35,30,271
727,537,Thaddeus Young,PF,32,CHI,68,23,1652,370,662,...,.628,168,255,423,291,74,40,137,152,823
728,538,Trae Young,PG,22,ATL,63,63,2125,487,1112,...,.886,38,207,245,594,53,12,261,111,1594
729,539,Cody Zeller,C,28,CHO,48,21,1005,181,324,...,.714,119,209,328,86,27,17,51,121,451


In [8]:
# For loop to combine all total htmls into a list of dataframes

szn_ttl_00_22 = []

for year in years:
    with open("totals/{}.html".format(year), encoding = "utf-8") as f:
        page = f.read()
        soup = BeautifulSoup(page, "html.parser")
        total_table = soup.find(id = "all_totals_stats")
        total = pd.read_html(str(total_table))[0]
        total["Year"] = year
        
        szn_ttl_00_22.append(total)

In [10]:
szn_ttl_df = pd.concat(szn_ttl_00_22)
szn_ttl_df.shape

(13439, 31)

In [11]:
szn_ttl_df.to_csv("./Data Files/szn_ttl_df.csv")

In [16]:
#Excluding rows where Rk is Rk

szn_ttl_df_cleaned = szn_ttl_df[szn_ttl_df["Rk"] != "Rk"]

In [17]:
szn_ttl_df_cleaned.to_csv("./Data Files/szn_ttl_df_cleaned.csv")