# NBA Salary Data Cleaning

In [1]:
# Dependencies and Setup
import pandas as pd 
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

In [2]:
# Pull CSV
salary_df = pd.read_csv("Resources/salaries_1985to2018.csv")
salary_df.head()

Unnamed: 0,index,league,player_id,salary,season,season_end,season_start,team
0,0,NBA,abdelal01,395000,1990-91,1991,1990,Portland Trail Blazers
1,1,NBA,abdelal01,494000,1991-92,1992,1991,Portland Trail Blazers
2,2,NBA,abdelal01,500000,1992-93,1993,1992,Boston Celtics
3,3,NBA,abdelal01,805000,1993-94,1994,1993,Boston Celtics
4,4,NBA,abdelal01,650000,1994-95,1995,1994,Sacramento Kings


In [3]:
# Create DATE column for merging later
salary_df["DATE"] = pd.to_datetime(salary_df["season_start"].astype(str)+ "-01-01")
salary_df.head()

Unnamed: 0,index,league,player_id,salary,season,season_end,season_start,team,DATE
0,0,NBA,abdelal01,395000,1990-91,1991,1990,Portland Trail Blazers,1990-01-01
1,1,NBA,abdelal01,494000,1991-92,1992,1991,Portland Trail Blazers,1991-01-01
2,2,NBA,abdelal01,500000,1992-93,1993,1992,Boston Celtics,1992-01-01
3,3,NBA,abdelal01,805000,1993-94,1994,1993,Boston Celtics,1993-01-01
4,4,NBA,abdelal01,650000,1994-95,1995,1994,Sacramento Kings,1994-01-01


In [4]:
# Print unique players
print(len(salary_df["player_id"].unique()))

2408


# Adjusting for Inflation
All inflation rate is as of 09/01/2023

In [5]:
# Pull CPI CSV to adjust for inflation
cpi_data = pd.read_csv('Resources/CPIAUCNS.csv')
cpi_data.head()

Unnamed: 0,DATE,CPIAUCNS
0,1913-01-01,9.8
1,1913-02-01,9.8
2,1913-03-01,9.8
3,1913-04-01,9.8
4,1913-05-01,9.7


In [6]:
# Create new column to adjust each year to Sep. 01 2023
cpi_data['CPI_rate'] = cpi_data['CPIAUCNS'].iloc[-1]/cpi_data['CPIAUCNS']
cpi_data['DATE'] = pd.to_datetime(cpi_data['DATE'])
cpi_file.head()

NameError: name 'cpi_file' is not defined

In [None]:
# Create Inflation Adjusted Salary column
total_salary2 = pd.merge(salary_df, cpi_data, how="left", on="DATE")
total_salary2['inflation_adjusted_salary'] = total_salary2["salary"] * total_salary2["CPI_rate"]
total_salary2.head()

In [None]:
# Drop unnecessary columns
total_salary2 = total_salary2[['player_id', 'salary', 'inflation_adjusted_salary']]
total_salary2.head()

In [None]:
# Group by player id and the sum of salary and inflation-adjusted salary
player_salary = total_salary2.groupby(['player_id']).sum()
player_salary.head()

In [None]:
# Print total rows
player_salary.count()

In [None]:
# Rename player id and salary for merging
player_salary.index.rename("_id", inplace=True)
player_salary.rename(columns={"salary": "Total Earnings", "inflation_adjusted_salary": "Inflation-Adjusted Earnings"}, inplace=True)
player_salary

In [None]:
# Create Salary DataFrame with readable salaries
player_salary2 = player_salary.copy()
player_salary2["Total Earnings"] = player_salary2["Total Earnings"].map("${:,.2f}".format)
player_salary2["Inflation-Adjusted Earnings"] = player_salary2["Inflation-Adjusted Earnings"].map("${:,.2f}".format)
player_salary2.head()

In [None]:
# Push clean csv files
filepath = Path('Resources/cleaned_salary_data.csv')  
filepath.parent.mkdir(parents=True, exist_ok=True)  
player_salary.to_csv(filepath)