# NBA Salary Data Cleaning

In [1]:
# Dependencies and Setup
import pandas as pd 
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

In [2]:
# Pull CSV
salary_df = pd.read_csv("Resources/salaries_1985to2018.csv")
salary_df.head()

Unnamed: 0,index,league,player_id,salary,season,season_end,season_start,team
0,0,NBA,abdelal01,395000,1990-91,1991,1990,Portland Trail Blazers
1,1,NBA,abdelal01,494000,1991-92,1992,1991,Portland Trail Blazers
2,2,NBA,abdelal01,500000,1992-93,1993,1992,Boston Celtics
3,3,NBA,abdelal01,805000,1993-94,1994,1993,Boston Celtics
4,4,NBA,abdelal01,650000,1994-95,1995,1994,Sacramento Kings


In [3]:
# Drop unnecessary columns
total_salary = salary_df[['player_id', 'salary']]
total_salary.head()

Unnamed: 0,player_id,salary
0,abdelal01,395000
1,abdelal01,494000
2,abdelal01,500000
3,abdelal01,805000
4,abdelal01,650000


In [4]:
# Print unique players
print(len(total_salary["player_id"].unique()))

2408


In [5]:
player_salary = total_salary.groupby(['player_id']).sum()
player_salary.head()

Unnamed: 0_level_0,salary
player_id,Unnamed: 1_level_1
abdelal01,2844000
abdulka01,8560000
abdulma02,19849500
abdulta01,37982800
abdursh01,95866000


In [6]:
# Print total rows
player_salary.count()

salary    2408
dtype: int64

In [7]:
# Rename player id and salary for merging
player_salary.index.rename("_id", inplace=True)
player_salary.rename(columns={"salary": "Total Earnings"}, inplace=True)
player_salary

Unnamed: 0_level_0,Total Earnings
_id,Unnamed: 1_level_1
abdelal01,2844000
abdulka01,8560000
abdulma02,19849500
abdulta01,37982800
abdursh01,95866000
...,...
zidekge01,2081200
zimmest01,950000
zipsepa01,2062611
zizican01,1645200


In [8]:
# Push clean csv files
filepath = Path('Resources/cleaned_salary_data.csv')  
filepath.parent.mkdir(parents=True, exist_ok=True)  
player_salary.to_csv(filepath) 