## Visualization 2: Relationship between NBA Player of the Week and Player Salary

In [1]:
%load_ext rpy2.ipython
%R library(ggplot2)

import pandas as pd
import numpy as np
from utils import pd2r

### Load Datasets: Player of the Week & Salary

In [2]:
## Player of week
POTW = pd2r(pd.read_csv('../data/raw/NBA_Player_of_the_Week.csv'))

In [3]:
## Salary
pd.set_option('display.max_columns', 999)
SLR = pd2r(pd.read_csv('../data/cleaned/NBA_Salary.csv'))

### Datasets Manipulation

Merge Two datasets

In [4]:
POTW_count = POTW.groupby(['Season short','Player','Team']).count()['Date'].reset_index().rename(columns = {'Date': 'POTW_Count'})
slr_new = SLR.drop(columns = ['Season Start', 'Team'])
slr_potw = pd.merge(slr_new, POTW_count, left_on=['Player Name', 'Season End'], right_on=['Player','Season short'], how = 'left')
slr_potw.loc[slr_potw.Player.isna(), 'POTW_Count'] = 0
slr_potw_new = slr_potw.drop(columns= ['Season short','Player','Team']).rename(columns = {'Player Name': 'Player', 'Season End':'Year','Salary in $':'Salary'})
slr_potw_new['Salary'] = round(slr_potw_new['Salary'] /1000000, 2)
slr_potw_new.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 12415 entries, 0 to 12414
Data columns (total 5 columns):
Player        12415 non-null object
Salary        12415 non-null float64
Year          12415 non-null int64
Franchise     12415 non-null object
POTW_Count    12415 non-null float64
dtypes: float64(2), int64(1), object(2)
memory usage: 582.0+ KB


Meadian Salary of NBA per year

In [5]:
slr_potw_median = slr_potw_new.groupby('Year')[['Salary']].median().reset_index()

Meadian Salaries for POTW and Not POTW

In [6]:
slr_potw_new.loc[slr_potw_new.POTW_Count == 0, 'Is_POTW'] = 'No'
slr_potw_new.loc[slr_potw_new.POTW_Count != 0, 'Is_POTW'] = 'Yes'
slr_vs_count = slr_potw_new.groupby(['Year','Is_POTW'])[['Salary']].median().reset_index()
slr_vs_count.head()

Unnamed: 0,Year,Is_POTW,Salary
0,1991,No,0.62
1,1991,Yes,1.37
2,1992,No,0.7
3,1992,Yes,2.24
4,1993,No,0.8


Divide into 2 datasets: potw salary and not potw salary

In [7]:
potw_salary = slr_potw_new.loc[slr_potw_new.Is_POTW == 'Yes']
Not_potw_salary = slr_potw_new.loc[slr_potw_new.Is_POTW == 'No']

Creat an Annotation Dataset

In [8]:
annote_slr_potw1 = slr_vs_count.loc[slr_vs_count.Year == 1991]
annote_slr_potw2 = slr_vs_count.loc[slr_vs_count.Year == 2019]

### Visualization in R

In [9]:
%R -i slr_potw_median
%R -i slr_vs_count
%R -i annote_slr_potw1
%R -i annote_slr_potw2
%R -i potw_salary
%R -i Not_potw_salary

In [10]:
%%R -w 14 -h 8 --units in -r 300
season <- function(x) {
    paste(x - 1,'-', x)
}

ggplot() +
geom_jitter(aes(x = Year, y = Salary), data = Not_potw_salary, color = 'grey', alpha = 0.1) +
geom_jitter(aes(x = Year, y = Salary, size = POTW_Count), data = potw_salary, color = '#FA8320', alpha = 0.3) +
theme_minimal(base_size = 17) +
geom_line(aes(x = Year, y = Salary, color = Is_POTW), data = slr_vs_count, alpha = 0.7, size = 2) +
geom_text(aes(x = Year, y = Salary, label = Salary, color = Is_POTW), size = 4, fontface = 'bold',
          data = annote_slr_potw1, vjust = 0.5, hjust = 1.5, family = 'Avenir Next') +
geom_text(aes(x = Year, y = Salary, label = Salary, color = Is_POTW), size = 5.5, fontface = 'bold',
          data = annote_slr_potw2, vjust = 1.25, hjust = -0.2, family = 'Avenir Next') +
scale_color_manual(values=c("grey2", "#FA8320")) +
geom_point(aes(x = Year, y = Salary, color = Is_POTW), data = annote_slr_potw1, position = "dodge", size = 5) +
geom_point(aes(x = Year, y = Salary, color = Is_POTW), data = annote_slr_potw2, position = "dodge", size = 5) +
geom_text(aes(x = 2018.5, y = 20.75, label = 'Avg Salary\nof POTW'), size = 5.5, color = '#FA8320',
          hjust = 0, fontface = 'bold', family = 'Avenir Next') +
geom_text(aes(x = 2018.5, y = 4.5, label = 'Avg Salary of\nNon-POTW'), size = 5.5, color = 'grey1',
          alpha = 0.8, hjust = 'right', fontface = 'bold', family = 'Avenir Next') +
xlab("\nYear") + 
ylab("Salary ($ in Millions) \n") +
# scale_colour_gradient(low = 'snow2', high = 'steelblue') +
scale_x_continuous(limits = c(1990, 2020), , breaks = seq(1990, 2020, by = 5), name = "\nSeason", label = season) +
scale_y_continuous(limits = c(0, 24), breaks = seq(0, 24, by = 2)) +
theme(text = element_text(family = 'Avenir Next'),
      axis.title = element_text(face = 'bold'), axis.ticks = element_blank(),
      axis.line = element_blank(),
      panel.grid.major = element_blank(),
      panel.grid.minor = element_blank()) +
guides(colour = F, size = F)

ggsave('../vis/vis2.png', units = 'in', width = 14, height = 8)