In [24]:
import os

import pandas as pd
import numpy as np

from bokeh.transform import jitter, factor_cmap
from bokeh.plotting import figure, show, output_notebook
from bokeh.palettes import Category20
from bokeh.models import ColumnDataSource
from bokeh.models import HoverTool

import chartify


## Data Preparation & Exploration

In [25]:
# Read in and inspect csv
nba_df = pd.read_csv('raw_data/NBA_POW_DATA.csv')
nba_df.head()

Unnamed: 0,Active season,Player,Team,Conference,Date,Position,Height,Weight,Age,Draft Year,Seasons in league,Season,Season short,Real_value
0,0,Micheal Ray Richardson,New Jersey Nets,,"Apr 14, 1985",PG,6-5,189,29,1978,6,1984-1985,1985,1.0
1,0,Derek Smith,Los Angeles Clippers,,"Apr 7, 1985",SG,6-6,205,23,1982,2,1984-1985,1985,1.0
2,0,Calvin Natt,Denver Nuggets,,"Apr 1, 1985",F,6-6,220,28,1979,5,1984-1985,1985,1.0
3,0,Kareem Abdul-Jabbar,Los Angeles Lakers,,"Mar 24, 1985",C,7-2,225,37,1969,15,1984-1985,1985,1.0
4,0,Larry Bird,Boston Celtics,,"Mar 17, 1985",SF,6-9,220,28,1978,5,1984-1985,1985,1.0


In [26]:
nba_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1155 entries, 0 to 1154
Data columns (total 14 columns):
Active season        1155 non-null int64
Player               1155 non-null object
Team                 1155 non-null object
Conference           771 non-null object
Date                 1155 non-null object
Position             1155 non-null object
Height               1155 non-null object
Weight               1155 non-null int64
Age                  1155 non-null int64
Draft Year           1155 non-null int64
Seasons in league    1155 non-null int64
Season               1155 non-null object
Season short         1155 non-null int64
Real_value           1155 non-null float64
dtypes: float64(1), int64(6), object(7)
memory usage: 126.4+ KB


## NBA Player of the Week (Average Age Per Season)

In [27]:
nba_avg_age = nba_df.groupby('Season short')['Age'].mean().to_frame().reset_index()
nba_avg_age.head()

Unnamed: 0,Season short,Age
0,1985,27.0
1,1986,26.695652
2,1987,27.130435
3,1988,26.173913
4,1989,26.478261


In [28]:
ch = chartify.Chart(blank_labels = True, x_axis_type = 'linear', 
                   y_axis_type = 'linear', layout = 'slide_100%')

ch.plot.line(nba_avg_age, x_column = 'Season short', y_column = 'Age')

ch.figure.plot_width = 750

ch.set_subtitle("NBA Player of the Week: Avg. Age Per Season")
ch.axes.set_xaxis_tick_format('0')
ch.axes.set_xaxis_label = 'Season Year'
ch.axes.set_yaxis_range(start = 15, end = 40)
ch.axes.set_yaxis_labe1 = 'Avg. Age'
ch.figure.ygrid.grid_line_color = 'lightgrey'

ch.show()

## NBA Player of The Week (Top Ten)

In [29]:
nba_top_ten = nba_df.groupby('Player').agg({'Real_value':'sum'})\
                    .sort_values('Real_value',ascending = False).head(10).reset_index()

nba_top_ten['Rounded'] = nba_top_ten['Real_value'].astype(int)

nba_top_ten

Unnamed: 0,Player,Real_value,Rounded
0,LeBron James,30.5,30
1,Michael Jordan,24.0,24
2,Karl Malone,22.5,22
3,Kobe Bryant,17.5,17
4,Shaquille O'Neal,16.0,16
5,David Robinson,15.0,15
6,Tim Duncan,14.0,14
7,Allen Iverson,13.5,13
8,Kevin Durant,13.0,13
9,Kevin Garnett,12.5,12


In [30]:
ch = chartify.Chart(blank_labels = True, y_axis_type = 'categorical', 
                   layout = 'slide_100%')

ch.plot.bar(nba_top_ten, categorical_columns = 'Player',
           numeric_column = 'Real_value', color_column = 'Player', 
           categorical_order_ascending = True)

ch.plot.text(nba_top_ten, categorical_columns='Player', 
            numeric_column = 'Real_value', text_column = 'Rounded', 
            categorical_order_ascending = True, font_size = '13px', 
            x_offset = -30, y_offset = 1)

ch.figure.renderers[-1].glyph.text_color = 'white'
ch.figure.plot_width = 750
ch.figure.plot_height = 750
ch.set_subtitle("NBA Player of the Week: Top 10")
ch.figure.xaxis.fixed_location = 25
ch.figure.xaxis.visible = False

# Dash marks on 10, 20, 30 milestones
for milestones in [10, 20, 30]:
    ch.callout.line(location = milestones, orientation = 'height', 
                   line_color = 'white', line_dash = 'dashed',
                   line_width = 1)
ch.show()

## NBA Player of the Week (Player & Position)

In [23]:
overall_pow = nba_df.groupby(['Player', 'Position'])['Real_value'].sum().reset_index()
source = ColumnDataSource(data = overall_pow)

cmapper = factor_cmap('Position', palette = Category20[11], 
                     factors = overall_pow['Position'].unique())
hover = HoverTool(tooltips = [('Player', '@Player'),])

p = figure(y_range = overall_pow['Position'].unique(), width = 750, 
          height = 750, tools = [hover])

p.scatter(x='Real_value', y = jitter('Position', width = 0.5, 
                                    range = p.y_range), source = source, 
         size = 10, alpha = 0.7, fill_color = cmapper, color = cmapper)

p.title.text = 'Player of the Week (By Player & Position)'
p.title.text_font_size = '16px'

p.xaxis.axis_label = 'Player of the Week (# of Times)'
p.yaxis.axis_label = 'Position'
p.ygrid.grid_line_color = None

show(p)