In [1]:
import numpy as np
import pandas as pd
from bokeh.core.properties import value
from bokeh.io import output_file, show, curdoc
from bokeh.plotting import figure, ColumnDataSource
from bokeh.models import CustomJS, HoverTool, Panel, Tabs, Slider, Select, CategoricalColorMapper
from bokeh.transform import dodge, factor_cmap
from bokeh.palettes import RdYlGn, Category10, Spectral6, Viridis256
from bokeh.layouts import widgetbox, row, column, gridplot

In [2]:
df_nba = pd.read_csv('nba_career_PG.csv')
df_nba = df_nba.drop(df_nba.columns[0], axis=1)
df_nba.head()

Unnamed: 0,Player,enterYear,retireYear,Pos,G,GS,MP,FG,FGA,3P,...,FTA_PG,ORB_PG,DRB_PG,TRB_PG,AST_PG,STL_PG,BLK_PG,TOV_PG,PF_PG,PTS_PG
0,A.C. Green,1986,2001,PF,1361,905,39044,4778,9686,125,...,3.3,2.6,4.8,7.4,1.1,0.8,0.4,1.1,1.9,9.5
1,A.J. Bramlett,2000,2000,C,8,0,61,4,21,0,...,0.0,1.5,1.2,2.8,0.0,0.1,0.0,0.4,1.6,1.0
2,A.J. English,1991,1992,SG,151,18,3108,617,1418,9,...,2.2,0.9,1.2,2.1,2.1,0.4,0.2,1.3,1.9,9.9
3,A.J. Guyton,2001,2003,PG,80,14,1246,166,440,73,...,0.6,0.3,0.7,1.0,1.8,0.2,0.1,0.8,0.7,5.5
4,A.J. Hammons,2017,2017,C,22,0,163,17,42,5,...,0.9,0.4,1.3,1.6,0.2,0.0,0.6,0.5,1.0,2.2


In [3]:
df_nba['Pos'] = df_nba['Pos'].apply(lambda x: x[-1])
df_nba = df_nba.sort_values(by=['Player'])
df_nba['2P_P'] = df_nba['2P'] * 2
df_nba['3P_P'] = df_nba['3P'] * 3

In [16]:
# np.where(df_nba['Pos'].isna())
# df_nba['Pos'].dtype.kind
# df_nba.isna().sum()
#abc=abc.set_index('Player')

(array([], dtype=int64),)

In [4]:
## top players in career: PTS>5000 or AST>1500 or TRB>3000 or STL>500
df_top = df_nba.copy()
df_top = df_top[(df_top['PTS']>7000) | (df_top['AST']>1500) | (df_top['TRB']>3000) | (df_top['STL']>500)]
df_top.loc[df_top['Pos']=='C', 'Pos'] = 'Center'
df_top.loc[df_top['Pos']=='F', 'Pos'] = 'Forward'
df_top.loc[df_top['Pos']=='G', 'Pos'] = 'Guard'
df_top.head()

Unnamed: 0,Player,enterYear,retireYear,Pos,G,GS,MP,FG,FGA,3P,...,DRB_PG,TRB_PG,AST_PG,STL_PG,BLK_PG,TOV_PG,PF_PG,PTS_PG,2P_P,3P_P
0,A.C. Green,1986,2001,Forward,1361,905,39044,4778,9686,125,...,4.8,7.4,1.1,0.8,0.4,1.1,1.9,9.5,9306,375
8,Aaron Brooks,2008,2017,Guard,797,226,17088,2829,6893,1025,...,1.3,1.7,3.1,0.6,0.1,1.6,1.9,9.8,3608,3075
13,Aaron McKie,1995,2007,Forward,957,330,22594,2554,5923,441,...,2.6,3.2,2.6,1.1,0.2,1.3,2.0,6.9,4226,1323
16,Aaron Williams,1994,2008,Center,838,88,13669,1777,3577,1,...,2.4,3.8,0.6,0.4,0.7,0.9,2.4,5.5,3552,3
23,Adonal Foyle,1998,2009,Center,743,269,13122,1325,2777,0,...,2.9,4.7,0.5,0.4,1.6,0.8,2.1,4.0,2650,0


In [95]:
#df_top.info()
#df_nba[['PTS','AST','TRB','STL']].describe()

In [5]:
def p_all_layout(p):
    p.y_range.start = 0
    p.x_range.range_padding = 0.02
    p.xaxis.major_label_orientation = 1.2
    p.xaxis.major_label_text_font_size = '10pt'
    p.yaxis.major_label_text_font_size = '10pt'
    p.title.text_font_size = '16pt'
    p.xgrid.grid_line_color = None
    p.axis.minor_tick_line_color = None
    p.outline_line_color = None
    p.legend.location = "top_right"
    p.legend.orientation = "horizontal"

In [12]:
## p1 Top 30 career points
# Top 30 career points data
df_top_PTS = df_top.sort_values(by=['PTS'], ascending=False).reset_index()
df_top_PTS = df_top_PTS.loc[0:29]
df_top_PTS=df_top_PTS.rename(columns = {'FT':'Free Throw', '2P_P':'2-Point', '3P_P':'3-Point'})

# set colors and groups
colors = ["#c9d9d3", "#718dbf", "#e84d60"]
Pos_list = df_top_PTS.Pos.unique().tolist()
Player_list = df_top_PTS.Player.unique().tolist()
Point_type = ['Free Throw', '2-Point', '3-Point']
s1 = ColumnDataSource(data=df_top_PTS)

p1 = figure(x_range=Player_list, plot_height=500, plot_width=700, title="NBA Career Scoring Leaders",
           tooltips="@Player - $name: @$name")

p1.vbar_stack(Point_type, x='Player', alpha=0.9, width=0.8, color=colors, source=s1,
             legend=[value(x) for x in Point_type])

p_all_layout(p1)
#show(p1)

In [13]:
## p2 Top 30 career rebounds with assists and blocks
# Top 30 career rebounds data
df_top_TRB = df_top.sort_values(by=['TRB'], ascending=False).reset_index()
df_top_TRB = df_top_TRB.loc[0:29]

Player_list = df_top_TRB.Player.unique().tolist()
s2 = ColumnDataSource(data=df_top_TRB)

p2 = figure(x_range=Player_list, plot_height=500, plot_width=700, title="NBA Career Rebound Leaders",
           tooltips=[("Player", "@Player"), ("Rebound", "@TRB")])

p2.vbar(x=dodge('Player', 0, range=p2.x_range), top='TRB', alpha=0.9, width=0.5, source=s2,
       color="#fc8d59", legend=value("Rebound"))
p2.vbar(x=dodge('Player', 0.35, range=p2.x_range), top='AST', alpha=0.9, width=0.15, source=s2,
       color="#a1d76a", legend=value("Assist"))
p2.vbar(x=dodge('Player', 0.5, range=p2.x_range), top='BLK', alpha=0.9, width=0.15, source=s2,
       color="#3182bd", legend=value("Block"))

p_all_layout(p2)
#show(p2)

In [14]:
## p3 Top 30 career assists with steals
# Top 30 career assists data
df_top_AST = df_top.sort_values(by=['AST'], ascending=False).reset_index()
df_top_AST = df_top_AST.loc[0:29]

Player_list = df_top_AST.Player.unique().tolist()
s3 = ColumnDataSource(data=df_top_AST)

p3 = figure(x_range=Player_list, plot_height=500, plot_width=700, title="NBA Career Assist Leaders",
           tooltips=[("Player", "@Player"), ("Assist", "@AST")])

p3.vbar(x=dodge('Player', 0, range=p3.x_range), top='AST', alpha=0.9, width=0.6, source=s3,
       color="#a1d76a", legend=value("Assist"))
p3.vbar(x=dodge('Player', 0.35, range=p3.x_range), top='STL', alpha=0.9, width=0.2, source=s3,
       color="#fa9fb5", legend=value("Steal"))

p_all_layout(p3)
#show(p3)

In [15]:
## p4 Top 30 career steals
# Top 30 career steals data
df_top_STL = df_top.sort_values(by=['STL'], ascending=False).reset_index()
df_top_STL = df_top_STL.loc[0:29]

Player_list = df_top_STL.Player.unique().tolist()
s4 = ColumnDataSource(data=df_top_STL)

p4 = figure(x_range=Player_list, plot_height=500, plot_width=700, title="NBA Career Steal Leaders",
            tooltips=[("Player", "@Player"), ("Steal", "@STL")])

p4.vbar(x=dodge('Player', 0.0, range=p4.x_range), top='STL', alpha=0.9, width=0.8, source=s4,
       color="#fa9fb5", legend=value("Steal"))

p_all_layout(p4)
#show(p4)

In [16]:
## p5 Top 30 career blocks
# Top 30 career blocks data
df_top_BLK = df_top.sort_values(by=['BLK'], ascending=False).reset_index()
df_top_BLK = df_top_BLK.loc[0:29]

Player_list = df_top_BLK.Player.unique().tolist()
s5 = ColumnDataSource(data=df_top_BLK)

p5 = figure(x_range=Player_list, plot_height=500, plot_width=700, title="NBA Career Block Leaders",
            tooltips=[("Player", "@Player"), ("Block", "@BLK")])

p5.vbar(x=dodge('Player', 0.0, range=p5.x_range), top='BLK', alpha=0.9, width=0.8, source=s5,
       color="#3182bd", legend=value("Block"))

p_all_layout(p5)
#show(p5)

In [19]:
output_file('NBA_top_player.html')
show(column(p1, row(p2, p5), row(p3, p4)))

In [20]:
def p_pos_layout(p):
    p.y_range.start = 0
    p.x_range.range_padding = 0.04
    p.xaxis.major_label_orientation = 1.4
    p.xaxis.axis_label = "Players Grouped by Positions"
    p.xaxis.axis_label_text_font_size = '12pt'
    p.xaxis.major_label_text_font_size = '10pt'
    p.xaxis.group_text_font_size = '12pt'
    p.yaxis.major_label_text_font_size = '10pt'
    p.title.text_font_size = '15pt'
    p.xgrid.grid_line_color = None
    p.axis.minor_tick_line_color = None
    p.outline_line_color = None

In [27]:
## p6 Top 10 career points by Positions
df_top_PTS_Pos0 = df_top.sort_values(by=['Pos','PTS'], ascending=False)
df_top_PTS_Pos_C = df_top_PTS_Pos0[df_top_PTS_Pos0['Pos']=='Center'].reset_index().loc[0:9]
df_top_PTS_Pos_F = df_top_PTS_Pos0[df_top_PTS_Pos0['Pos']=='Forward'].reset_index().loc[0:9]
df_top_PTS_Pos_G = df_top_PTS_Pos0[df_top_PTS_Pos0['Pos']=='Guard'].reset_index().loc[0:9]
df_top_PTS_Pos = pd.concat([df_top_PTS_Pos_C, df_top_PTS_Pos_F, df_top_PTS_Pos_G]) #\
                    #.reset_index().drop('level_0', axis=1)
# add index to player's name
df_top_PTS_Pos['index1'] = df_top_PTS_Pos.index
df_top_PTS_Pos['Player1'] = df_top_PTS_Pos['index1'].astype(str) + ': ' + df_top_PTS_Pos['Player']

group = df_top_PTS_Pos.groupby(by=['Pos','Player1'])
colors = ['#f03b20', '#fe9929', '#78c679']
index_cmap = factor_cmap('Pos_Player1', palette=colors, factors=sorted(df_top_PTS_Pos.Pos.unique()), end=1)

p6 = figure(plot_width=900, plot_height=500, title="Top 10 Career Scoring Leaders by Positions",
           x_range=group, tooltips=[("Points", "@PTS_mean"), ("Position, Player", "@Pos_Player1")])

p6.vbar(x='Pos_Player1', top='PTS_mean', alpha=0.9, width=0.8, source=group,
       line_color="white", fill_color=index_cmap)

p_pos_layout(p6)
tab1 = Panel(child=p6, title="Score")
#show(p6)

In [28]:
## p7 Top 10 career rebounds by Positions
df_top_TRB_Pos0 = df_top.sort_values(by=['Pos','TRB'], ascending=False)
df_top_TRB_Pos_C = df_top_TRB_Pos0[df_top_TRB_Pos0['Pos']=='Center'].reset_index().loc[0:9]
df_top_TRB_Pos_F = df_top_TRB_Pos0[df_top_TRB_Pos0['Pos']=='Forward'].reset_index().loc[0:9]
df_top_TRB_Pos_G = df_top_TRB_Pos0[df_top_TRB_Pos0['Pos']=='Guard'].reset_index().loc[0:9]
df_top_TRB_Pos = pd.concat([df_top_TRB_Pos_C, df_top_TRB_Pos_F, df_top_TRB_Pos_G]) #\
                    #.reset_index().drop('level_0', axis=1)
# add index to player's name
df_top_TRB_Pos['index1'] = df_top_TRB_Pos.index
df_top_TRB_Pos['Player1'] = df_top_TRB_Pos['index1'].astype(str) + ': ' + df_top_TRB_Pos['Player']

group = df_top_TRB_Pos.groupby(by=['Pos','Player1'])
colors = ['#f03b20', '#fe9929', '#78c679']
index_cmap = factor_cmap('Pos_Player1', palette=colors, factors=sorted(df_top_TRB_Pos.Pos.unique()), end=1)

p7 = figure(plot_width=900, plot_height=500, title="Top 10 Career Rebound Leaders by Positions",
           x_range=group, tooltips=[("Points", "@TRB_mean"), ("Position, Player", "@Pos_Player1")])

p7.vbar(x='Pos_Player1', top='TRB_mean', alpha=0.9, width=0.8, source=group,
       line_color="white", fill_color=index_cmap)

p_pos_layout(p7)
tab2 = Panel(child=p7, title="Rebound")
#show(p7)

In [29]:
## p8 Top 10 career blocks by Positions
df_top_BLK_Pos0 = df_top.sort_values(by=['Pos','BLK'], ascending=False)
df_top_BLK_Pos_C = df_top_BLK_Pos0[df_top_BLK_Pos0['Pos']=='Center'].reset_index().loc[0:9]
df_top_BLK_Pos_F = df_top_BLK_Pos0[df_top_BLK_Pos0['Pos']=='Forward'].reset_index().loc[0:9]
df_top_BLK_Pos_G = df_top_BLK_Pos0[df_top_BLK_Pos0['Pos']=='Guard'].reset_index().loc[0:9]
df_top_BLK_Pos = pd.concat([df_top_BLK_Pos_C, df_top_BLK_Pos_F, df_top_BLK_Pos_G]) #\
                    #.reset_index().drop('level_0', axis=1)
# add index to player's name
df_top_BLK_Pos['index1'] = df_top_BLK_Pos.index
df_top_BLK_Pos['Player1'] = df_top_BLK_Pos['index1'].astype(str) + ': ' + df_top_BLK_Pos['Player']

group = df_top_BLK_Pos.groupby(by=['Pos','Player1'])
colors = ['#f03b20', '#fe9929', '#78c679']
index_cmap = factor_cmap('Pos_Player1', palette=colors, factors=sorted(df_top_BLK_Pos.Pos.unique()), end=1)

p8 = figure(plot_width=900, plot_height=500, title="Top 10 Career Block Leaders by Positions",
           x_range=group, tooltips=[("Points", "@BLK_mean"), ("Position, Player", "@Pos_Player1")])

p8.vbar(x='Pos_Player1', top='BLK_mean', alpha=0.9, width=0.8, source=group,
       line_color="white", fill_color=index_cmap)

p_pos_layout(p8)
tab3 = Panel(child=p8, title="Block")
#show(p8)

In [30]:
## p9 Top 10 career assists by Positions
df_top_AST_Pos0 = df_top.sort_values(by=['Pos','AST'], ascending=False)
df_top_AST_Pos_C = df_top_AST_Pos0[df_top_AST_Pos0['Pos']=='Center'].reset_index().loc[0:9]
df_top_AST_Pos_F = df_top_AST_Pos0[df_top_AST_Pos0['Pos']=='Forward'].reset_index().loc[0:9]
df_top_AST_Pos_G = df_top_AST_Pos0[df_top_AST_Pos0['Pos']=='Guard'].reset_index().loc[0:9]
df_top_AST_Pos = pd.concat([df_top_AST_Pos_C, df_top_AST_Pos_F, df_top_AST_Pos_G]) #\
                    #.reset_index().drop('level_0', axis=1)
# add index to player's name
df_top_AST_Pos['index1'] = df_top_AST_Pos.index
df_top_AST_Pos['Player1'] = df_top_AST_Pos['index1'].astype(str) + ': ' + df_top_AST_Pos['Player']

group = df_top_AST_Pos.groupby(by=['Pos','Player1'])
colors = ['#f03b20', '#fe9929', '#78c679']
index_cmap = factor_cmap('Pos_Player1', palette=colors, factors=sorted(df_top_AST_Pos.Pos.unique()), end=1)

p9 = figure(plot_width=900, plot_height=500, title="Top 10 Career Assist Leaders by Positions",
           x_range=group, tooltips=[("Points", "@AST_mean"), ("Position, Player", "@Pos_Player1")])

p9.vbar(x='Pos_Player1', top='AST_mean', alpha=0.9, width=0.8, source=group,
       line_color="white", fill_color=index_cmap)

p_pos_layout(p9)
tab4 = Panel(child=p9, title="Assist")
#show(p9)

In [31]:
## p10 Top 10 career steals by Positions
df_top_STL_Pos0 = df_top.sort_values(by=['Pos','STL'], ascending=False)
df_top_STL_Pos_C = df_top_STL_Pos0[df_top_STL_Pos0['Pos']=='Center'].reset_index().loc[0:9]
df_top_STL_Pos_F = df_top_STL_Pos0[df_top_STL_Pos0['Pos']=='Forward'].reset_index().loc[0:9]
df_top_STL_Pos_G = df_top_STL_Pos0[df_top_STL_Pos0['Pos']=='Guard'].reset_index().loc[0:9]
df_top_STL_Pos = pd.concat([df_top_STL_Pos_C, df_top_STL_Pos_F, df_top_STL_Pos_G]) #\
                    #.reset_index().drop('level_0', axis=1)
# add index to player's name
df_top_STL_Pos['index1'] = df_top_STL_Pos.index
df_top_STL_Pos['Player1'] = df_top_STL_Pos['index1'].astype(str) + ': ' + df_top_STL_Pos['Player']

group = df_top_STL_Pos.groupby(by=['Pos','Player1'])
colors = ['#f03b20', '#fe9929', '#78c679']
index_cmap = factor_cmap('Pos_Player1', palette=colors, factors=sorted(df_top_STL_Pos.Pos.unique()), end=1)

p10 = figure(plot_width=900, plot_height=500, title="Top 10 Career Steal Leaders by Positions",
           x_range=group, tooltips=[("Points", "@STL_mean"), ("Position, Player", "@Pos_Player1")])

p10.vbar(x='Pos_Player1', top='STL_mean', alpha=0.9, width=0.8, source=group,
       line_color="white", fill_color=index_cmap)

p_pos_layout(p10)
tab5 = Panel(child=p10, title="Steal")
#show(p10)

In [32]:
tabs = Tabs(tabs=[tab1, tab2, tab3, tab4, tab5])
output_file('NBA_top_player_pos.html')
show(tabs)