# Advanced Pandas - Plotting and presenting your data
**Abid Ali**

Skype: Abd.Soft

Email: [abdsoftfsd@gmail.com](mailto:abdsoftfsd@gmail.com)

In [120]:
import pandas as pd
import os


In [121]:
team_splits = pd.read_pickle(os.path.join('data', 'modified', 'team_splits_periods.pickle'))
scoring = pd.read_pickle(os.path.join('data', 'modified', 'scoring.pickle'))


In [122]:
ducks = team_splits.loc[team_splits['name'] == 'Anaheim Ducks', :]
ducks = ducks.sort_index()
ducks.head(10)


Unnamed: 0_level_0,name,L,OL,T,W
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2006-01,Anaheim Ducks,5.0,2.0,,4.0
2006-02,Anaheim Ducks,5.0,2.0,,5.0
2006-03,Anaheim Ducks,3.0,2.0,,10.0
2006-04,Anaheim Ducks,0.0,2.0,,1.0
2006-10,Anaheim Ducks,0.0,3.0,,9.0
2006-11,Anaheim Ducks,2.0,3.0,,10.0
2006-12,Anaheim Ducks,5.0,0.0,,9.0
2007-01,Anaheim Ducks,4.0,1.0,,8.0
2007-02,Anaheim Ducks,2.0,1.0,,10.0
2007-03,Anaheim Ducks,4.0,1.0,,8.0


In [123]:
ducks.resample('A').sum()


Unnamed: 0_level_0,L,OL,T,W
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2006,20.0,14.0,0.0,48.0
2007,27.0,8.0,0.0,47.0
2008,33.0,7.0,0.0,42.0
2009,32.0,11.0,0.0,39.0
2010,30.0,5.0,0.0,47.0
2011,36.0,12.0,0.0,34.0


In [124]:
ducks_index = ducks.to_timestamp().index.map(
    lambda x:
    x + pd.DateOffset(years=1) if x.month < 5
    else x
)
# ducks.index = ducks_index
# ducks = ducks.sort_index()
# ducks.head()

In [125]:
ducks_index

DatetimeIndex(['2007-01-01', '2007-02-01', '2007-03-01', '2007-04-01',
               '2006-10-01', '2006-11-01', '2006-12-01', '2008-01-01',
               '2008-02-01', '2008-03-01', '2008-04-01', '2007-09-01',
               '2007-10-01', '2007-11-01', '2007-12-01', '2009-01-01',
               '2009-02-01', '2009-03-01', '2009-04-01', '2008-10-01',
               '2008-11-01', '2008-12-01', '2010-01-01', '2010-02-01',
               '2010-03-01', '2010-04-01', '2009-10-01', '2009-11-01',
               '2009-12-01', '2011-01-01', '2011-02-01', '2011-03-01',
               '2011-04-01', '2010-10-01', '2010-11-01', '2010-12-01',
               '2012-01-01', '2012-02-01', '2012-03-01', '2012-04-01',
               '2011-10-01', '2011-11-01', '2011-12-01'],
              dtype='datetime64[ns]', name='month', freq=None)

In [126]:
ducks.index = ducks_index


In [127]:
ducks = ducks.sort_index()
ducks.head()


Unnamed: 0_level_0,name,L,OL,T,W
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2006-10-01,Anaheim Ducks,0.0,3.0,,9.0
2006-11-01,Anaheim Ducks,2.0,3.0,,10.0
2006-12-01,Anaheim Ducks,5.0,0.0,,9.0
2007-01-01,Anaheim Ducks,5.0,2.0,,4.0
2007-02-01,Anaheim Ducks,5.0,2.0,,5.0


In [128]:
resampler = ducks.resample('A-JUN')
resampler


<pandas.core.resample.DatetimeIndexResampler object at 0x0000026BC69B64F0>

In [129]:
resampler.sum()

Unnamed: 0_level_0,L,OL,T,W
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2007-06-30,20.0,14.0,0.0,48.0
2008-06-30,27.0,8.0,0.0,47.0
2009-06-30,33.0,7.0,0.0,42.0
2010-06-30,32.0,11.0,0.0,39.0
2011-06-30,30.0,5.0,0.0,47.0
2012-06-30,36.0,12.0,0.0,34.0


In [130]:
# we can't do this; resampler.expanding().sum()
# but we can do this
res = resampler.apply(
    lambda x: x.reset_index()[["W", "L"]].expanding().sum()
)
res.head(8)


Unnamed: 0_level_0,Unnamed: 1_level_0,W,L
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2007-06-30,0,9.0,0.0
2007-06-30,1,19.0,2.0
2007-06-30,2,28.0,7.0
2007-06-30,3,32.0,12.0
2007-06-30,4,37.0,17.0
2007-06-30,5,47.0,20.0
2007-06-30,6,48.0,20.0
2008-06-30,0,1.0,1.0


In [131]:
res.index = ducks.index
res.head()


Unnamed: 0_level_0,W,L
month,Unnamed: 1_level_1,Unnamed: 2_level_1
2006-10-01,9.0,0.0
2006-11-01,19.0,2.0
2006-12-01,28.0,7.0
2007-01-01,32.0,12.0
2007-02-01,37.0,17.0


In [132]:
final = res.asfreq(pd.tseries.offsets.MonthBegin())
final.head(10)


Unnamed: 0_level_0,W,L
month,Unnamed: 1_level_1,Unnamed: 2_level_1
2006-10-01,9.0,0.0
2006-11-01,19.0,2.0
2006-12-01,28.0,7.0
2007-01-01,32.0,12.0
2007-02-01,37.0,17.0
2007-03-01,47.0,20.0
2007-04-01,48.0,20.0
2007-05-01,,
2007-06-01,,
2007-07-01,,


In [133]:
# final = final.reset_index()
fdf = final.reset_index()

In [134]:
fdf.head()

Unnamed: 0,month,W,L
0,2006-10-01,9.0,0.0
1,2006-11-01,19.0,2.0
2,2006-12-01,28.0,7.0
3,2007-01-01,32.0,12.0
4,2007-02-01,37.0,17.0


In [135]:
!pip install bokeh



In [136]:
from bokeh.plotting import figure, output_notebook, show
output_notebook()


In [137]:
from bokeh.models.sources import ColumnDataSource as cds


In [138]:
source = cds(fdf)
p = figure(x_axis_type="datetime", height=500, width=500)
p.line(source=source, x="month", y="W", color="green", legend="Wins")
p.line(source=source, x="month", y="L", color='red', legend="Loses")
p.legend.click_policy="hide"
p.title.text = "Anaheim Ducks Performance by Season"
p.title.text_font_size = "25px"
p.title.align = 'center'
show(p)

In [139]:
scoring_copy = scoring.copy()
scoring_copy.head()

Unnamed: 0,playerID,year,tmID,GP,G,A,Pts,SOG
0,aaltoan01,1997,ANA,3.0,0.0,0.0,0.0,1.0
1,aaltoan01,1998,ANA,73.0,3.0,5.0,8.0,61.0
2,aaltoan01,1999,ANA,63.0,7.0,11.0,18.0,102.0
3,aaltoan01,2000,ANA,12.0,1.0,1.0,2.0,18.0
4,abdelju01,2007,DET,2.0,0.0,0.0,0.0,6.0


In [140]:
scoring_copy.set_index('playerID', inplace=True)
scoring_copy.head()

Unnamed: 0_level_0,year,tmID,GP,G,A,Pts,SOG
playerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
aaltoan01,1997,ANA,3.0,0.0,0.0,0.0,1.0
aaltoan01,1998,ANA,73.0,3.0,5.0,8.0,61.0
aaltoan01,1999,ANA,63.0,7.0,11.0,18.0,102.0
aaltoan01,2000,ANA,12.0,1.0,1.0,2.0,18.0
abdelju01,2007,DET,2.0,0.0,0.0,0.0,6.0


In [141]:
wayne = scoring_copy.loc['gretzwa01']
wayne = wayne[['year', 'tmID', 'GP', 'Pts']]
wayne = wayne.assign(pts_per_game = lambda x: x['Pts'] / x['GP'])
wayne.head()


Unnamed: 0_level_0,year,tmID,GP,Pts,pts_per_game
playerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
gretzwa01,1980,EDM,80.0,164.0,2.05
gretzwa01,1981,EDM,80.0,212.0,2.65
gretzwa01,1982,EDM,80.0,196.0,2.45
gretzwa01,1983,EDM,74.0,205.0,2.77027
gretzwa01,1984,EDM,80.0,208.0,2.6


In [142]:
wayne.tmID.head()


playerID
gretzwa01    EDM
gretzwa01    EDM
gretzwa01    EDM
gretzwa01    EDM
gretzwa01    EDM
Name: tmID, dtype: category
Categories (37, object): ['ANA', 'AND', 'ATL', 'BOS', ..., 'VAN', 'WAS', 'WIN', 'WPG']

In [143]:
wayne.loc[:, 'tmID'] = wayne.tmID.cat.remove_unused_categories()
wayne.tmID.head()

playerID
gretzwa01    EDM
gretzwa01    EDM
gretzwa01    EDM
gretzwa01    EDM
gretzwa01    EDM
Name: tmID, dtype: category
Categories (4, object): ['EDM', 'LAK', 'NYR', 'STL']

In [144]:
wayne.loc[:, 'tmCode'] = wayne['tmID'].cat.codes
wayne.sample(5)


Unnamed: 0_level_0,year,tmID,GP,Pts,pts_per_game,tmCode
playerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
gretzwa01,1990,LAK,78.0,163.0,2.089744,1
gretzwa01,1982,EDM,80.0,196.0,2.45,0
gretzwa01,1988,LAK,78.0,168.0,2.153846,1
gretzwa01,1996,NYR,82.0,97.0,1.182927,2
gretzwa01,1995,LAK,62.0,81.0,1.306452,1


In [145]:
wayne.sample(5)


Unnamed: 0_level_0,year,tmID,GP,Pts,pts_per_game,tmCode
playerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
gretzwa01,1981,EDM,80.0,212.0,2.65,0
gretzwa01,1993,LAK,81.0,130.0,1.604938,1
gretzwa01,1980,EDM,80.0,164.0,2.05,0
gretzwa01,1991,LAK,74.0,121.0,1.635135,1
gretzwa01,1985,EDM,80.0,215.0,2.6875,0


In [146]:
wayne.loc[:, 'year'] = pd.to_datetime(wayne.year, format="%Y")


In [147]:
gp_max = wayne.GP.max()
gp_min = wayne.GP.min()
pts_per_game_max = wayne.pts_per_game.max()
pts_per_game_min = wayne.pts_per_game.min()


In [148]:
wayne.loc[:, 'height'] = wayne.GP / gp_max
wayne.loc[:, 'bottom'] = wayne.tmCode
wayne.loc[:, 'top'] = wayne.bottom + wayne.height
wayne.head()


Unnamed: 0_level_0,year,tmID,GP,Pts,pts_per_game,tmCode,height,bottom,top
playerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
gretzwa01,1980-01-01,EDM,80.0,164.0,2.05,0,0.97561,0,0.97561
gretzwa01,1981-01-01,EDM,80.0,212.0,2.65,0,0.97561,0,0.97561
gretzwa01,1982-01-01,EDM,80.0,196.0,2.45,0,0.97561,0,0.97561
gretzwa01,1983-01-01,EDM,74.0,205.0,2.77027,0,0.902439,0,0.902439
gretzwa01,1984-01-01,EDM,80.0,208.0,2.6,0,0.97561,0,0.97561


In [149]:
src = cds(wayne)
p = figure(x_axis_type='datetime',
           height =500, width = 500,
           y_range=list(wayne.tmID.cat.categories)
           )
p.vbar(bottom='bottom',
       top='top', x="year",
       width=1, source=src
       )
p.xaxis.major_label_text_font_size = "12pt"
p.yaxis.major_label_text_font_size = "12pt"

show(p)


In [190]:
reordered = wayne.sort_values('year').tmID.unique()
reordered


['EDM', 'LAK', 'STL', 'NYR']
Categories (4, object): ['EDM', 'LAK', 'NYR', 'STL']

In [204]:
wayne.tmID.cat.reorder_categories(list(reordered), ordered=False)

playerID
gretzwa01    EDM
gretzwa01    EDM
gretzwa01    EDM
gretzwa01    EDM
gretzwa01    EDM
gretzwa01    EDM
gretzwa01    EDM
gretzwa01    EDM
gretzwa01    LAK
gretzwa01    LAK
gretzwa01    LAK
gretzwa01    LAK
gretzwa01    LAK
gretzwa01    LAK
gretzwa01    LAK
gretzwa01    LAK
gretzwa01    STL
gretzwa01    NYR
gretzwa01    NYR
gretzwa01    NYR
Name: tmID, dtype: category
Categories (4, object): ['EDM', 'LAK', 'STL', 'NYR']

In [205]:
wayne.loc[:, 'tmID'] = wayne.tmID.cat.reorder_categories(list(reordered), ordered=False)
wayne.loc[:, 'tmCode'] = wayne.tmID.cat.codes
wayne.loc[:, 'bottom'] = wayne.tmCode
wayne.loc[:, 'top'] = wayne.bottom + wayne.height


In [209]:
wayne.tmID.cat.codes

playerID
gretzwa01    0
gretzwa01    0
gretzwa01    0
gretzwa01    0
gretzwa01    0
gretzwa01    0
gretzwa01    0
gretzwa01    0
gretzwa01    1
gretzwa01    1
gretzwa01    1
gretzwa01    1
gretzwa01    1
gretzwa01    1
gretzwa01    1
gretzwa01    1
gretzwa01    2
gretzwa01    3
gretzwa01    3
gretzwa01    3
dtype: int8

In [206]:
width = 320 * 24 * 60 * 60 * 1000


In [207]:
# reordered
list(wayne.tmID.cat.categories)
# list(reordered)


['EDM', 'LAK', 'STL', 'NYR']

In [208]:
src = cds(wayne)
p = figure(x_axis_type='datetime',
           height=500, width = 500,
           y_range=list(wayne.tmID.cat.categories)
           # y_range=list(reordered)
           )
p.vbar(bottom='bottom',
       top='top', x='year',
       width=width, source=src
       )
p.xaxis.major_label_text_font_size = "12pt"
p.yaxis.major_label_text_font_size = "12pt"

show(p)


In [214]:
wayne.loc[:, 'height'] = wayne.GP / gp_max * 0.8
wayne.loc[:, 'bottom'] = wayne.tmCode - (wayne.height / 2.0) + 0.5
wayne.loc[:,'top'] = wayne.bottom + wayne.height


In [215]:
src = cds(wayne)
p = figure(x_axis_type='datetime',
           height=500, width = 500,
           y_range=list(wayne.tmID.cat.categories)
           # y_range=list(reordered)
           )
p.vbar(bottom='bottom',
       top='top', x='year',
       width=width, source=src
       )
p.xaxis.major_label_text_font_size = "12pt"
p.yaxis.major_label_text_font_size = "12pt"

show(p)


In [216]:
from bokeh.models import LinearColorMapper
color_mapper = LinearColorMapper(
    palette="Plasma256",
    low=pts_per_game_min,
    high=pts_per_game_max
)


In [219]:
from bokeh.models import ColorBar
src = cds(wayne)
p = figure(x_axis_type='datetime',
           height=500, width = 500,
           y_range=list(wayne.tmID.cat.categories)
           )
p.vbar(bottom='bottom',
       top='top', x='year',
       width=width,
       color={'field': 'pts_per_game', 'transform': color_mapper},
       source=src
       )

color_bar = ColorBar(color_mapper=color_mapper,
                     label_standoff=8, border_line_color=None,
                     location=(0,0)
                     )
p.add_layout(color_bar, 'right')

p.xaxis.major_label_text_font_size = "12pt"
p.yaxis.major_label_text_font_size = "12pt"

show(p)


In [220]:
from bokeh.models import HoverTool

hover = HoverTool(
    tooltips=[
        ('Points per game', "@pts_per_game"),
        ("Games Played", '@GP')
    ]
)

src = cds(wayne)
p = figure(x_axis_type='datetime',
           height=500, width = 500,
           y_range=list(wayne.tmID.cat.categories)
           )
p.vbar(bottom='bottom',
       top='top', x='year',
       width=width,
       color={'field': 'pts_per_game', 'transform': color_mapper},
       source=src
       )

color_bar = ColorBar(color_mapper=color_mapper,
                     label_standoff=8, border_line_color=None,
                     location=(0,0)
                     )
p.add_layout(color_bar, 'right')
p.add_tools(hover)

p.xaxis.major_label_text_font_size = "12pt"
p.yaxis.major_label_text_font_size = "12pt"

show(p)
