# Package import...

In [1]:
from bokeh.io import output_file, output_notebook                                          #talk output
from bokeh.plotting import figure, show                                                    #talk plotting
from bokeh.models import ColumnDataSource,CDSView, GroupFilter,NumeralTickFormatter        #talk how to read data
from bokeh.layouts import row, column, gridplot                                            #talk layout
from bokeh.models.widgets import Tabs, Panel                                               #talk widgets

import pandas as pd
import numpy as np

In [2]:
%pwd

'/Users/ram/Desktop/INFO 6101 Spring 2021'

In [3]:
!ls Data

[31m2012-18_playerBoxScore.csv[m[m Advertising.csv
[31m2012-18_standings.csv[m[m      Titanic_full.csv
[31m2016-17_teamBoxScore.csv[m[m


In [13]:
player_stats = pd.read_csv('/Users/ram/Desktop/INFO 6101 Spring 2021/Data/2012-18_playerBoxScore.csv')
team_stats =  pd.read_csv('/Users/ram/Desktop/INFO 6101 Spring 2021/Data/2016-17_teamBoxScore.csv')
standings = pd.read_csv('/Users/ram/Desktop/INFO 6101 Spring 2021/Data/2012-18_standings.csv')

# Starting with emptiness

In [4]:
'''Look at the html page first'''

output_file('output_file_test.html', 
            title='Nothing but emptiness here...')

'''Inside the html file is going to be a figure, although empty'''
fig = figure()

'''Proof'''
show(fig)



In [5]:
'''Reset'''
from bokeh.plotting import reset_output

'''Use reset_output() between subsequent show() calls, as needed'''
reset_output()

In [6]:
'''The figure will be rendered inline in my Jupyter Notebook'''

output_notebook()

'''Example figure'''
fig = figure(background_fill_color='gray',
             background_fill_alpha=0.5,
             border_fill_color='blue',
             border_fill_alpha=0.25,
             plot_height=300,
             plot_width=500,
             h_symmetry=True,
             x_axis_label='X Label',
             x_axis_type='datetime',
             x_axis_location='above',
             x_range=('2018-01-01', '2018-06-30'),
             y_axis_label='Y Label',
             y_axis_type='linear',
             y_axis_location='left',
             y_range=(0, 100),
             title='Example Figure',
             title_location='right',
             toolbar_location='below',
             tools='save')

'''Show and tell'''
show(fig)



In [7]:
'''Remove the gridlines from the figure() object'''
fig.grid.grid_line_color = None
show(fig)



In [8]:
'''A simple visualization'''


'''My x-y coordinate data'''

x = [1, 2, 1]
y = [1, 1, 2]

'''Output the visualization directly in the notebook'''

output_file('first_glyphs.html', title='First Glyphs')

'''Create a figure with no toolbar and axis ranges of [0,3]'''
fig = figure(title='My Coordinates',
             plot_height=300, plot_width=300,
             x_range=(0, 3), y_range=(0, 3),
             toolbar_location=None)

'''Draw the coordinates as circles'''

fig.circle(x=x, y=y,
           color='green', size=10, alpha=0.5)
show(fig)

# Throw the data in

In [14]:
print(player_stats.shape,team_stats.shape,standings.shape)

(155713, 51) (2460, 123) (29520, 39)


# Visualize a race

In [15]:
'''The race for first place in NBA’s Western Conference in 2017-18.
Defending champion is Golden State Warriors and the challenger is Houston Rockets. 
The daily win-loss records of these two teams can be stored in a DataFrame named west_top_2'''

west_top_2 = (standings[(standings['teamAbbr'] == 'HOU') | (standings['teamAbbr'] == 'GS')].loc[:, ['stDate', 'teamAbbr', 'gameWon']]
 .sort_values(['teamAbbr','stDate']))

print(west_top_2.shape)
west_top_2.head(5)

(1968, 3)


Unnamed: 0,stDate,teamAbbr,gameWon
9,2012-10-30,GS,0
39,2012-10-31,GS,1
69,2012-11-01,GS,1
99,2012-11-02,GS,1
129,2012-11-03,GS,2


In [16]:
'''Select the data for Rockets and Wariors'''

rockets_data = west_top_2[west_top_2['teamAbbr'] == 'HOU']
warriors_data = west_top_2[west_top_2['teamAbbr'] == 'GS']
print(rockets_data.shape,warriors_data.shape)


(984, 3) (984, 3)


In [17]:
'''Create the mappings with columndata source'''

rockets_cds = ColumnDataSource(rockets_data)
warriors_cds = ColumnDataSource(warriors_data)


In [19]:
'''Set the output file before specifying the visualization'''

output_file('west-top-2-standings-race.html', 
            title='Western Conference Top 2 Teams Win Race')

In [20]:
'''Time to design the actual visual for past wins Vs. time'''

fig = figure(x_axis_type='datetime',
             plot_height=300, plot_width=600,
             title='Western Conference Top 2 Teams Wins Race, 2017-18',
             x_axis_label='Date', y_axis_label='Wins',
             toolbar_location=None)

In [21]:
'''A step-line plot'''

fig.step('stDate', 'gameWon', 
         color='red', legend='Rockets', 
         source=rockets_cds)
fig.step('stDate', 'gameWon', 
         color='blue', legend='Warriors', 
         source=warriors_cds)

'''Legend on the upper left corner'''

fig.legend.location = 'top_left'
show(fig)



# Creating behaviours: select data points

In [22]:
'''Data selection'''

'''Find players who took at least 1 three-point shot during the entire season'''

three_takers = player_stats[player_stats['play3PA'] > 0]

'''Clean up the player names, placing them in a single column'''

three_takers['name'] = [f'{p["playFNm"]} {p["playLNm"]}' 
                        for _, p in three_takers.iterrows()]

'''Aggregate the total three-point attempts and makes for each player'''

three_takers = (three_takers.groupby('name')
                            .sum()
                            .loc[:,['play3PA', 'play3PM']]
                            .sort_values('play3PA', ascending=False))

'''Filter out anyone who didn't take at least 100 three-point shots'''

three_takers = three_takers[three_takers['play3PA'] >= 100].reset_index()

'''Add a column with a calculated three-point percentage (made/attempted)'''

three_takers['pct3PM'] = three_takers['play3PM'] / three_takers['play3PA']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # Remove the CWD from sys.path while we load stuff.


In [23]:
'''Specify output file'''

output_file('three-point-att-vs-pct.html',
            title='Three-Point Attempts vs. Percentage')

'''Store the data in a ColumnDataSource'''

three_takers_cds = ColumnDataSource(three_takers)

'''Specify the selection tools to be made available'''

select_tools = ['box_select', 'lasso_select', 'poly_select', 'tap', 'reset']

'''Create the figure'''

fig = figure(plot_height=400,
             plot_width=600,
             x_axis_label='Three-Point Shots Attempted',
             y_axis_label='Percentage Made',
             title='3PT Shots Attempted vs. Percentage Made (min. 100 3PA), 2017-18',
             toolbar_location='below',
             tools=select_tools)

'''Format the y-axis tick labels as percentages'''

fig.yaxis[0].formatter = NumeralTickFormatter(format='00.0%')

'''Add square representing each player'''

fig.square(x='play3PA',
           y='pct3PM',
           source=three_takers_cds,
           color='royalblue',
           selection_color='deepskyblue',
           nonselection_color='lightgray',
           nonselection_alpha=0.3)

show(fig)