In [None]:
"""Bokeh Visualization Template

This template is a general outline for turning your data into a 
visualization using Bokeh.
"""
# Data handling
import pandas as pd
import numpy as np

In [None]:
# Bokeh libraries
from bokeh.io import output_file, output_notebook
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource
from bokeh.layouts import row, column, gridplot
from bokeh.models.widgets import Tabs, Panel

In [None]:
# Prepare the data

# Determine where the visualization will be rendered
output_file('filename.html')  # Render to static HTML, or 
output_notebook()  # Render inline in a Jupyter Notebook

In [None]:
# Set up the figure(s)
fig = figure()  # Instantiate a figure() object

# Connect to and draw the data

# Organize the layout

# Preview and save 
show(fig)  # See what I made, and save if I like it

In [1]:
# Bokeh Libraries
from bokeh.io import output_file
from bokeh.plotting import figure, show

In [2]:
# The figure will be rendered in a static HTML file called output_file_test.html
output_file('output_file_test.html', 
            title='Empty Bokeh Figure')

In [3]:
# Set up a generic figure() object
fig = figure()

# See what it looks like
show(fig)



If you were to run the same code snippet with output_notebook() in place of output_file()

In [4]:
# Bokeh Libraries
from bokeh.io import output_notebook
from bokeh.plotting import figure, show

# The figure will be right in my Jupyter Notebook
output_notebook()

# Set up a generic figure() object
fig = figure()

# See what it looks like
show(fig)





The figure() object is not only the foundation of your data visualization but also the object that unlocks all of Bokeh’s available tools for visualizing data. The Bokeh figure is a subclass of the Bokeh Plot object, which provides many of the parameters that make it possible to configure the aesthetic elements of your figure.

In [6]:
#To show you just a glimpse into the customization options available

In [7]:
# Bokeh Libraries
from bokeh.io import output_notebook
from bokeh.plotting import figure, show

In [8]:
# The figure will be rendered inline in my Jupyter Notebook
output_notebook()

In [10]:
# Example figure
fig = figure(background_fill_color='gray',
             background_fill_alpha=0.5,
             border_fill_color='blue',
             border_fill_alpha=0.25,
             plot_height=300,
             plot_width=500,
             h_symmetry=True,
             x_axis_label='X Label',
             x_axis_type='datetime',
             x_axis_location='above',
             x_range=('2018-01-01', '2018-06-30'),
             y_axis_label='Y Label',
             y_axis_type='linear',
             y_axis_location='left',
             y_range=(0, 100),
             title='Example Figure',
             title_location='right',
             toolbar_location='below',
             tools='save')

# See what it looks like
show(fig)

AttributeError: unexpected attribute 'h_symmetry' to Figure, possible attributes are above, align, aspect_ratio, aspect_scale, background, background_fill_alpha, background_fill_color, below, border_fill_alpha, border_fill_color, center, css_classes, disabled, extra_x_ranges, extra_x_scales, extra_y_ranges, extra_y_scales, frame_height, frame_width, height, height_policy, hidpi, inner_height, inner_width, js_event_callbacks, js_property_callbacks, left, lod_factor, lod_interval, lod_threshold, lod_timeout, margin, match_aspect, max_height, max_width, min_border, min_border_bottom, min_border_left, min_border_right, min_border_top, min_height, min_width, name, outer_height, outer_width, outline_line_alpha, outline_line_cap, outline_line_color, outline_line_dash, outline_line_dash_offset, outline_line_join, outline_line_width, output_backend, plot_height, plot_width, renderers, reset_policy, right, sizing_mode, subscribed_events, syncable, tags, title, title_location, toolbar, toolbar_location, toolbar_sticky, visible, width, width_policy, x_range, x_scale, y_range or y_scale

In [11]:
# Bokeh Libraries
from bokeh.io import output_file
from bokeh.plotting import figure, show

# My x-y coordinate data
x = [1, 2, 1]
y = [1, 1, 2]

# Output the visualization directly in the notebook
output_file('first_glyphs.html', title='First Glyphs')

# Create a figure with no toolbar and axis ranges of [0,3]
fig = figure(title='My Coordinates',
             plot_height=300, plot_width=300,
             x_range=(0, 3), y_range=(0, 3),
             toolbar_location=None)

# Draw the coordinates as circles
fig.circle(x=x, y=y,
           color='green', size=10, alpha=0.5)

# Show plot
show(fig)

Here are a few categories of glyphs:

Marker includes shapes like circles, diamonds, squares, and triangles and is effective for creating visualizations like scatter and bubble charts.

Line covers things like single, step, and multi-line shapes that can be used to build line charts.

Bar/Rectangle shapes can be used to create traditional or stacked bar (hbar) and column (vbar) charts as well as waterfall or gantt charts.

In [12]:
import numpy as np

# Bokeh libraries
from bokeh.io import output_notebook
from bokeh.plotting import figure, show

In [13]:
# My word count data
day_num = np.linspace(1, 10, 10)
daily_words = [450, 628, 488, 210, 287, 791, 508, 639, 397, 943]
cumulative_words = np.cumsum(daily_words)

# Output the visualization directly in the notebook
output_notebook()

In [14]:
# Create a figure with a datetime type x-axis
fig = figure(title='My Tutorial Progress',
             plot_height=400, plot_width=700,
             x_axis_label='Day Number', y_axis_label='Words Written',
             x_minor_ticks=2, y_range=(0, 6000),
             toolbar_location=None)

In [15]:
# The daily words will be represented as vertical bars (columns)
fig.vbar(x=day_num, bottom=0, top=daily_words, 
         color='blue', width=0.75, 
         legend='Daily')



In [16]:
# The cumulative sum will be a trend line
fig.line(x=day_num, y=cumulative_words, 
         color='gray', line_width=1,
         legend='Cumulative')



In [17]:
# Put the legend in the upper left corner
fig.legend.location = 'top_left'

# Let's check it out
show(fig)

In [19]:
import pandas as pd

# Read the csv files
player_stats = pd.read_csv('2017-18_playerBoxScore.csv', parse_dates=['gmDate'])
team_stats = pd.read_csv('2017-18_teamBoxScore.csv', parse_dates=['gmDate'])
standings = pd.read_csv('2017-18_standings.csv', parse_dates=['stDate'])

In [None]:
#Let’s start by visualizing the race for first place in 
#the NBA’s Western Conference in 2017-18 between 
#the defending champion Golden State Warriors and 
#the challenger Houston Rockets. 
#The daily win-loss records of these two teams is stored in a DataFrame 
named west_top_2

In [20]:
west_top_2 = (standings[(standings['teamAbbr'] == 'HOU') | (standings['teamAbbr'] == 'GS')]
...               .loc[:, ['stDate', 'teamAbbr', 'gameWon']]
...               .sort_values(['teamAbbr','stDate']))
west_top_2.head()

Unnamed: 0,stDate,teamAbbr,gameWon
9,2017-10-17,GS,0
39,2017-10-18,GS,0
69,2017-10-19,GS,0
99,2017-10-20,GS,1
129,2017-10-21,GS,1


From here, you can load this DataFrame into two ColumnDataSource objects and visualize the race:

In [21]:
# Bokeh libraries
from bokeh.plotting import figure, show
from bokeh.io import output_file
from bokeh.models import ColumnDataSource

In [22]:
# Output to file
output_file('west-top-2-standings-race.html', 
            title='Western Conference Top 2 Teams Wins Race')

In [23]:
# Isolate the data for the Rockets and Warriors
rockets_data = west_top_2[west_top_2['teamAbbr'] == 'HOU']
warriors_data = west_top_2[west_top_2['teamAbbr'] == 'GS']

In [24]:
# Create a ColumnDataSource object for each team
rockets_cds = ColumnDataSource(rockets_data)
warriors_cds = ColumnDataSource(warriors_data)

In [25]:
# Create and configure the figure
fig = figure(x_axis_type='datetime',
             plot_height=300, plot_width=600,
             title='Western Conference Top 2 Teams Wins Race, 2017-18',
             x_axis_label='Date', y_axis_label='Wins',
             toolbar_location=None)

In [26]:
# Render the race as step lines
fig.step('stDate', 'gameWon', 
         color='#CE1141', legend='Rockets', 
         source=rockets_cds)
fig.step('stDate', 'gameWon', 
         color='#006BB6', legend='Warriors', 
         source=warriors_cds)



In [27]:
# Move the legend to the upper left corner
fig.legend.location = 'top_left'

# Show the plot
show(fig)

# Adding Interaction
"The feature that sets Bokeh apart is its ability to easily implement interactivity in your visualization. Bokeh even goes as far as describing itself as an interactive visualization library:"

Configuring the Toolbar
As you saw all the way back in Generating Your First Figure, the default Bokeh figure() comes with a toolbar right out of the box. The default toolbar comes with the following tools (from left to right):

Pan
Box Zoom
Wheel Zoom
Save
Reset
A link to Bokeh’s user guide for Configuring Plot Tools
A link to the Bokeh homepage

# Selecting Data Points
Implementing selection behavior is as easy as adding a few specific keywords when declaring your glyphs.

The next example will create a scatter plot that relates a player’s total number of three-point shot attempts to the percentage made (for players with at least 100 three-point shot attempts).

The data can be aggregated from the player_stats DataFrame:

In [28]:
# Find players who took at least 1 three-point shot during the season
three_takers = player_stats[player_stats['play3PA'] > 0]

# Clean up the player names, placing them in a single column
three_takers['name'] = [f'{p["playFNm"]} {p["playLNm"]}' 
                        for _, p in three_takers.iterrows()]

# Aggregate the total three-point attempts and makes for each player
three_takers = (three_takers.groupby('name')
                            .sum()
                            .loc[:,['play3PA', 'play3PM']]
                            .sort_values('play3PA', ascending=False))

# Filter out anyone who didn't take at least 100 three-point shots
three_takers = three_takers[three_takers['play3PA'] >= 100].reset_index()

# Add a column with a calculated three-point percentage (made/attempted)
three_takers['pct3PM'] = three_takers['play3PM'] / three_takers['play3PA']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  three_takers['name'] = [f'{p["playFNm"]} {p["playLNm"]}'


In [29]:
three_takers.sample(5)


Unnamed: 0,name,play3PA,play3PM,pct3PM
231,Brandon Ingram,105,41,0.390476
180,Timothé Luwawu-Cabarrot,158,53,0.335443
25,Kyle Kuzma,435,159,0.365517
197,Dewayne Dedmon,141,50,0.35461
207,Lance Thomas,124,50,0.403226


Let’s say you want to select a groups of players in the distribution, and in doing so mute the color of the glyphs representing the non-selected players:



In [30]:
from bokeh.plotting import figure, show
from bokeh.io import output_file
from bokeh.models import ColumnDataSource, NumeralTickFormatter

In [31]:
# Output to file
output_file('three-point-att-vs-pct.html',
            title='Three-Point Attempts vs. Percentage')

In [32]:
# Store the data in a ColumnDataSource
three_takers_cds = ColumnDataSource(three_takers)

# Specify the selection tools to be made available
select_tools = ['box_select', 'lasso_select', 'poly_select', 'tap', 'reset']

In [33]:
# Create the figure
fig = figure(plot_height=400,
             plot_width=600,
             x_axis_label='Three-Point Shots Attempted',
             y_axis_label='Percentage Made',
             title='3PT Shots Attempted vs. Percentage Made (min. 100 3PA), 2017-18',
             toolbar_location='below',
             tools=select_tools)

In [34]:
# Format the y-axis tick labels as percentages
fig.yaxis[0].formatter = NumeralTickFormatter(format='00.0%')

# Add square representing each player
fig.square(x='play3PA',
           y='pct3PM',
           source=three_takers_cds,
           color='royalblue',
           selection_color='deepskyblue',
           nonselection_color='lightgray',
           nonselection_alpha=0.3)

In [35]:
# Visualize
show(fig)

In [39]:
# Bokeh Library
from bokeh.models import HoverTool

# Format the tooltip
tooltips = [
            ('Player','@name'),
            ('Three-Pointers Made', '@play3PM'),
            ('Three-Pointers Attempted', '@play3PA'),
            ('Three-Point Percentage','@pct3PM{00.0%}'),
           ]

# Add the HoverTool to the figure
fig.add_tools(HoverTool(tooltips=tooltips))

# Visualize
show(fig)

In [45]:
!pip install altair
!pip install vega_datasets

Collecting altair
  Downloading altair-5.0.1-py3-none-any.whl (471 kB)
Installing collected packages: altair
Successfully installed altair-5.0.1
Collecting vega_datasets
  Downloading vega_datasets-0.9.0-py3-none-any.whl (210 kB)
Installing collected packages: vega-datasets
Successfully installed vega-datasets-0.9.0


In [46]:
# Importing altair and pandas library
import altair as alt
import pandas as pd
  

In [47]:
# Making a Pandas DataFrame
score_data = pd.DataFrame({
    'Website': ['StackOverflow', 'FreeCodeCamp',
                'GeeksForGeeks', 'MDN', 'CodeAcademy'],
    'Score': [65, 50, 99, 75, 33]
})

In [48]:
#All altair charts need three essential elements: Data, Mark and Encoding. 
# Making the Simple Bar Chart
alt.Chart(score_data).mark_bar().encode(
    # Mapping the Website column to x-axis
    x='Website',
    # Mapping the Score column to y-axis
    y='Score'
)

In this example, we will visualize the iris dataset from the vega_datasets library in the form of a scatter plot. The mark method used for scatter plot in this example is mark_point(). For this bi-variate analysis, we map the sepalLength and petalLength columns to the x and y axes encoding. Further, to differentiate the points from each other, we map the shape encoding to the species column. 

In [49]:
# Importing altair
import altair as alt
# Import data object from vega_datasets
from vega_datasets import data
  
# Selecting the data
iris = data.iris()
  
# Making the Scatter Plot
alt.Chart(iris).mark_point().encode(
    # Map the sepalLength to x-axis
    x='sepalLength',
    # Map the petalLength to y-axis
    y='petalLength',
    # Map the species to shape
    shape='species'
)