In [1]:
# This code allows the scroll in presentation mode. Hidden content in slides view
from notebook.services.config import ConfigManager
cm = ConfigManager()
cm.update('livereveal', {
              'scroll': True,
})

{'scroll': True}

<h1>Bokeh</h1><h2>StyleSage - May 17th 2018</h2>

<a href="http://bokeh.pydata.org/">
    <img src="images/bokeh-transparent.png" style="width:200px" align="center">
</a>    
<p>



## What is Bokeh

Bokeh is an interactive visualization library that targets modern web browsers for presentation. It is good for:

* Interactive visualization in modern browsers
* Standalone HTML documents, or server-backed apps
* Expressive and versatile graphics
* Large, dynamic or streaming data
* Easy usage from python (or Scala, or R, or...)

And most importantly:

## <center><i>NO JAVASCRIPT REQUIRED</i></center>

# Imports and Setup

When using the [`bokeh.plotting`](http://bokeh.pydata.org/en/latest/docs/user_guide/plotting.html) interface, there are a few common imports:
* Use the [`figure`](http://bokeh.pydata.org/en/latest/docs/reference/plotting.html#bokeh.plotting.figure) function to  create new plot objects to work with. 
* Call the functions [`output_file`](http://bokeh.pydata.org/en/latest/docs/reference/resources_embedding.html#bokeh.io.output_file), [`output_notebook`](http://bokeh.pydata.org/en/latest/docs/reference/resources_embedding.html#bokeh.io.output_notebook), and [`output_server`](http://bokeh.pydata.org/en/latest/docs/reference/resources_embedding.html#bokeh.io.output_server) (possibly in combination) to tell Bokeh how to display or save output. 
* Execute [`show`](http://bokeh.pydata.org/en/latest/docs/reference/resources_embedding.html#bokeh.io.show) and  [`save`](http://bokeh.pydata.org/en/latest/docs/reference/resources_embedding.html#bokeh.io.save) to display or save plots and layouts.

In [2]:
from bokeh.io import output_notebook, show
from bokeh.plotting import figure

In this case, we are in the Jupyter notebook, so call `output_notebook()`. We only need to call this once, and all subsequent calls to `show()` will display inline in the notebook.

In [3]:
output_notebook()

# Basic plotting
<img src="images/chart_suggestions.png" align="center"/>

## Scatter Plots

Basic plotting and available tools

In [4]:
x = [1, 2, 3, 4, 5]
y = [6, 7, 2, 4, 5]

# create a new plot with default tools, using figure
p = figure(plot_width=900, plot_height=500, tools=['pan'], title='Hola caracola')

# add a circle renderer with a size
p.diamond(x, y, size=20, fill_color='orange')

# increase the axis' font size for the slides view
p.xaxis.major_label_text_font_size = "20pt"
p.yaxis.major_label_text_font_size = "20pt"

show(p) # show the results

Plotting from data source

In [5]:
from bokeh.models import ColumnDataSource

source = ColumnDataSource(data=dict(x=[1, 2, 3, 4, 5],
                                    y=[6, 7, 2, 4, 5],))

p = figure(plot_width=900, plot_height=400)

# axis info are now strings and parameter source is required
p.circle('x', 'y', size=20, source=source)

p.xaxis.major_label_text_font_size = "20pt"
p.yaxis.major_label_text_font_size = "20pt"

show(p)

Labelling the data

In [6]:
from bokeh.models import LabelSet

source = ColumnDataSource(data=dict(x=[1, 2, 3, 4, 5],
                                    y=[6, 7, 2, 4, 5],
                                    names=['a', 'b', 'c', 'd', 'e']))  # data for labels

p = figure(plot_width=900, plot_height=400)
p.circle(x='x', y='y', size=20, source=source)

# new labelset
labels = LabelSet(x='x', y='y', text='names', 
                  x_offset=5, y_offset=2, text_font_size="20pt", 
                  source=source)
p.add_layout(labels)

show(p)

Data ranges

In [7]:
from bokeh.models import Range1d

p = figure(plot_width=900, plot_height=400)

p.circle(x='x', y='y', size=20, source=source)

labels = LabelSet(x='x', y='y', text='names', 
                  x_offset=5, y_offset=2, text_font_size="20pt", 
                  source=source)
p.add_layout(labels)

# setting the y range to 8
p.y_range = Range1d(0, 8)

show(p)

# Lines

In [8]:
x = [1, 2, 3, 4, 5]
y = [6, 7, 2, 4, 5]

p = figure(plot_width=900, plot_height=400)

# add a line renderer
p.step(x, y, line_width=2, mode='center')

p.xaxis.major_label_text_font_size = "20pt"
p.yaxis.major_label_text_font_size = "20pt"

show(p)

In [9]:
p.step(x, y, line_width=2, mode="center")

## Bars

Basic plotting

In [10]:
from bokeh.palettes import Spectral3

x=[1, 2, 3]
top=[1.2, 2.5, 3.7]
    
p = figure(plot_width=900, plot_height=400)

# add a vertical bar renderer
p.vbar(x=x, top=top, width=0.9, color=Spectral3)

show(p)

## Bars - categorical data

In [11]:
fruits = ['Apples', 'Pears', 'Nectarines', 'Plums', 'Grapes', 'Strawberries']
data_fruits = [5, 3, 4, 2, 4, 6]

p = figure(x_range=fruits,  # we have to define the range of values in axis x 
           plot_width=900, plot_height=400)

p.vbar(x=fruits, top=data_fruits, width=0.9)

p.xaxis.major_label_text_font_size = "20pt"
p.yaxis.major_label_text_font_size = "20pt"  

show(p)

Horizontal bars

In [12]:
p = figure(y_range=fruits,  # we have to define the range of values in y axis
           plot_width=900, plot_height=400)

# add a horizontal bar renderer
p.hbar(y=fruits, right=data_fruits, height=0.9)

p.xaxis.major_label_text_font_size = "20pt"
p.yaxis.major_label_text_font_size = "20pt"

show(p)

Multiple categorical data

In [13]:
from bokeh.models import ColumnDataSource, FactorRange

fruits = ['Apples', 'Pears', 'Nectarines', 'Plums', 'Grapes', 'Strawberries']
years = ['2015', '2016', '2017']

data = {'fruits' : fruits,
        '2015'   : [2, 1, 4, 3, 2, 4],
        '2016'   : [5, 3, 3, 2, 4, 5],
        '2017'   : [3, 2, 4, 4, 5, 4]}

x = [(fruit, year) 
     for fruit in fruits 
        for year in years]
print('Values in x: {}'.format(x[:5]))
counts = sum(zip(data['2015'], data['2016'], data['2017']), ()) # like an hstack
print('Values in counts: {}'.format(counts[:5]))

Values in x: [('Apples', '2015'), ('Apples', '2016'), ('Apples', '2017'), ('Pears', '2015'), ('Pears', '2016')]
Values in counts: (2, 5, 3, 1, 3)


Multiple categorical data

In [14]:
from bokeh.models import FactorRange

source = ColumnDataSource(data=dict(x=x, counts=counts))

p = figure(x_range=FactorRange(*x), # converts our list of tuples in a list of values for the range value
           plot_width=900, plot_height=500)

p.vbar(x='x', top='counts', width=0.9, source=source)

show(p)

Multiple categorical data - formatted

In [15]:
from bokeh.models import FactorRange

source = ColumnDataSource(data=dict(x=x, counts=counts))

p = figure(x_range=FactorRange(*x), 
           plot_width=900, plot_height=500)

p.vbar(x='x', top='counts', width=0.9, source=source)

p.y_range.start = 0
p.x_range.range_padding = 0.1
p.xaxis.major_label_orientation = 1
p.xgrid.grid_line_color = None
 
p.xaxis.major_label_text_font_size = "20pt"
p.xaxis.group_text_font_size = "20pt"
p.yaxis.major_label_text_font_size = "20pt"

show(p)

Multiple data with legend

In [16]:
from bokeh.core.properties import value
from bokeh.transform import dodge

source = ColumnDataSource(data=data)

p = figure(x_range=fruits, y_range=(0, 6),  # we can define ranges in both axis
           plot_width=900, plot_height=500)

p.vbar(x=dodge('fruits', -0.25, range=p.x_range), 
       top='2015', width=0.2, source=source, color="#c9d9d3", legend=value("2015"))

p.vbar(x=dodge('fruits',  0.0,  range=p.x_range), 
       top='2016', width=0.2, source=source, color="#718dbf", legend=value("2016"))

p.vbar(x=dodge('fruits',  0.25, range=p.x_range), 
       top='2017', width=0.2, source=source, color="#e84d60", legend=value("2017"))

p.legend.location = "top_left"
p.legend.orientation = "horizontal"

p.xaxis.major_label_text_font_size = "20pt"
p.yaxis.major_label_text_font_size = "20pt"

show(p)

## Geometries

In [17]:
top=[2, 3, 4]
bottom=[1, 2, 3]
left=[1, 2, 3]
right=[1.2, 2.5, 3.7]

p = figure(plot_width=900, plot_height=400)

p.quad(top=top, bottom=bottom, left=left, right=right)

show(p)

## Geometries

In [18]:
x = [[1, 3, 2], [3, 4, 6, 6]]
y = [[2, 1, 4], [4, 7, 8, 5]]

p = figure(plot_width=900, plot_height=400)

p.patches(x, y)

show(p)

# Multiple Glyphs 

In [19]:
x = [1, 2, 3, 4, 5]
y = [6, 7, 8, 7, 3]

p = figure(plot_width=900, plot_height=400)

# add both a line and circles on the same plot
p.line(x, y, line_width=4)

show(p) # show the results

p.circle(x, y, fill_color="white", size=15)
show(p) # show the results


In [20]:
import numpy as np

x = np.linspace(0, 4*np.pi, 100)
y = np.sin(x)

p = figure(plot_width=900, plot_height=400)

p.circle(x, y, legend="sin(x)")
p.line(x, y, legend="sin(x)")

p.line(x, 2*y, legend="2*sin(x)",
       line_dash=[4, 4], line_color="orange", line_width=2)

p.square(x, 3*y, legend="3*sin(x)", fill_color=None, line_color="green")
p.line(x, 3*y, legend="3*sin(x)", line_color="green")

show(p)

# Some StyleSage charts
## Real examples

<h1>Price Architecture</h1>
<img src="images/price_architecture.png" align="center">

1st step: get the data from a json file

In [21]:
import json
import pandas as pd

# open the json file
file_name = './data/price_architecture.json'
data = json.load(open(file_name))

# load the data in a pandas dataframe
df = pd.DataFrame(data['data']['marks'])
df.head()

Unnamed: 0,brand_count,country_name,level1_id,level2_id,original_price_bin,retailer,unique_product_count,unique_style_count
0,4,USA,Clothing,"Outerwear, Coats, Jackets",101-150,Webshop1,94,53
1,3,USA,Clothing,"Outerwear, Coats, Jackets",151-200,Webshop1,35,28
2,4,USA,Clothing,"Outerwear, Coats, Jackets",201-250,Webshop1,21,16
3,2,USA,Clothing,"Outerwear, Coats, Jackets",251-300,Webshop1,8,8
4,1,USA,Clothing,"Outerwear, Coats, Jackets",301-350,Webshop1,5,4


2nd step: build the circle chart

In [22]:
from bokeh.models import ColumnDataSource

source = ColumnDataSource(df)
x_range = [mark['original_price_bin'] for mark in data['data']['columns']][:15]
y_range = list(df.level2_id.unique())

p = figure(plot_width=900, plot_height=500, x_range=x_range, y_range=y_range)

# add a circle renderer with a radius, color, and alpha
p.circle('original_price_bin', 'level2_id', 
         radius=0.2, line_color="navy", fill_color="orange", fill_alpha=0.5, source=source)

show(p) # show the results

3rd step: convert to bubble chart - size is a dimension

In [23]:
p = figure(plot_width=900, plot_height=500, x_range=x_range, y_range=y_range)

# using size instead of radius
p.circle('original_price_bin', 'level2_id', 
         size='unique_product_count', line_color="navy", fill_color="orange", fill_alpha=0.5, source=source)

show(p)

4th step: normalize the data by row

In [24]:
# calculating max values by category (row in our chart)
df_max_values = df.groupby('level2_id').max().reset_index()
max_values_dict = df_max_values.to_dict('records')
max_values_dict = {row['level2_id']: row['unique_product_count'] for row in max_values_dict}

# calculating the normalized value by row
df['max_value'] = df.level2_id.apply(max_values_dict.get)
df['norm'] = df['unique_product_count'] / df['max_value'] * 35
df.head(5)

Unnamed: 0,brand_count,country_name,level1_id,level2_id,original_price_bin,retailer,unique_product_count,unique_style_count,max_value,norm
0,4,USA,Clothing,"Outerwear, Coats, Jackets",101-150,Webshop1,94,53,168,19.583333
1,3,USA,Clothing,"Outerwear, Coats, Jackets",151-200,Webshop1,35,28,168,7.291667
2,4,USA,Clothing,"Outerwear, Coats, Jackets",201-250,Webshop1,21,16,168,4.375
3,2,USA,Clothing,"Outerwear, Coats, Jackets",251-300,Webshop1,8,8,168,1.666667
4,1,USA,Clothing,"Outerwear, Coats, Jackets",301-350,Webshop1,5,4,168,1.041667


5th step: drawing normalized data

In [25]:
source = ColumnDataSource(df)

# create a new plot with default tools, using figure
p = figure(plot_width=900, plot_height=400, x_range=x_range, y_range=y_range)

# add a circle renderer with a radius, color, and alpha
p.circle('original_price_bin', 'level2_id', size='norm', 
         line_color="navy", fill_color="orange", fill_alpha=0.5, source=source)

show(p) # show the results

6th step: 

In [26]:
from bokeh.models import HoverTool
from math import pi

hover = HoverTool(
        tooltips=[
            ("# products", "@unique_product_count"),
            ("# styles", "@unique_style_count"),
        ]
    )

p = figure(plot_width=900, plot_height=400, x_range=x_range, y_range=y_range, 
           tools=[hover, 'lasso_select'])

# add a circle renderer with a radius, color, and alpha
p.circle('original_price_bin', 'level2_id', 
         size='norm', line_color="navy", fill_color="orange", fill_alpha=0.5, source=source)
p.xaxis.major_label_orientation = pi/4

show(p) # show the results

<h1>Historic data</h1>
<img src="images/historical_chart.png" align="center">


Step 1: Get the data

In [27]:
import json
import pandas as pd
from datetime import datetime

from pprint import pprint

data = json.load(open('./data/historic_webshop.json'))
hist_data = data['historic_data']

for data_point in hist_data:
    data_point['price_date'] = datetime.strptime(data_point['price_date'], '%Y-%m-%d')

hist_data[:3]

[{'product_count': 4366.0,
  'price_date': datetime.datetime(2014, 9, 9, 0, 0),
  'is_historic_gap': False},
 {'product_count': 4299.0,
  'price_date': datetime.datetime(2014, 9, 10, 0, 0),
  'is_historic_gap': False},
 {'product_count': 4299.0,
  'price_date': datetime.datetime(2014, 9, 11, 0, 0),
  'is_historic_gap': False}]

Step 2: first approach

In [28]:
source = ColumnDataSource(data=dict(
    price_date=[data_point['price_date'] for data_point in hist_data],
    product_count=[data_point['product_count'] for data_point in hist_data],
))

# create a new plot (with a title) using figure
p = figure(plot_width=800, plot_height=400, title="Webshop X - Historic chart")

# add a line renderer
p.line('price_date', 'product_count', line_width=2, source=source)

p.xaxis.major_label_text_font_size = "10pt"
p.xaxis.major_label_orientation = 1

show(p) # show the results

Step 3: configure date format

In [29]:
p = figure(plot_width=800, plot_height=400, x_axis_type='datetime', title="Zara - Historic chart")

p.line('price_date', 'product_count', line_width=2, source=source)

p.xaxis.major_label_text_font_size = "10pt"
p.xaxis.major_label_orientation = 1

show(p) # show the results

Step 4: Remove the holes

In [30]:
source = ColumnDataSource(data=dict(
    price_date=[data_point['price_date'] for data_point in hist_data if data_point['is_historic_gap'] is False],
    product_count=[data_point['product_count'] for data_point in hist_data if data_point['is_historic_gap'] is False],
))

p = figure(plot_width=800, plot_height=400, x_axis_type='datetime', title="Webshop X - Historic chart")
p.line('price_date', 'product_count', line_width=2, source=source)

show(p) # show the results

Step 5: drawing lines and areas

In [31]:
from bokeh.models.annotations import BoxAnnotation, Span

p = figure(plot_width=800, plot_height=400, x_axis_type='datetime', title="Webshop X - Historic chart")
p.line('price_date', 'product_count', line_width=2, source=source)

# horizontal line
line = Span(location=7000, dimension='width', line_color='purple', line_width=1)
p.add_layout(line)

show(p) # show the results

In [32]:
from bokeh.models.annotations import BoxAnnotation, Span

p = figure(plot_width=800, plot_height=400, x_axis_type='datetime', title="Webshop X - Historic chart")
p.line('price_date', 'product_count', line_width=2, source=source)

# region that always fills the top of the plot
upper = BoxAnnotation(bottom=11000, fill_alpha=0.1, fill_color='olive')
p.add_layout(upper)

# region that always fills the bottom of the plot
lower = BoxAnnotation(top=3000, fill_alpha=0.1, fill_color='firebrick')
p.add_layout(lower)

show(p) # show the results

In [33]:
from bokeh.models.annotations import BoxAnnotation
from datetime import datetime

p = figure(plot_width=800, plot_height=400, x_axis_type='datetime', title="Webshop X - Historic chart")
p.line('price_date', 'product_count', line_width=2, source=source)

# a finite region
center = BoxAnnotation(left=datetime.strptime('2016-01-01', "%Y-%m-%d"), 
                       right=datetime.strptime('2017-01-01', "%Y-%m-%d"), 
                       fill_alpha=0.1, fill_color='yellow')
p.add_layout(center)

show(p) # show the results

Hovers

In [34]:
from bokeh.models import HoverTool

hover = HoverTool(tooltips=[
    ("index", "$index"),
    ("price_date", "@price_date"),
    ("product_count", "@product_count"),
])

p = figure(plot_width=800, plot_height=400, 
           x_axis_type='datetime', 
           tools=[hover],
           title="Webshop X - Historic chart")

p.line('price_date', 'product_count', line_width=2, source=source)

show(p) # show the results

## Awesomic charts

In [35]:
from bokeh.models.tools import HoverTool
from bokeh.sampledata.glucose import data

subset = data.loc['2010-10-06']

x, y = subset.index.to_series(), subset['glucose']

# Basic plot setup
p = figure(width=900, height=400, x_axis_type="datetime", title='Hover over points')

p.line(x, y, line_dash="4 4", line_width=1, color='gray')

cr = p.circle(x, y, size=20,
              fill_color="grey", hover_fill_color="firebrick",
              fill_alpha=0.05, hover_alpha=0.3,
              line_color=None, hover_line_color="white")

p.add_tools(HoverTool(tooltips=None, renderers=[cr], mode='hline'))

show(p)

Interactive data applications

In [36]:
# Create and deploy 

from IPython.display import IFrame
IFrame('https://demo.bokehplots.com/apps/sliders', width=900, height=500)

Maps :)

In [37]:
from bokeh.plotting import figure
from bokeh.models import WMTSTileSource
import pandas as pd

# web mercator coordinates
x_range,y_range = ((-13884029,-7453304), (2698291,6455972))

p = figure(tools='pan, wheel_zoom', x_range=x_range, y_range=y_range)
p.axis.visible = False

url = 'http://a.basemaps.cartocdn.com/dark_all/{Z}/{X}/{Y}.png'
attribution = "Tiles by Carto, under CC BY 3.0. Data by OSM, under ODbL"

p.add_tile(WMTSTileSource(url=url, attribution=attribution))

df = pd.DataFrame({
    'name': {0: 'Austin', 1: 'NYC'},
    'x': {0: -10880712.12055602, 1: -8238299.103697925},
    'y': {0: 3537942.3583266055, 1: 4970071.579142428}})

p.circle(x=df['x'], y=df['y'], fill_color='orange', size=10)
show(p)

Linked panning

In [38]:
from bokeh.layouts import gridplot

x = list(range(11))
y0, y1, y2 = x, [10-i for i in x], [abs(i-5) for i in x]

plot_options = dict(width=250, plot_height=250, tools='pan,wheel_zoom')

# create a new plot
s1 = figure(**plot_options)
s1.circle(x, y0, size=10, color="navy")

# create a new plot and share both ranges
s2 = figure(x_range=s1.x_range, y_range=s1.y_range, **plot_options)
s2.triangle(x, y1, size=10, color="firebrick")

# create a new plot and share only one range
s3 = figure(x_range=s1.x_range, **plot_options)
s3.square(x, y2, size=10, color="olive")

p = gridplot([[s1, s2, s3]])

# show the results
show(p)

# THANK YOU!