# L03.2 Interactive plots with Bokeh

In order for this lecture to be standalone, we will use the dataset by the [Gapminder foundation](https://www.gapminder.org/). This dataset is very rich and allows us to demonstrate a rich demonstration of Bokeh's capabilities.


## Basic plot

In [1]:
from bokeh.plotting import figure
from bokeh.io import output_file, show
import numpy as np

output_file("basic_bokeh.html")

x = np.linspace(-np.pi, np.pi, 256)
y = np.cos(x)

p = figure()
p.line(x, y)

show(p)

Running this code creates a html file and opens it in your browser.

In [None]:
output_file("basic_bokeh_pretty.html")

golden = 1.618
dpi = 75
h = int(2.5*dpi); w = int(h*golden) 

p = figure(plot_width=w, plot_height=h)
p.line(x, y, line_width=2)

p.xaxis.axis_label = "x"
p.yaxis.axis_label = "cos(x)"

p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None
p.xaxis.minor_tick_line_color = None
p.yaxis.minor_tick_line_color = None

show(p)

## Interactive dataset plots

In [4]:
import pandas as pd
df = pd.read_csv("gapminder.csv")

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1704 entries, 0 to 1703
Data columns (total 6 columns):
country      1704 non-null object
continent    1704 non-null object
year         1704 non-null int64
lifeExp      1704 non-null float64
pop          1704 non-null int64
gdpPercap    1704 non-null float64
dtypes: float64(2), int64(2), object(2)
memory usage: 80.0+ KB


In [6]:
df['country'].unique() # Display unique entries in row 'country'

array(['Afghanistan', 'Albania', 'Algeria', 'Angola', 'Argentina',
       'Australia', 'Austria', 'Bahrain', 'Bangladesh', 'Belgium',
       'Benin', 'Bolivia', 'Bosnia and Herzegovina', 'Botswana', 'Brazil',
       'Bulgaria', 'Burkina Faso', 'Burundi', 'Cambodia', 'Cameroon',
       'Canada', 'Central African Republic', 'Chad', 'Chile', 'China',
       'Colombia', 'Comoros', 'Congo, Dem. Rep.', 'Congo, Rep.',
       'Costa Rica', "Cote d'Ivoire", 'Croatia', 'Cuba', 'Czech Republic',
       'Denmark', 'Djibouti', 'Dominican Republic', 'Ecuador', 'Egypt',
       'El Salvador', 'Equatorial Guinea', 'Eritrea', 'Ethiopia',
       'Finland', 'France', 'Gabon', 'Gambia', 'Germany', 'Ghana',
       'Greece', 'Guatemala', 'Guinea', 'Guinea-Bissau', 'Haiti',
       'Honduras', 'Hong Kong, China', 'Hungary', 'Iceland', 'India',
       'Indonesia', 'Iran', 'Iraq', 'Ireland', 'Israel', 'Italy',
       'Jamaica', 'Japan', 'Jordan', 'Kenya', 'Korea, Dem. Rep.',
       'Korea, Rep.', 'Kuwait', 'Leba

In [7]:
df.head(10) # display first 10 entries

Unnamed: 0,country,continent,year,lifeExp,pop,gdpPercap
0,Afghanistan,Asia,1952,28.801,8425333,779.445314
1,Afghanistan,Asia,1957,30.332,9240934,820.85303
2,Afghanistan,Asia,1962,31.997,10267083,853.10071
3,Afghanistan,Asia,1967,34.02,11537966,836.197138
4,Afghanistan,Asia,1972,36.088,13079460,739.981106
5,Afghanistan,Asia,1977,38.438,14880372,786.11336
6,Afghanistan,Asia,1982,39.854,12881816,978.011439
7,Afghanistan,Asia,1987,40.822,13867957,852.395945
8,Afghanistan,Asia,1992,41.674,16317921,649.341395
9,Afghanistan,Asia,1997,41.763,22227415,635.341351


Looking at the head of the dataframe tells us that we have information on life expectancies, population, and GDP for each country every 5 years. This should allow us to produce some interesting plots!

## Bokeh

### Line charts

We already used line charts in the basic plotting example. Let's check out how life expectancy has been doing over the years in Europe.

In [8]:
is_europe = df['continent'] == "Europe"
europe = df[is_europe]
europe.head()

Unnamed: 0,country,continent,year,lifeExp,pop,gdpPercap
12,Albania,Europe,1952,55.23,1282697,1601.056136
13,Albania,Europe,1957,59.28,1476505,1942.284244
14,Albania,Europe,1962,64.82,1728137,2312.888958
15,Albania,Europe,1967,66.22,1984060,2760.196931
16,Albania,Europe,1972,67.69,2263554,3313.422188


In [12]:
from bokeh.plotting import figure
from bokeh.io import show, reset_output, output_notebook
from bokeh.models import Legend, LegendItem

reset_output()
output_notebook()

# Bokeh does not automatically cycle through colours
from bokeh.palettes import Category20_20 as palette
import itertools
colors = itertools.cycle(palette) 

output_notebook()

countries = europe['country'].unique()

# Create a blank figure with labels
fig = figure(plot_width = 600, plot_height = 800, 
           title = 'Gapminder data',
           x_axis_label = 'year', y_axis_label = 'life_expectancy')

# Creating traces by looping over countries
legend_items = []
for country in countries:
    is_country = europe['country'] == country
    r = fig.line(europe[is_country]['year'],
             europe[is_country]['lifeExp'],
             line_width=2,
             color=next(colors),
             alpha=1.0,
             muted_alpha=0.0)
    legend_items.append((country, [r]))

legend = Legend(items=legend_items, location=(20, 20))
legend.click_policy = "mute"
fig.add_layout(legend, "right")
    
show(fig)

In [13]:
colors = itertools.cycle(palette) 

scandinavians = ['Norway', 'Sweden', 'Denmark']
is_scandinavia = europe['country'].isin(scandinavians)
scandinavia = europe[is_scandinavia]
 
fig = figure(plot_width = 600, plot_height = 600, 
           title = 'Gapminder data',
           x_axis_label = 'year', y_axis_label = 'life_expectancy')

legend_items = []
for country in scandinavians:
    color = next(colors)
    is_country = europe['country'] == country
    r = fig.line(europe[is_country]['year'],
             europe[is_country]['lifeExp'],
             line_width=2,
             color=color, alpha=1.0,
             muted_color=color, muted_alpha=0.2)
    legend_items.append((country, [r]))
    
legend = Legend(items=legend_items, location=(20, 440))
legend.click_policy = "mute"
fig.add_layout(legend, "right")
    
show(fig)

### Bar charts

To illustrate bar charts we'll look at the GDP in Scandinavia over the years.

In [17]:
from bokeh.models import ColumnDataSource, FactorRange
from bokeh.palettes import Category20_20 as palette
from bokeh.transform import factor_cmap

years = [str(year) for year in list(europe['year'].unique())]
data = scandinavia[['country', 'year', 'gdpPercap']]

data = scandinavia.to_dict(orient='list')
x = [(year, country) for country in scandinavians for year in years]
data['x'] = x


fig = figure(x_range=FactorRange(*x), plot_height=250, title="Gapminder data",
           toolbar_location=None, tools="")
fig.vbar(x='x', top='gdpPercap', width=0.8, source=data,
        fill_color=factor_cmap('x', palette=palette, factors=scandinavians, start=1, end=2))

fig.xaxis.major_label_orientation = 1
    
show(fig)

### Bubble plots

To illustrate scatter plots we'll look at life expectancy vs. income per country, where we'll scale the bubble by population size. This is the most well known graphic by Gapminder.

In [47]:
from ipywidgets import interact, widgets
from bokeh.io import curdoc, output_notebook, push_notebook
from bokeh.layouts import layout
from bokeh.models import (Button, CategoricalColorMapper, ColumnDataSource,
                          HoverTool, Label, SingleIntervalTicker, Slider,)
from bokeh.palettes import Spectral6
from bokeh.plotting import figure

output_notebook()

years = df['year'].unique()

plot = figure(x_range=(0, 40000), y_range=(25, 90), title='Gapminder Data', plot_height=300)
plot.xaxis.ticker = SingleIntervalTicker(interval=5000)
plot.xaxis.axis_label = "GDP per capita"
plot.yaxis.ticker = SingleIntervalTicker(interval=20)
plot.yaxis.axis_label = "Life expectancy"

label = Label(x=1.1, y=23, text=str(years[0]), text_font_size='93px', text_color='#eeeeee')
plot.add_layout(label)

is_year = df['year'] == years[0]
source = ColumnDataSource(data=df[is_year])

color_mapper = CategoricalColorMapper(palette=Spectral6, factors=df['continent'].unique())
plot.circle(
    x='gdpPercap',
    y='lifeExp',
    size=10,
    source=source,
    fill_color={'field': 'continent', 'transform': color_mapper},
    fill_alpha=0.8,
    line_color='#7c7e71',
    line_width=0.5,
    line_alpha=0.5,
    legend_group='continent',
)
plot.add_tools(HoverTool(tooltips="@country", show_arrow=False, point_policy='follow_mouse'))


def slider_update(year):
    label.text = str(year)
    is_year = df['year'] == year
    source.data = df[is_year]
    push_notebook()

show(plot, notebook_handle=True)

In [49]:
interact(slider_update, year=widgets.IntSlider(min=years[0], max=years[-1], step=5, value=years[0]))

interactive(children=(IntSlider(value=1952, description='year', max=2007, min=1952, step=5), Output()), _dom_c…

<function __main__.slider_update(year)>