In [162]:
import numpy as np
import pandas as pd

from bokeh.layouts import layout
from bokeh.layouts import widgetbox

from bokeh.embed import file_html

from bokeh.io import show
from bokeh.io import output_notebook 

from bokeh.models import Text
from bokeh.models import Plot
from bokeh.models import Slider
from bokeh.models import Circle
from bokeh.models import Range1d
from bokeh.models import CustomJS
from bokeh.models import HoverTool
from bokeh.models import LinearAxis
from bokeh.models import ColumnDataSource
from bokeh.models import SingleIntervalTicker

from bokeh.palettes import Spectral7

To display Bokeh plots inline in a Jupyter notebook, use the `output_notebook()` function from bokeh.io. When `show()` is called, the plot will be displayed inline in the next notebook output cell. To save your Bokeh plots, you can use the `output_file()` function instead (or in addition).

In [163]:
output_notebook()

### Get the data

#Get the data from pickle file which contains the HondaData.

In [180]:
data = pd.read_pickle("listings.pkl")


data.region_name = data.region_name.astype('category')
regions_list = list(data.region_name.cat.categories)



def get_color(r):
        return Spectral7[regions_list.index(r.region_name)]
    
data['region_color'] = data.apply(get_color, axis=1)

sources = {}

region_color = data['region_color']
region_color.name = 'region_color'

data = data.sort_values(by='year_is', ascending=True)


years = data['year_is'].unique()

data.miles_fs = data['miles_fs'] / 1000
data.price_fs = data['price_fs'] / 100

aggregated_df = data[['year_is','state_ss','miles_fs','engine_size_ss_encoded','price_fs','region_color']].groupby(['year_is','state_ss','region_color'],as_index=False).agg({'price_fs':['mean'],'miles_fs':['mean'], 'engine_size_ss_encoded':['mean'] })


for year in years:  
    new_df = aggregated_df[aggregated_df['year_is']==year]      
    new_df.columns = ['year_is','state_ss','region_color', 'price_fs','miles_fs', 'engine_size_ss_encoded']      
    new_df.set_index('state_ss', inplace=True)
    del new_df.index.name
    new_df = new_df[['region_color','price_fs','miles_fs', 'engine_size_ss_encoded']]
    sources['_' + str(year)] = ColumnDataSource(new_df)
    
    
test = aggregated_df[aggregated_df['state_ss']=='NM']
test

Unnamed: 0_level_0,year_is,state_ss,region_color,price_fs,miles_fs,engine_size_ss_encoded
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,mean,mean,mean
20,2000,NM,#d53e4f,39.99,125.805,15.0
55,2001,NM,#d53e4f,289.0,86.5,19.0
95,2002,NM,#d53e4f,49.95,177.1385,18.5
135,2003,NM,#d53e4f,29.95,12.284,0.0
175,2004,NM,#d53e4f,54.633333,104.219,16.0
216,2005,NM,#d53e4f,63.024286,119.895857,19.142857
257,2006,NM,#d53e4f,70.461111,131.366222,22.111111
298,2007,NM,#d53e4f,79.004286,168.482429,24.285714
339,2008,NM,#d53e4f,97.574545,120.511273,22.272727
379,2009,NM,#d53e4f,84.195,115.615,21.166667


Next we will add each of our sources to the `sources` dictionary, where each key is the name of the year (prefaced with an underscore) and each value is a dataframe with the aggregated values for that year.

_Note that we needed the prefixing as JavaScript objects cannot begin with a number._

Now we have an object that's storing all of our `ColumnDataSources`, so that we can look them up.

### Build the plot

First we need to create a `Plot` object. We'll start with a basic frame, only specifying things like plot height, width, and ranges for the axes.

In [165]:
xdr = Range1d(1, 300)
ydr = Range1d(1, 50)
plot = Plot(
    x_range=xdr,
    y_range=ydr,
    plot_width=800,
    plot_height=400,
    outline_line_color=None,
    toolbar_location=None, 
    min_border=20,
)

What do you expect to see when we call `show()` on the plot that we have created so far? Try it out below:

### Build the axes

Next we can make some stylistic modifications to the plot axes (e.g. by specifying the text font, size, and color, and by adding labels), to make the plot look more like the one in Hans Rosling's video.

In [166]:
AXIS_FORMATS = dict(
    minor_tick_in=None,
    minor_tick_out=None,
    major_tick_in=None,
    major_label_text_font_size="10pt",
    major_label_text_font_style="normal",
    axis_label_text_font_size="10pt",

    axis_line_color='#AAAAAA',
    major_tick_line_color='#AAAAAA',
    major_label_text_color='#666666',

    major_tick_line_cap="round",
    axis_line_cap="round",
    axis_line_width=1,
    major_tick_line_width=1,
)

xaxis = LinearAxis(ticker=SingleIntervalTicker(interval=30), axis_label="Miles", **AXIS_FORMATS)
yaxis = LinearAxis(ticker=SingleIntervalTicker(interval=5), axis_label="Engine Size", **AXIS_FORMATS)   
plot.add_layout(xaxis, 'below')
plot.add_layout(yaxis, 'left')

What do you expect to see when we call `show()` on the plot that we have created so far? Experiment with running the below cell, modifying some of the parameters above, and then re-running the cell below:

In [167]:
show(plot)

### Add the background year text

One of the features of Rosling's animation is that the year appears as the text background of the plot. We will add this feature to our plot first so it will be layered below all the other glyphs (will will be incrementally added, layer by layer, on top of each other until we are finished).

In [168]:
text_source = ColumnDataSource({'year': ['%s' % years[0]]})
text = Text(x=30, y=15, text='year', text_font_size='150pt', text_color='#EEEEEE')
plot.add_glyph(text_source, text)

Test out different versions of the background text and see how it changes the plot:

In [169]:
show(plot)

### Add the bubbles and hover
Next we will add the bubbles using Bokeh's [`Circle`](http://bokeh.pydata.org/en/latest/docs/reference/plotting.html#bokeh.plotting.figure.Figure.circle) glyph. We start from the first year of data, which is our source that drives the circles (the other sources will be used later).    

In [170]:
# Add the circle
renderer_source = sources['_%s' % years[0]]
circle_glyph = Circle(
    x='miles_fs', y='engine_size_ss_encoded', size='price_fs',
    fill_color='region_color', fill_alpha=0.8, 
    line_color='#7c7e71', line_width=0.5, line_alpha=0.5)

circle_renderer = plot.add_glyph(renderer_source, circle_glyph)

In the above, `plot.add_glyph` returns the renderer, which we can then pass to the `HoverTool` so that hover only happens for the bubbles on the page and not other glyph elements:

In [171]:
# Add the hover (only against the circle and not other plot elements)
tooltips = "@index"
plot.add_tools(HoverTool(tooltips=tooltips, renderers=[circle_renderer]))

Test out different parameters for the `Circle` glyph and see how it changes the plot:

In [172]:
show(plot)

### Add the legend

Next we will manually build a legend for our plot by adding circles and texts to the upper-righthand portion:

In [173]:
text_x = 250
text_y = 40
for i, region in enumerate(regions_list):
    plot.add_glyph(Text(x=text_x, y=text_y, text=[region], text_font_size='10pt', text_color='#666666'))
    plot.add_glyph(Circle(x=text_x - 0.1, y=text_y + 2, fill_color=Spectral7[i], size=10, line_color=None, fill_alpha=0.8))
    text_y = text_y - 5

regions_list

['Far West Region',
 'Great Lakes Region',
 'Mideast Region',
 'Plains Region',
 'Rocky Mountain Region',
 'Southeast Region',
 'Southwest Region']

Experiment with different parameters, and test it out by running the below cell:

In [174]:
show(plot)

### Add the slider and callback
Next we add the slider widget and the JavaScript callback code, which changes the data of the `renderer_source` (powering the bubbles / circles) and the data of the `text_source` (powering our background text). After we've `set()` the data we need to `trigger()` a change. `slider`, `renderer_source`, `text_source` are all available because we add them as args to `Callback`.    

It is the combination of `sources = %s % (js_source_array)` in the JavaScript and `Callback(args=sources...)` that provides the ability to look-up, by year, the JavaScript version of our Python-made `ColumnDataSource`.

In [182]:
# Add the slider


dictionary_of_sources = dict(zip([x for x in years], ['_%s' % x for x in years]))
js_source_array = str(dictionary_of_sources).replace("'", "")



code = """
    var year = slider.value,
        sources = %s,
        new_source_data = sources[year].data;
    renderer_source.data = new_source_data;
    text_source.data = {'year': [String(year)]};
""" % js_source_array

callback = CustomJS(args=sources, code=code)
slider = Slider(start=years[0], end=years[-1], value=1, step=1, title="Year", callback=callback)
callback.args["renderer_source"] = renderer_source
callback.args["slider"] = slider
callback.args["text_source"] = text_source

Check out what our slider looks like by itself:

In [183]:
show(widgetbox(slider))

### Putting all the pieces together

Last but not least, we put the chart and the slider together in a layout and display it inline in the notebook.

In [184]:
show(layout([[plot], [slider]], sizing_mode='scale_width'))