# BOKEH FOR DATA VISUALIZATION
## Claudia Guirao Fernández
## 30.09.2016
## Kernel Meetups


## Libraries

In [1]:
import pandas as pd
import numpy as np

## Bokeh in the notebook

In [2]:
from bokeh.io import output_notebook, show, save
output_notebook()

# Get the sample data

In [3]:
from bokeh.sampledata.autompg import autompg
autompg.head(n=3)

Unnamed: 0,mpg,cyl,displ,hp,weight,accel,yr,origin,name
0,18.0,8,307.0,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350.0,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318.0,150,3436,11.0,70,1,plymouth satellite


In [4]:
from bokeh.sampledata.iris import flowers

In [5]:
from bokeh.sampledata.gapminder import life_expectancy

In [6]:
from bokeh.sampledata.glucose import data
glucose = data.copy()[0:2000]
glucose.isig = pd.to_numeric(glucose.isig, errors=False)
glucose.head(3)

Unnamed: 0_level_0,isig,glucose
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2010-03-24 09:51:00,22.59,258
2010-03-24 09:56:00,22.52,260
2010-03-24 10:01:00,22.23,258


In [7]:
from bokeh.charts.utils import df_from_json
from bokeh.sampledata.olympics2014 import data

olimpics =  df_from_json(data)

In [8]:
from bokeh.charts import Area, Bar, BoxPlot, Donut, Dot, HeatMap, Histogram, Horizon, Line, Scatter, Step, TimeSeries

## Better defaults

In [9]:
from bokeh.charts import defaults
defaults.plot_height=300
defaults.plot_width=800
defaults.tools='pan, wheel_zoom, reset'

## Simple Charts

## Bar

In [10]:
bar_plot = Bar(autompg, label='cyl', bar_width=0.4, title="label='cyl' bar_width=0.4",
               xlabel="Cyl",ylabel="Number of vehicles", legend = None)
show(bar_plot)

In [11]:
bar_plot3 = Bar(autompg, label='cyl', values='mpg', agg='mean',
                title="label='cyl' values='mpg' agg='mean'", 
                 xlabel="Cyl",ylabel="Mean mpg",legend = None)
show(bar_plot3)

In [12]:
# multiple columns
bar_plot5 = Bar(autompg, label=['cyl', 'origin'], values='mpg', agg='mean', color='blue',
                title="label=['cyl', 'origin'] values='mpg' agg='mean'", legend=None)
show(bar_plot5)

In [13]:
p = Bar(autompg, label='yr', values='mpg', agg='median', stack='origin',
        title="Median MPG by YR, grouped by ORIGIN", legend='top_left', tools='crosshair')
show(p)

## Boxplot

In [14]:
from bokeh.charts import BoxPlot
from bokeh.sampledata.iris import flowers
p = BoxPlot(flowers, label='species', values='petal_width', tools='crosshair', color='#aa4444',
            xlabel='', ylabel='petal width, mm', title='Distributions of petal widths')
show(p)

In [15]:
from bokeh.charts import BoxPlot
from bokeh.sampledata.iris import flowers
p = BoxPlot(flowers, label='species', values='petal_width', tools='crosshair', color='species',
            xlabel='', ylabel='petal width, mm', title='Distributions of petal widths')
show(p)

## Scatter

In [16]:
scatter1 = Scatter(autompg, x='mpg', y='hp', title="x='mpg', y='hp'",
    xlabel="Miles Per Gallon", ylabel="Horsepower", legend='top_right')
show(scatter1)

In [17]:
scatter2 = Scatter(
    autompg, x='mpg', y='hp', color='cyl', title="x='mpg', y='hp', color='cyl'",
    xlabel="Miles Per Gallon", ylabel="Horsepower", legend='top_right')
show(scatter2)

In [18]:
from bokeh.charts import Scatter
p = Scatter(flowers, x='petal_length', y='petal_width', color='species', marker= 'species',legend='top_left')
show(p)

In [19]:
from bokeh.charts.operations import blend
scatter6 = Scatter(flowers, x=blend('petal_length', 'sepal_length', name='length'),
                   y=blend('petal_width', 'sepal_width', name='width'), color='species',
                   title='x=petal_length+sepal_length, y=petal_width+sepal_width, color=species',
                   legend='top_right')
show(scatter6)

## Histogram

In [20]:
import pandas as pd
import numpy as np
# build some distributions
mu, sigma = 0, 0.5
normal = pd.DataFrame({'value': np.random.normal(mu, sigma, 1000), 'type': 'normal'})
lognormal = pd.DataFrame({'value': np.random.lognormal(mu, sigma, 1000), 'type': 'lognormal'})
# create a pandas data frame
df = pd.concat([normal, lognormal])
df[997:1003]

Unnamed: 0,type,value
997,normal,-0.104063
998,normal,-0.040593
999,normal,0.687226
0,lognormal,0.874235
1,lognormal,0.795774
2,lognormal,0.529159


In [21]:
from bokeh.charts import Histogram
hist = Histogram(df, values='value', bins=30)
show(hist)

In [22]:
hist = Histogram(df, values='value', color='type',bins=30)
show(hist)

## Dots

In [23]:
life_expectancy.head()

Unnamed: 0_level_0,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,...,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Afghanistan,33.639,34.152,34.662,35.17,35.674,36.172,36.663,37.143,37.614,38.075,...,56.583,57.071,57.582,58.102,58.618,59.124,59.612,60.079,60.524,60.947
Albania,65.475,65.863,66.122,66.316,66.5,66.702,66.948,67.251,67.595,67.966,...,75.725,75.949,76.124,76.278,76.433,76.598,76.78,76.979,77.185,77.392
Algeria,47.953,48.389,48.806,49.205,49.592,49.976,50.366,50.767,51.195,51.67,...,69.682,69.854,70.02,70.18,70.332,70.477,70.615,70.747,70.874,71.0
American Samoa,,,,,,,,,,,...,,,,,,,,,,
Andorra,,,,,,,,,,,...,,,,,,,,,,


In [24]:
decades = life_expectancy[life_expectancy.index.str.startswith('A')][['1964', '1974', '1984', '1994', '2004']]
decades = decades.reset_index()
decades

Unnamed: 0,Country,1964,1974,1984,1994,2004
0,Afghanistan,33.639,38.529,43.661,51.738,56.583
1,Albania,65.475,68.356,71.144,71.92,75.725
2,Algeria,47.953,52.213,63.16,67.674,69.682
3,American Samoa,,,,,
4,Andorra,,,,,
5,Angola,34.604,38.635,40.671,41.736,48.036
6,Anguilla,,,,,
7,Antigua and Barbuda,63.775,67.181,69.931,72.219,74.355
8,Argentina,65.388,67.652,70.439,72.489,74.645
9,Armenia,67.714,70.774,70.149,68.513,73.192


In [25]:
show(Dot(decades.dropna(), values='1964', label='Country', ylabel='Life Expectency', legend = None))

In [26]:
from bokeh.charts.operations import blend
from bokeh.models import HoverTool

b = blend('1964', '1974', '1984', '1994', '2004', name='life_expectency', labels_name='year')

dots_plot = Dot(decades.dropna(), values=b, label='Country', color='year', line_color='year', height=400, ylabel='Life Expectency', 
         tooltips=[('year', '@year')], legend = None) 

show(dots_plot)


# Time Series

In [27]:
show(TimeSeries(glucose))

# Step

In [28]:
from bokeh.palettes import Spectral8
show(Step(glucose, palette=Spectral8))

## Area

In [10]:
area = Area(glucose, legend=True, stack=True)
area.y_range.start = 0
show(area)

## Donut
### _"do not"_

In [32]:
show(Donut(autompg.cyl.astype('str'), palette=Spectral8, plot_width=400, plot_height=400, responsive=False,))

In [33]:
from bokeh.charts.utils import df_from_json
# utilize utility to make it easy to get json/dict data converted to a dataframe
df = olimpics

# filter by countries with at least one medal and sort by total medals
df = df[df['total'] > 8]
df = df.sort_values("total", ascending=False)
df = pd.melt(df, id_vars=['abbr'],
             value_vars=['bronze', 'silver', 'gold'],
             value_name='medal_count', var_name='medal')

# original example
d = Donut(df, label=['abbr', 'medal'], values='medal_count',
          text_font_size='8pt', hover_text='medal_count')

show(d)

## Plotting

In [34]:
from bokeh.plotting import figure

# set up some data
x = [1, 2, 3, 4, 5]
y = [6, 7, 8, 7, 3]
# create a new plot with figure
p = figure(plot_width=400, plot_height=400)

# add both a line and circles on the same plot
p.line(x, y, line_width=2)
p.circle(x, y, fill_color="white", size=8)

show(p)

In [35]:
# create a new plot using figure
p = figure(plot_width=400, plot_height=400)

# add a square renderer with a size, color, alpha, and sizes
p.square([1, 2, 3, 4, 5], [6, 7, 2, 4, 5], size=[10, 15, 20, 25, 30], color="firebrick", alpha=0.6)

show(p) # show the results

In [36]:
from bokeh.models import ColumnDataSource

source = ColumnDataSource(data={
    'x' : [1, 2, 3, 4, 5],
    'y' : [3, 7, 8, 5, 1],
})
p = figure()
p.circle('x', 'y', size=20, source=source)
show(p)

## Bokeh & Excel

In [37]:
filepath = "http://databank.worldbank.org/data/download/catalog/climate_change_download_0.xls"
df = pd.ExcelFile(filepath).parse('Data')
df.head(5)

Unnamed: 0,Country code,Country name,Series code,Series name,SCALE,Decimals,1990,1991,1992,1993,...,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011
0,ABW,Aruba,AG.LND.EL5M.ZS,Land area below 5m (% of land area),0,1,29.5748,..,..,..,...,..,..,..,..,..,..,..,..,..,..
1,ADO,Andorra,AG.LND.EL5M.ZS,Land area below 5m (% of land area),0,1,0.0,..,..,..,...,..,..,..,..,..,..,..,..,..,..
2,AFG,Afghanistan,AG.LND.EL5M.ZS,Land area below 5m (% of land area),0,1,0.0,..,..,..,...,..,..,..,..,..,..,..,..,..,..
3,AGO,Angola,AG.LND.EL5M.ZS,Land area below 5m (% of land area),0,1,0.208235,..,..,..,...,..,..,..,..,..,..,..,..,..,..
4,ALB,Albania,AG.LND.EL5M.ZS,Land area below 5m (% of land area),0,1,4.96788,..,..,..,...,..,..,..,..,..,..,..,..,..,..


In [39]:
emissions = df[df['Series name'] == "CO2 emissions, total (KtCO2)"].copy()
for k in [2007, 2006, 2005]:
    emissions[k] = pd.to_numeric(emissions[k], errors='coerce')

emissions = emissions.sort_values(2007, ascending=False)
_remissions = emissions.iloc[:10, :]

In [40]:
columns = ['Country code', 'Country name', 2007, 2006, 2005]
remissions = _remissions[columns]
remissions.columns = [str(x) for x in remissions.columns]

In [41]:
palette = ['#f7fcf5', '#e5f5e0','#c7e9c0', '#a1d99b','#74c476','#41ab5d','#238b45','#005a32', '#5A6351', '#000000']

p = Bar(remissions, label='years', group='Country name', palette=palette,
        values= blend('2007', '2006', '2005', name='values', labels_name='years'),
        title='Emissions', color='Country name', legend="top_right", responsive=True)
show(p)

In [42]:
greys = ['#ffffff', '#f0f0f0', '#d9d9d9', '#bdbdbd', '#969696', '#737373', '#525252', '#252525', '#000000']
blues = ["#f7fbff" ,"#deebf7" ,"#c6dbef" ,"#9ecae1" ,"#6baed6" ,"#4292c6" ,"#2171b5" ,"#084594"]
p = Bar(remissions, label='Country name', group='year', palette = blues[1::3],
        values= blend('2007', '2006', '2005', name='values', labels_name='year'),
        title='Emissions', color='year', legend=True, responsive=True)
show(p)

## Interactions

In [43]:
from bokeh.layouts import gridplot
from bokeh.plotting import figure

x = list(range(11))
y0, y1, y2 = x, [10-i for i in x], [abs(i-5) for i in x]

# create a new plot
s1 = figure(width=250, plot_height=250)
s1.circle(x, y0, size=10, color="navy", alpha=0.5)

# create another one
s2 = figure(width=250, height=250)
s2.triangle(x, y1, size=10, color="firebrick", alpha=0.5)

# create and another
s3 = figure(width=250, height=250)
s3.square(x, y2, size=10, color="olive", alpha=0.5)

# put all the plots in a gridplot
p = gridplot([[s1, s2, s3]], toolbar_location=None)

# show the results
show(p)

In [44]:
plot_options = dict(width=250, plot_height=250, tools='pan,wheel_zoom')

# create a new plot
s1 = figure(**plot_options)
s1.circle(x, y0, size=10, color="navy")

# create a new plot and share both ranges
s2 = figure(x_range=s1.x_range, y_range=s1.y_range, **plot_options)
s2.triangle(x, y1, size=10, color="firebrick")

# create a new plot and share only one range
s3 = figure(x_range=s1.x_range, **plot_options)
s3.square(x, y2, size=10, color="olive")

p = gridplot([[s1, s2, s3]])

# show the results
show(p)

In [47]:
from bokeh.models import ColumnDataSource

x = list(range(-20, 21))
y0, y1 = [abs(xx) for xx in x], [xx**2 for xx in x]

# create a column data source for the plots to share
source = ColumnDataSource(data=dict(x=x, y0=y0, y1=y1))

TOOLS = "box_select,lasso_select,reset,help"

# create a new plot and add a renderer
left = figure(tools=TOOLS, width=300, height=300)
left.circle('x', 'y0', source=source)

# create another new plot and add a renderer
right = figure(tools=TOOLS, width=300, height=300)
right.circle('x', 'y1', source=source)

p = gridplot([[left, right]])

show(p)

## Maps & Bokeh

In [None]:
from bokeh.io import output_file, show
from bokeh.models import (
  GMapPlot, GMapOptions, ColumnDataSource, Circle, DataRange1d, PanTool, WheelZoomTool, BoxSelectTool,
    HoverTool, ResetTool,SaveTool
)


map_options = GMapOptions(lat=40.4617152, lng=-3.6959736, map_type="roadmap", zoom=5)

plot_map = GMapPlot(
    x_range=DataRange1d(), y_range=DataRange1d(), map_options=map_options, api_key=API_KEY
)

source = ColumnDataSource(data={
    'Oficina':["Kernel Madrid", "Kernel Barcelona"],
    'Latitud' : [40.4617152, 41.3912862],
    'Longitud' : [-3.6959736, 2.1574188],
    'Direccion': ["Calle Orense, 68, 6ª planta, Madrid", "Carrer de Balmes, 89, 08008 Barcelona"],
    'Telefono':[915022390, 932506437]
})



circle = Circle(x="Longitud", y="Latitud", size=15, fill_color="blue", fill_alpha=0.5, line_color=None)
plot_map.add_glyph(source, circle)

plot_map.add_tools(PanTool(), WheelZoomTool(), BoxSelectTool(), HoverTool(), ResetTool(), SaveTool())

hover = plot_map.select_one(HoverTool)
hover.point_policy = "follow_mouse"
hover.tooltips = [
    ("Oficina", "@Oficina"),
    ("Dirección", "@Direccion"),
    ("Telefono", "@Telefono")
 ]
output_file("office.html")
save(plot_map) # save(p) will save without opening a new browser tab
# show(plot_map)   


## Some cool viz with Bokeh

### The Gapminder visualization

https://anaconda.org/bokeh/gapminder/notebook 

### Bokeh Demos

https://demo.bokehplots.com/

## Sharing like a pro

In [11]:
import jinja2
from bokeh.embed import components

In [12]:

template = jinja2.Template("""
<!DOCTYPE html>
<html lang="en-US">

<link
    href="http://cdn.pydata.org/bokeh/release/bokeh-0.12.0.min.css"
    rel="stylesheet" type="text/css"
>
<script 
    src="http://cdn.pydata.org/bokeh/release/bokeh-0.12.0.min.js"
></script>

<body>

    <h1>Gracias por vuestra atención, Bokeh mola!</h1>
    
    <p> Esta es una plantilla HTLM y aquí debajo pondremos nuestra visualización </p>
    
    {{ script }}
    
    {{ div }}

</body>

</html>
""")

In [13]:
script, div = components(area)

In [14]:
from IPython.display import HTML
HTML(template.render(script=script, div=div))