# Bokeh charts

In [1]:
import IPython
IPython.__version__

'4.1.1'

In [2]:
import bokeh
bokeh.__version__

'0.11.1'

High level interface re-written for version [0.10](http://bokeh.pydata.org/en/0.10.0/docs/releases/0.10.0.html)

In [3]:
from bokeh.io import output_notebook, show
output_notebook()

## Chart inputs

Bokeh chart inpusts can be of two types

* Array-like - [list][10], [tuple][20], [numpy.ndarray][30], [pandas.Series][40]
* Table-like - records: list([dict][50]), columns: dict(list), [pandas.DataFrame][60]

[10]: https://docs.python.org/2/tutorial/datastructures.html#more-on-lists
[20]: https://docs.python.org/2/tutorial/datastructures.html#tuples-and-sequences
[30]: http://docs.scipy.org/doc/numpy-1.10.0/reference/generated/numpy.ndarray.html#numpy-ndarray
[40]: http://pandas.pydata.org/pandas-docs/version/0.17.1/dsintro.html#series-is-ndarray-like

[50]: https://docs.python.org/2/tutorial/datastructures.html#dictionaries
[60]: http://pandas.pydata.org/pandas-docs/version/0.17.1/dsintro.html#dataframe

In [5]:
from bokeh import charts

## Scatter plot

In [6]:
from bokeh.sampledata.iris import flowers
flowers.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [7]:
p = charts.Scatter(flowers, x='petal_length', y='petal_width', color='species',
                   legend='top_left')
show(p)

## Box plot

In [19]:
p = charts.BoxPlot(
    flowers, label='species', values='petal_width', tools='crosshair',
    ylabel='petal width, mm', title='Distribution of petal widths'
)
show(p)

## Bar plots

In [10]:
from bokeh.sampledata.autompg import autompg
autompg.head()

Unnamed: 0,mpg,cyl,displ,hp,weight,accel,yr,origin,name
0,18,8,307,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15,8,350,165,3693,11.5,70,1,buick skylark 320
2,18,8,318,150,3436,11.0,70,1,plymouth satellite
3,16,8,304,150,3433,12.0,70,1,amc rebel sst
4,17,8,302,140,3449,10.5,70,1,ford torino


### Grouped bar plots

In [25]:
p = charts.Bar(
    autompg, label='yr', values='mpg', agg='median', 
    group='origin', # Use the group feature
    title="Median MPG by YR, grouped by ORIGIN", legend='top_left', tools='crosshair'
)
show(p)

### Stacked bar plots

In [24]:
p = charts.Bar(
    autompg, label='yr', values='mpg', agg='median', 
    stack='origin', # Use the stack feature
    title="Median MPG by YR, stacked by ORIGIN", legend='top_left', tools='crosshair'
)
show(p)

## Histogram

In [15]:
import pandas as pd
import numpy as np

# build some distributions
mu, sigma = 0, 0.5
normal = pd.DataFrame({'value': np.random.normal(mu, sigma, 1000), 'type': 'normal'})
lognormal = pd.DataFrame({'value': np.random.lognormal(mu, sigma, 1000), 'type': 'lognormal'})

# create a pandas data frame
df = pd.concat([normal, lognormal])

In [23]:
from bokeh.charts import Histogram
hist = charts.Histogram(df, values='value', color='type', bins=50, legend=True)
show(hist)

## Line plot

In [18]:
from bokeh.sampledata.us_marriages_divorces import data as marriages_divorces
marriages_divorces.head()

Unnamed: 0,Year,Marriages,Divorces,Population,Marriages_per_1000,Divorces_per_1000
0,1867,357000,10000,36970000,9.7,0.3
1,1868,345000,10000,37885000,9.1,0.3
2,1869,348000,11000,38870000,9.0,0.3
3,1870,352000,11000,39905000,8.8,0.3
4,1871,359000,12000,41010000,8.8,0.3


In [26]:
charts.Line?

In [29]:
p = charts.Line(
    marriages_divorces, ylabel='per 1000 people', legend='top_left', x='Year', 
    y=['Marriages_per_1000', 'Divorces_per_1000']
)
show(p)

## Area plot

Get the number of models by year

In [93]:
df2 = autompg[['yr', 'origin']].groupby(
    ['yr', 'origin']).aggregate(len).reset_index()
df2.columns = ['yr', 'origin', 'model_count']
df3 = df2.replace({'origin':{1:'US', 2:'Europe', 3:'Japan'}})

In [94]:
model_origin = df3.pivot(
    index='yr', columns='origin', values='model_count').reset_index()

In [95]:
p = charts.Area(
    model_origin, x='yr', y=['US', 'Europe', 'Japan'], legend='top_right',
    xlabel='year', ylabel='number of models',
    title="Auto model origin by country")
show(p)