In [7]:
import pandas as pd
import random

from bokeh.io import output_notebook
from bokeh.plotting import figure, output_file, show
from bokeh.models import ColumnDataSource

In [11]:
# Generate some synthetic data.
df = pd.DataFrame({
    'Treatment':[str(i) for i in range(4) for j in range(100)],
    'y':[random.gauss(i, 0.5) for i in range(4) for j in range(100)]
})
df.head()

Unnamed: 0,Treatment,y
0,0,0.082093
1,0,0.174365
2,0,0.231916
3,0,0.725655
4,0,-0.009648


In [12]:
from bokeh.palettes import brewer
def color_list_generator(df, treatment_col):
    """ Create a list of colors per treatment given a dataframe and 
        column representing the treatments.
        
        Args:
            df - dataframe to get data from
            treatment_col - column to use to get unique treatments.
                
        Inspired by creating colors for each treatment 
        Rough Source: http://bokeh.pydata.org/en/latest/docs/gallery/brewer.html#gallery-brewer
        Fine Tune Source: http://bokeh.pydata.org/en/latest/docs/gallery/iris.html
    """
    # Get the number of colors we'll need for the plot.
    colors = brewer["Spectral"][len(df[treatment_col].unique())]

    # Create a map between treatment and color.
    colormap = {i: colors[k] for k,i in enumerate(df[treatment_col].unique())}

    # Return a list of colors for each value that we will be looking at.
    return [colormap[x] for x in df[treatment_col]]


In [18]:
# Generate a boxplot of the maximum fitness value per treatment.
import numpy as np

from bokeh.models import Legend, LegendItem
from bokeh.plotting import figure, show, output_file

output_notebook()

# Get the colors for the boxes.
colors = color_list_generator(df, 'Treatment')
colors = list(set(colors))

# Get the categories that we will be plotting by.
cats = df.Treatment.unique()

# find the quartiles and IQR for each category
groups = df.groupby('Treatment')
q1 = groups.quantile(q=0.25)
q2 = groups.quantile(q=0.5)
q3 = groups.quantile(q=0.75)
iqr = q3 - q1
upper = q3 + 1.5*iqr
lower = q1 - 1.5*iqr

# Form the source data to call vbar for upper and lower
# boxes to be formed later.
upper_source = ColumnDataSource(data=dict(
    x=cats, 
    bottom=q2.y,
    top=q3.y,
    fill_color=colors,
    legend=cats
))

lower_source = ColumnDataSource(data=dict(
    x=cats, 
    bottom=q1.y,
    top=q2.y,
    fill_color=colors
))

# find the outliers for each category
def outliers(group):
    cat = group.name
    return group[(group.y > upper.loc[cat]['y']) | (group.y < lower.loc[cat]['y'])]['y']
out = groups.apply(outliers).dropna()

# prepare outlier data for plotting, we need coordinates for every outlier.
if not out.empty:
    outx = []
    outy = []
    for cat in cats:
        # only add outliers if they exist
        if not out.loc[cat].empty:
            for value in out[cat]:
                outx.append(cat)
                outy.append(value)

p = figure(tools="save", title="", x_range=df.Treatment.unique())

# stems (Don't need colors of treatment)
p.segment(cats, upper.y, cats, q3.y, line_color="black")
p.segment(cats, lower.y, cats, q1.y, line_color="black")

# Add the upper and lower quartiles
l=p.vbar(source = upper_source, x='x', width=0.7, bottom='bottom', top='top', fill_color='fill_color', line_color="black")
p.vbar(source = lower_source, x='x', width=0.7, bottom='bottom', top='top', fill_color='fill_color', line_color="black")

# whiskers (almost-0 height rects simpler than segments)
p.rect(cats, lower.y, 0.2, 0.01, line_color="black")
p.rect(cats, upper.y, 0.2, 0.01, line_color="black")

# outliers
if not out.empty:
    p.circle(outx, outy, size=6, color="#F38630", fill_alpha=0.6)

# Using the newer autogrouped syntax.
# Grab a renderer, in this case upper quartile and then
# create the legend explicitly.  
# Guidance from: https://groups.google.com/a/continuum.io/forum/#!msg/bokeh/uEliQlgj390/Jyhsc5HqAAAJ
legend = Legend(items=[LegendItem(label=dict(field="x"), renderers=[l])])

p.add_layout(legend, 'below')    

# Setup plot titles and such.
p.title.text = "Boxplot with Colored Treatments and Legend Outside Plot"
p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = "white"
p.grid.grid_line_width = 2
p.xaxis.major_label_text_font_size="0pt"
p.xaxis.major_label_orientation = np.pi/4
p.xaxis.axis_label="Treatment"
p.yaxis.axis_label="y"
p.legend.location = (100,10)

show(p)