In [1]:
from bokeh.io import output_notebook
import pandas as pd

In [2]:
output_notebook()

In [3]:
file = "../data/csv_files/lincoln.csv"
df = pd.read_csv(file)

df["DATE"] = pd.to_datetime(df["DATE"])
df["TAVG"] = (df["TMAX"] + df["TMIN"]) / 2
df["MONTH"] = df.DATE.dt.strftime("%b")

df = df[
    [
        "MONTH",
        "TMIN",
        "TMAX",
        "TAVG",
    ]
]

In [4]:
qs = df.groupby("MONTH").TAVG.quantile([0.25, 0.5, 0.75]).unstack().reset_index()
qs.columns = ["MONTH", "Q1", "Q2", "Q3"]

iqr = qs.Q3 - qs.Q1
qs["upper"] = qs.Q3 + 1.5 * iqr
qs["lower"] = qs.Q1 - 1.5 * iqr
df = pd.merge(df, qs, on="MONTH", how="left")

df

Unnamed: 0,MONTH,TMIN,TMAX,TAVG,Q1,Q2,Q3,upper,lower
0,Jan,15.0,36.0,25.5,23.00,27.5,31.5,44.250,10.250
1,Jan,18.0,39.0,28.5,23.00,27.5,31.5,44.250,10.250
2,Jan,15.0,32.0,23.5,23.00,27.5,31.5,44.250,10.250
3,Jan,15.0,27.0,21.0,23.00,27.5,31.5,44.250,10.250
4,Jan,21.0,40.0,30.5,23.00,27.5,31.5,44.250,10.250
...,...,...,...,...,...,...,...,...,...
361,Dec,23.0,48.0,35.5,18.75,27.5,33.5,55.625,-3.375
362,Dec,29.0,47.0,38.0,18.75,27.5,33.5,55.625,-3.375
363,Dec,25.0,45.0,35.0,18.75,27.5,33.5,55.625,-3.375
364,Dec,21.0,49.0,35.0,18.75,27.5,33.5,55.625,-3.375


In [5]:
from bokeh.models import ColumnDataSource, Whisker
from bokeh.plotting import figure, show

In [6]:
p = figure(
    title="Figure 9.3",
    x_range=df.MONTH.unique(),
    height=400,
    width=500,
    x_axis_label="month",
    y_axis_label="mean temperature (F)",
)

source = ColumnDataSource(df)

whisker = Whisker(base="MONTH", upper="upper", lower="lower", source=source)
whisker.upper_head.size = whisker.lower_head.size = 20
p.add_layout(whisker)

p.vbar(
    x="MONTH",
    top="Q1",
    bottom="Q2",
    source=source,
    color="#E0E0E0",
    line_color="black",
)

p.vbar(
    x="MONTH",
    top="Q2",
    bottom="Q3",
    source=source,
    color="#E0E0E0",
    line_color="black",
)

outliers = df[~df.TAVG.between(df.lower, df.upper)]
p.scatter("MONTH", "TAVG", source=outliers, size=5, color="black")

p.y_range.start = -10
p.yaxis.ticker = [0, 25, 50, 75]

show(p)