In [None]:
from bokeh.io import output_notebook
import pandas as pd

In [None]:
output_notebook()

### Figure 9.3

In [None]:
file = "../data/csv_files/lincoln.csv"
df = pd.read_csv(file)

df["DATE"] = pd.to_datetime(df["DATE"])
df["TAVG"] = (df["TMAX"] + df["TMIN"]) / 2
df["MONTH"] = df.DATE.dt.strftime("%b")

df = df[
    [
        "MONTH",
        "TMIN",
        "TMAX",
        "TAVG",
    ]
]

In [None]:
qs = df.groupby("MONTH").TAVG.quantile([0.25, 0.5, 0.75]).unstack().reset_index()
qs.columns = ["MONTH", "Q1", "Q2", "Q3"]

iqr = qs.Q3 - qs.Q1
qs["upper"] = qs.Q3 + 1.5 * iqr
qs["lower"] = qs.Q1 - 1.5 * iqr
df = pd.merge(df, qs, on="MONTH", how="left")

df

In [None]:
from bokeh.models import ColumnDataSource, Whisker
from bokeh.plotting import figure, show

In [None]:
p = figure(
    title="Figure 9.3",
    x_range=df.MONTH.unique(),
    toolbar_location=None,
    height=400,
    width=600,
    x_axis_label="month",
    y_axis_label="mean temperature (F)",
)

source = ColumnDataSource(df)

whisker = Whisker(base="MONTH", upper="upper", lower="lower", source=source)
whisker.upper_head.size = whisker.lower_head.size = 20
p.add_layout(whisker)

p.vbar(
    x="MONTH",
    top="Q2",
    bottom="Q1",
    width=0.8,
    source=source,
    color="#E0E0E0",
    line_color="black",
)

p.vbar(
    x="MONTH",
    top="Q3",
    bottom="Q2",
    width=0.8,
    source=source,
    color="#E0E0E0",
    line_color="black",
)

outliers = df[~df.TAVG.between(df.lower, df.upper)]
p.scatter("MONTH", "TAVG", source=outliers, size=5, color="black")

p.y_range.start = -10
p.yaxis.ticker = [0, 25, 50, 75]
p.grid.grid_line_color = None

show(p)

### Figure 9.8

In [None]:
from bokeh.transform import jitter

In [None]:
source = ColumnDataSource(df)

p = figure(
    title="Figure 9.8",
    x_range=df.MONTH.unique(),
    toolbar_location=None,
    height=400,
    width=500,
    x_axis_label="month",
    y_axis_label="mean temperature (F)",
)

p.scatter(
    x="MONTH",
    # y="TAVG",
    y=jitter("TAVG", width=0.6, range=p.y_range),
    source=source,
    alpha=0.5,
    color="black",
)

p.y_range.start = -10
p.yaxis.ticker = [0, 25, 50, 75]
p.grid.grid_line_color = None

show(p)

### figure 9.12

In [None]:
file = "../data/csv_files/dw_nominate_house.csv"

df = pd.read_csv(file)
df["year"] = (df.congress) * 2 + 1787

year = df["year"] >= 1963
parties = (df["party_code"] == 100) | (df["party_code"] == 200)
dn = df["cd"] != 0

df = df[year & parties & dn].reset_index(drop=True)

df

In [None]:
import numpy as np
from sklearn.neighbors import KernelDensity

positions = np.linspace(-1.5, 1.5, 1000)
dem = df[df["party_code"] == 100]
rep = df[df["party_code"] == 200]