In [1]:
from collections import OrderedDict
from bokeh.models import ColumnDataSource, HoverTool
import bokeh.plotting as bk
import numpy as np
from math import pi
import pandas as pd
import datetime as dt

bk.output_notebook()

Firstly, do some data processing on the Seattle file to prepare the content for the two charts.

In [2]:
data = pd.read_csv("data/seattle_incidents_summer_2014.csv")

jun_filter = data["Month"] == 6
jul_filter = data["Month"] == 7
aug_filter  = data["Month"] == 8

# prepare for the 1st plot: 
# a histogram showing the monthly totals
# also a histogram at a daily detail

total_jun = len(data[jun_filter])
total_jul = len(data[jul_filter])
total_aug = len(data[aug_filter])

start_tstamp_strings = data["Occurred Date or Date Range Start"]
start_tstamps = [dt.datetime.strptime(ts_str, '%m/%d/%Y %I:%M:%S %p') 
                 for ts_str in start_tstamp_strings]
min_date = dt.datetime(2014, 5, 31)
start_days = [(tstamp - min_date).days for tstamp in start_tstamps]
daily_counts = pd.Series(start_days).value_counts()

  interactivity=interactivity, compiler=compiler, result=result)


In [3]:
# prepare for the 2nd plot

grouped = data.groupby(data["Offense Type"])

mean_counts = data["Offense Type"].value_counts() / 3
# at this point the codes (index) are ordered by average monthly incidents

# join further details to this column without changing the order
counts_jun = data["Offense Type"][jun_filter].value_counts()
counts_jul = data["Offense Type"][jul_filter].value_counts()
counts_aug = data["Offense Type"][aug_filter ].value_counts()

joint = pd.concat([mean_counts, counts_jun, counts_jul, counts_aug], 
                  axis=1, join_axes=[mean_counts.index])

joint.columns = ["c_mean", "c_jun", "c_jul", "c_aug"]

n = len(joint) + 1
ranks = range(1, n)

types = joint.index

def date_from_daynr(daynrs):
    return([dt.datetime.fromordinal(min_date.toordinal() + x) 
            for x in daynrs])

In [5]:
# construct the 1st plot

TOOLS = "save"
source = ColumnDataSource(data=dict(month=["June", "July", "August"],
                                    crimes_in_month=[total_jun, 
                                                     total_jul, total_aug]))

p1 = bk.figure(title = "Summer 2014 crime breakdown in Seattle over time",
               width=900, height=600, tools=TOOLS, x_axis_type = "datetime")

p1.xaxis.axis_label = "Date"

p1.yaxis.axis_label = "Incidents"

p1.quad(top = [total_jun, total_jul, total_aug], bottom=0, 
        left=date_from_daynr([1, 31, 62]), right=date_from_daynr([31, 62, 93]),
        fill_color=["#995511", "#EEEE22", "#BB9922"],
        color="#000000",
        source=source
       )


daily_weekdays = [pd.Timestamp(x).weekday() for x in date_from_daynr(daily_counts.index)]
# Sunday_filter = [x == 6 for x in daily_weekdays]
# daily_fill_colors = np.array(len(Sunday_filter) * ["#3377CC"])
# daily_fill_colors[Sunday_filter] = "#5599FF"
Sunday_index = [x for x in range(0, len(daily_weekdays)) if daily_weekdays[x] == 6]
daily_fill_colors = len(daily_weekdays) * ["#3377CC"]
for idx in Sunday_index:
    daily_fill_colors[idx] = "#5599FF"


p1.quad(top=daily_counts.values, bottom=0,
        left =date_from_daynr(daily_counts.index), 
        right=date_from_daynr(daily_counts.index + 1),
        color="#111111", #fill_color="#3377CC")
        fill_color=daily_fill_colors)

# It is at least counter-intuitive to specify structurally different
# tooltips in a simple way, so gave up on that for a while.

# hover = p1.select(dict(type=HoverTool))

# hover.tooltips = OrderedDict([
#         ("Month", "@month"),
#         ("Incidents", "@crimes_in_month")
#     ])


<bokeh.models.renderers.GlyphRenderer at 0x7fb52beeb050>

In [6]:
# construct the 2nd plot

TOOLS = "pan,box_zoom,resize,wheel_zoom,reset,hover,save"
p2 = bk.figure(title="Summer 2014 crime breakdown in Seattle by offense type", 
               tools=TOOLS, plot_height=600, plot_width=900)
p2.xaxis.axis_label = "Rank"
p2.yaxis.axis_label = "Incidents"

source_mean = ColumnDataSource(data=dict(rank=ranks, month=n * ["Summer (avg.)"],
                                         count=joint.c_mean,
                                         type_=types))
source_jun = ColumnDataSource(data=dict(rank=ranks, month=n * ["June"],
                                        count=joint.c_jun,
                                        type_=types))
source_jul = ColumnDataSource(data=dict(rank=ranks, month=n * ["July"],
                                        count=joint.c_jul,
                                        type_=types))
source_aug = ColumnDataSource(data=dict(rank=ranks, month=n * ["August"],
                                        count=joint.c_aug,
                                        type_=types))

p2.line(ranks, joint.c_mean, color="#222222", line_width=2,
        source=source_mean, legend="Monthly mean")
p2.line(ranks, joint.c_jun, color="#AA2222", line_width=2,
        source=source_jun, legend="June incidents")
p2.line(ranks, joint.c_jul, color="#22AA22", line_width=2,
        source=source_jul, legend="August incidents")
p2.line(ranks, joint.c_aug, color="#2222AA", line_width=2,
        source=source_aug, legend="July incidents")

hover = p2.select(dict(type=HoverTool))

hover.tooltips = OrderedDict([
        ("Mean rank", "@rank"),
        ("Period", "@month"),
        ("Incidents", "@count"),
        ("Offense Type", "@type_")
    ])



In [7]:
bk.show(p1)

Apparently, August was the most peaceful month. Sundays are highlighted in light blue. The incident counts seem to be below the average on these days as well.

In [8]:
bk.show(p2)

A small number of crime types account for most of the overall incidents. Vehicle, car and other thefts are the most typical source of crime (use tooltips to check). The three months do not seem to really significantly differ in the crime statistics breakdown.