In [1]:
import os
import sys

sys.path.insert(0, os.path.dirname(os.getcwd()))

# Bokeh Demo!

We're getting pretty familiar with our dataset now, so let's dive right in! We can make this much fancier with bokeh server, but in the notebook we are a bit more limited in terms of interactivity

- [Number of Offenses per Month](#Number-of-Offenses-per-Month)
- [Number of Shootings per Month](#Number-of-Shootings-per-Month)
- [Top 10 Offense Code Groups](#Top-10-Offense-Code-Groups)
- [Number of Offenses per Day of Week and Hour of Day](#Number-of-Offenses-per-Day-of-Week-and-Hour-of-Day)

In [17]:
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import ColumnDataSource, HoverTool, Span, BoxSelectTool, LabelSet, LinearColorMapper, ColorBar
from bokeh.transform import transform
from bokeh.palettes import Viridis11
from load_data import load_data
from config import RED

output_notebook()

In [3]:
df = load_data()

In [4]:
df.head()

Unnamed: 0,INCIDENT_NUMBER,OFFENSE_CODE,OFFENSE_CODE_GROUP,OFFENSE_DESCRIPTION,DISTRICT,REPORTING_AREA,SHOOTING,OCCURRED_ON_DATE,YEAR,MONTH,DAY_OF_WEEK,HOUR,UCR_PART,STREET,Lat,Long,Location
0,I182070945,619,Larceny,LARCENY ALL OTHERS,D14,808,0,2018-09-02 13:00:00,2018,9,Sunday,13,Part One,LINCOLN ST,42.357791,-71.139371,"(42.35779134, -71.13937053)"
1,I182070943,1402,Vandalism,VANDALISM,C11,347,0,2018-08-21 00:00:00,2018,8,Tuesday,0,Part Two,HECLA ST,42.306821,-71.0603,"(42.30682138, -71.06030035)"
2,I182070941,3410,Towed,TOWED MOTOR VEHICLE,D4,151,0,2018-09-03 19:27:00,2018,9,Monday,19,Part Three,CAZENOVE ST,42.346589,-71.072429,"(42.34658879, -71.07242943)"
3,I182070940,3114,Investigate Property,INVESTIGATE PROPERTY,D4,272,0,2018-09-03 21:16:00,2018,9,Monday,21,Part Three,NEWCOMB ST,42.334182,-71.078664,"(42.33418175, -71.07866441)"
4,I182070938,3114,Investigate Property,INVESTIGATE PROPERTY,B3,421,0,2018-09-03 21:05:00,2018,9,Monday,21,Part Three,DELHI ST,42.275365,-71.090361,"(42.27536542, -71.09036101)"


# Number of Offenses per Month

We can use a ColumnDataSource here to share data between our num_offenses and num_shootings graph - it won't do much since we're currently not rendering them together, but it saves us some code!

In [5]:
num_offenses = (df.resample('M', on='OCCURRED_ON_DATE').SHOOTING
                .agg(num_offenses="count", 
                     shootings="sum")
               )

source = ColumnDataSource(num_offenses)

A neat feature of datasources, is that we can refer to the columns in tooltips using the `@` notation - letting us make some really nice tooltips for very cheap

In [6]:
offenses_tooltip = HoverTool(
    tooltips=[('Date', '@OCCURRED_ON_DATE{%b %Y}'),
              ('Number of Offenses', '@num_offenses')],
    formatters={'OCCURRED_ON_DATE': 'datetime'},
    mode="vline"
)

num_offenses = figure(x_axis_type="datetime", title="Number of Offenses per Month")
num_offenses.line(x='OCCURRED_ON_DATE', y='num_offenses', source=source, color=RED, line_width=1.5)
num_offenses.add_tools(offenses_tooltip)
num_offenses.circle(x='OCCURRED_ON_DATE', y='num_offenses', source=source, color="black", size=4)
num_offenses.yaxis.axis_label = 'Number of Offenses'
num_offenses.xaxis.axis_label = 'Date'

mean_line = Span(location=source.data['num_offenses'].mean(), dimension='width', line_color=RED, line_dash=[8, 3])
num_offenses.add_layout(mean_line)

show(num_offenses)

# Number of Shootings per Month

Pretty much the same here - if we were to render the two together, we'd get some nice interaction for free!

In [7]:
shootings_tooltip = HoverTool(
    tooltips=[('Date', '@OCCURRED_ON_DATE{%b %Y}'),
              ('Number of Shootings', '@shootings')],
    formatters={'OCCURRED_ON_DATE': 'datetime'},
    mode="vline"
)

num_shootings = figure(x_axis_type="datetime", title="Number of Shootings per Month")
num_shootings.add_tools(shootings_tooltip)
num_shootings.line(x='OCCURRED_ON_DATE', y='shootings', source=source, color=RED, line_width=1.5)
num_shootings.circle(x='OCCURRED_ON_DATE', y='shootings', source=source, color="black", size=4)

mean_shootings = Span(location=source.data["shootings"].mean(), dimension="width", line_color=RED, line_dash=[8, 3])
num_shootings.add_layout(mean_shootings)

show(num_shootings)

# Top 10 Offense Code Groups

For our Top 10, we need a new datasource. I don't actually need to create one manually, if I'm not going to reuse it, Bokeh will automatically create one if
you pass a dataframe as source.

Note again how nice and easy it is to add annotations!

In [8]:
top10_groups = df.OFFENSE_CODE_GROUP.value_counts().iloc[:10].sort_values(ascending=True).reset_index().rename(columns={"index": "code_group", "OFFENSE_CODE_GROUP": "counts"})
top10_source = ColumnDataSource(top10_groups)

In [9]:
top10 = figure(y_range=top10_groups.code_group, title="Top 10 Offence Code Groups")
top10.hbar(right="counts", y="code_group", height=0.8, source=top10_source)
labels = LabelSet(x="counts", y="code_group", text="counts", source=top10_source, x_offset=5, y_offset=-10, text_font_size='1em')
top10.add_layout(labels)
top10.plot_width=800
show(top10)

# Number of Offenses per Day of Week and Hour of Day

The heatmap is similarly easy to plot, but it requires a bit more setup, since I manually define a color mapper for my fill_color. Bokeh has a lot of palettes to choose from though!

In [10]:
per_day_hour = df.assign(HOUR=lambda x: x.HOUR.astype(str)).groupby(['DAY_OF_WEEK', 'HOUR']).size().rename('counts').reset_index()
per_day_hour_source = ColumnDataSource(per_day_hour)
day_of_week = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
hours = [str(x) for x in range(0, 24)]

In [11]:
mapper = LinearColorMapper(palette=Viridis11, low=per_day_hour.counts.min(), high=per_day_hour.counts.max())

In [25]:
heatmap = figure(title="Number of Offenses per Hour and Day Of Week", x_range=day_of_week, y_range=hours, tools="hover")
heatmap.hover.tooltips = [("Day of Week", "@DAY_OF_WEEK"),
                         ("Hour of Day", "@HOUR"),
                         ("Number of Offenses", "@counts")]
heatmap.rect(x='DAY_OF_WEEK', y='HOUR', source=per_day_hour_source, width=1, height=1, fill_color=transform('counts', mapper), line_color=None)
labels = LabelSet(x="DAY_OF_WEEK", y="HOUR", text="counts", source=per_day_hour_source, text_font_size='1em', x_offset=-10, y_offset=-10)

colorbar = ColorBar(color_mapper=mapper, location=(0, 0))
colorbar.major_label_text_align = 'left'

heatmap.add_layout(labels)
heatmap.add_layout(colorbar, "right")

heatmap.axis.axis_line_color = None
heatmap.axis.major_tick_line_color = None
heatmap.axis.major_label_text_font_size = "8pt"
heatmap.axis.major_label_standoff = 0

show(heatmap)