In [1]:
import pandas as pd
import numpy as np

from bokeh.plotting import figure, output_file, show, output_notebook
from bokeh.palettes import Spectral11, colorblind, Inferno, BuGn, brewer

import datetime

from bokeh.models import HoverTool, value, LabelSet, Legend, ColumnDataSource,LinearColorMapper,BasicTicker, PrintfTickFormatter, ColorBar

In [2]:
data = pd.read_csv("datasets/Monthly_Property_Crime_2005_to_2015.csv", parse_dates=['Date'])
data.head()

Unnamed: 0,IncidntNum,Category,Date
0,21,ARSON,2005-01-01
1,696,BURGLARY,2005-01-01
2,2027,LARCENY/THEFT,2005-01-01
3,55,STOLEN PROPERTY,2005-01-01
4,489,VANDALISM,2005-01-01


In [3]:
data.Date.min(), data.Date.max()

(Timestamp('2005-01-01 00:00:00'), Timestamp('2015-12-01 00:00:00'))

In [4]:
data.Category.value_counts()

BURGLARY           132
LARCENY/THEFT      132
STOLEN PROPERTY    132
ARSON              132
VANDALISM          132
VEHICLE THEFT      132
Name: Category, dtype: int64

In [5]:
data['Year'] = data.Date.apply(lambda x: x.year)
data['Month'] = data.Date.apply(lambda x: x.month)
data.head()

Unnamed: 0,IncidntNum,Category,Date,Year,Month
0,21,ARSON,2005-01-01,2005,1
1,696,BURGLARY,2005-01-01,2005,1
2,2027,LARCENY/THEFT,2005-01-01,2005,1
3,55,STOLEN PROPERTY,2005-01-01,2005,1
4,489,VANDALISM,2005-01-01,2005,1


In [6]:
burglary = data[data.Category == 'BURGLARY'].sort_values(['Date'])
stolen_property = data[data.Category == 'STOLEN PROPERTY'].sort_values(['Date'])
vehicle_theft = data[data.Category == 'VEHICLE THEFT'].sort_values(['Date'])
vandalism = data[data.Category == 'VANDALISM'].sort_values(['Date'])
larceny = data[data.Category == 'LARCENY/THEFT'].sort_values(['Date'])
arson = data[data.Category == 'ARSON'].sort_values(['Date'])
arson.head()

Unnamed: 0,IncidntNum,Category,Date,Year,Month
0,21,ARSON,2005-01-01,2005,1
6,17,ARSON,2005-02-01,2005,2
12,18,ARSON,2005-03-01,2005,3
18,24,ARSON,2005-04-01,2005,4
24,13,ARSON,2005-05-01,2005,5


In [7]:
output_notebook()

In [8]:
temp_df = data.groupby(['Month']).mean().reset_index()
temp_df.head()

Unnamed: 0,Month,IncidntNum,Year
0,1,697.015152,2010.0
1,2,615.0,2010.0
2,3,701.757576,2010.0
3,4,694.69697,2010.0
4,5,702.393939,2010.0


In [12]:
TOOLS = "hover,save,pan,box_zoom,reset,wheel_zoom,tap"
p = figure(plot_height=350,
#   plot_width=1000,
    title="Average Number of Crimes by Month",
    tools=TOOLS,
    toolbar_location='above')

p.vbar(x=temp_df.Month, top=temp_df.IncidntNum, width=0.9)

p.y_range.start = 0
p.x_range.range_padding = 0.1
p.xgrid.grid_line_color = None
p.axis.minor_tick_line_color = None
p.outline_line_color = None
p.xaxis.axis_label = 'Month'
p.yaxis.axis_label = 'Average Crimes'
p.select_one(HoverTool).tooltips = [
    ('month', '@x'),
    ('Number of crimes', '@top'),
]
output_file("barchart.html", title="barchart")
show(p)

In [13]:
temp_df = data.groupby(['Year']).sum().reset_index()
temp_df.head()

Unnamed: 0,Year,IncidntNum,Month
0,2005,58100,468
1,2006,49848,468
2,2007,45663,468
3,2008,45297,468
4,2009,44326,468


In [20]:
TOOLS = 'save,pan,box_zoom,reset,wheel_zoom,hover'
p = figure(title="Year-wise total number of crimes", y_axis_type="linear", plot_height = 400,
           tools = TOOLS, plot_width = 800)
p.xaxis.axis_label = 'Year'
p.yaxis.axis_label = 'Total Crimes'
p.circle(2010, temp_df.IncidntNum.min(), size = 10, color = 'red')

p.line(temp_df.Year, temp_df.IncidntNum,line_color="purple", line_width = 3)
p.select_one(HoverTool).tooltips = [
    ('year', '@x'),
    ('Number of crimes', '@y'),
]

output_file("line_chart.html", title="Line Chart")
show(p)

In [21]:
wide = data.pivot(index='Date', columns='Category', values='IncidntNum')
wide.reset_index(inplace=True)
wide['Year'] = wide.Date.apply(lambda x: x.year)
wide['Month'] = wide.Date.apply(lambda x: x.month)

temp_df = wide.groupby(['Year']).sum().reset_index()
temp_df.head()
cats = ['ARSON','BURGLARY','LARCENY/THEFT','STOLEN PROPERTY','VANDALISM','VEHICLE THEFT'] 
temp_df.drop(['Month'], axis = 1, inplace=True)
temp_df.head()

Category,Year,ARSON,BURGLARY,LARCENY/THEFT,STOLEN PROPERTY,VANDALISM,VEHICLE THEFT
0,2005,229,7051,25226,528,6963,18103
1,2006,240,6967,27227,552,7599,7263
2,2007,241,5422,25599,505,7452,6444
3,2008,245,5650,25636,490,7234,6042
4,2009,221,5366,25419,640,7510,5170


In [25]:
TOOLS = "save,pan,box_zoom,reset,wheel_zoom,tap"

source = ColumnDataSource(data=temp_df)
p = figure( plot_width=800, title="Category wise count of crimes by year",toolbar_location='above', tools=TOOLS)
colors = brewer['Dark2'][6]

p.vbar_stack(cats, x='Year', width=0.9, color=colors, source=source,
             legend=[value(x) for x in cats])

p.y_range.start = 0
p.x_range.range_padding = 0.1
p.xgrid.grid_line_color = None
p.axis.minor_tick_line_color = None
p.outline_line_color = None
p.xaxis.axis_label = 'Year'
p.yaxis.axis_label = 'Total Crimes'
p.legend.location = "top_left"
p.legend.orientation = "horizontal"

output_file("stacked_bar.html", title="Stacked Bar Chart")

show(p)  # open a browser



In [26]:
temp_df = data.groupby(['Year', 'Month']).sum().reset_index()
# temp_df['Month_Category'] = pd.concat([temp_df['Month'], temp_df['Category']], axis = 1)
temp_df.head()

Unnamed: 0,Year,Month,IncidntNum
0,2005,1,4969
1,2005,2,4152
2,2005,3,4791
3,2005,4,4691
4,2005,5,5097


In [34]:
TOOLS = "hover,save,pan,box_zoom,reset,wheel_zoom,tap"
hm = figure(title="Month-Year wise crimes", tools=TOOLS, toolbar_location='above')

source = ColumnDataSource(temp_df)
colors = brewer['YlOrBr'][9]
colors = colors[::-1]
mapper = LinearColorMapper(
    palette=colors, low=temp_df.IncidntNum.min(), high=temp_df.IncidntNum.max())
hm.rect(x="Year", y="Month",width=2,height=1,source = source,  
    fill_color={
        'field': 'IncidntNum',
        'transform': mapper
    },
    line_color=None)
color_bar = ColorBar(
    color_mapper=mapper,
    major_label_text_font_size="10pt",
    ticker=BasicTicker(desired_num_ticks=len(colors)),
    formatter=PrintfTickFormatter(),
    label_standoff=6,
    border_line_color=None,
    location=(0, 0))

hm.add_layout(color_bar, 'right')
hm.xaxis.axis_label = 'Year'
hm.yaxis.axis_label = 'Month'
hm.select_one(HoverTool).tooltips = [
    ('Year', '@Year'),('Month', '@Month'), ('Number of Crimes', '@IncidntNum')
]

output_file("heatmap.html", title="Heat Map")

show(hm)  # open a browser

In [35]:
TOOLS = 'crosshair,save,pan,box_zoom,reset,wheel_zoom'
p = figure(title="Category-wise crimes through Time", y_axis_type="linear",x_axis_type='datetime', tools = TOOLS)

p.line(burglary['Date'], burglary.IncidntNum, legend="burglary", line_color="purple", line_width = 3)
p.line(stolen_property['Date'], stolen_property.IncidntNum, legend="stolen_property", line_color="blue", line_width = 3)

p.line(vehicle_theft['Date'], vehicle_theft.IncidntNum, legend="vehicle_theft", line_color = 'coral', line_width = 3)

p.line(larceny['Date'], larceny.IncidntNum, legend="larceny", line_color='green', line_width = 3)

p.line(vandalism['Date'], vandalism.IncidntNum, legend="vandalism", line_color="gold", line_width = 3)

p.line(arson['Date'], arson.IncidntNum, legend="arson", line_color="magenta",line_width = 3)

p.legend.location = "top_left"

p.xaxis.axis_label = 'Year'
p.yaxis.axis_label = 'Count'

output_file("multiline_plot.html", title="Multi Line Plot")

show(p)  # open a browser

