In [10]:
import pandas as pd

# Load in data from csv-file
data = pd.read_csv("SFdata.csv")

# Add "Year" column to dataframe
years = list(data["Date"])
for i in range (0, len(years)):
    date = years[i]
    years[i] = date[-4:]
data['Year'] = years

# filtering only by "Vehicle Theft"
df = data[(data.Category == 'VEHICLE THEFT')]

In [11]:
def count_vehicle_thefts(district, year):
    """Return sum of vehicle thefts in the given district/year"""
    return ((df.Category == 'VEHICLE THEFT') &
            (df.PdDistrict == district) &
            (df.Year == year)).sum()

# Years that will be covered in the plot
list_of_years = ['2008', '2009', '2010', '2011', '2012',
                 '2013', '2014', '2015', '2016', '2017']

# Prepare data for line graph
SOUT_data = []
CENT_data = []
RICH_data = []
NORT_data = []
INGL_data = []
BAYV_data = []
TARA_data = []
PARK_data = []
MISS_data = []
TEND_data = []
for year in list_of_years:
    SOUT_data.append(count_vehicle_thefts('SOUTHERN', year)*100000/65115)
    CENT_data.append(count_vehicle_thefts('CENTRAL', year)*100000/69889)
    RICH_data.append(count_vehicle_thefts('RICHMOND', year)*100000/87817)
    NORT_data.append(count_vehicle_thefts('NORTHERN', year)*100000/103941)
    INGL_data.append(count_vehicle_thefts('INGLESIDE', year)*100000/137830)
    BAYV_data.append(count_vehicle_thefts('BAYVIEW', year)*100000/74038)
    TARA_data.append(count_vehicle_thefts('TARAVAL', year)*100000/154868)
    PARK_data.append(count_vehicle_thefts('PARK', year)*100000/63292)
    MISS_data.append(count_vehicle_thefts('MISSION', year)*100000/81818)
    TEND_data.append(count_vehicle_thefts('TENDERLOIN', year)*100000/35841)
x = list_of_years


In [12]:
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource, Legend
from bokeh.io import output_notebook,output_file
output_notebook()
output_file("assignment2_bokeh_output.html")

# Create bokeh plot with a title and axis labels
p = figure(title="Yearly reports adjusted for polulation size",
           x_axis_label="Year",
           y_axis_label="Reports per 100.000 capita",
           width=500,
           height=350)

# Configurate plot data (draw lines)
line0 = p.line(x, SOUT_data, line_width=2, color="blue")
line1 = p.line(x, CENT_data, line_width=2, color="red")
line2 = p.line(x, RICH_data, line_width=2, color="green")
line3 = p.line(x, NORT_data, line_width=2, color="yellow")
line4 = p.line(x, INGL_data, line_width=2, color="purple")
line5 = p.line(x, BAYV_data, line_width=2, color="orange")
line6 = p.line(x, TARA_data, line_width=2, color="magenta")
line7 = p.line(x, PARK_data, line_width=2, color="grey")
line8 = p.line(x, MISS_data, line_width=2, color="brown")
line9 = p.line(x, TEND_data, line_width=2, color="navy")

lines = [("SOUTHERN", [line0]),
         ("CENTRAL", [line1]),
         ("RICHMOND", [line2]),
         ("NORTHERN", [line3]),
         ("INGLESIDE", [line4]),
         ("BAYVIEW", [line5]),
         ("TARAVAL", [line6]),
         ("PARK", [line7]),
         ("MISSION", [line8]),
         ("TENDERLOIN", [line9])]

# Legend config
legend = Legend(items=lines, location="top")
p.add_layout(legend, 'right')
p.legend.click_policy="hide"
p.legend.title = "Districts"

# Title config
p.title.text_font_size = "15px"
p.title.text_color = "black"

# show the results
show(p)