# Assignment 2 - Plots

In this notebook you can find the code generating the plots on the website. 

### Content

1. [Preparing data](#part1)
2. [Time-series plot](#part2)  
3. [Map plot](#part3)  
4. [Bokeh plot](#part4)  

## Let's load all relevant packages

In [1]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
from bokeh.models import ColumnDataSource, FactorRange, Legend
from bokeh.palettes import Category20
from bokeh.plotting import figure, show, save, output_file
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import folium
from folium import plugins
from folium.plugins import HeatMap
import plotly.io as pio
from bokeh.io import output_notebook, show
output_notebook()
import bokeh
from bokeh.models.widgets import Panel, Tabs
from bokeh.io import output_file, show
from bokeh.plotting import figure
from bokeh.models import SingleIntervalTicker, LinearAxis
from bokeh.models import ColumnDataSource, FactorRange, Legend, HoverTool, GeoJSONDataSource, \
                        LinearColorMapper, ColorBar, NumeralTickFormatter, Div, Select, TableColumn, \
                        DataTable, CheckboxGroup, Tabs, Panel, CheckboxButtonGroup, RadioButtonGroup, \
                        Label, LabelSet, Range1d
from bokeh.palettes import Viridis

pio.renderers.default='notebook'

#set notebook options
pd.options.mode.chained_assignment = None 
pd.set_option('display.max_columns', None)
plt.rcParams.update({'font.size': 10})
sns.set_style("white")
plt_color = 'cadetblue'


<a id='part1'></a>
## Preparing data 

In [2]:
df = pd.read_csv('Data/Police_Department_Incident_Reports__Historical_2003_to_May_2018.csv') 
df = df[(pd.to_datetime(df.Date).dt.year < 2018)]

# Feature settings - Setting the 'DayOfWeek' coloumn to categorical.
df['DayOfWeek'] = pd.Categorical(df.DayOfWeek,
                                      categories=['Monday', 'Tuesday', 'Wednesday', 'Thursday','Friday','Saturday','Sunday'],
                                      ordered=True)

# Creating new coloumns, with timestamp, year, month, hour and HoW (Hour of Week)
df['Timestamp'] = pd.to_datetime(df["Date"] + df["Time"], format="%m/%d/%Y%H:%M")
df['year'] = df['Timestamp'].dt.year.astype(int)
df['Hour'] = df['Timestamp'].dt.hour.astype(int)
df['HoW'] = df['Timestamp'].dt.dayofweek * 24 + (df['Timestamp'].dt.hour + 1).astype(int)


In [38]:
property_crimes = set(['BURGLARY', 'VEHICLE THEFT', 'LARCENY/THEFT', 'VANDALISM'])
violent_crimes = set(['SEX OFFENSES, FORCIBLE', 'ASSAULT', 'ROBBERY'])

df_property_ori = df[df['Category'].isin(property_crimes)]
df_violent_ori = df[df['Category'].isin(violent_crimes)]

<a id='part2'></a>
## Time-series plot

In [40]:
df_property = df_property_ori.pivot_table(index='year', columns='Category', values='PdId', aggfunc='count')
df_property = df_property.fillna(0)
df_property.reset_index(inplace=True)

In [41]:
df_violent = df_violent_ori.pivot_table(index='year', columns='Category', values='PdId', aggfunc='count')
df_violent = df_violent.fillna(0)
df_violent.reset_index(inplace=True)

In [68]:
output_file("time_series_plot.html")

colors_list = ["#008080", "#DA70D6", "#4B0082", "blue", "red"]

# Plot 1 - Property crimes
p1 = figure(plot_width=800, plot_height=500, title = "Property crimes", x_axis_label = 'Year', y_axis_label = 'Count')
for i in range(len(df_property.columns)-1):
    p1.line(df_property.year, df_property[df_property.columns[i+1]], line_width=3, color=colors_list[i])
    
# add legend 
legend = Legend(items=[(col, [p1.line(df_property.year,df_property[col], line_width=3, color=colors_list[i])]) 
                       for i, col in enumerate(property_crimes)],
                location=(0, 0))
p1.add_layout(legend, 'right')
p1.legend.label_text_font_size = "8pt"
tab1 = Panel(child=p1, title="Property Crimes")

# Plot 2 - Violent crimes
p2 = figure(plot_width=800, plot_height=500, title = "Violent crimes", x_axis_label = 'Year', y_axis_label = 'Count')
for i in range(len(df_violent.columns)-1):
    p2.line(df_violent.year, df_violent[df_violent.columns[i+1]], line_width=3, color=colors_list[i])

# add legend to plot
legend = Legend(items=[(col, [p2.line(df_violent.year,df_violent[col], line_width=3, color=colors_list[i])]) 
                       for i, col in enumerate(violent_crimes)],
                location=(0, 0))
p2.add_layout(legend, 'right')
p2.legend.label_text_font_size = "8pt"
tab2 = Panel(child=p2, title="Violent Crimes")


# add a tab with the aggregated plot
p3 = figure(plot_width=800, plot_height=500, title = "Aggregated", x_axis_label = 'Year', y_axis_label = 'Count')
p3.line(df_property.year, df_property.sum(axis=1), line_width=3, color=colors_list[0])
p3.line(df_violent.year, df_violent.sum(axis=1), line_width=3, color=colors_list[1])
tab3 = Panel(child=p3, title="Aggregated")
# add legend
legend = Legend(items=[("Property Crimes", [p3.line(df_property.year,df_property.sum(axis=1), line_width=3, color=colors_list[0])]),
                          ("Violent Crimes", [p3.line(df_violent.year,df_violent.sum(axis=1), line_width=3, color=colors_list[1])])],
                location=(0, 0))
p3.legend.label_text_font_size = "8pt"
p3.add_layout(legend, 'right')

# add hover tool to all plots
hover = HoverTool(tooltips=[('Year', '@x'), ('Count', '@y')])
p1.add_tools(hover)
p2.add_tools(hover)
p3.add_tools(hover)

tabs = Tabs(tabs=[ tab1, tab2, tab3 ])


show(tabs)



You are attempting to set `plot.legend.label_text_font_size` on a plot that has zero legends added, this will have no effect.

Before legend properties can be set, you must add a Legend explicitly, or call a glyph method with a legend parameter set.




<a id='part3'></a>
## Map plot

In [61]:
district = ['Central', 'Southern', 'Bayview', 'Mission', 'Northern', 'Park', 'Richmond', 'Ingleside', 'Taraval', 'Tenderloin']
population = [69961, 65166, 74191, 81913, 104067, 63359, 87890, 138002, 155029, 35902]
data_district = pd.DataFrame({'district': district, 'population': population})

# capitalize district names
data_district['district'] = data_district['district'].str.upper()

In [62]:
#Aggregate data by police district
df_map_violent = df_violent_ori.pivot_table(index='PdDistrict', values='PdId', aggfunc='count')
df_map_violent = df_map_violent.reset_index()

df_map_property = df_property_ori.pivot_table(index='PdDistrict', values='PdId', aggfunc='count')
df_map_property = df_map_property.reset_index()

df_map_property_theft = df_property_ori[df_property_ori['Category']=='LARCENY/THEFT'].pivot_table(index='PdDistrict', values='PdId', aggfunc='count')
df_map_property_theft = df_map_property_theft.reset_index()

In [63]:
# insert population in df_map_new
df_map_violent = df_map_violent.merge(data_district, left_on='PdDistrict', right_on='district')
df_map_violent['crime_rate'] = df_map_violent['PdId']/df_map_violent['population']

# insert population in df_map_new
df_map_property = df_map_property.merge(data_district, left_on='PdDistrict', right_on='district')
df_map_property['crime_rate'] = df_map_property['PdId']/df_map_property['population']

# insert population in df_map_new
df_map_property_theft = df_map_property_theft.merge(data_district, left_on='PdDistrict', right_on='district')
df_map_property_theft['crime_rate'] = df_map_property_theft['PdId']/df_map_property_theft['population']

In [64]:
from urllib.request import urlopen
import json
with urlopen('https://raw.githubusercontent.com/suneman/socialdata2022/main/files/sfpd.geojson') as response:
    districts = json.load(response)

In [65]:
fig = make_subplots(rows=1, cols=1, subplot_titles=['Crimes per population in each district'],
    specs=[[{"type": "mapbox"}]])

# Making a choropleth map where we change the data range from [0, 3000] to [0, 10000]
fig.add_trace(go.Choroplethmapbox(geojson=districts, locations=df_map_violent['PdDistrict'], z=df_map_violent['crime_rate'],
                                  colorscale='Viridis', marker=dict(opacity=0.8), 
                                  zmin=0, zmax=max(df_map_violent.crime_rate)*1.1,
                                  colorbar=dict(thickness=20, x=1.02)),
                                  row=1, col=1)

# Making a choropleth map where we change the colorscale and decrease the opacity from 0.7 to 0.4
fig.add_trace(go.Choroplethmapbox(geojson=districts, locations=df_map_property['PdDistrict'], z=df_map_property['crime_rate'],
                                  colorscale='Viridis', marker=dict(opacity=0.8), 
                                  zmin=0, zmax=max(df_map_property.crime_rate)*1.1,
                                  colorbar=dict(thickness=20, x=1.02)),
                                  row=1, col=1)

# Making a choropleth map where we change the colorscale and decrease the opacity from 0.7 to 0.4
fig.add_trace(go.Choroplethmapbox(geojson=districts, locations=df_map_property_theft['PdDistrict'], z=df_map_property_theft['crime_rate'],
                                  colorscale='Viridis', marker=dict(opacity=0.8), 
                                  zmin=0, zmax=max(df_map_property_theft.crime_rate)*1.1,
                                  colorbar=dict(thickness=20, x=1.02)),
                                  row=1, col=1)
        
fig.update_mapboxes(bearing=0, center = {"lat": 37.773972, "lon": -122.431297 }) # Centering the maps around SF
fig.update_layout(margin=dict(l=0, r=0, t=40, b=10), height=400, width=800) # Adjusting the margins and dimensions
fig.update_layout(mapbox1=dict(zoom=10.5, style='open-street-map')) # Choosing the zoom and style

# Add dropdown
fig.update_layout(
    updatemenus=[
        dict(
            type="buttons",
            direction="down",
            buttons=list([
                dict(label="Violent crimes",
                     method="update",
                     args=[{"visible": [True, False, False]},
                           {"title": "Violent crimes"}]),
                dict(label="Property crimes",
                     method="update",
                     args=[{"visible": [False, True, False]},
                           {"title": "Property crimes",
                            }]),
                dict(label="Larceny/Theft",
                     method="update",
                     args=[{"visible": [False, False, True]},
                           {"title": "Larceny/Theft",
                            }]),
            ]),
        )
    ])

# save as html
pio.write_html(fig, file='map.html', auto_open=True)


<a id='part4'></a>
## Bokeh plot

In [66]:
df_1 = df_violent_ori[df_violent_ori['PdDistrict'].isin(['TENDERLOIN', 'SOUTHERN', 'CENTRAL', 'NORTHERN'])]
df_1 = df_violent_ori.pivot_table(index='HoW', columns='Category', values='PdId', aggfunc='count')
df_1 = df_1.fillna(0)
#df_1.reset_index(inplace=True)

df_2 = df_property_ori[df_property_ori['PdDistrict'].isin(['TENDERLOIN', 'SOUTHERN', 'CENTRAL', 'NORTHERN'])]
df_2 = df_property_ori.pivot_table(index='HoW', columns='Category', values='PdId', aggfunc='count')
df_2 = df_2.fillna(0)
#df_2.reset_index(inplace=True)

In [67]:
source1 = ColumnDataSource(df_1)
source2 = ColumnDataSource(df_2)

hours = list(df_1.index)
hours = list(map(str, hours))


#### Plot 1 - violent crimes ####
p1 = figure(title = "Violent crimes throughout the week (Hour of Week (HoW))", width = 800, height = 400, x_range = FactorRange(factors=hours), x_axis_label="HoW",
    y_axis_label="Count")

p1.xaxis.visible = False

bar ={} # to store vbars
items = [] ### for the custom legend // you need to figure out where to add it
### here we will do a for loop:
for indx,i in enumerate(df_1):
    bar[i] = p1.vbar(x='HoW',  top=i, source= source1, 
                    ### we will create a vbar for each focuscrime
                    muted_alpha=0.05, muted = (i!='ASSAULT'),
                    fill_color = colors_list[indx], line_color = colors_list[indx]) 
    items.append((i, [bar[i]])) ### figure where to add it
    
legend = Legend(items=items, location=(10, 50)) ## figure where to add it
p1.add_layout(legend, 'right') ## figure where to add it
p1.legend.click_policy="mute" ### assigns the click policy (you can try to use ''hide'
ticker = SingleIntervalTicker(interval=24)
xaxis = LinearAxis(ticker=ticker)
p1.add_layout(xaxis, 'below')
p1.xaxis.axis_label = "Hour of Week (HoW)"
p1.legend.label_text_font_size = "8pt"
tab1 = Panel(child=p1, title="Violent Crimes")

#### Plot 2 - Property crimes ####
p2 = figure(title = "Property crimes throughout the week (Hour of Week (HoW))", width = 800, height = 400, x_range = FactorRange(factors=hours), x_axis_label="HoW",
    y_axis_label="Count")

p2.xaxis.visible = False

bar ={} # to store vbars
items = [] ### for the custom legend // you need to figure out where to add it
### here we will do a for loop:
for indx,i in enumerate(df_2):
    bar[i] = p2.vbar(x='HoW',  top=i, source= source2, 
                    ### we will create a vbar for each focuscrime
                    muted_alpha=0.05, muted = (i!='LARCENY/THEFT'),
                    fill_color = colors_list[indx], line_color = colors_list[indx]) 
    items.append((i, [bar[i]])) ### figure where to add it
    
legend = Legend(items=items, location=(10, 50)) ## figure where to add it
p2.add_layout(legend, 'right') ## figure where to add it
p2.legend.click_policy="mute" ### assigns the click policy (you can try to use ''hide'
ticker = SingleIntervalTicker(interval=24)
xaxis = LinearAxis(ticker=ticker)
p2.add_layout(xaxis, 'below')
p2.xaxis.axis_label = "Hour of Week (HoW)"
p2.legend.label_text_font_size = "8pt"
tab2 = Panel(child=p2, title="Property Crimes")

tabs = Tabs(tabs=[ tab1, tab2 ])

show(tabs)

output_file("plot3.html")
#save(p)
#show(p) #displays your plot