# Interactive Visualizations with Bokeh!
#### This is a brief exploration of Bokeh's interactive visualization tools as a component of our social data analysis course. We will delve into simple vbar plots to improve our understanding of the distribution of various focus crimes throughout each hour of the day. The data is normalized to enable comparisons of the crimes' differences rather than using absolute numbers.




In [2]:
import pandas as pd 
import numpy as np 

df = pd.read_csv("Police_Department_Incident_Reports__Historical_2003_to_May_2018.csv")

We are here going to normalize the data, first by splitting the counts of each crime category per hour, then divide the count of reach hour by the total number of this crime type

In [3]:
# Convert the "Date" column to datetime type with format '%m/%d/%Y'
df["Date"] = pd.to_datetime(df["Date"],format='%m/%d/%Y')

# Filter rows with dates between 2010-01-01 and 2017-01-01
df = df.loc[(df["Date"]>="2010-01-01") &(df["Date"]<="2017-01-01")]

# Extract hour from the "Time" column and convert it to integer
df["Hour"] = df["Time"].apply(lambda time: int(time.split(":")[0]))

# Group the data by hour and crime category, and count the number of crimes for each group
df_pivot = df.groupby(["Hour", "Category"])["Hour"].count().unstack("Category")

# Normalize the counts by dividing them by the total number of crimes for each hour
df_pivot = df_pivot.apply(lambda crime: crime/crime.sum())

In [10]:
from bokeh.plotting import figure, output_file, show
from bokeh.models import ColumnDataSource, FactorRange, Grid, LinearAxis, Plot, VBar, HoverTool, Legend,LinearColorMapper
from bokeh.palettes import Category20
from bokeh.io import output_file, show


# Create a ColumnDataSource object from a pandas dataframe called df_pivot
source = ColumnDataSource(df_pivot)

# Create a list of strings representing hours in a day from 0 to 23
# and use it to create a FactorRange object that will be used for the x-axis range of the plot.
hours = [str(i) for i in range(0,24)]
factors = FactorRange(factors=hours)



# Create a set of strings representing the crime types that will be plotted.
focuscrimes = set(['WEAPON LAWS', 'PROSTITUTION', 'DRIVING UNDER THE INFLUENCE', 'ROBBERY', 'BURGLARY', 'ASSAULT', 'DRUNKENNESS', 
                    'DRUG/NARCOTIC', 'TRESPASS', 'LARCENY/THEFT', 'VANDALISM', 'VEHICLE THEFT', 'STOLEN PROPERTY', 'DISORDERLY CONDUCT'])

# Create an empty dictionary to store the VBars
bar = {}

# Create a figure object with various attributes and tools.
p = figure(title = "Hourly Crimes", x_axis_label = "hour", y_axis_label = "Distribution of crime ",
           x_range =factors,tools="pan,wheel_zoom,box_zoom,reset",toolbar_location="above" )

# Create a color palette based on the number of crime types being plotted
colors = Category20[len(focuscrimes)]

# Create an empty list to store the legend items
TOOLTIPS = [("Percentage", "@i")]


items = []
# Iterate through each crime type and create a corresponding VBar
for index,i in enumerate(focuscrimes):
    color = colors[index]
    # Create a VBar and add it to the figure
    bar[i] = p.vbar(x="Hour", top=i, source=source,bottom=0 ,muted_alpha=0.01, width=0.6,muted=True, line_color="black", fill_color=color,alpha=0.75)

    # Add the VBar to the legend items list
    items.append((i,[bar[i]]))

# Create a legend using the items list and add it to the figure
legend = Legend(items = items , location =(0,-15))
p.add_layout(legend,'right')

# Set the width of the plot and the click policy for the legend
p.plot_width = 600
p.plot_height = 500
p.legend.click_policy="mute" 

# Display the plot
show(p)
#Specefing our output file 
output_file("Focuscrime.html")


####    Now  we'll want to look at which crimes dominate the various districts.
#####   So we use stacked barcharts of normalized crime to see how crime types compare in fractional occurences across SF'S precincts 
