# Bokeh plotting template

Do all cleaning beforehand!

**Load in clean df**

In [1]:
# imports
import pandas as pd
pd.options.mode.chained_assignment = None
import numpy as np
from math import pi
import matplotlib.pyplot as plt

from bokeh.plotting import figure, show, output_file, output_notebook
from bokeh.models import ColumnDataSource, Range1d, HoverTool, Legend
from bokeh.models.widgets import Tabs, Panel
from bokeh.io import show
from bokeh.palettes import Category20b, Spectral, Plasma, Viridis, YlOrRd, PuOr, BuPu, BuGn, brewer, GnBu, PRGn, Inferno256, PuRd, RdPu, PiYG, RdYlGn, YlGnBu
from bokeh.io import reset_output, output_notebook
reset_output()
output_notebook()

from bokeh.plotting import figure, output_file, save
from IPython.display import IFrame
from IPython.core.display import display, HTML
import tempfile


In [2]:
df = pd.read_csv('data/hatecrime_df_clean.csv')
df.head()

Unnamed: 0,Date of Offense,Time of Offense,Date Offense Reported,Report Year,Month,CCN,District,Block Location,Type of Hate Bias,Targeted Group,Top Offense Type,Address,gcode,lat,long,zipcodes
0,2012-01-08,1500-1505,2012-01-08,2012,1,12003845,3,1600 B/O 17th St NW,Sexual Orientation,Not Reported,Threats,1600 17th St NW Washington DC,"1600, 17th Street Northwest, Dupont Circle, Wa...",38.91125,-77.038588,20009
1,2012-01-14,0240-0250,2012-01-14,2012,1,12006716,3,1800 B/O 14th St NW,Sexual Orientation,Not Reported,Simple Assault,1800 14th St NW Washington DC,"1800, 14th Street Northwest, Cardozo/Shaw, Was...",38.914217,-77.032054,20009
2,2012-01-22,145,2012-01-22,2012,1,12010626,7,1300 B/O Alabama Ave SE,Sexual Orientation,Not Reported,ADW,1300 Alabama Ave SE Washington DC,"1300, Alabama Avenue Southeast, Washington, Di...",38.845056,-76.987412,20020
3,2012-01-22,1930,2012-01-22,2012,1,12010912,1,600 B/O H St NW,Race,Asian,Threats,600 H St NW Washington DC,"600, H Street Northwest, Chinatown, Washington...",38.899729,-77.020065,20001
4,2012-02-06,1900,2012-02-06,2012,2,12018396,3,1300 B/O Park Rd NW,Sexual Orientation,Not Reported,Simple Assault,1300 Park Rd NW Washington DC,"1300, Park Road Northwest, Columbia Heights, W...",38.931198,-77.029808,20010


# Horizontal bar examples

### Step 1: Create a function that creates a seperate dataframe for every categorical column that we want to analyze

In [9]:
# function to make a dataframe to count the unique values for each category in a given column
def prep_bokeh_df(df, column):
    count_df = pd.Series(df[column].value_counts()).reset_index(name='Hate crime count').rename(columns={'index':column})
    return count_df


Use the function to create multiple dfs for the categories we want to explore.

In [12]:
# make the new dfs
type_bias_bar_df = prep_bokeh_df(df, 'Type of Hate Bias')
targeted_group_bar_df = prep_bokeh_df(df, 'Targeted Group')
offense_type_bar_df = prep_bokeh_df(df, 'Top Offense Type')

### Step 2: Create a function to plot Bokeh categorical data into horizontal bars

In [13]:
# function to plot bokeh categorical data in a horizontal bar
# you input the dataframe, column, colorpalette, height, and width
def bokeh_hbar(df, column, title, colorpalette, height, width):
    TOOLS = "hover,save,pan,box_zoom,reset,wheel_zoom,tap"
    p = figure(plot_height=height,
        plot_width=width,
        title=title,
        tools=TOOLS,
        toolbar_location='right',
        y_range=df[column].unique()[::-1])
    p.hbar(y=df[column], right=df['Hate crime count'], 
    height=0.75,color=colorpalette[df[column].nunique()]) #color='#8968CD')
    p.yaxis.axis_label = column
    p.xaxis.axis_label = 'Number of hate crimes'
    p.select_one(HoverTool).tooltips = [
        (column, '@y'),
        ('Number of hate crimes', '@right')]
    return p

We display the number of hate crimes per type of hate bias, targeted group, and top offense type.

In [14]:
# update colorschemes
PuRd_20 = {20: PuRd[9][:-1]+ PuRd[9][:-1] + PuRd[9][:-5]}
PuRd_28 = {28: PuRd[9][:-1]+ PuRd[9][:-1] + PuRd[9][:-1] + PuRd[9][:-5]}


# intialize plots to be put in tabs
p_type = bokeh_hbar(type_bias_bar_df, 'Type of Hate Bias', "Number of hate crimes by type of hate bias", 
             PuRd, 450, 650)
p_target = bokeh_hbar(targeted_group_bar_df, 'Targeted Group', "Number of hate crimes by targeted group", 
            PuRd_28, 450, 650)
p_offense = bokeh_hbar(offense_type_bar_df, 'Top Offense Type', "Number of hate crimes by offense type", 
            PuRd_20, 450, 650)            

# format in tabs
tabs = Tabs(tabs=[
                 Panel(child=p_type, title='Type'),
                 Panel(child=p_target, title='Targeted group'),
                 Panel(child=p_offense, title='Offense type')])

show(tabs)
# bokeh_deepnote_show(tabs) # To display Bokeh plot on Deepnote
# save(tabs,filename='./html/hc_intro_tab.html')

# Trending examples

### Step 1: Prepare dataframes for the time intervals we want to trend by.

In [16]:
# prep a dataframe to have one row per date with the count of crimes, and also include year, month
df_daily_counts = df['Date of Offense'].value_counts()
df_daily_counts = pd.Series(df_daily_counts).reset_index(name='Hate crime count').rename(columns={'index':'Date of Offense'})
df_daily_counts['Year'] = pd.to_datetime(df_daily_counts['Date of Offense']).dt.year
df_daily_counts['Month'] = pd.to_datetime(df_daily_counts['Date of Offense']).dt.month
df_daily_counts['MonthYear'] = pd.to_datetime(df_daily_counts['Date of Offense']).dt.to_period('M')# month_temp_df.head()

# df just for month and year trending
df_yearly_counts = df_daily_counts.groupby('Year').sum().reset_index()
df_monthly_counts = df_daily_counts.groupby('MonthYear').sum().reset_index()

### Step 2: Create function to plot trending data in vbars.

In [23]:
# create function to take in a df, column, title, and colormap to make a trending vbar
def bokeh_vbar(df, column, title, colormap):
    TOOLS = "hover,save,pan,box_zoom,reset,wheel_zoom,tap"
    p = figure(plot_height=300, plot_width=600,
            title=title,
            tools=TOOLS,
            toolbar_location='right')

    p.vbar(x=df[column], top=df['Hate crime count'], width=0.75, 
         color=colormap)

    p.xaxis.axis_label = column
    p.yaxis.axis_label = 'Hate crime count'
    p.select_one(HoverTool).tooltips = [
        (column, '@x'),
        ('Number of hate crimes', '@top'),
    ]

    return p


Show basic trending vbar by year.

In [26]:
# show yearly hate crime counts
p = bokeh_vbar(df_yearly_counts, 'Year', 'Number of hate crimes per year',PuRd[8][:-1]+PuRd[9][:-6])
show(p)

## Step 3: Create function to trend data in a line.