In [1]:
# # # # # # # # # # # # # # # # # # # # # # # 
# # Importing Libraries 
# # # # # # # # # # # # # # # # # # # # # # #
import requests
import pandas as pd
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import plotly.express as px
import os
from urllib.request import urlopen
import json
import dash
from dash import Dash, html, dcc, Input, Output, State
import dash_bootstrap_components as dbc
import dash as dash_table
import pyarrow.feather as feather
import plotly.io as pio
import plotly.graph_objects as go
# from jupyter_dash import JupyterDash
api_key = 'eef7nyyh9bl862kn45goy7kk9' # API-key for Chicago Data Portal
headers = {'API-Key': '{key}'.format(key=api_key)}

In [2]:
# # # # # # # # # # # # # # # # # # # # # # # 
# # # Storing Categories Locally 
# # # # # # # # # # # # # # # # # # # # # # #

# # Request + Data Wrangling for Tree Map

# # Initial run to retrive the mapping between the primary type and the secondary type. This should only been used once since a CSV file is saved
# # And this file should be loaded rather than make a API call.

# url_type_categories = 'https://data.cityofchicago.org/resource/crimes?$$app_token=KOnBLwyDeOqegTClGWyHUe6Kf&$limit=10000000&$select=description,primary_type,iucr'
# jsonData_type_categories = requests.get(url_type_categories, headers=headers).json()
# df_type_categories= pd.DataFrame.from_dict(jsonData_type_categories)

# # Drops dublicates to get unique combinations of primary and secondary types
# df_type_categories_clean = df_type_categories.drop_duplicates().reset_index()

# # Saves the categories as a CSV
# df_type_categories_clean.to_feather(os.getcwd()+'\\Categories' )

# # Deletes large varibles that are not needed outside of the initial query 
# del df_type_categories,jsonData_type_categories,df_type_categories_clean

In [3]:
# Loading the geo map from cityofchicago.org
with urlopen(r'https://data.cityofchicago.org/api/geospatial/fthy-xz3r?method=export&format=GeoJSON') as response:
    geo_ward = json.load(response)

In [4]:
categories = feather.read_feather(os.getcwd()+'/Categories' )
categories = categories.drop_duplicates(subset='iucr')
# Creates a map between from iucr to primary type and from iucr to description
map_primary_type = dict(zip(categories.iucr,categories.primary_type))
map_description = dict(zip(categories.iucr,categories.description))

In [5]:
def Crime_selector(input):
    if len(input['points'][0].get('id').split("/")) == 2:
        crime_type = '&primary_type='+input['points'][0].get('id').split("/")[0]+'&description='+input['points'][0].get('id').split("/")[1]
    else:
        crime_type = '&primary_type='+input['points'][0].get('id')
    return crime_type

In [6]:
# # # # # # # # # # # # # # # # # # # # # # # 
# # Tree Map 
# # # # # # # # # # # # # # # # # # # # # # #

def update_treemap(years):
    #Retrive count of the different sub_types
    start_year = str(years[0])
    end_year = str(years[1])
    url_description_count = 'https://data.cityofchicago.org/resource/crimes?$$app_token=KOnBLwyDeOqegTClGWyHUe6Kf&$limit=10000000&$where= year <= '+end_year+' AND year >='+start_year+'&$select=iucr,count(iucr)&$group=iucr'
    jsonData_description_count = requests.get(url_description_count, headers=headers).json()
    df_description_count= pd.DataFrame.from_dict(jsonData_description_count)
    # Convert the count to int
    df_description_count['count_iucr'] = df_description_count['count_iucr'].astype(int)
    #Mapping iucr from the crime statistic to the primary type and description using the map defined earilier
    df_description_count['primary_type'] = df_description_count['iucr'].map(map_primary_type)
    df_description_count['description'] = df_description_count['iucr'].map(map_description)
    #Plot Treemap diagram 
    annotation_template = go.layout.Template()
    annotation_template.layout = dict(paper_bgcolor='rgb(31, 43, 64)')
    tree = px.treemap(df_description_count, path=['primary_type','description'], names='description', values='count_iucr',maxdepth=2, template=annotation_template,labels={'count_iucr':'Number of reports', 'labels':'Crime type'})
    tree.update_layout(margin = dict(t=25, l=25, r=25, b=25))
    return tree

In [7]:
# # # # # # # # # # # # # # # # # # # # # # # 
# # Choropleth Map 
# # # # # # # # # # # # # # # # # # # # # # #

def update_map(years,input):
    # Request + Data Wrangling for Choropleth Map
    start_year = str(years[0])
    end_year = str(years[1])
    crime_type = Crime_selector(input)
    url_geo = 'https://data.cityofchicago.org/resource/crimes?$$app_token=KOnBLwyDeOqegTClGWyHUe6Kf&$limit=10000000&$where= year <= '+end_year+' AND year >='+start_year+'&$select=district,count(primary_type)&$group=district'+crime_type
    jsonData_geo = requests.get(url_geo, headers=headers).json()
    df_geo = pd.DataFrame.from_dict(jsonData_geo)
    df_geo.dropna(inplace=True)  # There is one NaN in the district column
    # Removing the 0 in front of the district number, to match the districts in the geojson file
    df_geo = df_geo.astype({'district': 'int'})
    df_geo = df_geo.astype({'count_primary_type': 'int'})
    # We remove 21st district because it isn't used on the geo map. It is only a single building
    df_geo = df_geo[df_geo.district != 21]
    # There are two 16th districts for some reason, so we group them. Alternatively we could remove the smallest of them
    df_geo = df_geo.groupby(['district'], as_index=False)['count_primary_type'].sum()

    # try:
    #     geo_ward
    #     print('Geo data exists')
    # except NameError:
    #     print('Geo data does not exist')
    #     with urlopen(r'https://data.cityofchicago.org/api/geospatial/fthy-xz3r?method=export&format=GeoJSON') as response:
    #         geo_ward = json.load(response)
    annotation_template = go.layout.Template()
    annotation_template.layout = dict(paper_bgcolor='rgb(31, 43, 64)')
    choro = px.choropleth(df_geo, geojson=geo_ward, color="count_primary_type",
                    locations="district", featureidkey="properties.dist_num",
                    projection="mercator",
                    color_continuous_scale=px.colors.sequential.OrRd, template=annotation_template, labels={"count_primary_type":"Number of reports",'districts':'Districts'})
    choro.update_geos(fitbounds="locations", visible=False)
    choro.update_layout({'margin':{"r":0,"t":0,"l":0,"b":0}, 'dragmode':False, 'font_color':'rgb(127, 175, 223)'})
    choro.update_layout(geo=dict(bgcolor= 'rgba(0,0,0,0)'))
    return choro


In [8]:
# # # # # # # # # # # # # # # # # # # # # # # 
# # Stacked Area Chart 
# # # # # # # # # # # # # # # # # # # # # # #

# Request + Data Wrangling for the Chart
def update_stacked_area(years,input):
    start_year = str(years[0])
    end_year = str(years[1])
    crime_type = '&primary_type='+input['points'][0].get('id').split("/")[0]
    area_chart_url = 'https://data.cityofchicago.org/resource/crimes?$$app_token=KOnBLwyDeOqegTClGWyHUe6Kf&$limit=1000&$where= year <= '+end_year+' AND year >='+start_year+'&$select=count(year),year,iucr&$group=year,iucr'+crime_type
    jsonData_area_chart = requests.get(area_chart_url, headers=headers).json()
    df_area_chart= pd.DataFrame.from_dict(jsonData_area_chart)
    df_area_chart['primary_type'] = df_area_chart['iucr'].map(map_primary_type)
    df_area_chart['description'] = df_area_chart['iucr'].map(map_description)
    df_area_chart = df_area_chart.astype({'year':'int', 'count_year':'int'}) # Converting strings to integers
    temp_description_other = df_area_chart[~df_area_chart['description'].isin(df_area_chart.groupby(by='description').sum().nlargest(n=11,columns='count_year').reset_index()['description'])].groupby(by='year').sum().reset_index()
    temp_description_other['description'] = 'Other'
    largest_12_descriptions = df_area_chart[df_area_chart['description'].isin(df_area_chart.groupby(by='description').sum().nlargest(n=11,columns='count_year').reset_index()['description'])]
    df_area_chart = pd.concat([largest_12_descriptions,temp_description_other])
    temp = df_area_chart[['description','count_year']].groupby(by='description').sum().reset_index()
    sorter = list(temp.sort_values(by='count_year',ascending=False)['description'])
    df_area_chart['order'] = pd.Categorical(df_area_chart['description'], ordered=True, categories=sorter)
    df_area_chart = df_area_chart.sort_values('order')
    annotation_template = go.layout.Template()
    annotation_template.layout = dict(paper_bgcolor='rgb(31, 43, 64)')
    chart = px.area(df_area_chart, x="year", y="count_year", color="description", template=annotation_template)
    chart.update_layout({
        'plot_bgcolor': 'rgba(0, 0, 0, 0)',
        'font_color':'rgb(127, 175, 223)'})
    chart.update_layout(
        xaxis_title="Year",
        yaxis_title="Number of reports",
        legend_title = "Crime",
        legend_title_font_color = 'rgb(70, 236, 150)',
        legend_title_font_size = 20,
        legend_font_size = 15,
    )
    chart.update_xaxes(dtick=1)
    return chart

In [9]:
# # # # # # # # # # # # # # # # # # # # # # # 
# # Sunburst Diagram
# # # # # # # # # # # # # # # # # # # # # # #

# Request + Data Wrangling for the Sunburst
def update_sunburst(years,input):
    start_year = str(years[0])
    end_year = str(years[1])
    crime_type = Crime_selector(input)
    url_sunburst = 'https://data.cityofchicago.org/resource/crimes?$$app_token=KOnBLwyDeOqegTClGWyHUe6Kf&$limit=10000000&$where= year <= '+end_year+' AND year >='+start_year+'&$select=date,primary_type,domestic,arrest'+crime_type
    jsonData_sunburst = requests.get(url_sunburst, headers=headers).json()
    df_sunburst = pd.DataFrame.from_dict(jsonData_sunburst)
    df_sunburst.head(2)
    df_sunburst['Count'] = 1
    df_sunburst['Date']= pd.to_datetime(df_sunburst['date'])
    df_sunburst['WeekDayName'] = df_sunburst['Date'].dt.day_name()
    df_sunburst['month'] = df_sunburst['Date'].dt.month
    df_sunburst['hour'] = (df_sunburst['Date'].dt.hour % 24+4) // 4
    df_sunburst['hour'].replace({1: 'Late Night', 2: 'Early Morning',3: 'Morning',4: 'Noon',5: 'Evening', 6: 'Night'}, inplace=True)
    seasonDictionary = {1: 'Winter',2:'Winter',12:'Winter',3:'Spring',4:'Spring',5:'Spring',6:'Summer',7:'Summer',8:'Summer',9:'Autumn',10:'Autumn',11:'Autumn'}
    df_sunburst['Season'] = df_sunburst['month'].map(seasonDictionary) 
    booleanDictionaryArrest = {True: 'Arrest made', False: 'Arrest not made'} # Changing arrest column from T/F --> Arrest made / Arrest not made
    booleanDictionaryDomestic = {True: 'Domestic', False: 'Not domestic'} # Changing domestic column from T/F --> Domestic / Not domestic
    df_sunburst['arrest'] = df_sunburst['arrest'].map(booleanDictionaryArrest) # The actual change for arrest
    df_sunburst['domestic'] = df_sunburst['domestic'].map(booleanDictionaryDomestic) # The actual change for domestic
    annotation_template = go.layout.Template()
    annotation_template.layout = dict(paper_bgcolor='rgb(31, 43, 64)')
    sunburst = px.sunburst(df_sunburst, path=['Season','arrest','hour'], template=annotation_template)
    sunburst.update_layout({'margin':{"r":0,"t":20,"l":0,"b":20}})
    return sunburst

In [10]:
def update_titles(years,input):
    start_year = str(years[0])
    end_year = str(years[1])
    crime_type = input['points'][0].get('id')
    title_string_choro = str("Heatmap of "+crime_type.lower()+" from "+start_year+" to "+end_year+" by districts")
    title_string_sunburst = str("Sunburst of "+crime_type.lower()+" from "+start_year+" to "+end_year)
    title_string_area = str("Stacked area chart of "+crime_type.lower()+" from "+start_year+" to "+end_year)

    return title_string_choro,title_string_sunburst,title_string_area

In [None]:
# # # # # # # # # # # # # # # # # # # # # # #
# # DASH LAYOUT
# # # # # # # # # # # # # # # # # # # # # # #

years = [2016,2021]
y = {'points': [{'curveNumber': 0, 'pointNumber': 332, 'currentPath': '/', 'root': '', 'entry': '', 'percentRoot': 0.1923782549881637, 'percentEntry': 0.1923782549881637, 'percentParent': 0.1923782549881637, 'parent': '', 'id': 'BATTERY', 'label': 'BATTERY', 'value': 91018}]}
x = {'points': [{'curveNumber': 0, 'pointNumber': 124, 'currentPath': '/BATTERY/', 'root': '', 'entry': 'BATTERY', 'percentRoot': 0.09360415962123773, 'percentEntry': 0.48656309740930365, 'percentParent': 0.48656309740930365, 'parent': 'BATTERY', 'id': 'BATTERY/DOMESTIC BATTERY SIMPLE', 'label': 'DOMESTIC BATTERY SIMPLE', 'value': 44286}]}
tree = update_treemap(years)
choro = update_map(years,x)
sunburst = update_sunburst(years,x)
chart = update_stacked_area(years,y)

app = Dash(__name__)

app.layout = html.Div(id='root', children =[
    html.Div(html.H1('The Crimes of Chicago'), id="headline"),
    html.Div(children=[
        html.P('Reported crime in the city of Chicago from 2001 to present day.'),
        html.P('Gain an overview of the distribution of crime types in Chicago City, and explore developments and trends in reported crimes.')
    ], id= 'subheader'),
    html.Div(id='top', children=[
        html.Div(children=[
            html.P("Number of reports by primary and secondary type.", id='tree_txt1'),
            html.P("Click a crime to display it in the other visualizations:", id='tree_txt2'),
            dcc.Graph(id='Treemap', figure=tree),
        ]),
    ]),
    html.Div(id='slider', children=[
        html.P("Drag the slider to change years:", id='slider_txt1'),
        dcc.RangeSlider(2001,2021, 1, value=years, id='my-range-slider', marks={
            2001: '2001',
            2002: '2002',
            2003: '2003',
            2004: '2004',
            2005: '2005',
            2006: '2006',
            2007: '2007',
            2008: '2008',
            2009: '2009',
            2010: '2010',
            2011: '2011',
            2012: '2012',
            2013: '2013',
            2014: '2014', 
            2015: '2015',
            2016: '2016',
            2017: '2017',
            2018: '2018',
            2019: '2019',
            2020: '2020',
            2021: '2021',
            2022: '2022',      
        }), # Value is the initial years when side is loaded

    ]),
    html.Div(className='flex-container', id='mid', children=[
        html.Div(className='flex-child', id='Map', children=[
            html.P(children=["Heatmap"], id= 'choro_txt'),
            dcc.Graph(id='Map', figure=choro)
        ]),
        html.Div(className='flex-child', id='Sunburst', children=[
            html.P(children=[f"Sunburst ({years[0]}-{years[1]})"], id= 'sunburst_txt'),
            dcc.Graph(id='Sunburst', figure=sunburst)
        ])
    ]),
    html.Div(id='bottom', children=[
        html.P(children=["Stacked Area Chart"],id= 'area_txt'),
        dcc.Graph(id='Chart', figure=chart)
    ]),
])

# Creates output from the slider
@app.callback(
    Output('output-container-range-slider', 'children'),
    Input('my-range-slider', 'value'))
def update_output(value):
    return value

# Updates the Sunburst when the slider and treemap is changed
@app.callback(
    Output('Sunburst', 'figure'),
    Input('my-range-slider', 'value'),
    Input('Treemap', 'clickData'))
def sunburst(value,clickData):
    years = value
    return update_sunburst(years,clickData)

# Updates the Choropleth when the slider and treemap is changed
@app.callback(
    Output('Map', 'figure'),
    Input('my-range-slider', 'value'),
    Input('Treemap', 'clickData'))
def choro_map(value,clickData):
    years = value
    return update_map(years,clickData)

# Updates the stacked area chart when the slider and treemap is changed
@app.callback(
    Output('Chart', 'figure'),
    Input('my-range-slider', 'value'),
    Input('Treemap', 'clickData'))
def chart(value,clickData):
    years = value
    return update_stacked_area(years, clickData)

# Updates the year and crime in graph titles, but only when the treemap is clicked (since the plots are only updated when the treemap is clicked)
@app.callback(
    Output('choro_txt', 'children'),
    Output('sunburst_txt', 'children'),
    Output('area_txt', 'children'),
    Input('my-range-slider', 'value'),
    Input('Treemap', 'clickData'))
def titles(years,input):
    return update_titles(years,input)

# Updates the tree map with input from the time slider
@app.callback(
    Output('Treemap', 'figure'),
    Input('my-range-slider', 'value'))
def tree_map(value):
    years = value
    return update_treemap(years)

if __name__ == '__main__':
    app.run_server(debug=False)


The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.


The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.


The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.

[2023-10-24 14:39:51,676] ERROR in app: Exception on /_alive_3c65b378-2916-403b-9b94-708e990bcb55 [GET]
Traceback (most recent call last):
  File "/opt/anaconda3/envs/withpip/lib/python3.9/site-packages/flask/app.py", line 2525, in wsgi_app
    response = self.full_dispatch_request()
  File "/opt/anaconda3/envs/withpip/lib/python

[2023-10-24 14:39:52,519] ERROR in app: Exception on /_dash-update-component [POST]
Traceback (most recent call last):
  File "/opt/anaconda3/envs/withpip/lib/python3.9/site-packages/flask/app.py", line 2525, in wsgi_app
    response = self.full_dispatch_request()
  File "/opt/anaconda3/envs/withpip/lib/python3.9/site-packages/flask/app.py", line 1822, in full_dispatch_request
    rv = self.handle_user_exception(e)
  File "/opt/anaconda3/envs/withpip/lib/python3.9/site-packages/flask/app.py", line 1820, in full_dispatch_request
    rv = self.dispatch_request()
  File "/opt/anaconda3/envs/withpip/lib/python3.9/site-packages/flask/app.py", line 1796, in dispatch_request
    return self.ensure_sync(self.view_functions[rule.endpoint])(**view_args)
  File "/opt/anaconda3/envs/withpip/lib/python3.9/site-packages/dash/dash.py", line 1310, in dispatch
    ctx.run(
  File "/opt/anaconda3/envs/withpip/lib/python3.9/site-packages/dash/_callback.py", line 442, in add_context
    output_value = fun

In [17]:
uf = [2016,2021]

print('('+str(uf[0])+ '-' + str(uf[1])+')')

(2016-2021)
