In [1]:
import requests
import json
import pandas as pd
from pandas.io.json import json_normalize
import matplotlib.pyplot as plt
from sodapy import Socrata
from geopandas import gpd
import pysal as ps
import numpy as np
import folium

In [4]:
client = Socrata("data.cityofchicago.org", 'EjrjYzG6YAkBx7bPBzME8jD4c')
results = client.get("w3hi-cfa4",limit = 500000)

complaints = pd.DataFrame.from_records(results)

split = complaints['beat'].str.split('|').apply(pd.Series, 1).stack()
split.index = split.index.droplevel(-1)
split.name = 'beat'
del complaints['beat']
split = split.apply(lambda x: x.strip())
complaints = complaints.join(split, how ="right")
complaints['complaint_date'] =  pd.to_datetime(complaints['complaint_date'], format='%Y%m%dT%H:%M:%S.%f')
complaints['complaint_year'] = complaints.complaint_date.dt.year

beats_gpd = gpd.read_file('policebeats.shp')
beats_gpd['beat_num'] = pd.to_numeric(beats_gpd['beat_num'])

In [5]:
type = complaints[['beat','current_category']].dropna()
type_long = pd.get_dummies(type, columns=['current_category'])
type_long = type_long.rename(columns={'current_category_Bias':'bias',
                         'current_category_Civil Suits':'civilsuits',
                         'current_category_Coercion':'coercion',
                         'current_category_Death or Injury In Custody':'deathinjuryincustody',
                         'current_category_Domestic Violence':'domesticviolence',
                         'current_category_Excessive Force':'excessforce',
                         'current_category_Firearm Discharge - Hits':'firearmhit',
                         'current_category_Firearm Discharge - No Hits':'firearmnohit',
                         'current_category_Firearm Discharge at Animal':'firearmanimal',
                         'current_category_Legal Violation':'legalviol',
                         'current_category_Miscellaneous':'misc',
                         'current_category_Motor Vehicle Related Death':'motordeath',
                         'current_category_OC Discharge':'ocdischarge',
                         'current_category_Operational Violation':'operationviolation',
                         'current_category_Search or Seizure':'searchseizure',
                         'current_category_Taser Discharge':'taserdischarge',
                         'current_category_Taser Notification':'tasernotif',
                         'current_category_Unlawful Denial of Counsel':'denialcounsel',
                         'current_category_Unnecessary Display of Weapon':'unnecessarydispweapon',
                         'current_category_Verbal Abuse':'verbalabuse'})
type_long = type_long.groupby(['beat']).agg({'bias':[sum],'civilsuits':[sum],
                                'coercion':[sum],'deathinjuryincustody':[sum],
                                'domesticviolence':[sum],'excessforce':[sum],
                               'firearmhit':[sum],'firearmnohit':[sum],
                                'firearmanimal':[sum],'legalviol':[sum],
                                'misc':[sum],'motordeath':[sum],
                               'ocdischarge':[sum],'operationviolation':[sum],
                                'searchseizure':[sum],'taserdischarge':[sum],
                                'tasernotif':[sum],'denialcounsel':[sum],
                               'unnecessarydispweapon':[sum],'verbalabuse':[sum]})
type_long.columns = type_long.columns.droplevel(1)
type_long = type_long.reset_index()
type_long['beat'] = pd.to_numeric(type_long['beat'])

In [8]:
race = complaints[['beat','race_of_complainant']].dropna()
race_long = pd.get_dummies(race, columns = ['race_of_complainant']) 
race_long.rename(columns={'race_of_complainant_African American / Black':'Black', 
                          'race_of_complainant_American Indian or Alaskan Native':'AmerIndianAlaskan',
                         'race_of_complainant_Asian or Pacific Islander':'Apia',
                          'race_of_complainant_Hispanic':'Hispanic',
                          'race_of_complainant_Unknown':'Unknown',
                          'race_of_complainant_White':'White'
                         }, inplace=True)
race_long =race_long.groupby(['beat']).agg({'Black':[sum],'AmerIndianAlaskan':[sum],
                                'Apia':[sum],'Hispanic':[sum],
                                'Unknown':[sum],'White':[sum]})
race_long.columns = race_long.columns.droplevel(1)
race_long = race_long.reset_index()
race_long['beat'] = pd.to_numeric(race_long['beat'])

In [9]:
data = beats_gpd.merge(type_long, right_on='beat',left_on='beat_num', how ='left')
data = data.merge(race_long,right_on='beat',left_on='beat_num', how ='left')
data = data.fillna(0)

W = ps.weights.Queen.from_dataframe(data)
W.transform = 'r'
moran = ps.Moran_Local(data.Black.values, W, permutations=9999)


In [12]:
sig = moran.p_sim < 0.05
hotspots = moran.q==1 * sig
coldspots = moran.q==3 * sig
hotcold = hotspots*1 + coldspots*2

hc_df = pd.DataFrame(hotcold)

mapdata = beats_gpd.join(hc_df)
mapdata.rename(columns={0: 'type'}, inplace=True)

style = pd.DataFrame({'type': [0,1,2], 'style': [
    {'fillColor': '#e3dfd6', 'weight': .25, 'color': 'black'},
    {'fillColor': '#dd3232', 'fillOpacity' : .85, 'weight': .25, 'color': 'black'},
    {'fillColor': '#a2d0cf', 'fillOpacity' : .85, 'weight': .25, 'color': 'black'},
    ]})
mapdata = mapdata.merge(style)

cook_coords = [41.857602, -87.731696]
width, height = 400, 500

my_map = folium.Map(location = cook_coords, zoom_start = 10, 
                    tiles = 'cartodbpositron', width=width, height=height)

folium.GeoJson(mapdata.to_crs({'init': 'epsg:4326'}).to_json()).add_to(my_map)

my_map.save('map.html')