In [1]:
# import necessary libraries
import pandas as pd  # data wranglin'
import sys  # append paths
import os  # mess with the operating system
from sodapy import Socrata  #pull data with Tyler api
import plotly.express as px
import json
import numpy as np
import statsmodels.api as sm
import requests

In [2]:
#config, load storage data
sys.path.append('/workspaces/acs-study/data/')

#geospatial boundaries of neighborhoods in chicago
'''
beats.geojson is provided by the city of Chicago, denoting police beats
the policy of interest was implemented at the police-beat level
beats are smaller than neighborhoods, and in some cases cross multiple neighborhoods
'''
with open('/workspaces/acs-study/data/beats.geojson', 'r') as beats_json:
    beats_data = json.load(beats_json)
    
client = Socrata('data.cityofchicago.org',
                 os.environ['CHICAGO_DATA_APP_TOKEN'],  #secrets stay secret, sorry not sorry
                 username="bk2988a@american.edu",
                 password=os.environ['CHICAGO_DATA_PWD'])  #pwds are not for sharing
client.timeout = 50

print('Configured.')

Configured.


In [3]:
results = client.get("ijzp-q8t2")  # use Socrata client to pull Chicago's crime data from 2001 to today

In [13]:
health_portal_url = 'https://chicagohealthatlas.org/api/v1/data/'
health_data = requests.get(
    health_portal_url,
    params = {
        # homicides, firearm, alcohol, drugs, overdoses, suicides, behav hosp, depress hosp, hischool suicide attmpt, psych distress, HCSAPDP
        # VRHOR,VRFIR,VRALR,VRDIDR,VRDOR,VRSUR,HDBHR,YRSUP,HDMDR
        # 'topic':'VRHOR',
        # 'population':'',
        'geography':'southwest',
        'layer':'zip'
    }
)

In [14]:
health_data.content

b'{"time":"29.01 ms","params":{"topic":"VRHOR","geography":"southwest","layer":"zip"},"count":0,"results":[]}'

In [4]:
# cleaning up data, casting to correct dtypes
crime_df = pd.DataFrame.from_records(results)
crime_df['latitude'] = pd.to_numeric(crime_df['latitude'])
crime_df['longitude'] = pd.to_numeric(crime_df['longitude'])
crime_df['community_area'] = pd.to_numeric(crime_df['community_area'])
crime_df['date'] = pd.to_datetime(crime_df['date'])

In [5]:
beats = []  #geometry of all community areas
properties = [
    'beat',
    'beat_num',
    'district',
    'sector'
]  # these are all available properties of a beat in the geojson

#iterates over geojson. this maps beat ID to the geographic object
for feature in beats_data['features']:  # each 'feature' is beat : {properties, geometry : {coordinates}}
    beat = []
    for property in properties:
        beat.append(feature['properties'][property])
        
    beat.append(feature['geometry']['coordinates'])  # capture actual geometry
    beats.append(beat)  # add the beat to the geodataset

# this dataframe maps the geojson polygon names to the crime dataframe
beats_df = pd.DataFrame(beats) 
beats_df.columns = properties + ['coordinates']

In [6]:

#West Treatment, this is where no cop was involved with the 911 calls
# wta_full is ALL treated neighborhoods
wta_full = ['0811', '0812', '0813', '0814', '0815', '0821', '0822', '0823', '0824', '0825', '0832', '0833', '0834', '0835', '0831', '0711', '0722', '0732', '0731', '0734', '0733', '0735', '0723', '0724', '0725', '0726', '0712', '0713', '0714', '0715']
# wta_border is a proper subset of wta_full -- just those that border untreated (control) neighborhoods
wta_border = ['0814', '0815', '0821', '0822', '0824', '0715', '0713', '0712', '0711', '0722', '0731']
# control neighborhoods bordering
wta_control = ['1031', '1032', '0911', '0921', '0922', '0923', '0932', '0934', '0935', '0225', '0232', '0311', '0312', '0322', '0323']
def treatment(area):
    if area in wta_border:  #treated border units
        return 1
    elif area in wta_control:  #control border units
        return 2
    elif area in wta_full and area not in wta_border:  #control units not bordering
        return 3
    else:
        return 0
    
beats_df['treatment'] = beats_df['beat_num'].apply(treatment)  #which treatment the neighborhood received

treated_crime_df = crime_df.merge(beats_df[['beat_num', 'treatment']], how='left', left_on='beat', right_on='beat_num')  #add neighborhood names

In [7]:
crime_counts = treated_crime_df['beat'].value_counts()
crime_counts = crime_counts.reset_index().rename(columns={'index':'value', 0:'count'})
crime_counts = crime_counts.merge(
    beats_df, 
    how='left', 
    left_on='beat', 
    right_on='beat_num'
)

In [8]:
crime_counts

Unnamed: 0,beat_x,count,beat_y,beat_num,district,sector,coordinates,treatment
0,0321,11,2,0321,03,2,"[[[[-87.58800670593915, 41.77695824816409], [-...",0
1,0222,11,2,0222,02,2,"[[[[-87.59527268740189, 41.81695734551978], [-...",0
2,1831,11,3,1831,18,3,"[[[[-87.63420286636071, 41.89659580852442], [-...",0
3,0513,10,1,0513,05,1,"[[[[-87.59424230420211, 41.70046564505776], [-...",0
4,1433,10,3,1433,14,3,"[[[[-87.6664056895925, 41.91814624647259], [-8...",0
...,...,...,...,...,...,...,...,...
249,1923,1,2,1923,19,2,"[[[[-87.6496998317798, 41.954561232610956], [-...",0
250,2033,1,3,2033,20,3,"[[[[-87.6578164711081, 41.9763528546908], [-87...",0
251,1824,1,2,1824,18,2,"[[[[-87.62673645632522, 41.91146877523999], [-...",0
252,2525,1,2,2525,25,2,"[[[[-87.71699394874092, 41.9246033899828], [-8...",0


In [9]:
# fig = px.choropleth(crime_counts, 
# #                     geojson=beats_data, 
# #                     locations='beat_num', 
# #                     color='treatment', 
# #                     color_continuous_scale="Viridis", 
# #                     featureidkey='properties.beat_num'
# # )

# # fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
# # fig.update_geos(fitbounds="locations", visible=False)
# # fig.show()