In [65]:
import pandas as pd
import json
import requests
from urllib.parse import quote
%matplotlib inline 

# Get crime data 
soql = "https://data.cityofchicago.org/resource/6zsd-86xi.json?$query="
soql += quote("SELECT community_area AS Community, count(*)/5 AS Crime_Rate, primary_type AS Primary_Type ")
soql += quote("WHERE 2007 < year AND year < 2013 ")
soql += quote("GROUP BY Community, Primary_Type LIMIT 10000")

resp = requests.get(soql).json()

crime_type = pd.DataFrame(resp)

In [66]:
# Get rid of the redundant decimal places 
crime_type.Crime_Rate = crime_type.Crime_Rate.astype(float)
crime_type.round(1) 

crime_type.dropna(axis = 0, how = "any", inplace = True) 
crime_type.tail() 

Unnamed: 0,Community,Crime_Rate,Primary_Type
2022,9,2.6,ROBBERY
2023,9,1.2,SEX OFFENSE
2024,9,0.4,STALKING
2025,9,84.2,THEFT
2026,9,0.2,WEAPONS VIOLATION


In [67]:
# Get socioeconomics data 
socioeconomics = pd.read_csv("Census_Data_-_Selected_socioeconomic_indicators_in_Chicago__2008___2012.csv")
# rename "community"
crime_type.rename(columns={'Community':'Community Area Number'}, inplace = True)
crime_type['Community Area Number'] = crime_type['Community Area Number'].astype(float)
crime_type.head(15)

Unnamed: 0,Community Area Number,Crime_Rate,Primary_Type
0,0.0,0.2,ASSAULT
1,0.0,1.2,BATTERY
2,0.0,0.2,CRIMINAL DAMAGE
3,0.0,0.4,CRIMINAL TRESPASS
4,0.0,1.0,DECEPTIVE PRACTICE
5,0.0,0.2,MOTOR VEHICLE THEFT
6,0.0,0.4,NARCOTICS
7,0.0,0.8,OTHER OFFENSE
8,0.0,1.6,THEFT
9,1.0,4.6,ARSON


In [57]:
socioeconomics.head()

Unnamed: 0,Community Area Number,COMMUNITY AREA NAME,PERCENT OF HOUSING CROWDED,PERCENT HOUSEHOLDS BELOW POVERTY,PERCENT AGED 16+ UNEMPLOYED,PERCENT AGED 25+ WITHOUT HIGH SCHOOL DIPLOMA,PERCENT AGED UNDER 18 OR OVER 64,PER CAPITA INCOME,HARDSHIP INDEX
0,1.0,Rogers Park,7.7,23.6,8.7,18.2,27.5,23939,39.0
1,2.0,West Ridge,7.8,17.2,8.8,20.8,38.5,23040,46.0
2,3.0,Uptown,3.8,24.0,8.9,11.8,22.2,35787,20.0
3,4.0,Lincoln Square,3.4,10.9,8.2,13.4,25.5,37524,17.0
4,5.0,North Center,0.3,7.5,5.2,4.5,26.2,57123,6.0


In [68]:
mix_type = pd.DataFrame.merge(crime_type, socioeconomics, on = 'Community Area Number')

In [72]:
mix_type.head()

Unnamed: 0,Community Area Number,Crime_Rate,Primary_Type,COMMUNITY AREA NAME,PERCENT OF HOUSING CROWDED,PERCENT HOUSEHOLDS BELOW POVERTY,PERCENT AGED 16+ UNEMPLOYED,PERCENT AGED 25+ WITHOUT HIGH SCHOOL DIPLOMA,PERCENT AGED UNDER 18 OR OVER 64,PER CAPITA INCOME,HARDSHIP INDEX
0,1.0,4.6,ARSON,Rogers Park,7.7,23.6,8.7,18.2,27.5,23939,39.0
1,1.0,344.8,ASSAULT,Rogers Park,7.7,23.6,8.7,18.2,27.5,23939,39.0
2,1.0,1038.6,BATTERY,Rogers Park,7.7,23.6,8.7,18.2,27.5,23939,39.0
3,1.0,279.6,BURGLARY,Rogers Park,7.7,23.6,8.7,18.2,27.5,23939,39.0
4,1.0,630.6,CRIMINAL DAMAGE,Rogers Park,7.7,23.6,8.7,18.2,27.5,23939,39.0
