In [256]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import numpy as np
import pandas as pd
import gmaps
import gmaps.datasets
import os
import apikey as api_key
import json
import ipywidgets
import geopy

In [257]:
# Import CSVs for 2015-2018 
crime_15 = "Crime_Database/Data/crimedata2015.csv"
crime_16 = "Crime_Database/Data/crimedata2016.csv"
crime_17 = "Crime_Database/Data/crimedata2017.csv"
crime_18 = "Crime_Database/Data/crimedata2018.csv"
crime_data_15 = pd.read_csv(crime_15)
crime_data_16 = pd.read_csv(crime_16)
crime_data_17 = pd.read_csv(crime_17)
crime_data_18 = pd.read_csv(crime_18)

In [258]:
## Comments to group:
# Merge cannot be performed until data is cleaned, too much Memory is used 
# I can merge CSVs by file and chunk size but some data will be lost, figured cleaning each was better for graphs
# I am unable to filter by the crimes in highest_offense_desc row, tried setting as index and renaming column
# highest_offense_desc row is only row that will not filter
# I will be using the highest_offense row for simplification, think it will read better for graphs anyway

In [259]:
# Cleaning 2015 DF
# Drop columns
crime_15_df = crime_data_15.drop(columns= {"highest_offense_desc", "council_district", "location", "district", "zip"})
crime_15_df

offenses_15 = (crime_15_df.loc[crime_15_df["highest_offense"].isin(categories)])
offenses_15["x_coord"] = offenses_15["x_coord"].div(100000,  fill_value = 0)
offenses_15["y_coord"] = offenses_15["y_coord"].div(-100000,  fill_value = 0)
offenses_15

Unnamed: 0,highest_offense,date,x_coord,y_coord
0,Robbery,1-Jan-15,31.304830,-101.023660
1,Robbery,1-Jan-15,31.247300,-100.902960
2,Burglary,1-Jan-15,31.359850,-101.172200
3,Burglary,1-Jan-15,31.298960,-100.960320
4,Burglary,1-Jan-15,31.104550,-100.393400
...,...,...,...,...
38568,Theft,31-Dec-15,31.246750,-100.724780
38569,Theft,31-Dec-15,30.962850,-101.407380
38570,Theft,31-Dec-15,31.095040,-101.013100
38571,Theft,31-Dec-15,31.502900,-100.796080


In [260]:
# 2016 DF
crime_16_df = crime_data_16.drop(columns= {"highest_offense_desc", "council_district", "location_rec", "district", "zip"})
crime_16_df


offenses_16 = (crime_16_df.loc[crime_16_df["highest_offense"].isin(categories)])
offenses_16["x_coord"] = offenses_16["x_coord"].div(100000,  fill_value = 0)
offenses_16["y_coord"] = offenses_16["y_coord"].div(-100000,  fill_value = 0)
offenses_16

Unnamed: 0,highest_offense,date_rec,x_coord,y_coord
0,Agg Assault,1-Jan-16,30.673220,-100.627960
1,Theft,1-Jan-16,31.149570,-100.704620
2,Robbery,1-Jan-16,31.291810,-101.069230
3,Theft,1-Jan-16,31.136430,-100.703570
4,Rape,1-Jan-16,0.000000,-0.000000
...,...,...,...,...
37456,Theft,31-Dec-16,31.174540,-101.090650
37457,Theft,31-Dec-16,31.139240,-100.707940
37458,Theft,31-Dec-16,31.328120,-100.843560
37459,Theft,31-Dec-16,31.317780,-100.624810


In [261]:
# 2017 DF
crime_17_df = crime_data_17.drop(columns= {"highest_offense_desc","council_district", "location", "district", "zip"})
crime_17_df

offenses_17 = (crime_17_df.loc[crime_17_df["highest_offense"].isin(categories)])
offenses_17["x_coord"] = offenses_17["x_coord"].div(100000,  fill_value = 0)
offenses_17["y_coord"] = offenses_17["y_coord"].div(-100000,  fill_value = 0)
offenses_17

Unnamed: 0,highest_offense,date,x_coord,y_coord
0,Auto Theft,3-Feb-17,31.430280,-100.839170
1,Agg Assault,25-Feb-17,31.151540,-100.706040
2,Theft,12-Feb-17,31.354520,-101.277570
3,Theft,22-Feb-17,31.299960,-100.969830
4,Theft,8-Mar-17,31.216920,-100.465640
...,...,...,...,...
35092,Theft,24-Feb-17,31.397760,-101.250390
35093,Auto Theft,26-Feb-17,30.997170,-100.595800
35094,Burglary,24-Mar-17,31.017040,-101.352370
35095,Agg Assault,11-Mar-17,31.158430,-100.706970


In [262]:
# 2018 DF
crime_18_df = crime_data_18.drop(columns= {"highest_offense_desc","council_district", "location", "district", "zip"})
crime_18_df

offenses_18 = (crime_18_df.loc[crime_18_df["highest_offense"].isin(categories)])
offenses_18["x_coord"] = offenses_18["x_coord"].div(100000,  fill_value = 0)
offenses_18["y_coord"] = offenses_18["y_coord"].div(-100000,  fill_value = 0)
offenses_18

Unnamed: 0,highest_offense,date,x_coord,y_coord
0,Auto Theft,10-Dec-18,31.183040,-100.786200
1,Theft,16-Oct-18,31.246010,-100.849230
2,Theft,22-Jun-18,31.044710,-100.748940
3,Burglary,5-Jul-18,31.245380,-100.871770
4,Theft,24-Sep-18,31.291600,-101.265750
...,...,...,...,...
37151,Auto Theft,1-Jul-18,30.848250,-100.284430
37152,Theft,3-Dec-18,31.110610,-100.531930
37153,Theft,31-Dec-18,31.158430,-100.669470
37154,Rape,4-Aug-18,0.000000,-0.000000


In [263]:
## Non-violent Crimes list pulled from SQL file in Database folder
crimes = ['ROBBERY BY ASSAULT','PUBLIC INTOX-SOBERING CENTER','PUBLIC INTOXICATION', 'GAMBLING PROMOTION',
          'URINATING IN PUBLIC PLACE','LIQUOR LAW VIOLATION/OTHER','COMMUNICATING GAMBLING INFO',
          'FORGERY OF IDENTIFICATION', 'SIT AND LIE ORDINANCE VIOL', 'DOC CREATING NOXIOUS ODOR', 'MAIL THEFT', 
          'DRIVING WHILE INTOX / FELONY','DEBIT CARD ABUSE','DOC WINDOW PEEPING - HOTEL','AMPLIFIED MUSIC / VEHICLE', 
          'ILLEGAL LABELLING OF RECORDING','POSSESSION OF MARIJUANA','ISSUANCE OF BAD CHECK','PROBATION VIOL', 'HAZING', 
          'ABANDONED REFRIGERATOR','POCKET PICKING','TAMPERING WITH ID NUMBER','GAMBLING','THEFT BY SHOPLIFTING', 
          'IDENTITY THEFT','SLEEPING IN PUBLIC PLACE','GRAFFITI', 'FORGERY - OTHER','AUTO THEFT']

## All crimes categorized within these categories, will be used for simplification on vizualizations
categories = ['Theft', 'Robbery', 'Auto Theft', 'Burglary', 'Agg Assault', 'Rape', 'Murder']

In [264]:
# 2015
offenses_15 = (offenses_15.loc[offenses_15["highest_offense"].isin(categories)])
new_15= offenses_15.dropna()
new_15

Unnamed: 0,highest_offense,date,x_coord,y_coord
0,Robbery,1-Jan-15,31.304830,-101.023660
1,Robbery,1-Jan-15,31.247300,-100.902960
2,Burglary,1-Jan-15,31.359850,-101.172200
3,Burglary,1-Jan-15,31.298960,-100.960320
4,Burglary,1-Jan-15,31.104550,-100.393400
...,...,...,...,...
38568,Theft,31-Dec-15,31.246750,-100.724780
38569,Theft,31-Dec-15,30.962850,-101.407380
38570,Theft,31-Dec-15,31.095040,-101.013100
38571,Theft,31-Dec-15,31.502900,-100.796080


In [265]:
offenses_15_1 = (crime_15_df[crime_15_df["highest_offense"] == 'Theft'])
print(len(offenses_15_1))
offenses_15_2 = (crime_15_df[crime_15_df["highest_offense"] == 'Robbery'])
print(len(offenses_15_2))
offenses_15_3 = (crime_15_df[crime_15_df["highest_offense"] == 'Auto Theft'])
print(len(offenses_15_3))
offenses_15_4 = (crime_15_df[crime_15_df["highest_offense"] == 'Burglary'])
print(len(offenses_15_4))
offenses_15_5 = (crime_15_df[crime_15_df["highest_offense"] == 'Agg Assault'])
print(len(offenses_15_5))
offenses_15_6 = (crime_15_df[crime_15_df["highest_offense"] == 'Rape'])
print(len(offenses_15_6))
offenses_15_7 = (crime_15_df[crime_15_df["highest_offense"] == 'Murder'])
print(len(offenses_15_7))

28274
935
2018
4927
1915
481
23


In [266]:
# 2016
offenses_16 = (offenses_16.loc[offenses_16["highest_offense"].isin(categories)])
new_16= offenses_16.dropna()
new_16

Unnamed: 0,highest_offense,date_rec,x_coord,y_coord
0,Agg Assault,1-Jan-16,30.673220,-100.627960
1,Theft,1-Jan-16,31.149570,-100.704620
2,Robbery,1-Jan-16,31.291810,-101.069230
3,Theft,1-Jan-16,31.136430,-100.703570
4,Rape,1-Jan-16,0.000000,-0.000000
...,...,...,...,...
37456,Theft,31-Dec-16,31.174540,-101.090650
37457,Theft,31-Dec-16,31.139240,-100.707940
37458,Theft,31-Dec-16,31.328120,-100.843560
37459,Theft,31-Dec-16,31.317780,-100.624810


In [267]:
offenses_16_1 = (crime_16_df[crime_16_df["highest_offense"] == 'Theft'])
print(len(offenses_16_1))
offenses_16_2 = (crime_16_df[crime_16_df["highest_offense"] == 'Robbery'])
print(len(offenses_16_2))
# No Auto Theft
offenses_16_3 = (crime_16_df[crime_16_df["highest_offense"] == 'Burglary'])
print(len(offenses_16_3))
offenses_16_4 = (crime_16_df[crime_16_df["highest_offense"] == 'Agg Assault'])
print(len(offenses_16_4))
offenses_16_5 = (crime_16_df[crime_16_df["highest_offense"] == 'Rape'])
print(len(offenses_16_5))
offenses_16_6 = (crime_16_df[crime_16_df["highest_offense"] == 'Murder'])
print(len(offenses_16_6))

26241
1050
5171
2177
759
38


In [268]:
# 2017
offenses_17 = (offenses_17.loc[offenses_17["highest_offense"].isin(categories)])
offenses_17
new_17= offenses_17.dropna()
new_17

Unnamed: 0,highest_offense,date,x_coord,y_coord
0,Auto Theft,3-Feb-17,31.430280,-100.839170
1,Agg Assault,25-Feb-17,31.151540,-100.706040
2,Theft,12-Feb-17,31.354520,-101.277570
3,Theft,22-Feb-17,31.299960,-100.969830
4,Theft,8-Mar-17,31.216920,-100.465640
...,...,...,...,...
35092,Theft,24-Feb-17,31.397760,-101.250390
35093,Auto Theft,26-Feb-17,30.997170,-100.595800
35094,Burglary,24-Mar-17,31.017040,-101.352370
35095,Agg Assault,11-Mar-17,31.158430,-100.706970


In [269]:
offenses_17_1 = (crime_17_df[crime_17_df["highest_offense"] == 'Theft'])
print(len(offenses_17_1))
offenses_17_2 = (crime_17_df[crime_17_df["highest_offense"] == 'Robbery'])
print(len(offenses_17_2))
offenses_17_3 = (crime_17_df[crime_17_df["highest_offense"] == 'Auto Theft'])
print(len(offenses_17_3))
offenses_17_4 = (crime_17_df[crime_17_df["highest_offense"] == 'Burglary'])
print(len(offenses_17_4))
offenses_17_5 = (crime_17_df[crime_17_df["highest_offense"] == 'Agg Assault'])
print(len(offenses_17_5))
offenses_17_6 = (crime_17_df[crime_17_df["highest_offense"] == 'Rape'])
print(len(offenses_17_6))
offenses_17_7 = (crime_17_df[crime_17_df["highest_offense"] == 'Murder'])
print(len(offenses_17_7))

24609
979
2016
4338
2319
811
25


In [270]:
# 2018
offenses_18 = (offenses_18.loc[offenses_18["highest_offense"].isin(categories)])
offenses_18
new_18= offenses_18.dropna()
new_18

Unnamed: 0,highest_offense,date,x_coord,y_coord
0,Auto Theft,10-Dec-18,31.183040,-100.786200
1,Theft,16-Oct-18,31.246010,-100.849230
2,Theft,22-Jun-18,31.044710,-100.748940
3,Burglary,5-Jul-18,31.245380,-100.871770
4,Theft,24-Sep-18,31.291600,-101.265750
...,...,...,...,...
37151,Auto Theft,1-Jul-18,30.848250,-100.284430
37152,Theft,3-Dec-18,31.110610,-100.531930
37153,Theft,31-Dec-18,31.158430,-100.669470
37154,Rape,4-Aug-18,0.000000,-0.000000


In [271]:
offenses_18_1 = (crime_18_df[crime_18_df["highest_offense"] == 'Theft'])
print(len(offenses_18_1))
offenses_18_2 = (crime_18_df[crime_18_df["highest_offense"] == 'Robbery'])
print(len(offenses_18_2))
offenses_18_3 = (crime_18_df[crime_18_df["highest_offense"] == 'Auto Theft'])
print(len(offenses_18_3))
offenses_18_4 = (crime_18_df[crime_18_df["highest_offense"] == 'Burglary'])
print(len(offenses_18_4))
offenses_18_5 = (crime_18_df[crime_18_df["highest_offense"] == 'Agg Assault'])
print(len(offenses_18_5))
offenses_18_6 = (crime_18_df[crime_18_df["highest_offense"] == 'Rape'])
print(len(offenses_18_6))
offenses_18_7 = (crime_18_df[crime_18_df["highest_offense"] == 'Murder'])
print(len(offenses_18_7))

26572
1022
2427
4171
2128
804
32


In [272]:
# Set date index for plotting
date_15 = offenses_15.set_index("date")
date_15

Unnamed: 0_level_0,highest_offense,x_coord,y_coord
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1-Jan-15,Robbery,31.304830,-101.023660
1-Jan-15,Robbery,31.247300,-100.902960
1-Jan-15,Burglary,31.359850,-101.172200
1-Jan-15,Burglary,31.298960,-100.960320
1-Jan-15,Burglary,31.104550,-100.393400
...,...,...,...
31-Dec-15,Theft,31.246750,-100.724780
31-Dec-15,Theft,30.962850,-101.407380
31-Dec-15,Theft,31.095040,-101.013100
31-Dec-15,Theft,31.502900,-100.796080


In [273]:
locations = offenses_15[["x_coord", "y_coord"]]
print(locations)

                             x_coord                        y_coord
0                          31.304830                    -101.023660
1                          31.247300                    -100.902960
2                          31.359850                    -101.172200
3                          31.298960                    -100.960320
4                          31.104550                    -100.393400
...                              ...                            ...
38568                      31.246750                    -100.724780
38569                      30.962850                    -101.407380
38570                      31.095040                    -101.013100
38571                      31.502900                    -100.796080
38572                      31.272860                    -101.204690

[38573 rows x 2 columns]


In [274]:
# Heatmap
g_key = "AIzaSyCQRY-TmfYIztg9E8oNyUjEknsy9e7KHNs"
gmaps.configure(api_key= g_key)

In [279]:
locations = new_15.drop(columns= {"highest_offense","date"})
fig = gmaps.figure(map_type='HYBRID')
heatmap_layer = gmaps.heatmap_layer(locations)
fig.add_layer(heatmap_layer)
heatmap_layer.max_intensity = 100
heatmap_layer.point_radius = 5

fig

Figure(layout=FigureLayout(height='420px'))

In [178]:
# mapping by zip or coordinates
# running API for gun, liquor stores
# mapping heat map, export to png