In [442]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import numpy as np
import pandas as pd
import gmaps
import gmaps.datasets
import os
import apikey as api_key
import json
import ipywidgets
import geopy

In [444]:
# Import CSVs for 2015-2018 
crime_15 = "Crime_Database/Data/crimedata2015.csv"
crime_16 = "Crime_Database/Data/crimedata2016.csv"
crime_17 = "Crime_Database/Data/crimedata2017.csv"
crime_18 = "Crime_Database/Data/crimedata2018.csv"
crime_data_15 = pd.read_csv(crime_15)
crime_data_16 = pd.read_csv(crime_16)
crime_data_17 = pd.read_csv(crime_17)
crime_data_18 = pd.read_csv(crime_18)

In [445]:
## Comments to group: (To be Deleted)
# Merge cannot be performed, there is too much Data on each CSV - I get a memory error 
# I can merge CSVs by file and chunk size but some data will be lost, figured cleaning each was better for graphs
# I am unable to filter by the crimes in highest_offense_desc row, tried setting as index and renaming column
# highest_offense_desc row is only row that will not filter

In [446]:
# Cleaning 2015 DF

# Drop columns
crime_15_df = crime_data_15.drop(columns= {"highest_offense_desc", "council_district", "location", "district", "zip"})
crime_15_df

offenses_15 = (crime_15_df.loc[crime_15_df["highest_offense"].isin(categories)])
offenses_15["x_coord"] = offenses_15["x_coord"].div(100000,  fill_value = 0)
offenses_15["y_coord"] = offenses_15["y_coord"].div(-100000,  fill_value = 0)

# Standard error
offenses_15["x_coord"] -= 0.9
offenses_15["y_coord"] += 3
offenses_15

Unnamed: 0,highest_offense,date,x_coord,y_coord
0,Robbery,1-Jan-15,30.404830,-98.023660
1,Robbery,1-Jan-15,30.347300,-97.902960
2,Burglary,1-Jan-15,30.459850,-98.172200
3,Burglary,1-Jan-15,30.398960,-97.960320
4,Burglary,1-Jan-15,30.204550,-97.393400
...,...,...,...,...
38568,Theft,31-Dec-15,30.346750,-97.724780
38569,Theft,31-Dec-15,30.062850,-98.407380
38570,Theft,31-Dec-15,30.195040,-98.013100
38571,Theft,31-Dec-15,30.602900,-97.796080


In [447]:
# 2016 DF
crime_16_df = crime_data_16.drop(columns= {"highest_offense_desc", "council_district", "location_rec", "district", "zip"})
crime_16_df

# Standard error
offenses_16 = (crime_16_df.loc[crime_16_df["highest_offense"].isin(categories)])
offenses_16["x_coord"] = offenses_16["x_coord"].div(100000,  fill_value = 0)
offenses_16["y_coord"] = offenses_16["y_coord"].div(-100000,  fill_value = 0)
offenses_16["x_coord"] -= 0.9
offenses_16["y_coord"] += 3
offenses_16

Unnamed: 0,highest_offense,date_rec,x_coord,y_coord
0,Agg Assault,1-Jan-16,29.773220,-97.627960
1,Theft,1-Jan-16,30.249570,-97.704620
2,Robbery,1-Jan-16,30.391810,-98.069230
3,Theft,1-Jan-16,30.236430,-97.703570
4,Rape,1-Jan-16,-0.900000,3.000000
...,...,...,...,...
37456,Theft,31-Dec-16,30.274540,-98.090650
37457,Theft,31-Dec-16,30.239240,-97.707940
37458,Theft,31-Dec-16,30.428120,-97.843560
37459,Theft,31-Dec-16,30.417780,-97.624810


In [448]:
# 2017 DF
crime_17_df = crime_data_17.drop(columns= {"highest_offense_desc","council_district", "location", "district", "zip"})
crime_17_df

offenses_17 = (crime_17_df.loc[crime_17_df["highest_offense"].isin(categories)])
offenses_17["x_coord"] = offenses_17["x_coord"].div(100000,  fill_value = 0)
offenses_17["y_coord"] = offenses_17["y_coord"].div(-100000,  fill_value = 0)

# Standard error
offenses_17["x_coord"] -= 0.9
offenses_17["y_coord"] += 3
offenses_17

Unnamed: 0,highest_offense,date,x_coord,y_coord
0,Auto Theft,3-Feb-17,30.530280,-97.839170
1,Agg Assault,25-Feb-17,30.251540,-97.706040
2,Theft,12-Feb-17,30.454520,-98.277570
3,Theft,22-Feb-17,30.399960,-97.969830
4,Theft,8-Mar-17,30.316920,-97.465640
...,...,...,...,...
35092,Theft,24-Feb-17,30.497760,-98.250390
35093,Auto Theft,26-Feb-17,30.097170,-97.595800
35094,Burglary,24-Mar-17,30.117040,-98.352370
35095,Agg Assault,11-Mar-17,30.258430,-97.706970


In [449]:
# 2018 DF
crime_18_df = crime_data_18.drop(columns= {"highest_offense_desc","council_district", "location", "district", "zip"})
crime_18_df

offenses_18 = (crime_18_df.loc[crime_18_df["highest_offense"].isin(categories)])
offenses_18["x_coord"] = offenses_18["x_coord"].div(100000,  fill_value = 0)
offenses_18["y_coord"] = offenses_18["y_coord"].div(-100000,  fill_value = 0)

# Standard error
offenses_18["x_coord"] -= 0.9
offenses_18["y_coord"] += 3
offenses_18

Unnamed: 0,highest_offense,date,x_coord,y_coord
0,Auto Theft,10-Dec-18,30.283040,-97.786200
1,Theft,16-Oct-18,30.346010,-97.849230
2,Theft,22-Jun-18,30.144710,-97.748940
3,Burglary,5-Jul-18,30.345380,-97.871770
4,Theft,24-Sep-18,30.391600,-98.265750
...,...,...,...,...
37151,Auto Theft,1-Jul-18,29.948250,-97.284430
37152,Theft,3-Dec-18,30.210610,-97.531930
37153,Theft,31-Dec-18,30.258430,-97.669470
37154,Rape,4-Aug-18,-0.900000,3.000000


In [450]:
## Non-violent Crimes list pulled from SQL file in Database folder
crimes = ['ROBBERY BY ASSAULT','PUBLIC INTOX-SOBERING CENTER','PUBLIC INTOXICATION', 'GAMBLING PROMOTION',
          'URINATING IN PUBLIC PLACE','LIQUOR LAW VIOLATION/OTHER','COMMUNICATING GAMBLING INFO',
          'FORGERY OF IDENTIFICATION', 'SIT AND LIE ORDINANCE VIOL', 'DOC CREATING NOXIOUS ODOR', 'MAIL THEFT', 
          'DRIVING WHILE INTOX / FELONY','DEBIT CARD ABUSE','DOC WINDOW PEEPING - HOTEL','AMPLIFIED MUSIC / VEHICLE', 
          'ILLEGAL LABELLING OF RECORDING','POSSESSION OF MARIJUANA','ISSUANCE OF BAD CHECK','PROBATION VIOL', 'HAZING', 
          'ABANDONED REFRIGERATOR','POCKET PICKING','TAMPERING WITH ID NUMBER','GAMBLING','THEFT BY SHOPLIFTING', 
          'IDENTITY THEFT','SLEEPING IN PUBLIC PLACE','GRAFFITI', 'FORGERY - OTHER','AUTO THEFT']

## All crimes categorized within these categories, will be used for simplification on vizualizations
categories = ['Theft', 'Robbery', 'Auto Theft', 'Burglary', 'Agg Assault', 'Rape', 'Murder']

In [451]:
# 2015
offenses_15 = (offenses_15.loc[offenses_15["highest_offense"].isin(categories)])
new_15= offenses_15.dropna()
new_15

Unnamed: 0,highest_offense,date,x_coord,y_coord
0,Robbery,1-Jan-15,30.404830,-98.023660
1,Robbery,1-Jan-15,30.347300,-97.902960
2,Burglary,1-Jan-15,30.459850,-98.172200
3,Burglary,1-Jan-15,30.398960,-97.960320
4,Burglary,1-Jan-15,30.204550,-97.393400
...,...,...,...,...
38568,Theft,31-Dec-15,30.346750,-97.724780
38569,Theft,31-Dec-15,30.062850,-98.407380
38570,Theft,31-Dec-15,30.195040,-98.013100
38571,Theft,31-Dec-15,30.602900,-97.796080


In [481]:
# 2016
offenses_16 = (offenses_16.loc[offenses_16["highest_offense"].isin(categories)])
new_16= offenses_16.dropna()

# Outlier in this DF
new_16.loc[new_16['x_coord']==388.05239]
new_16 = new_16[new_16.x_coord != 388.05239]
new_16

Unnamed: 0,highest_offense,date_rec,x_coord,y_coord
0,Agg Assault,1-Jan-16,29.773220,-97.627960
1,Theft,1-Jan-16,30.249570,-97.704620
2,Robbery,1-Jan-16,30.391810,-98.069230
3,Theft,1-Jan-16,30.236430,-97.703570
4,Rape,1-Jan-16,-0.900000,3.000000
...,...,...,...,...
37456,Theft,31-Dec-16,30.274540,-98.090650
37457,Theft,31-Dec-16,30.239240,-97.707940
37458,Theft,31-Dec-16,30.428120,-97.843560
37459,Theft,31-Dec-16,30.417780,-97.624810


In [484]:
# 2017
offenses_17 = (offenses_17.loc[offenses_17["highest_offense"].isin(categories)])
offenses_17
new_17= offenses_17.dropna()

# Outlier in this DF
new_17.loc[new_17['x_coord'] ==388.05239]
new_17 = new_17[new_17.x_coord != 388.05239]
new_17

Unnamed: 0,highest_offense,date,x_coord,y_coord
0,Auto Theft,3-Feb-17,30.530280,-97.839170
1,Agg Assault,25-Feb-17,30.251540,-97.706040
2,Theft,12-Feb-17,30.454520,-98.277570
3,Theft,22-Feb-17,30.399960,-97.969830
4,Theft,8-Mar-17,30.316920,-97.465640
...,...,...,...,...
35092,Theft,24-Feb-17,30.497760,-98.250390
35093,Auto Theft,26-Feb-17,30.097170,-97.595800
35094,Burglary,24-Mar-17,30.117040,-98.352370
35095,Agg Assault,11-Mar-17,30.258430,-97.706970


In [485]:
# 2018
offenses_18 = (offenses_18.loc[offenses_18["highest_offense"].isin(categories)])
offenses_18
new_18= offenses_18.dropna()
new_18

Unnamed: 0,highest_offense,date,x_coord,y_coord
0,Auto Theft,10-Dec-18,30.283040,-97.786200
1,Theft,16-Oct-18,30.346010,-97.849230
2,Theft,22-Jun-18,30.144710,-97.748940
3,Burglary,5-Jul-18,30.345380,-97.871770
4,Theft,24-Sep-18,30.391600,-98.265750
...,...,...,...,...
37151,Auto Theft,1-Jul-18,29.948250,-97.284430
37152,Theft,3-Dec-18,30.210610,-97.531930
37153,Theft,31-Dec-18,30.258430,-97.669470
37154,Rape,4-Aug-18,-0.900000,3.000000


In [486]:
# Set date index for plotting
date_15 = offenses_15.set_index("date")
date_15

Unnamed: 0_level_0,highest_offense,x_coord,y_coord
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1-Jan-15,Robbery,30.404830,-98.023660
1-Jan-15,Robbery,30.347300,-97.902960
1-Jan-15,Burglary,30.459850,-98.172200
1-Jan-15,Burglary,30.398960,-97.960320
1-Jan-15,Burglary,30.204550,-97.393400
...,...,...,...
31-Dec-15,Theft,30.346750,-97.724780
31-Dec-15,Theft,30.062850,-98.407380
31-Dec-15,Theft,30.195040,-98.013100
31-Dec-15,Theft,30.602900,-97.796080


In [487]:
# Heatmap
g_key = "AIzaSyCQRY-TmfYIztg9E8oNyUjEknsy9e7KHNs"
gmaps.configure(api_key= g_key)

In [513]:
locations = new_15.drop(columns= {"highest_offense","date"})
fig = gmaps.figure(map_type='ROADMAP')
heatmap_layer = gmaps.heatmap_layer(locations)
fig.add_layer(heatmap_layer)
heatmap_layer.max_intensity = 100
heatmap_layer.point_radius = 6
fig

Figure(layout=FigureLayout(height='420px'))

In [514]:
locations = new_16.drop(columns= {"highest_offense","date_rec"})
fig = gmaps.figure(map_type='ROADMAP')
heatmap_layer = gmaps.heatmap_layer(locations)
fig.add_layer(heatmap_layer)
heatmap_layer.max_intensity = 100
heatmap_layer.point_radius = 6

fig

Figure(layout=FigureLayout(height='420px'))

In [515]:
locations = new_17.drop(columns= {"highest_offense","date"})
fig = gmaps.figure(map_type='ROADMAP')
heatmap_layer = gmaps.heatmap_layer(locations)
fig.add_layer(heatmap_layer)
heatmap_layer.max_intensity = 100
heatmap_layer.point_radius = 6

fig

Figure(layout=FigureLayout(height='420px'))

In [516]:
locations = new_18.drop(columns= {"highest_offense","date"})
fig = gmaps.figure(map_type='ROADMAP')
heatmap_layer = gmaps.heatmap_layer(locations)
fig.add_layer(heatmap_layer)
heatmap_layer.max_intensity = 100
heatmap_layer.point_radius = 6

fig

Figure(layout=FigureLayout(height='420px'))