In [1]:
%%writefile 4_crime.py

import pandas as pd
import datetime as dt
import matplotlib.pyplot as plt
import scipy.stats as sts
import numpy as np
import descartes
import geopandas as gpd
from shapely.geometry import Point, Polygon

%matplotlib inline

from math import isnan
from matplotlib.animation import FuncAnimation
import scipy.ndimage as ndimage

from matplotlib import animation, rc

import geopandas
from geopandas import GeoDataFrame


#Importing the community areas data

#Data source below
#https://data.cityofchicago.org/Facilities-Geographic-Boundaries/Boundaries-Community-Areas-current-/cauq-8yn6

comm=pd.read_csv('CommAreas_20200108.csv')
comm.head()
# length=len(comm.index)
# length

#Importing the populationa by community area data

#Data source below
#https://data.cityofchicago.org/Facilities-Geographic-Boundaries/Boundaries-Community-Areas-current-/cauq-8yn6

pop=pd.read_csv('Pop-Census-Data-by-Chicago-Community-Area-2017.csv')
pop.head()
length=len(pop.index)
length

pop['Community'] = pop['Community'].str.upper() 

# pop=pop[['Community','Community Area','Total Population']]

pop2=pop[['Total Population','Community']]
pop2.sort_values(by='Total Population', ascending=False)

#Importing the crime data

#Data source below
#https://data.cityofchicago.org/Public-Safety/Crime_2014_2018/tunj-hqmk

crime=pd.read_csv('Crime_2014_2018.csv')
crime.head()
len(crime.index)
#length
#Length should be 1345713

#remove the records with zero as the community area
crime=crime.loc[crime['Community Area']!=0]
crime.head()
len(crime.index)

#creating new columns for Year, Month, and Day
crime['Year'] = pd.DatetimeIndex(crime['Date']).year
crime['Month'] = pd.DatetimeIndex(crime['Date']).month
crime['Day'] = pd.DatetimeIndex(crime['Date']).day
crime.head(2)
# len(crime)
#the total number of records is 1345713
crime['Date'].min()
crime['Date'].max()

#filtering data for everything greater than 2013 and less than 2019
crime=crime.loc[(crime['Year']>2013) & (crime['Year']<2019)]
len(crime)
#after filtering the total number of records is 1345162

#merge the crime data with the community areas data
crime_comm = pd.merge(crime, comm, left_on="Community Area", right_on='AREA_NUMBE', how="left")
crime_comm.head()
# len(crime_comm) #length is 1345162

#total number of records by Community Area
c_bycomm=crime_comm.groupby(['COMMUNITY'])['ID'].count().reset_index()

#total number of records by Year
c_byyear=crime_comm.groupby(['Year'])['ID'].count().reset_index()

#total number of records by year by Month
c_byyear_bymonth=crime_comm.groupby(['Year','Month'])['ID'].count().reset_index()

#total number of crimes by crime type
c_bytype=crime_comm.groupby(['Primary Type'])['ID'].count().reset_index()

#total number of crime reports by community area by year by month
c_bycomm_byyear=crime_comm.groupby(['COMMUNITY','Year'])['ID'].count().reset_index()

#total number of crime reports by community area by year by month
c_bycomm_byyear_bymonth=crime_comm.groupby(['COMMUNITY','Year','Month'])['ID'].count().reset_index()


c_bycomm_byType=crime_comm.groupby(['COMMUNITY','Primary Type'])['ID'].count().reset_index()
c_bycomm_byType=c_bycomm_byType.loc[c_bycomm_byType['COMMUNITY']=='EDISON PARK']
c_bycomm_byType.sort_values(by='ID',ascending=False).head(100)
c_bycomm_byType.count()
crime_comm['Primary Type'].unique()

#Importing the street light requests data

#Data source below
#https://data.cityofchicago.org/Service-Requests/ServiceRequests_2014_2018/wx4a-kywa

street=pd.read_csv('ServiceRequests_2014_2018.csv')
street.head()
length=len(street.index)
length

#add a year column
street['Year'] = pd.DatetimeIndex(street['Creation Date']).year
street['Month'] = pd.DatetimeIndex(street['Creation Date']).month
street['Day'] = pd.DatetimeIndex(street['Creation Date']).day
# street['Creation Date'] =  pd.to_datetime(street['Creation Date'], format='%d%b%Y:%H:%M:%S.%f')
# street['Creation Date'] = datetime.strptime(street['Creation Date'], '%A, %B %d, %Y')
#street['Service Lag Time'] = street["Completion Date"] - street["Creation Date"]
street=street.loc[(street['Year']>2013) & (street['Year']<2019)]

# street=pd.read_csv('project_1/Service_Requests_Street_Lights.csv',encoding='ISO-8859-1')
# street.head()
# length=len(street.index)
# length
#The length should be 73542
# street=street.rename(columns={'ÈÀCreation Date':'Creation Date'})
street.head()

length=len(street.index)
length
street.dtypes

street['Community Area'].unique()
street[['Community Area']].min()
street[['Community Area']].max()

#Filtering out null and zero values
street_nonull=street.dropna(subset=['Community Area'])
street_nozero=street_nonull.loc[street_nonull['Community Area']!=0]
#Filter out where Community Area is blank or zero
length=len(street_nozero.index)
length

#Getting the min and max of the new dataset
street_nozero[['Community Area']].min()
street_nozero[['Community Area']].max()

street_nozero['Status'].unique()
#Status options 'Completed' and 'Open'

street_nozero['Service Request Number'].nunique()
#73237 records the second time I did this
#there are indeed only 73314 records so each request number is unique

groups= street_nozero.groupby(['Service Request Number'])['Creation Date'].count()
groups.sort_values(ascending=False)  


#the below is code to keep the first record of the duplicated records
# street_nozero.duplicated(keep=False)


# street_nozero['Community Area'].unique()


# Select duplicate rows except first occurrence based on all columns
duplicateRowsDF = street_nozero[street_nozero.duplicated()]
print("Duplicate Rows except first occurrence based on all columns are :")
print(duplicateRowsDF)


street_nozero.head(3)
street_nozero['Year'].min()
street_nozero['Year'].max()

#merge the street data with the community areas data
street_comm = pd.merge(street_nozero, comm, left_on="Community Area", right_on='AREA_NUMBE', how="left")
street_comm.head()
# len(street_comm) #length is 

#Total number of requests by community area
s_bycomm=street_comm.groupby(['COMMUNITY'])['Service Request Number'].count().reset_index()
s_bycomm.head()

#Total number of requests by year
s_byyear=street_comm.groupby(['Year'])['Service Request Number'].count().reset_index()

#Total number of requests by year by month
s_byyear_bymonth=street_comm.groupby(['Year','Month'])['Service Request Number'].count().reset_index()

#Total number of requests by community area by year
s_bycomm_byyear=street_comm.groupby(['COMMUNITY','Year'])['Service Request Number'].count().reset_index()

#Average time it takes to get the street light fixed over the 5 year period
#Average time it takes to get the street light fixed by year

bycomm = pd.merge(c_bycomm, s_bycomm, on="COMMUNITY", how="left")
bycomm.head(10)

bycomm_pop = pd.merge(bycomm, pop, left_on="COMMUNITY", right_on='Community', how="left")
bycomm_pop.head()
# bycomm_pop['Total Population'].astype('int64')

bycomm_pop['Total Population']=pd.to_numeric(bycomm_pop['Total Population'].astype(str).str.replace(',',''), errors='coerce')
bycomm_pop.head()

bycomm_pop['No. of Service Requests to Crime Reports']=bycomm_pop['Service Request Number']/bycomm_pop['ID']

#sorting by the ratio of Service Requests to Crime reports
bycomm_pop.sort_values(['No. of Service Requests to Crime Reports'],ascending=True)
bycomm_pop.sort_values(['No. of Service Requests to Crime Reports'],ascending=False)

#sorting by Service Requests Number for all 5 years
bycomm_pop.sort_values(['Service Request Number'],ascending=True)
bycomm_pop.sort_values(['Service Request Number'],ascending=False)

#sorting by crime reports for all 5 years
bycomm_pop.sort_values(['ID'],ascending=True)
bycomm_pop.sort_values(['ID'],ascending=False)

byyear = pd.merge(c_byyear, s_byyear, on="Year", how="left")
byyear.head()

byyearbymonth = pd.merge(c_byyear_bymonth, s_byyear_bymonth, on=["Year","Month"], how="left")
byyearbymonth.head()

bycommbyyear = pd.merge(c_bycomm_byyear, s_bycomm_byyear, on=["COMMUNITY","Year"], how="left")
bycommbyyear = pd.merge(bycommbyyear, comm, on=["COMMUNITY"], how="left")
bycommbyyear.head()

#Let's make some visuals
#community areas by crime by street

x=bycomm_pop['ID']
y=bycomm_pop['Service Request Number']
z=bycomm_pop['Community']
w=bycomm_pop['No. of Service Requests to Crime Reports']

fig, ax = plt.subplots(figsize=(20,10))
ax.scatter(x,y,s=bycomm_pop['Total Population']*.003,c='purple',alpha=0.6)

#Street light request data
y_max=y.max()
y_min=y.min()

i=bycomm_pop.loc[y==y_max, 'ID'].iloc[0]
j=bycomm_pop.loc[y==y_max, 'Service Request Number'].iloc[0]
ax.annotate(bycomm_pop.loc[y==y_max, 'Community'].iloc[0] + " (Largest Service Request)", xy=(i+1000, j))

k=bycomm_pop.loc[y==y_min, 'ID'].iloc[0]
m=bycomm_pop.loc[y==y_min, 'Service Request Number'].iloc[0]
ax.annotate(bycomm_pop.loc[y==y_min, 'Community'].iloc[0] + " (Smallest Service Request)", xy=(k, m))

#Crime numbers
x_max=x.max()
x_min=x.min()

n=bycomm_pop.loc[x==x_max, 'ID'].iloc[0]
p=bycomm_pop.loc[x==x_max, 'Service Request Number'].iloc[0]
ax.annotate(bycomm_pop.loc[x==x_max, 'Community'].iloc[0] + " (Largest Crime)", xy=(n-1000, p-75))

q=bycomm_pop.loc[x==x_min, 'ID'].iloc[0]
r=bycomm_pop.loc[x==x_min, 'Service Request Number'].iloc[0]
ax.annotate(bycomm_pop.loc[x==x_min, 'Community'].iloc[0] + " (Smallest Crime)", xy=(q, r))

#Service Request to Crime ratio
w_max=w.max()
w_min=w.min()

t=bycomm_pop.loc[w==w_max, 'ID'].iloc[0]
u=bycomm_pop.loc[w==w_max, 'Service Request Number'].iloc[0]
ax.annotate(bycomm_pop.loc[w==w_max, 'Community'].iloc[0] + " (Largest Service Request to Crime Report Ratio)", xy=(t, u))

a=bycomm_pop.loc[w==w_min, 'ID'].iloc[0]
b=bycomm_pop.loc[w==w_min, 'Service Request Number'].iloc[0]
print(a)
print(b)
ax.annotate(bycomm_pop.loc[w==w_min, 'Community'].iloc[0] + " (Smallest  Service Request to Crime Report Ratio)", xy=(a, b))



#https://www.kaggle.com/threadid/geopandas-mapping-chicago-crimes 

street_map= gpd.read_file(r'C:\Users\dharti.patel\Desktop\NU_DS_Project1\Street-Lights\Street-Lights\geo_export_f11eb402-3887-43c2-a681-95fde0bf6fbf.shp')

# fig,ax=plt.subplots(figsize=(15,15))
# street_map.plot(ax=ax)

#Street Lights

crs={'init':'epsg:4326'}
street_comm.dtypes
street_comm.head()

geometry= [Point(xy) for xy in zip(street_comm["Longitude"],street_comm["Latitude"])]
geometry[:3]


geo_df=gpd.GeoDataFrame(street_comm,
                       crs=crs,
                       geometry=geometry)
geo_df.head()

# fig.ax=plt.subplots(figsize=(15,15))
# street_map.plot(ax=ax,alpha=0.4,color="grey")
# geo_df[geo_df[geometry]==0].plot(ax=ax,markersize=20,color="blue",marker="o",label="Neg")
# # geo_df[geo_df['WnvPresent']==0].plot(ax=ax,markersize=20,color="red",marker="^",label="Pos")
# # plt.legend(plot={'size':15})

street_loc_df = geo_df.dropna(inplace=False)  # Remove all nan entries. 
street_loc_df = street_loc_df.drop(street_loc_df[(street_loc_df.Latitude < 41.0)].index) #Remove bad values in Long/Lat 
street_loc_df['AREA_NUMBE'] = street_loc_df['AREA_NUMBE'].astype(int)
street_geometry = [Point(xy) for xy in zip(street_loc_df.Longitude, street_loc_df.Latitude)]
# street_geometry = geo_df['geometry']

street_crs = {'type': 'EPSG', 'properties': {'code': 102671}}
street_points = GeoDataFrame(street_loc_df, crs=street_crs, geometry=street_geometry)
street_points.head(5)

# street_map = geo_df.plot(figsize=(25,25), edgecolor='k', facecolor='b', alpha=0.25, linewidth=2) 

#Street lights data
# street_points = street_points.plot(figsize=(10,10), markersize=5) 
# street_points.set_axis_off()

# chistreet_map = street_map.plot(figsize=(25,25), edgecolor='#7f7f7f', cmap='nipy_spectral', alpha=0.5, linewidth=2) 
# street_map.apply(lambda x: chistreet_map.annotate(s=x.community, xy=x.geometry.centroid.coords[0], ha='center', size=16),axis=1);
# chistreet_map.set_axis_off()

chistreet_map = street_map.plot(figsize=(15,15), edgecolor='#7f7f7f', facecolor='#1f77b4', alpha=0.25, linewidth=2) 
street_map.apply(lambda x: chistreet_map.annotate(s=x.community, xy=x.geometry.centroid.coords[0], ha='center', size=6),axis=1);
street_points.plot(figsize=(25,25),ax=chistreet_map, markersize=1, color='y', alpha=0.25)
chistreet_map.set_axis_off()

#yearly streetlight visual

# for i in (2014,2015,2016,2017,2018):
#     street_vc_points = street_points.loc[street_points['Year'].isin([i])]

#     street_vc_points_map = street_vc_points.plot(figsize=(5,5), markersize=1, c='blue', alpha=0.2) 
#     street_vc_points_map.set_axis_off()
#     street_vc_points_map.set(title=i)
    
#yearly streetlight visual

street_vc_points = street_points.loc[street_points['Year'].isin(['2014'])]

street_vc_points_map = street_vc_points.plot(figsize=(10,10), markersize=1, c='red', alpha=0.2) 
street_vc_points_map.set_axis_off()
street_vc_points_map.set(title='Street Light Outages 2014')

street_vc_points = street_points.loc[street_points['Year'].isin(['2015'])]

street_vc_points_map = street_vc_points.plot(figsize=(10,10), markersize=1, c='blue', alpha=0.2) 
street_vc_points_map.set_axis_off()
street_vc_points_map.set(title='Street Light Outages 2015')

street_vc_points = street_points.loc[street_points['Year'].isin(['2016'])]

street_vc_points_map = street_vc_points.plot(figsize=(10,10), markersize=1, c='purple', alpha=0.2) 
street_vc_points_map.set_axis_off()
street_vc_points_map.set(title='Street Light Outages 2016')

street_vc_points = street_points.loc[street_points['Year'].isin(['2017'])]

street_vc_points_map = street_vc_points.plot(figsize=(10,10), markersize=1, c='green', alpha=0.2) 
street_vc_points_map.set_axis_off()
street_vc_points_map.set(title='Street Light Outages 2017')

street_vc_points = street_points.loc[street_points['Year'].isin(['2018'])]

street_vc_points_map = street_vc_points.plot(figsize=(10,10), markersize=1, c='orange', alpha=0.2) 
street_vc_points_map.set_axis_off()
street_vc_points_map.set(title='Street Light Outages 2018')

crime_comm['Primary Type'].unique()

crime_comm_type=crime_comm.loc[crime_comm['Primary Type']=='BATTERY']



# Crime maps

crs={'init':'epsg:4326'}
crime_comm_type.dtypes
crime_comm_type.head()

geometry= [Point(xy) for xy in zip(crime_comm_type["Longitude"],crime_comm_type["Latitude"])]
geometry[:3]


geo_df=gpd.GeoDataFrame(crime_comm_type,
                       crs=crs,
                       geometry=geometry)
geo_df.head()

# fig.ax=plt.subplots(figsize=(15,15))
# crime_map.plot(ax=ax,alpha=0.4,color="grey")
# geo_df[geo_df[geometry]==0].plot(ax=ax,markersize=20,color="blue",marker="o",label="Neg")
# # geo_df[geo_df['WnvPresent']==0].plot(ax=ax,markersize=20,color="red",marker="^",label="Pos")
# # plt.legend(plot={'size':15})

crime_loc_df = geo_df.dropna(inplace=False)  # Remove all nan entries. 
crime_loc_df = crime_loc_df.drop(crime_loc_df[(crime_loc_df.Latitude < 41.0)].index) #Remove bad values in Long/Lat 
crime_loc_df['AREA_NUMBE'] = crime_loc_df['AREA_NUMBE'].astype(int)
crime_geometry = [Point(xy) for xy in zip(crime_loc_df.Longitude, crime_loc_df.Latitude)]
# crime_geometry = geo_df['geometry']

crime_crs = {'type': 'EPSG', 'properties': {'code': 102671}}
crime_points = GeoDataFrame(crime_loc_df, crs=crime_crs, geometry=crime_geometry)
crime_points.head(5)

# crime_map = geo_df.plot(figsize=(25,25), edgecolor='k', facecolor='b', alpha=0.25, linewidth=2) 

#crime lights data
# crime_points = crime_points.plot(figsize=(10,10), markersize=5) 
# crime_points.set_axis_off()

# chicrime_map = crime_map.plot(figsize=(25,25), edgecolor='#7f7f7f', cmap='nipy_spectral', alpha=0.5, linewidth=2) 
# crime_map.apply(lambda x: chicrime_map.annotate(s=x.community, xy=x.geometry.centroid.coords[0], ha='center', size=16),axis=1);
# chicrime_map.set_axis_off()

chicrime_map = street_map.plot(figsize=(15,15), edgecolor='#7f7f7f', facecolor='#1f77b4', alpha=0.25, linewidth=2) 
street_map.apply(lambda x: chicrime_map.annotate(s=x.community, xy=x.geometry.centroid.coords[0], ha='center', size=6),axis=1);
crime_points.plot(figsize=(25,25),ax=chicrime_map, markersize=1, color='r', alpha=0.25)
chicrime_map.set_axis_off()

#yearly streetlight visual

crime_vc_points = crime_points.loc[crime_points['Year'].isin(['2014'])]

crime_vc_points_map = crime_vc_points.plot(figsize=(10,10), markersize=1, c='red', alpha=0.2) 
crime_vc_points_map.set_axis_off()
crime_vc_points_map.set(title='Crime: Battery (2014)')

crime_vc_points = crime_points.loc[crime_points['Year'].isin(['2015'])]

crime_vc_points_map = crime_vc_points.plot(figsize=(10,10), markersize=1, c='blue', alpha=0.2) 
crime_vc_points_map.set_axis_off()
crime_vc_points_map.set(title='Crime: Battery (2015)')

crime_vc_points = crime_points.loc[crime_points['Year'].isin(['2016'])]

crime_vc_points_map = crime_vc_points.plot(figsize=(10,10), markersize=1, c='purple', alpha=0.2) 
crime_vc_points_map.set_axis_off()
crime_vc_points_map.set(title='Crime: Battery (2016)')

crime_vc_points = crime_points.loc[crime_points['Year'].isin(['2017'])]

crime_vc_points_map = crime_vc_points.plot(figsize=(10,10), markersize=1, c='green', alpha=0.2) 
crime_vc_points_map.set_axis_off()
crime_vc_points_map.set(title='Crime: Battery (2017)')

crime_vc_points = crime_points.loc[crime_points['Year'].isin(['2018'])]

crime_vc_points_map = crime_vc_points.plot(figsize=(10,10), markersize=1, c='orange', alpha=0.2) 
crime_vc_points_map.set_axis_off()
crime_vc_points_map.set(title='Crime: Battery (2018)')

Writing 4_crime.py
