In [None]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress
import json
from datetime import datetime
import requests
import gmaps
import os

# Import API key
from api_keys import g_key

# importing csv files.
crimes_19 = pd.read_csv('input_data/Chicago_Crimes_2019.csv')
crimes_20 = pd.read_csv('input_data/Chicago_Crimes_2020.csv')
crimes_21 = pd.read_csv('input_data/Chicago_Crimes_2021.csv')

# merging crimes data for the last 3 years into a signle dataframe
crimes_data = pd.merge((pd.merge(crimes_19,crimes_20, how="outer")),crimes_21,how="outer")

print(len(crimes_data))


In [None]:
# converting Date column in dataframe to date format and adding columns for month, year, day, time
crimes_data["Date"] = pd.to_datetime(crimes_data['Date'])
crimes_data["Day"] = crimes_data['Date'].dt.day_name()
crimes_data["Month Day"] =crimes_data['Date'].dt.day
crimes_data["Month"] = crimes_data['Date'].dt.month_name()
crimes_data["Year"] = crimes_data['Date'].dt.year
crimes_data["Time"] =crimes_data['Date'].dt.time
crimes_data["Hour"] =crimes_data['Date'].dt.strftime('%H').add(':00')
crimes_data["AM_PM"] = crimes_data['Date'].dt.strftime("%p")

#print the number of columns and number of rows 
print(crimes_data.shape)

# clense the dataframe by removing rows that have NaN values in any of the columns
crimes_data.dropna(how = 'any', inplace = True)

#print the number of columns and number of rows after clensing the data
print(crimes_data.shape)

# #temporarily reducing the size of the datafram to run it quicker
# crimes_data = crimes_data.head(10000)
crimes_data

In [None]:
# list all the columns titles
crimes_data.columns

In [None]:
# separate data for each year resulted into arrest 
arrest19_df  = pd.DataFrame(crimes_data[(crimes_data["Year"]==2019) & (crimes_data["Arrest"]== True)])
arrest20_df  = pd.DataFrame(crimes_data[(crimes_data["Year"]==2020) & (crimes_data["Arrest"]== True)])
arrest21_df  = pd.DataFrame(crimes_data[(crimes_data["Year"]==2021) & (crimes_data["Arrest"]== True)])

arrest19_df.head()

In [None]:
# calculating total arrests in each year
total_arrest_19 = len(arrest19_df["ID"])
total_arrest_20 = len(arrest20_df["ID"])
total_arrest_21 = len(arrest21_df["ID"])
total_crimes_19 = len(crimes_data[crimes_data["Year"]==2019]["ID"])
total_crimes_20 = len(crimes_data[crimes_data["Year"]==2020]["ID"])
total_crimes_21 = len(crimes_data[crimes_data["Year"]==2021]["ID"])
print(f" 019 Arrest = {total_arrest_19}, 020 Arrest = {total_arrest_20}, 021 Arrest = {total_arrest_21}")
print(f" 019 crime = {total_crimes_19}, 020 crime = {total_crimes_20}, 021 crime = {total_crimes_21}")



In [None]:
# Creating Bar plot for total crimes and arrests

Arrests = [total_arrest_19,total_arrest_20,total_arrest_21]
Total_crimes = [total_crimes_19,total_crimes_20,total_crimes_21]

labels = ["2019", "2020", "2021"]
x = np.arange(len(labels))

width = 0.3  #width of bar plot
fig, ax = plt.subplots()
rects1 = ax.bar(x - width/2, Arrests, width, label='Total Arrests')
rects2 = ax.bar(x + width/2, Total_crimes, width, label='Total Crimes')

#Add some text for labels, title and custom x-axis tick labels, etc.
ax.set_ylabel('Crimes')
ax.set_title('Crimes in Chicago')
ax.set_xticks(x, labels)
ax.legend()

ax.bar_label(rects1, padding=3)
ax.bar_label(rects2, padding=3)

fig.tight_layout()
plt.savefig("Images/chicago_crime_arrest.png")
plt.show()


In [None]:
# preparing the data to visualize time of the day when crime occurs
time_analysis = crimes_data[["ID", 'Hour']].groupby('Hour')['ID'].count()

#converting the series and then back to dataframe. Also converting midnight time to 24:00, then sorting the data.
time_analysis = time_analysis.to_frame().reset_index()
time_analysis = time_analysis.replace(to_replace =["00:00"], 
                            value ="24:00").sort_values(by=['Hour']).reset_index(drop=True)

# plotting the line graph
time_analysis.plot(kind="line", x="Hour", y="ID", xlabel="Time of the day in military format", 
                  ylabel ="Total Number of Crimes",label='Crimes Committed')
# adding title
plt.title('Crimes per Time of the Day (3 years)')
plt.xticks(np.arange(1,25),np.arange(1,25))
plt.show()

## need to figure out how to display all values for hours

In [None]:
#lotting locations of crimes for the last 3 years.... Maybe sort on loop area and something else and do compare? 
# still work in progress... if I just map it then the area  is huge and doesn't give much value

#sorting the dataframe by MOTOR VEHICLE THEFT
vehicle_theft = crimes_data[(crimes_data["Primary Type"]=="HOMICIDE") & (crimes_data["Year"]==2021)]
                 
# Convert crime description to a list
crime_desc = vehicle_theft["Description"].tolist()

# Configure gmaps
gmaps.configure(api_key=g_key)

# group lats and lngs for each city location
marker_locations = vehicle_theft[["Latitude","Longitude"]]

# Create a marker_layer using the poverty list to fill the info box
fig = gmaps.figure()
markers = gmaps.marker_layer(marker_locations,
    info_box_content=[f"Crime Description: {desc}" for desc in crime_desc])
fig.add_layer(markers)
fig