In [None]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as st
import datetime as dt
import numpy as np
from scipy.stats import linregress

# Assign Data File Locations
cobra_09_path = "data/COBRA-2009-2019.csv"
cobra_20_path = "data/COBRA-2020.csv"
cobra_20_Old_path = "data/COBRA-2020-Old.csv"
cobra_21_path = "data/COBRA-2021.csv"
weather_data_path = "data/weather_data.csv"
county_cases_path = "data/county_cases.csv"

# Read the data files into dataframes
cobra_09 = pd.read_csv(cobra_09_path, low_memory=False)
cobra_20 = pd.read_csv(cobra_20_path)
cobra_20_Old = pd.read_csv(cobra_20_Old_path)
cobra_21 = pd.read_csv(cobra_21_path)
weather_data = pd.read_csv(weather_data_path)
county_cases = pd.read_csv(county_cases_path)

In [None]:
cobra_09.columns = ['offense_id','rpt_date','occur_date','occur_time','poss_date','poss_time','beat','apt_office_prefix', 'apt_office_num', 'location', 'shift_occurence','location_type','UC2_Literal','UC2#','ibr_code','neighborhood','npu','lat','long']
cobra_09 = cobra_09.drop(columns = ['apt_office_prefix','apt_office_num','shift_occurence','location_type','UC2#', 'poss_date'])
cobra_09

In [None]:
cobra_09['rpt_date']= pd.to_datetime(cobra_09['rpt_date'])
cobra_09['occur_date']= pd.to_datetime(cobra_09['occur_date'])
cobra_09['rpt_date'] = cobra_09['rpt_date'].dt.strftime("%m/%d/%Y")
cobra_09['occur_date'] = cobra_09['occur_date'].dt.strftime("%m/%d/%Y")
cobra_09.head()

In [None]:
cobra_09.insert(3,'occur_year',cobra_09['occur_date'].str[-4:])
cobra_09_delete = cobra_09.loc[cobra_09['occur_year'].str[:2] != '20']
cobra_09_delete_list = cobra_09_delete['offense_id'].to_list()
cobra_09 = cobra_09[~cobra_09['offense_id'].isin(cobra_09_delete_list)]
cobra_09

In [None]:
cobra_20_Old.columns = ['offense_id','rpt_date','occur_date','occur_time','poss_date','poss_time','beat','apt_office_prefix', 'apt_office_num', 'location', 'shift_occurence','location_type','UC2_Literal','UC2#','neighborhood','npu','lat','long']
cobra_20_Old = cobra_20_Old.drop(columns = ['apt_office_prefix','apt_office_num','shift_occurence','location_type','UC2#', 'poss_date'])
cobra_20_Old

In [None]:
cobra_20_Old['rpt_date']= pd.to_datetime(cobra_20_Old['rpt_date'])
cobra_20_Old['occur_date']= pd.to_datetime(cobra_20_Old['occur_date'])
cobra_20_Old['rpt_date'] = cobra_20_Old['rpt_date'].dt.strftime("%m/%d/%Y")
cobra_20_Old['occur_date'] = cobra_20_Old['occur_date'].dt.strftime("%m/%d/%Y")
cobra_20_Old.head()

In [None]:
cobra_20_Old.insert(3,'occur_year',cobra_20_Old['occur_date'].str[-4:])
cobra_20_Old_delete = cobra_20_Old.loc[cobra_20_Old['occur_year'].str[:2] != '20']
cobra_20_Old_delete_list = cobra_20_Old_delete['offense_id'].to_list()
cobra_20_Old = cobra_20_Old[~cobra_20_Old['offense_id'].isin(cobra_20_Old_delete_list)]
cobra_20_Old

In [None]:
cobra_20 = cobra_20.drop(columns = ['apt_office_prefix','apt_office_num','MinOfucr','dispo_code','Shift','loc_type', 'poss_date'])
cobra_20_reorder = ['offense_id','rpt_date','occur_date','occur_time','poss_time','beat','location','ibr_code','UC2_Literal','neighborhood','npu','lat','long']
cobra_20_reordered = cobra_20.reindex(columns = cobra_20_reorder)
cobra_20 = cobra_20_reordered.copy()

In [None]:
cobra_20.insert(3,'occur_year',cobra_20['occur_date'].str[-4:])
cobra_20_delete = cobra_20.loc[cobra_20['occur_year'].str[:2] != '20']
cobra_20_delete_list = cobra_20_delete['offense_id'].to_list()
cobra_20 = cobra_20[~cobra_20['offense_id'].isin(cobra_20_delete_list)]
cobra_20.head()

In [None]:
cobra_20['rpt_date']= pd.to_datetime(cobra_20['rpt_date'])
cobra_20['occur_date']= pd.to_datetime(cobra_20['occur_date'])
cobra_20['rpt_date'] = cobra_20['rpt_date'].dt.strftime("%m/%d/%Y")
cobra_20['occur_date'] = cobra_20['occur_date'].dt.strftime("%m/%d/%Y")
cobra_20.head()

In [None]:
cobra_21 = cobra_21.drop(columns = ['occur_day','occur_day_num','occur_day_num','zone', 'poss_date'])
cobra_21.head()

In [None]:
cobra_21.insert(3,'occur_year',cobra_21['occur_date'].str[-4:])
cobra_21_delete = cobra_21.loc[cobra_21['occur_year'].str[:2] != '20']
cobra_21_delete_list = cobra_21_delete['offense_id'].to_list()
cobra_21 = cobra_21[~cobra_21['offense_id'].isin(cobra_21_delete_list)]
cobra_21

In [None]:
cobra_21['rpt_date']= pd.to_datetime(cobra_21['rpt_date'])
cobra_21['occur_date']= pd.to_datetime(cobra_21['occur_date'])
cobra_21['rpt_date'] = cobra_21['rpt_date'].dt.strftime("%m/%d/%Y")
cobra_21['occur_date'] = cobra_21['occur_date'].dt.strftime("%m/%d/%Y")
cobra_21.head()

In [None]:
concat_list = [cobra_09, cobra_20,cobra_20_Old, cobra_21]  # List of your dataframes
crime_df_complete = pd.concat(concat_list)
crime_df_complete.head()

In [None]:
crime_df_complete.insert(4,'occur_month',crime_df_complete['occur_date'].str[:2])
crime_df_complete.insert(5,'occur_day',crime_df_complete['occur_date'].str[3:5])
crime_df_complete

In [None]:
crime_df_complete[['occur_year','occur_month', 'occur_day']] = crime_df_complete[['occur_year','occur_month', 'occur_day']].apply(pd.to_numeric)

In [None]:
delete_dates = crime_df_complete.loc[((crime_df_complete['occur_year'] == 2021) & (crime_df_complete['occur_month'] > 9 )) | (crime_df_complete['occur_year'] < 2009)]
delete_dates_list = delete_dates['offense_id'].tolist()

In [None]:
crime_df_complete = crime_df_complete[~crime_df_complete['offense_id'].isin(delete_dates_list)]
crime_df_complete

In [None]:
#Export dataframe to csv file
crime_df_complete.to_csv("Crime Data.csv")

In [None]:
crime_year_month = crime_df_complete.groupby(['occur_year', 'occur_month'], as_index = False).agg({'offense_id': ['count']})
crime_year_month.columns = ['occur_year','occur_month','no._offenses']
crime_year_month

In [None]:
weather_data

In [None]:
weather_data['dt_iso'] = weather_data['dt_iso'].str[:10]
weather_data.head()

In [None]:
weather_data['dt_iso'] = weather_data['dt_iso'].str[:10]
weather_data['dt_iso']= pd.to_datetime(weather_data['dt_iso'])
weather_data['dt_iso'] = weather_data['dt_iso'].dt.strftime("%m/%d/%Y")
weather_data.head()

In [None]:
weather_data.insert(2,'occur_year',weather_data['dt_iso'].str[-4:])
weather_data.insert(3,'occur_month',weather_data['dt_iso'].str[:2])
weather_data.insert(4,'occur_day',weather_data['dt_iso'].str[3:5])

In [None]:
weather_data['rain_1h'] = weather_data['rain_1h'].fillna(0)
weather_data['rain_3h'] = weather_data['rain_3h'].fillna(0)
weather_data['snow_1h'] = weather_data['snow_1h'].fillna(0)
weather_data['snow_3h'] = weather_data['snow_3h'].fillna(0)
weather_data.head()

In [None]:
atlanta_weather_data = weather_data[['occur_year','occur_month','temp','temp_min','temp_max','humidity','rain_1h','rain_3h','snow_1h','snow_3h']].copy()
atlanta_weather_data

In [None]:
agg_weather_data = atlanta_weather_data.groupby(['occur_year', 'occur_month'], as_index = False).agg({'temp': ['mean'],'temp_min': ['mean'],'temp_max': ['mean'],'humidity': ['mean'],'rain_1h': ['mean'],'rain_3h': ['mean'],'snow_1h': ['mean'],'snow_3h': ['mean']})
agg_weather_data.columns = ['occur_year','occur_month','temp_mean','min_temp_mean','max_temp_mean','humidity_mean','rain_1h_mean','rain_3h_mean', 'snow_1h_mean', 'snow_3h_mean']
agg_weather_data

In [None]:
agg_weather_data[['occur_year', 'occur_month']] = agg_weather_data[['occur_year','occur_month']].apply(pd.to_numeric)

In [None]:
crime_with_weather = pd.merge(crime_year_month, agg_weather_data, how="left", on=['occur_year','occur_month'])
crime_with_weather

In [None]:
crime_scatter = crime_with_weather.iloc[:,2]
temp_scatter = crime_with_weather.iloc[:,3]
max_temp_scatter = crime_with_weather.iloc[:,5]
min_temp_scatter = crime_with_weather.iloc[:,4]
rain_scatter = crime_with_weather.iloc[:,7]
snow_scatter = crime_with_weather.iloc[:,9]
humidity_scatter = crime_with_weather.iloc[:,6]

In [None]:
(slope, intercept, rvalue, pvalue, stderr) = linregress(temp_scatter,crime_scatter)
regress_values = temp_scatter * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(temp_scatter,crime_scatter)
plt.plot(temp_scatter,regress_values,"r-")
plt.xlabel('Temperature')
plt.ylabel('Reported Crime')
plt.title('Crime vs Temperature')
#plt.savefig('output_data/City Latitude vs Max Temperature.png')
plt.show()
print(f'The r-value is {rvalue}')

In [None]:
(slope, intercept, rvalue, pvalue, stderr) = linregress(max_temp_scatter,crime_scatter)
regress_values = max_temp_scatter * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(max_temp_scatter,crime_scatter)
plt.plot(max_temp_scatter,regress_values,"r-")
plt.xlabel('Max Temperature')
plt.ylabel('Reported Crime')
plt.title('Crime vs Max Temperature')
#plt.savefig('output_data/City Latitude vs Max Temperature.png')
plt.show()
print(f'The r-value is {rvalue}')

In [None]:
(slope, intercept, rvalue, pvalue, stderr) = linregress(min_temp_scatter,crime_scatter)
regress_values = min_temp_scatter * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(min_temp_scatter,crime_scatter)
plt.plot(min_temp_scatter,regress_values,"r-")
plt.xlabel('Min Temperature')
plt.ylabel('Reported Crime')
plt.title('Crime vs Min Temperature')
#plt.savefig('output_data/City Latitude vs Max Temperature.png')
plt.show()
print(f'The r-value is {rvalue}')

In [None]:
(slope, intercept, rvalue, pvalue, stderr) = linregress(rain_scatter,crime_scatter)
regress_values = rain_scatter * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(rain_scatter,crime_scatter)
plt.plot(rain_scatter,regress_values,"r-")
plt.xlabel('Rainfall')
plt.ylabel('Reported Crime')
plt.title('Crime vs Rain')
#plt.savefig('output_data/City Latitude vs Max Temperature.png')
plt.show()
print(f'The r-value is {rvalue}')

In [None]:
(slope, intercept, rvalue, pvalue, stderr) = linregress(snow_scatter,crime_scatter)
regress_values = snow_scatter * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(snow_scatter,crime_scatter)
plt.plot(snow_scatter,regress_values,"r-")
plt.xlabel('Snow')
plt.ylabel('Reported Crime')
plt.title('Crime vs Snow')
#plt.savefig('output_data/City Latitude vs Max Temperature.png')
plt.show()
print(f'The r-value is {rvalue}')

In [None]:
(slope, intercept, rvalue, pvalue, stderr) = linregress(humidity_scatter,crime_scatter)
regress_values = humidity_scatter * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(humidity_scatter,crime_scatter)
plt.plot(humidity_scatter,regress_values,"r-")
plt.xlabel('Humidity')
plt.ylabel('Reported Crime')
plt.title('Crime vs Humidity')
#plt.savefig('output_data/City Latitude vs Max Temperature.png')
plt.show()
print(f'The r-value is {rvalue}')

In [None]:
county_cases.columns = ['County Name','Cases','County ID','State FIPS Code','County FIPS Code','Population','Hospitalization','Deaths', 'Case Rate', 'Death Rate', '14 Day Case Rate','14 Day Cases','Antigen Cases','Probable Deaths']
county_cases.head()