In [1]:
# Import Needed Modules
import pandas as pd

# Import the files
weather_df = pd.read_csv("Resources/Canton_Ohio_Weather.csv")
cfs_df = pd.read_excel("Resources/cfs_data_Canton.xlsx")
reports_df = pd.read_excel("Resources/reported_crime.xlsx")

In [2]:
# Clean and keep: Weather Data
# Columns to keep: dt_iso, temp, feels_like, temp_min, temp_max, humidity, weather_description, rain_1h, snow_1h
# Columns to make: temp_avg (To replace temp_min/temp_max for that hour?)
weather_df = weather_df[['dt_iso','temp','feels_like','temp_min','temp_max',
                         'humidity','weather_description','rain_1h','snow_1h']]

# Create the temp average for the hour
weather_df['temp_avg'] = (weather_df['temp_min'] + weather_df['temp_max'])/2

# Convert dt_iso to datetime and localize
weather_df['dt_iso'] = pd.to_datetime(weather_df['dt_iso'], format='%Y-%m-%d %H:%M:%S %z UTC')
weather_df['dt_iso'] = weather_df['dt_iso'].dt.tz_convert("America/New_York")

# Create column to link on merge
weather_df['relation'] = weather_df['dt_iso'].dt.strftime("%Y-%m-%d %H:%M:%S")

# Bin the temperature to the nearest multiple of 5
# Example: Rounding 41 to 40 and 44 to 45
weather_df['Temperature Bin'] = weather_df['temp'].apply(lambda x: round(x / 5) * 5)

weather_df.head()

Unnamed: 0,dt_iso,temp,feels_like,temp_min,temp_max,humidity,weather_description,rain_1h,snow_1h,temp_avg,relation,Temperature Bin
0,2014-12-31 19:00:00-05:00,21.63,9.03,21.6,22.44,42,sky is clear,,,22.02,2014-12-31 19:00:00,20
1,2014-12-31 20:00:00-05:00,21.51,8.91,20.98,22.44,46,sky is clear,,,21.71,2014-12-31 20:00:00,20
2,2014-12-31 21:00:00-05:00,21.51,9.19,20.98,22.44,46,sky is clear,,,21.71,2014-12-31 21:00:00,20
3,2014-12-31 22:00:00-05:00,20.57,7.97,20.52,22.44,48,sky is clear,,,21.48,2014-12-31 22:00:00,20
4,2014-12-31 23:00:00-05:00,20.57,8.02,20.52,21.36,48,sky is clear,,,20.94,2014-12-31 23:00:00,20


In [3]:
# Clean and keep: Calls for Service Data
# Columns to keep: CallType, CreateDatetime
# Create relation column for weather if needed
cfs_df = cfs_df[['CallType','CreateDatetime']]
cfs_df['relation'] = cfs_df['CreateDatetime'].dt.strftime("%Y-%m-%d %H:00:00")

cfs_df.head()

Unnamed: 0,CallType,CreateDatetime,relation
0,Disturbance,2023-12-22 21:09:34.750,2023-12-22 21:00:00
1,911 Hangup,2023-12-22 21:04:17.120,2023-12-22 21:00:00
2,911 Hangup,2023-12-22 20:44:22.477,2023-12-22 20:00:00
3,911 Hangup,2023-12-22 20:41:46.953,2023-12-22 20:00:00
4,Theft,2023-12-22 20:27:56.443,2023-12-22 20:00:00


In [4]:
# Clean and keep: Reported Crime
# Columns to keep: dordate, tor, offense, QOL_category, crimetype, crimeagainst, day_of_week
reports_df = reports_df[['dordate','tor', 'ti1', 'offense', 'QOL_category', 'crimetype', 'crimeagainst', 'day_of_week']]

# Rename columns
reports_df = reports_df.rename(columns={'dordate':'Report Date','tor':'Report Time','ti1':'Incident Time'})

# Keep only rows that have a full time of report time
reports_df = reports_df.loc[reports_df['Report Time'].str.len() >=8]

# Combine Date and time for relation column for weather if needed
reports_df['relation'] = reports_df['Report Date'] + pd.to_timedelta(reports_df['Report Time'])
reports_df['relation'] = reports_df['relation'].dt.strftime("%Y-%m-%d %H:00:00")

In [14]:
# Merge Weather with CFS
accidents_df = pd.merge(cfs_df, weather_df, on="relation", how="outer")
accidents_df.head(3)

Unnamed: 0,CallType,CreateDatetime,relation,dt_iso,temp,feels_like,temp_min,temp_max,humidity,weather_description,rain_1h,snow_1h,temp_avg,Temperature Bin
0,Disturbance,2023-12-22 21:09:34.750,2023-12-22 21:00:00,NaT,,,,,,,,,,
1,911 Hangup,2023-12-22 21:04:17.120,2023-12-22 21:00:00,NaT,,,,,,,,,,
2,911 Hangup,2023-12-22 20:44:22.477,2023-12-22 20:00:00,NaT,,,,,,,,,,


In [15]:
accidents_df = accidents_df.dropna(subset=["temp","CallType"])
accidents_df = accidents_df.loc[accidents_df['CallType'].str.contains("Accident")]
accidents_df.head(10)

Unnamed: 0,CallType,CreateDatetime,relation,dt_iso,temp,feels_like,temp_min,temp_max,humidity,weather_description,rain_1h,snow_1h,temp_avg,Temperature Bin
481,Accident - W/Injuries,2023-12-19 16:39:18.760,2023-12-19 16:00:00,2023-12-19 16:00:00-05:00,28.81,20.93,27.16,31.32,68.0,broken clouds,,,29.24,30.0
487,Accident,2023-12-19 15:44:33.460,2023-12-19 15:00:00,2023-12-19 15:00:00-05:00,29.25,19.56,28.09,30.94,68.0,overcast clouds,,,29.515,30.0
492,Accident,2023-12-19 15:14:15.607,2023-12-19 15:00:00,2023-12-19 15:00:00-05:00,29.25,19.56,28.09,30.94,68.0,overcast clouds,,,29.515,30.0
573,Accident,2023-12-19 03:16:18.307,2023-12-19 03:00:00,2023-12-19 03:00:00-05:00,28.56,17.67,27.09,30.31,75.0,heavy snow,,,28.7,30.0
574,Accident,2023-12-19 03:16:18.307,2023-12-19 03:00:00,2023-12-19 03:00:00-05:00,28.56,17.67,27.09,30.31,75.0,fog,,,28.7,30.0
629,Accident,2023-12-18 23:28:41.970,2023-12-18 23:00:00,2023-12-18 23:00:00-05:00,27.27,15.1,26.1,28.98,75.0,mist,,,27.54,25.0
630,Accident,2023-12-18 23:28:41.970,2023-12-18 23:00:00,2023-12-18 23:00:00-05:00,27.27,15.1,26.1,28.98,75.0,light snow,,,27.54,25.0
633,Accident,2023-12-18 23:15:34.093,2023-12-18 23:00:00,2023-12-18 23:00:00-05:00,27.27,15.1,26.1,28.98,75.0,mist,,,27.54,25.0
634,Accident,2023-12-18 23:15:34.093,2023-12-18 23:00:00,2023-12-18 23:00:00-05:00,27.27,15.1,26.1,28.98,75.0,light snow,,,27.54,25.0
635,Accident,2023-12-18 23:02:12.443,2023-12-18 23:00:00,2023-12-18 23:00:00-05:00,27.27,15.1,26.1,28.98,75.0,mist,,,27.54,25.0


In [19]:
accidents_df.groupby("Temperature Bin")['Temperature Bin'].count()

Temperature Bin
-5.0       19
 0.0       23
 5.0       36
 10.0      94
 15.0     209
 20.0     450
 25.0     783
 30.0    1299
 35.0    1507
 40.0    1368
 45.0    1263
 50.0    1246
 55.0    1357
 60.0    1319
 65.0    1498
 70.0    1689
 75.0    1487
 80.0    1150
 85.0     576
 90.0     170
 95.0       5
Name: Temperature Bin, dtype: int64