In [44]:
# Import Needed Modules
import pandas as pd

# Import the files
weather_df = pd.read_csv("Resources/Canton_Ohio_Weather.csv")
cfs_df = pd.read_excel("Resources/cfs_data_Canton.xlsx")
reports_df = pd.read_excel("Resources/reported_crime.xlsx")

In [45]:
# Clean and keep: Weather Data
# Columns to keep: dt_iso, temp, feels_like, temp_min, temp_max, humidity, weather_description, rain_1h, snow_1h
# Columns to make: temp_avg (To replace temp_min/temp_max for that hour?)
weather_df = weather_df[['dt_iso','temp','feels_like','temp_min','temp_max',
                         'humidity','weather_description','rain_1h','snow_1h']]

# Create the temp average for the hour
weather_df['temp_avg'] = (weather_df['temp_min'] + weather_df['temp_max'])/2

# Convert dt_iso to datetime and localize
weather_df['dt_iso'] = pd.to_datetime(weather_df['dt_iso'], format='%Y-%m-%d %H:%M:%S %z UTC')
weather_df['dt_iso'] = weather_df['dt_iso'].dt.tz_convert("America/New_York")

# Create column to link on merge
weather_df['relation'] = weather_df['dt_iso'].dt.strftime("%Y-%m-%d %H:%M:%S")

weather_df.head()

Unnamed: 0,dt_iso,temp,feels_like,temp_min,temp_max,humidity,weather_description,rain_1h,snow_1h,temp_avg,relation
0,2014-12-31 19:00:00-05:00,21.63,9.03,21.6,22.44,42,sky is clear,,,22.02,2014-12-31 19:00:00
1,2014-12-31 20:00:00-05:00,21.51,8.91,20.98,22.44,46,sky is clear,,,21.71,2014-12-31 20:00:00
2,2014-12-31 21:00:00-05:00,21.51,9.19,20.98,22.44,46,sky is clear,,,21.71,2014-12-31 21:00:00
3,2014-12-31 22:00:00-05:00,20.57,7.97,20.52,22.44,48,sky is clear,,,21.48,2014-12-31 22:00:00
4,2014-12-31 23:00:00-05:00,20.57,8.02,20.52,21.36,48,sky is clear,,,20.94,2014-12-31 23:00:00


In [46]:
# Clean and keep: Calls for Service Data
# Columns to keep: CallType, CreateDatetime
# Create relation column for weather if needed
cfs_df = cfs_df[['CallType','CreateDatetime']]
cfs_df['relation'] = cfs_df['CreateDatetime'].dt.strftime("%Y-%m-%d %H:00:00")

cfs_df.head()

Unnamed: 0,CallType,CreateDatetime,relation
0,Disturbance,2023-12-22 21:09:34.750,2023-12-22 21:00:00
1,911 Hangup,2023-12-22 21:04:17.120,2023-12-22 21:00:00
2,911 Hangup,2023-12-22 20:44:22.477,2023-12-22 20:00:00
3,911 Hangup,2023-12-22 20:41:46.953,2023-12-22 20:00:00
4,Theft,2023-12-22 20:27:56.443,2023-12-22 20:00:00


In [49]:
# Clean and keep: Reported Crime
# Columns to keep: dordate, tor, offense, QOL_category, crimetype, crimeagainst, day_of_week
reports_df = reports_df[['dordate','tor', 'ti1', 'offense', 'QOL_category', 'crimetype', 'crimeagainst', 'day_of_week']]

# Rename columns
reports_df = reports_df.rename(columns={'dordate':'Report Date','tor':'Report Time','ti1':'Incident Time'})

# Keep only rows that have a full time of report time
reports_df = reports_df.loc[reports_df['Report Time'].str.len() >=8]

# Combine Date and time for relation column for weather if needed
reports_df['relation'] = reports_df['Report Date'] + pd.to_timedelta(reports_df['Report Time'])
reports_df['relation'] = reports_df['relation'].dt.strftime("%Y-%m-%d %H:00:00")

In [50]:
reports_df.head()

Unnamed: 0,Report Date,Report Time,Incident Time,offense,QOL_category,crimetype,crimeagainst,day_of_week,relation
0,2024-01-02,17:04:10,16:55:00,MENACING,Menacing,Violent,Person,Tuesday,2024-01-02 17:00:00
1,2024-01-02,09:40:00,17:00:00,BURGLARY,Burglary,Property,Property,Tuesday,2024-01-02 09:00:00
2,2024-01-02,14:28:13,14:15:00,REPORT REQUIRED ON LOST/STOLEN FIREARM,Weapons Violations,Society,Society,Tuesday,2024-01-02 14:00:00
3,2024-01-02,02:55:24,02:30:00,CRIMINAL DAMAGING / ENDANGERING,Damaging,Property,Property,Tuesday,2024-01-02 02:00:00
4,2024-01-02,07:36:33,07:26:00,VANDALISM,Damaging,Property,Property,Tuesday,2024-01-02 07:00:00
