In [1]:
# Load Packages
from sklearn.ensemble import RandomForestRegressor
import matplotlib.pyplot as plt
import pandas as pd
import pickle
import plotly.express as px
import numpy as np

In [3]:
# Load the data
dataI = pd.read_csv("final_table.csv")

In [4]:
# Clean dataS
dataI = dataI.drop(columns=['week', 'Arrest', 'Domestic', 'DayofMonth'])
dataI['hour'] = dataI['hour'].astype('category')
dataI['is_holiday'] = dataI['is_holiday'].apply(lambda x: 1 if x > 0 else 0).astype("category")
dataI['is_full_moon'] = dataI['is_full_moon'].apply(lambda x: 1 if x > 0 else 0).astype("category")
dataI['is_day'] = dataI['is_day'].apply(lambda x: 1 if x > 0 else 0).astype("category")

In [5]:
# Indetify features and response variables
X = dataI.iloc[:,:23]
Y = dataI.iloc[:,23:]

In [6]:
# Make categorical features into dummy variables
categorical_features = X.select_dtypes(include=['object', 'category'])
def preprocess_and_create_dummies(data):
    processed_data = data.copy()
    processed_data = pd.get_dummies(processed_data, columns=categorical_features.columns)
    return processed_data
X= X.pipe(preprocess_and_create_dummies)

In [None]:
# Fit models

num = 1
for col in Y.columns:
    # Extract the response variable
    y = Y[col]
    
    # Train a random forest model
    model = RandomForestRegressor()
    model.fit(X, y)
    
    # Name the model according to the response variable
    model_name = col + "_model"
    
    # Store the trained model in the list
    with open(model_name, 'wb') as file:
        pickle.dump(model, file)

    print(f"Done training model {model_name} - {num}/{len(Y.columns)}")
    num += 1

In [37]:
models_path = ['Primary Type_BURGLARY_model.pkl',
       'Primary Type_CRIM SEXUAL ASSAULT_model.pkl',
       'Primary Type_ASSAULT_model.pkl', 'Primary Type_BATTERY_model.pkl',
       'Primary Type_ROBBERY_model.pkl', 'Primary Type_INTIMIDATION_model.pkl',
       'Primary Type_HOMICIDE_model.pkl', 'Primary Type_KIDNAPPING_model.pkl',
       'Primary Type_HUMAN TRAFFICKING_model.pkl', 'Beat_1111_model.pkl',
       'Beat_1112_model.pkl', 'Beat_1113_model.pkl', 'Beat_1114_model.pkl',
       'Beat_1115_model.pkl', 'Beat_1121_model.pkl', 'Beat_1122_model.pkl',
       'Beat_1123_model.pkl', 'Beat_1124_model.pkl', 'Beat_1125_model.pkl',
       'Beat_1131_model.pkl', 'Beat_1132_model.pkl', 'Beat_1133_model.pkl',
       'Beat_1134_model.pkl', 'Beat_1135_model.pkl', 'violent_crimes_mod.pkl']
trained_models = []
for i in models_path:
    with open(i, 'rb') as file:
        model = pickle.load(file)
        name = i.replace('_model.pkl', '')
        trained_models.append((name, model))

In [23]:
calendar = pd.date_range(start='2024-01-01', end='2024-03-31', freq='H')
full_moon_dates = [
    "2024-01-25",
    "2024-02-24",
    "2024-03-25",
    "2024-04-23",
    "2024-05-23",
    "2024-06-21",
    "2024-07-21",
    "2024-08-19",
    "2024-09-17",
    "2024-10-17",
    "2024-11-15",
    "2024-12-15"
]

holiday_dates = [
    "2024-01-01",  # New Year's Day
    "2024-01-15",  # Martin Luther King Jr. Day
    "2024-02-19",  # Washington's Birthday (Presidents Day)
    "2024-05-27",  # Memorial Day
    "2024-07-04",  # Independence Day
    "2024-09-02",  # Labor Day
    "2024-10-14",  # Columbus Day
    "2024-11-11",  # Veterans Day
    "2024-11-28",  # Thanksgiving Day
    "2024-12-25"   # Christmas Day
]

full_calendar = pd.DataFrame({
    'Date': calendar,
    'Year': calendar.year,
    'month': calendar.strftime('%b'),
    'DayOfWeek': calendar.strftime('%a'),  # Monday=0, Sunday=6
    'hour': calendar.hour.astype('category'),
    'Week' : [date.isocalendar()[1] for date in calendar], 
    'is_holiday': [1 if str(date)[:10] in holiday_dates else 0 for date in calendar],  # Check if date is a holiday
    'is_full_moon': [1 if str(date)[:10] in full_moon_dates else 0 for date in calendar]  # Check if date is a full moon
})

In [24]:
sunrise = [7]*31 + [7]*4 + [6]*25 + [6]*9 + [7]*6 + [6]*16
sunset = [16]*27 + [17]*4 + [17]*29 + [17]*9 + [18]*7 + [19]*15
full_calendar['is_day'] = full_calendar['hour'].apply(lambda x: 1 if sunrise[x] <= x < sunset[x] else 0).astype('category')
full_calendar['is_full_moon'] = full_calendar['is_full_moon'].astype('category')
full_calendar['is_holiday'] = full_calendar['is_holiday'].astype('category')

In [25]:
week_concatenated = (full_calendar.groupby('Week')['Date'].agg(['first', 'last']).reset_index())
week_concatenated['last'] = week_concatenated['last'].dt.date # Remove the hours from the 'last' column
week_concatenated['weeks'] = week_concatenated['first'].astype(str) + ' - ' + week_concatenated['last'].astype(str)
full_calendar = full_calendar.merge(week_concatenated[['Week', 'weeks']], on='Week', how='left')

In [26]:
new_obs_t = full_calendar[full_calendar['weeks'] == "2024-01-01 - 2024-01-07"][['Year','month', 'DayOfWeek', 'hour', 'is_holiday', 'is_full_moon', 'is_day']]
new_obs = new_obs_t
new_obs['moonphase'] = 6
new_obs['temperature'] = -122
new_obs['humidity'] = 710
new_obs['feels_like'] = -188
new_obs['rain'] = 0
new_obs['snowfall'] = 0
new_obs['snow_depth'] = 0.9
new_obs['cloud_cover'] = 850
new_obs['wind_speed'] = 205
new_obs['wind_gusts'] = 335
new_obs['shortwave_radiation'] = 0
new_obs['direct_radiation'] = 0
new_obs['dew'] = -176
new_obs['sealevelpressure'] = 10316
new_obs['visibility'] = 156
new_obs['Unemployment'] = 112

In [12]:
categorical_features = new_obs.select_dtypes(include=['object', 'category'])
def preprocess_and_create_dummies(data):
    processed_data = data.copy()
    processed_data = pd.get_dummies(processed_data, columns=categorical_features.columns)
    return processed_data
new_obs= new_obs.pipe(preprocess_and_create_dummies)

# List of columns to add
new_columns = ['month_Apr', 'month_Aug', 'month_Dec', 'month_Feb', 'month_Jan', 'month_Jul',
               'month_Jun', 'month_Mar', 'month_May', 'month_Nov', 'month_Oct', 'month_Sep']

# Add columns if they don't exist already
for col in new_columns:
    if col not in new_obs.columns:
        new_obs[col] = 0

# Reorder columns
new_obs = new_obs[['Year', 'moonphase', 'temperature', 'humidity', 'feels_like', 'rain',
       'snowfall', 'snow_depth', 'cloud_cover', 'wind_speed', 'wind_gusts',
       'shortwave_radiation', 'direct_radiation', 'dew', 'sealevelpressure',
       'visibility', 'Unemployment', 'month_Apr',
       'month_Aug', 'month_Dec', 'month_Feb', 'month_Jan', 'month_Jul',
       'month_Jun', 'month_Mar', 'month_May', 'month_Nov', 'month_Oct',
       'month_Sep', 'DayOfWeek_Fri', 'DayOfWeek_Mon', 'DayOfWeek_Sat',
       'DayOfWeek_Sun', 'DayOfWeek_Thu', 'DayOfWeek_Tue', 'DayOfWeek_Wed',
       'hour_0', 'hour_1', 'hour_2', 'hour_3', 'hour_4', 'hour_5', 'hour_6',
       'hour_7', 'hour_8', 'hour_9', 'hour_10', 'hour_11', 'hour_12',
       'hour_13', 'hour_14', 'hour_15', 'hour_16', 'hour_17', 'hour_18',
       'hour_19', 'hour_20', 'hour_21', 'hour_22', 'hour_23', 'is_holiday_0',
       'is_holiday_1', 'is_full_moon_0', 'is_full_moon_1', 'is_day_0',
       'is_day_1']]

In [13]:
# Select the model from the list based on the crime type
selected_model = None
for model_name, model in vc_models:
    if 'Primary Type_' + 'ROBBERY' + '_model' in model_name:
        selected_model = model
        break

predictions = selected_model.predict(new_obs)

In [14]:
# Put predictions next to Day - hour combination
matrix = np.array(predictions).reshape(7, 24)

array([[1.57, 1.57, 1.63, 1.79, 1.54, 2.66, 2.74, 1.88, 1.72, 1.51, 1.67,
        1.56, 1.53, 2.14, 1.87, 1.59, 2.04, 1.49, 2.28, 1.64, 2.15, 2.12,
        1.63, 1.59],
       [1.28, 1.32, 1.24, 1.68, 1.24, 2.46, 2.54, 1.78, 1.51, 1.34, 1.44,
        1.47, 1.36, 1.77, 1.66, 1.42, 1.72, 1.19, 2.13, 1.35, 1.75, 1.72,
        1.33, 1.59],
       [1.19, 1.18, 1.1 , 1.54, 1.1 , 2.28, 2.37, 1.59, 1.32, 1.15, 1.25,
        1.28, 1.17, 1.57, 1.51, 1.23, 1.62, 1.12, 2.28, 1.21, 1.59, 1.58,
        1.19, 1.41],
       [1.4 , 1.5 , 1.39, 1.83, 1.39, 2.44, 2.38, 1.66, 1.52, 1.35, 1.45,
        1.48, 1.37, 1.78, 1.62, 1.43, 1.9 , 1.39, 2.09, 1.5 , 1.85, 1.9 ,
        1.44, 1.66],
       [1.32, 1.3 , 1.18, 1.62, 1.18, 2.4 , 2.46, 1.72, 1.45, 1.28, 1.38,
        1.41, 1.3 , 1.75, 1.64, 1.36, 1.7 , 1.14, 2.  , 1.29, 1.67, 1.73,
        1.27, 1.61],
       [1.36, 1.32, 1.24, 1.64, 1.24, 2.41, 2.51, 1.79, 1.52, 1.41, 1.45,
        1.44, 1.37, 1.87, 1.71, 1.43, 1.82, 1.2 , 2.26, 1.31, 1.74, 1.73,
       

In [15]:
fig = px.imshow(matrix,
                labels=dict(x="Hour", y="Day", color="Violent Crimes"),
                y=['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'],
                x=['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12',
                   '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23'])

# Update layout
fig.update_layout(title='Violent Crimes Heatmap')
fig.show()


ValueError: Mime type rendering requires nbformat>=4.2.0 but it is not installed