In [7]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, f1_score
from sklearn.model_selection import train_test_split

# Load the dataset
data = pd.read_csv("crimelstm.csv")

# Select the relevant columns
cols = ['Neighbourhood', 'offence_type', 'reportedmonth', 'reporteddayofweek', 'occurrencehour']
data = data[cols]

# Encode categorical variables
cat_cols = ['Neighbourhood', 'offence_type', 'reportedmonth', 'reporteddayofweek']
for col in cat_cols:
    encoder = LabelEncoder()
    data[col] = encoder.fit_transform(data[col])

# Normalize numerical variables
num_cols = ['occurrencehour']
scaler = MinMaxScaler()
data[num_cols] = scaler.fit_transform(data[num_cols])

# Encode the target variable
encoder = LabelEncoder()
data['offence_type'] = encoder.fit_transform(data['offence_type'])

# Split the data into input (X) and output (y) variables
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Reshape the input data for the LSTM
X_train = np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1]))
X_test = np.reshape(X_test, (X_test.shape[0], 1, X_test.shape[1]))

# Define the LSTM model
model = Sequential()
model.add(LSTM(64, input_shape=(1, X_train.shape[2])))
model.add(Dense(1))

# Compile the model
model.compile(loss='mse', optimizer='adam')

# Fit the model to the training data
model.fit(X_train, y_train, epochs=5, batch_size=32, verbose=2)

# Make predictions on the testing data
y_pred = model.predict(X_test)

# Transform the y_test data using the same encoder used for training
y_test = encoder.transform(y_test)

# Evaluate the model using mean absolute error, mean squared error, and F1-score
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
f1 = f1_score(np.round(y_test), np.round(y_pred), average='weighted')

# Print the evaluation metrics
print("MAE:", mae)
print("MSE:", mse)
print("RMSE:", rmse)
print("F1-score:", f1)

# Save the model
model.save("crime_lstm_model.h5")

Epoch 1/5
5161/5161 - 9s - loss: 0.0992 - 9s/epoch - 2ms/step
Epoch 2/5
5161/5161 - 9s - loss: 0.0981 - 9s/epoch - 2ms/step
Epoch 3/5
5161/5161 - 8s - loss: 0.0979 - 8s/epoch - 2ms/step
Epoch 4/5
5161/5161 - 9s - loss: 0.0978 - 9s/epoch - 2ms/step
Epoch 5/5
5161/5161 - 9s - loss: 0.0976 - 9s/epoch - 2ms/step
MAE: 0.545297017212851
MSE: 0.29938760410589244
RMSE: 0.547163233510707
F1-score: 0.14105916357307757


In [9]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import load_model
from sklearn.preprocessing import LabelEncoder, MinMaxScaler

# Load the saved model
model = load_model("crime_lstm_model.h5")

# Load the input data
neighbourhood = "Neighbourhood1"
offence_type = "OffenceType1"
reportedmonth = 5 # May
reporteddayofweek = 2 # Wednesday
occurrencehour = 15 # 3 PM

# Create a dataframe with the input data
input_data = pd.DataFrame({
    "Neighbourhood": "University (79)",
    "offence_type": "Assault",
    "reportedmonth": "December",
    "reporteddayofweek": "Friday",
    "occurrencehour": 3
})

# Encode categorical variables
cat_cols = ['Neighbourhood', 'offence_type', 'reportedmonth', 'reporteddayofweek']
for col in cat_cols:
    encoder = LabelEncoder()
    input_data[col] = encoder.fit_transform(input_data[col])

# Normalize numerical variables
num_cols = ['occurrencehour']
scaler = MinMaxScaler()
input_data[num_cols] = scaler.fit_transform(input_data[num_cols])

# Reshape the input data for the LSTM
input_data = np.reshape(input_data.values, (input_data.shape[0], 1, input_data.shape[1]))

# Make a prediction on the input data
predicted_prob = model.predict(input_data)

# Print the predicted probability
print("The probability of a crime occurring in {} at {} on a {} in {} with offence type {} is {:.2f}%".format(
    neighbourhood, occurrencehour, reporteddayofweek, reportedmonth, offence_type, predicted_prob[0][0]*100))


ValueError: If using all scalar values, you must pass an index

In [23]:
import numpy as np
from sklearn.preprocessing import LabelEncoder

# Load the dataset
data = pd.read_csv("crimelstm.csv")

# Select the relevant columns
cols = ['Neighbourhood', 'offence_type', 'reportedmonth', 'reporteddayofweek', 'occurrencehour']
data = data[cols]

# Encode categorical variables
cat_cols = ['Neighbourhood', 'offence_type', 'reportedmonth', 'reporteddayofweek']
for col in cat_cols:
    encoder = LabelEncoder()
    data[col] = encoder.fit_transform(data[col])

# Save the encoded classes
np.save('Neighbourhood_classes.npy', encoder.classes_)


In [24]:
np.save('Neighbourhood_classes.npy', encoder.classes_)
np.save('offence_type_classes.npy', encoder.classes_)
np.save('month_classes.npy', encoder.classes_)
np.save('day_classes.npy', encoder.classes_)




In [25]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder, MinMaxScaler

# Load the dataset
data = pd.read_csv("crimelstm.csv")

# Select the relevant columns
cols = ['Neighbourhood', 'offence_type', 'reportedmonth', 'reporteddayofweek', 'occurrencehour']
data = data[cols]

# Encode categorical variables
cat_cols = ['Neighbourhood', 'offence_type', 'reportedmonth', 'reporteddayofweek']
for col in cat_cols:
    encoder = LabelEncoder()
    data[col] = encoder.fit_transform(data[col])

# Normalize numerical variables
num_cols = ['occurrencehour']
scaler = MinMaxScaler()
data[num_cols] = scaler.fit_transform(data[num_cols])

# Save the maximum values of the scaler object
np.save('scaler_max.npy', scaler.data_max_)



In [26]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder, MinMaxScaler

# Load the dataset
data = pd.read_csv("crimelstm.csv")

# Select the relevant columns
cols = ['Neighbourhood', 'offence_type', 'reportedmonth', 'reporteddayofweek', 'occurrencehour']
data = data[cols]

# Encode categorical variables
cat_cols = ['Neighbourhood', 'offence_type', 'reportedmonth', 'reporteddayofweek']
for col in cat_cols:
    encoder = LabelEncoder()
    data[col] = encoder.fit_transform(data[col])

# Normalize numerical variables
num_cols = ['occurrencehour']
scaler = MinMaxScaler()
data[num_cols] = scaler.fit_transform(data[num_cols])

# Save the minimum values of the scaler object
np.save('scaler_min.npy', scaler.data_min_)


In [28]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import load_model
from sklearn.preprocessing import LabelEncoder, MinMaxScaler

# Load the model
model = load_model("crime_lstm_model.h5")

# Load the label encoders and scaler used for training
Neighbourhood_encoder = LabelEncoder()
offence_type_encoder = LabelEncoder()
month_encoder = LabelEncoder()
day_encoder = LabelEncoder()
scaler = MinMaxScaler()
Neighbourhood_encoder.classes_ = np.load('Neighbourhood_classes.npy', allow_pickle=True)
offence_type_encoder.classes_ = np.load('offence_type_classes.npy', allow_pickle=True)
month_encoder.classes_ = np.load('month_classes.npy', allow_pickle=True)
day_encoder.classes_ = np.load('day_classes.npy', allow_pickle=True)
scaler.data_max_ = np.load('scaler_max.npy', allow_pickle=True)
scaler.data_min_ = np.load('scaler_min.npy', allow_pickle=True)

# Define the input variables
Neighbourhood = "Broadview North (57)"
offence_type = "Assault"
reportedmonth = "January"  # January
reporteddayofweek = "Wednesday"  # Wednesday
occurrencehour = 12

# Encode the categorical variables
Neighbourhood_encoded = Neighbourhood_encoder.transform([Neighbourhood])
offence_type_encoded = offence_type_encoder.transform([offence_type])
month_encoded = month_encoder.transform([reportedmonth])
day_encoded = day_encoder.transform([reporteddayofweek])

# Normalize the numerical variable
hour_normalized = scaler.transform([[occurrencehour]])[0][0]

# Create a DataFrame with the input variables
data = pd.DataFrame({
    'Neighbourhood': Neighbourhood_encoded,
    'offence_type': offence_type_encoded,
    'reportedmonth': month_encoded,
    'reporteddayofweek': day_encoded,
    'occurrencehour': hour_normalized
})

# Reshape the input data for the LSTM
X = np.reshape(data.values, (1, 1, data.shape[1]))

# Make a prediction using the loaded model
y_pred = model.predict(X)[0][0]

# Print the predicted probability of the crime occurring
print("The predicted probability of a crime occurring in", Neighbourhood, "for an offence type of", offence_type, "on a", day_encoder.classes_[reporteddayofweek], "in", month_encoder.classes_[reportedmonth], "at", occurrencehour, "hours is:", y_pred)


ValueError: y contains previously unseen labels: 'Broadview North (57)'