In [30]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense
import folium
from folium.plugins import HeatMap
from sklearn.metrics import mean_squared_error
from math import sqrt

# Load the dataset
file_path = '/Users/arkamandol/DataspellProjects/Desertation_arka_23023023/data_files/uk_crime_lat_long.csv'
data = pd.read_csv(file_path)

# Convert 'Month' to datetime format for better date handling
data['Month'] = pd.to_datetime(data['Month'])

# Sort the DataFrame by the 'Month' column
data.sort_values('Month', inplace=True)

# Group data by Month, Latitude, and Longitude and count occurrences
monthly_data = data.groupby(['Month', 'Latitude', 'Longitude']).size().reset_index(name='Count')

# Initialize the MinMaxScaler
scaler = MinMaxScaler(feature_range=(0, 1))

# Fit and transform the 'Count' data to scale it
monthly_data['Normalized_Count'] = scaler.fit_transform(monthly_data[['Count']])

# Function to create input sequences for LSTM
def create_dataset(dataset, look_back=1):
    dataX, dataY = [], []
    for i in range(len(dataset) - look_back):
        a = dataset[i:(i + look_back)]
        dataX.append(a)
        dataY.append(dataset[i + look_back])
    return np.array(dataX), np.array(dataY)

# Define the number of past months data to consider for predicting the next month
look_back = 3
X, y = create_dataset(monthly_data['Normalized_Count'].values, look_back)

# Reshape input to be [samples, time steps, features] for LSTM
X = np.reshape(X, (X.shape[0], look_back, 1))

# Define the LSTM model
model = Sequential()
model.add(LSTM(50, input_shape=(look_back, 1)))  # 50 LSTM units
model.add(Dense(1))  # Output layer that predicts the future value
model.compile(loss='mean_squared_error', optimizer='adam')




Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


ValueError: Found input variables with inconsistent numbers of samples: [3469922, 6]

In [34]:
# Fit the model on the dataset
model.fit(X, y, epochs=40, batch_size=128, verbose=1)

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


<keras.src.callbacks.History at 0x288d21cc0>

In [35]:

# Predict future values for the next 6 months
future_predictions = []
current_input = X[-1]  # Start with the last available input

for _ in range(6):
    # Predict the next value
    next_prediction = model.predict(current_input.reshape(1, look_back, 1))[0, 0]

    # Store the prediction
    future_predictions.append(next_prediction)

    # Update the input for the next prediction (slide the window)
    current_input = np.append(current_input[1:], next_prediction).reshape(look_back, 1)

# Inverse transform to get predictions in the original count scale
future_predictions = scaler.inverse_transform(np.array(future_predictions).reshape(-1, 1)).flatten()

# Loop through each month and create a heatmap
for i in range(6):
    # Add the predictions to the DataFrame
    monthly_data[f'Predicted_Count_Month_{i+1}'] = np.concatenate([np.zeros(len(monthly_data) - 1), [future_predictions[i]]])

    # Create a map centered around an average location
    center_lat, center_lon = monthly_data['Latitude'].mean(), monthly_data['Longitude'].mean()
    map = folium.Map(location=[center_lat, center_lon], zoom_start=6)

    # Add a heatmap to the map using predicted crime counts
    heat_data = [[row['Latitude'], row['Longitude'], row[f'Predicted_Count_Month_{i+1}']] for index, row in monthly_data.iterrows()]
    HeatMap(heat_data).add_to(map)

    # Save the map
    map.save(f'/Users/arkamandol/DataspellProjects/Desertation_arka_23023023/data_files/crime_hotspots_month_{i+1}.html')

# Sum predictions across all six months
monthly_data['Predicted_Count_Accumulated'] = sum([monthly_data[f'Predicted_Count_Month_{i+1}'] for i in range(6)])

# Create a map centered around an average location
map_accumulated = folium.Map(location=[center_lat, center_lon], zoom_start=6)

# Add a heatmap to the map using accumulated predicted crime counts
heat_data_accumulated = [[row['Latitude'], row['Longitude'], row['Predicted_Count_Accumulated']] for index, row in monthly_data.iterrows()]
HeatMap(heat_data_accumulated).add_to(map_accumulated)

# Save the accumulated map
map_accumulated.save('/Users/arkamandol/DataspellProjects/Desertation_arka_23023023/data_files/crime_hotspots_accumulated.html')

# Calculate RMSE for evaluation
rmse = sqrt(mean_squared_error(y, future_predictions[:len(y)]))
print('Root Mean Squared Error:', rmse)




ValueError: Found input variables with inconsistent numbers of samples: [3469922, 6]