In [1]:
import pandas as pd
import numpy as np

# Path to your dataset
file_path = '/Users/arkamandol/DataspellProjects/Desertation_arka_23023023/data_files/uk_crime_lat_long.csv'
data = pd.read_csv(file_path)


In [2]:
data.head()


Unnamed: 0,Month,Longitude,Latitude
0,2020-01,-1.882746,53.933807
1,2020-01,-1.819597,53.923411
2,2020-01,-1.819121,53.920696
3,2020-01,-1.822354,53.926776
4,2020-01,-1.820732,53.925147


In [3]:
# Convert 'Month' to datetime format for better date handling
data['Month'] = pd.to_datetime(data['Month'])

# Sort the DataFrame by the 'Month' column
data.sort_values('Month', inplace=True)
data.head()

Unnamed: 0,Month,Longitude,Latitude
2532779,2018-10-01,-1.799748,52.480917
2636199,2018-10-01,-0.883768,52.241438
2636200,2018-10-01,-0.886208,52.239304
2636201,2018-10-01,-0.912985,52.239679
2636202,2018-10-01,-0.910119,52.242476


In [4]:
# Group data by Month, Latitude, and Longitude and count occurrences
monthly_data = data.groupby(['Month', 'Latitude', 'Longitude']).size().reset_index(name='Count')
monthly_data.head()

Unnamed: 0,Month,Latitude,Longitude,Count
0,2018-10-01,49.958602,-6.282843,1
1,2018-10-01,49.965936,-5.206265,1
2,2018-10-01,49.968855,-5.203851,1
3,2018-10-01,50.01747,-5.213606,5
4,2018-10-01,50.023755,-5.247637,1


In [5]:
from sklearn.preprocessing import MinMaxScaler

# Initialize the MinMaxScaler
scaler = MinMaxScaler(feature_range=(0, 1))

# Fit and transform the 'Count' data to scale it
monthly_data['Normalized_Count'] = scaler.fit_transform(monthly_data[['Count']])


In [6]:
# Function to create input sequences for LSTM
def create_dataset(dataset, look_back=1):
    dataX, dataY = [], []
    for i in range(len(dataset) - look_back):
        a = dataset[i:(i + look_back)]
        dataX.append(a)
        dataY.append(dataset[i + look_back])
    return np.array(dataX), np.array(dataY)

# Define the number of past months data to consider for predicting the next month
look_back = 3
X, y = create_dataset(monthly_data['Normalized_Count'].values, look_back)

# Reshape input to be [samples, time steps, features] for LSTM
X = np.reshape(X, (X.shape[0], look_back, 1))


In [7]:
from keras.models import Sequential
from keras.layers import LSTM, Dense

# Define the LSTM model
model = Sequential()
model.add(LSTM(50, input_shape=(look_back, 1)))  # 50 LSTM units
model.add(Dense(1))  # Output layer that predicts the future value
model.compile(loss='mean_squared_error', optimizer='adam')


In [8]:
# Fit the model on the dataset
model.fit(X, y, epochs=10, batch_size=512, verbose=1)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x2ba8ba230>

In [9]:
# Predict future values
predictions = model.predict(X)




In [10]:
# Inverse transform to get predictions in the original count scale
predictions = scaler.inverse_transform(predictions)


In [11]:
# Add predictions to the monthly_data DataFrame
monthly_data['Predicted_Count'] = np.concatenate([np.zeros(look_back), predictions.flatten()])


In [12]:
import folium
from folium.plugins import HeatMap

# Create a map centered around an average location
center_lat, center_lon = monthly_data['Latitude'].mean(), monthly_data['Longitude'].mean()
map = folium.Map(location=[center_lat, center_lon], zoom_start=6)

# Add a heatmap to the map using predicted crime counts
heat_data = [[row['Latitude'], row['Longitude'], row['Predicted_Count']] for index, row in monthly_data.iterrows()]
HeatMap(heat_data).add_to(map)

# Save or display the map
map.save('/Users/arkamandol/DataspellProjects/Desertation_arka_23023023/data_files/crime_hotspots.html')


In [13]:
from sklearn.metrics import mean_squared_error
from math import sqrt

# Calculate RMSE
rmse = sqrt(mean_squared_error(y, predictions))
print('Root Mean Squared Error:', rmse)


Root Mean Squared Error: 1.5721617129086545
