<a href="https://colab.research.google.com/github/harshakoneru98/city_watch/blob/main/modelling/Crime_Rate_Forecasting_LSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [52]:
import pandas as pd
import json
import tqdm
import seaborn as sns
import numpy as np
import datetime
import tqdm
from sklearn.preprocessing import MinMaxScaler
from keras.layers import LSTM, Dense
from keras.models import Sequential
from keras.callbacks import EarlyStopping
ROOT_PATH = "/content/drive/MyDrive/DSCI 560/Datasets/Cleaned_data/"

In [17]:
crimes = pd.read_csv(f"{ROOT_PATH}/crime_date_aligned.csv")
crime_by_date_zipcode = crimes.groupby(["date_occ", "zip_code"]).agg({ "dr_no": "count"}).reset_index()

In [18]:
# Convert the 'date' column to datetime format
crime_by_date_zipcode['date'] = pd.to_datetime(crime_by_date_zipcode['date_occ'])

# Create a new column for the day of the week
crime_by_date_zipcode['day_of_week'] = crime_by_date_zipcode['date'].dt.dayofweek

# Create a new column for the week of the year
crime_by_date_zipcode['week_of_year'] = crime_by_date_zipcode['date'].dt.weekofyear

# Create a new column for the day of the month
crime_by_date_zipcode['day_of_month'] = crime_by_date_zipcode['date'].dt.day

# Create a new column for the month of the year
crime_by_date_zipcode['month_of_year'] = crime_by_date_zipcode['date'].dt.month

# Create a new column for the year
crime_by_date_zipcode['year'] = crime_by_date_zipcode['date'].dt.year

crime_by_date_zipcode['previous_7_days'] = crime_by_date_zipcode.set_index('date').groupby('zip_code')['dr_no'].rolling(window="7D").count().values

  crime_by_date_zipcode['week_of_year'] = crime_by_date_zipcode['date'].dt.weekofyear


In [21]:
zip_crime_df = crime_by_date_zipcode.groupby('zip_code')['previous_7_days'].mean().reset_index()
zip_crime_map = {k:v for k,v in zip(zip_crime_df['zip_code'], zip_crime_df['previous_7_days'])}
crime_by_date_zipcode['zipcode_avg'] = crime_by_date_zipcode.zip_code.map(lambda x: zip_crime_map[x])

In [67]:
OUTPUT_COLUMN = 'previous_7_days'
INPUT_COLUMNS = ['day_of_week', 'week_of_year', 'day_of_month', 'month_of_year', 'year', 'zipcode_avg']
WINDOW_LENGTH = 30
train_data = crime_by_date_zipcode[crime_by_date_zipcode['date'] <= datetime.datetime(2022, 9, 30)][INPUT_COLUMNS + [OUTPUT_COLUMN]]
test_data = crime_by_date_zipcode[crime_by_date_zipcode['date'] > datetime.datetime(2022, 9, 30)][INPUT_COLUMNS + [OUTPUT_COLUMN]]
scaler = MinMaxScaler(feature_range=(0, 1))
scaler.fit(train_data)

In [69]:
def create_dataset(dataset, time_steps=1):
    X, Y = [], []
    dataset = dataset.to_numpy()
    for i in tqdm.tqdm(range(len(dataset) - time_steps - 1)):
        a = dataset[i:(i + time_steps), :]
        X.append(a[:, :-1])
        Y.append(a[-1, -1])
    return np.array(X), np.array(Y)

In [70]:
train_X, train_Y = create_dataset(train_data, WINDOW_LENGTH)

100%|██████████| 378290/378290 [00:00<00:00, 855067.50it/s]


In [71]:
test_X, test_Y = create_dataset(test_data, WINDOW_LENGTH)

100%|██████████| 9669/9669 [00:00<00:00, 394547.28it/s]


In [72]:
train_X.shape, train_Y.shape, test_X.shape, test_Y.shape

((378290, 30, 6), (378290,), (9669, 30, 6), (9669,))

In [74]:
# defining model
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(train_X.shape[1], train_X.shape[2])))
model.add(LSTM(units=50, return_sequences=True))
model.add(Dense(units=1))

In [None]:
early_stopping_monitor = EarlyStopping(patience = 2)
model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(train_X, train_Y, validation_split = 0.1, epochs=100, batch_size=64, callbacks=[early_stopping_monitor])

Epoch 1/100
Epoch 2/100

In [55]:
predictions = model.predict(test_X)



In [59]:
predictions.squeeze()

array([[0.8791423 , 0.87962705, 0.8795396 , ..., 0.8795929 , 0.87959296,
        0.87959296],
       [0.87914294, 0.87962705, 0.87953943, ..., 0.87959296, 0.87959296,
        0.87959296],
       [0.87914234, 0.87962675, 0.87953967, ..., 0.87959296, 0.87959296,
        0.87959313],
       ...,
       [0.87902087, 0.87958443, 0.8795184 , ..., 0.87958086, 0.87958086,
        0.879581  ],
       [0.8790256 , 0.87958574, 0.87951815, ..., 0.87958086, 0.879581  ,
        0.8795809 ],
       [0.87902683, 0.87958527, 0.8795183 , ..., 0.879581  , 0.87958086,
        0.87958086]], dtype=float32)