In [None]:
import pandas as pd
import numpy as np

# File loaded from Github Repo
file_path = 'final_crime.csv'

df = pd.read_csv(file_path)

# Removing invalid points
df = df[pd.isnull(df['NEIGHBOURHOOD']) != pd.isnull(pd.NaT)]


df = df[['YEAR','MONTH','DAY', 'HOUR', 'Neighbourhood', 'Latitude', 'Longitude']] 
print("Shape of Dataset imported for use: " + str(df.shape) + "\n")

print("Sample row")
df.head(1)

In [None]:
print ("------ Adding date-time object as key ------\n")
date_time_col = pd.to_datetime(df[['YEAR', 'MONTH', 'DAY', 'HOUR']])

# Make the added Date the index of the dataset
df['DateTimeMix'] = date_time_col
df.set_index('DateTimeMix', inplace=True)

print("Sample row")
df.head(1)

In [None]:
df['Crime'] = 1
print("Sample row")
df.head(1)

In [None]:
nb_file_path = 'https://raw.githubusercontent.com/NasirKhalid24/ELE494-Project/master/Datasets/cov_localareas.csv'

neighbourhoods = pd.read_csv(nb_file_path)
print("List of Neighbourhoods : ")
neighbourhoods

In [None]:
for i in neighbourhoods['NAME']:
    subset = df[df['Neighbourhood'] == i]
    crime_extra = subset.groupby(level=0).count().resample('1H').asfreq()
    crime_extra = crime_extra[pd.isna((crime_extra['Crime']))]
  
    if(crime_extra.shape[0] != 0 ):
        crime_extra['Crime'] = 0
        crime_extra['YEAR'] = crime_extra.index.year
        crime_extra['MONTH'] = crime_extra.index.month
        crime_extra['DAY'] = crime_extra.index.day
        crime_extra['HOUR'] = crime_extra.index.hour

        crime_extra['Neighbourhood'] = i


        df = df.append(crime_extra)

In [None]:
Y = df['Crime'].values
# X = df[['YEAR','MONTH','DAY', 'HOUR', 'Latitude', 'Longitude', 'Graffiti', 'Drinking_Fountain']].values

neigh = df['Neighbourhood']
onehot_neigh = pd.get_dummies(neigh)
X = np.concatenate((df[['YEAR','MONTH','DAY', 'HOUR']].values, onehot_neigh.values), axis = 1)

In [None]:
X[0]

In [None]:
Y[0]

In [None]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()

scaler.fit(X)
X = scaler.transform(X)
print(X[0])

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=101)

print("Training Data Size: " + str(X_train.shape))
print("Testing Data Size:" + str(X_test.shape))
print("Training Label Size: " + str(Y_train.shape))
print("Testing Label Size: " + str(Y_test.shape))

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Function to plot loss and accuracy vs epochs
def loss_curve(history):
  train_loss = history.history['loss']
  val_loss = history.history['val_loss']
  x_axis     = range(1, len(history.history['loss'])+1)
  
  plt.figure()
  plt.plot(x_axis, train_loss, label="Training Loss")
  plt.plot(x_axis, val_loss, label="Validation Loss")
  plt.ylabel('Loss Value')
  plt.xlabel('Epochs')
  plt.title('Epochs vs Loss')
  plt.legend()
  
def accuracy(history):
  acc  = history.history['acc']
  val_acc = history.history['val_acc']
  x_axis     = range(1, len(history.history['acc'])+1)
  
  plt.figure()
  plt.plot(x_axis, np.dot(acc,100), label="Accuracy")
  plt.plot(x_axis, np.dot(val_acc, 100), label="Validation Accuracy")
  plt.ylabel('Accuracy')
  plt.xlabel('Epochs')
  plt.title('Epochs vs Accuracy')
  plt.legend()

In [None]:
from keras.layers import Dense, Dropout
from keras.models import Sequential

model = Sequential()

model.add(Dense(64, activation='relu', input_shape=(X_train.shape[1], ) ) )
model.add(Dense(128, activation='relu') )
model.add(Dense(512, activation='relu') )
model.add(Dense(128, activation='relu') )
model.add(Dense(64, activation='relu') )


model.add(Dense(1, activation='sigmoid') )

model.compile(optimizer='rmsprop',
                loss='binary_crossentropy',
                metrics=['accuracy'])

model.summary()

In [None]:
history = model.fit(X_train, Y_train,
          epochs = 15,
          batch_size=512,
          validation_split=0.2,
          verbose = 1)

In [None]:
loss_curve(history)
accuracy(history)

In [None]:
labels = list(onehot_neigh.columns.values)

test_loss, test_acc = model.evaluate(X_test, Y_test)
y_pred = model.predict(X_test)


print("Test Loss: " + str(test_loss))
print("Test Accuracy: " + str(test_acc) + "\n")

In [None]:
test = np.array([[2019, 2, 11, 3,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]])

test = scaler.transform(test)
ans = model.predict(test)
ans

In [None]:
test = np.array([[2019, 2, 11, 12,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0]])
test = scaler.transform(test)
ans = model.predict(test)
ans

In [None]:
for i in range(0, 24):
#   test = np.array([[2012, 1, 1, i, 49.284645, -123.136306, DistanceToGraffiti(49.284645, -123.136306), DistanceToFountain(49.284645, -123.136306)]])
  test = np.array([[2005, 12,31, i,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]])

  test = scaler.transform(test)
  ans = model.predict(test)
  print("Likelihood of crime at " + str(i) + " hour: " + str(ans[0][0]*100) + " %") 

In [None]:
labels