# Imports

In [1]:
import numpy as np
import pandas as pd
import math

import os

import matplotlib.pyplot as plt

import sklearn

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.metrics import accuracy_score

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input, Dropout, BatchNormalization, Conv1D, GlobalMaxPooling1D, Flatten

# Getting File Path

In [2]:
path = os.getcwd()
files = os.listdir(path)

newPath = path+'\\mergedCleanedCRDataset.csv'
print(newPath)

C:\Users\nbadr\OneDrive - City St George's, University of London\Intro to AI\Project_IntroToAI\mergedCleanedCRDataset.csv


In [3]:
crDf = pd.read_csv(newPath,sep='\t')
crDf.head()

Unnamed: 0,Year,Month Num,Reported by,Falls within,Longitude,Latitude,Location,LSOA code,LSOA name,Crime type
0,2022,10,Avon and Somerset Constabulary,Avon and Somerset Constabulary,-2.492876,51.422716,On or near Trajectus Way,E01014399,Bath and North East Somerset 001A,Anti-social behaviour
1,2022,10,Avon and Somerset Constabulary,Avon and Somerset Constabulary,-2.491146,51.425008,On or near Maximus Gardens,E01014399,Bath and North East Somerset 001A,Other theft
2,2022,10,Avon and Somerset Constabulary,Avon and Somerset Constabulary,-2.515659,51.408897,On or near Caroline Close,E01014399,Bath and North East Somerset 001A,Violence and sexual offences
3,2022,10,Avon and Somerset Constabulary,Avon and Somerset Constabulary,-2.491616,51.424619,On or near Julius Place,E01014399,Bath and North East Somerset 001A,Violence and sexual offences
4,2022,10,Avon and Somerset Constabulary,Avon and Somerset Constabulary,-2.492876,51.422716,On or near Trajectus Way,E01014399,Bath and North East Somerset 001A,Violence and sexual offences


# Label Encoding

Converting categorical columns into numerical ones

In [4]:
crEncDf = crDf.copy()

for i in crEncDf.columns:
    if not (i == 'Longitude' or i=='Latitude'):
        labEnc = LabelEncoder()
        crEncDf[i] = labEnc.fit_transform(crEncDf[i])

crEncDf.head()

Unnamed: 0,Year,Month Num,Reported by,Falls within,Longitude,Latitude,Location,LSOA code,LSOA name,Crime type
0,0,9,0,0,-2.492876,51.422716,246246,13087,1481,0
1,0,9,0,0,-2.491146,51.425008,160787,13087,1481,6
2,0,9,0,0,-2.515659,51.408897,45587,13087,1481,13
3,0,9,0,0,-2.491616,51.424619,133151,13087,1481,13
4,0,9,0,0,-2.492876,51.422716,246246,13087,1481,13


# Location Data Normalisation

Here we are normalising the Longitude and Latitude between a 0 to 1 scale, we may lose some data, but it ensures that it is easier for the models to process those columns

In [5]:
crScalDf = crEncDf.copy()

for i in crScalDf.columns:
    if (i == 'Longitude' or i=='Latitude'):
        minMaxScal = MinMaxScaler()
        crScalDf[i] = minMaxScal.fit_transform(crScalDf[i].to_numpy().reshape(-1,1)).flatten()

crScalDf.head()
        

Unnamed: 0,Year,Month Num,Reported by,Falls within,Longitude,Latitude,Location,LSOA code,LSOA name,Crime type
0,0,9,0,0,0.475959,0.259476,246246,13087,1481,0
1,0,9,0,0,0.476172,0.259865,160787,13087,1481,6
2,0,9,0,0,0.473152,0.257133,45587,13087,1481,13
3,0,9,0,0,0.476114,0.259799,133151,13087,1481,13
4,0,9,0,0,0.475959,0.259476,246246,13087,1481,13


# Train Test Split

Train Size is 70%
Test Size is 30% 
Random State is 16

In [38]:
x = crScalDf.drop(columns='Crime type')
y = crScalDf['Crime type']

xTrain, xTest, yTrain, yTest = train_test_split(x,y,test_size=0.3,shuffle=True,random_state=16)

# Applying Convolutional Neural Network

Simple 1D CNN network with single convolution layer with single hidden layer, with batch normalisation for them and global max pooling between the two.

In [44]:
cnnBase1 = Sequential([
    Input(shape=(xTrain.shape[1],1,)),

    Conv1D(filters=32, kernel_size=3, activation='relu'),
    BatchNormalization(),

    GlobalMaxPooling1D(),

    Dense(64, activation='relu'),
    BatchNormalization(),

    Dense(1,activation='softmax')
])

cnnBase1.compile(loss='binary_crossentropy',metrics=['accuracy'])

cnnBase1.summary()



Model: "sequential_12"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d_16 (Conv1D)          (None, 7, 32)             128       
                                                                 
 batch_normalization_32 (Ba  (None, 7, 32)             128       
 tchNormalization)                                               
                                                                 
 global_max_pooling1d_16 (G  (None, 32)                0         
 lobalMaxPooling1D)                                              
                                                                 
 dense_29 (Dense)            (None, 64)                2112      
                                                                 
 batch_normalization_33 (Ba  (None, 64)                256       
 tchNormalization)                                               
                                                     

In [45]:
hist1 = cnnBase1.fit(xTrain,yTrain, epochs=25, batch_size=10000)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


# Attempt to get accuracy score by a different method

But of course, failing miserably and too lazy to remove it

In [46]:
accListX = []
accListY = []

yPred1 = cnnBase1.predict(xTest)
acc1 = accuracy_score(np.argmax(yTest,axis=1),np.argmax(yPred2,axis=1))
#accListX.append(i)
accListY.append(acc2)


#plt.plot(accListY)
print(np.max(accListY))



ValueError: `axis` must be fewer than the number of dimensions (1)