In [1]:
# Importing dependencies
import pandas as pd
import numpy as np

# Reading the CSV

In [2]:
# Reading CSV file
df = pd.read_csv('../Data/sac_crime.csv')

df.head()

Unnamed: 0,Beat,Description,Grid,Occurence_Date,Offense_Category,Offense_Code,Offense_Ext,Police_District,Record_ID,Day_of_the_Week,Month,Year,Hour,Encoded_Beat
0,3M,602(L)(1)TRESPASS REFUSE TO LV,734,"Mon 01-01-2018, 02:15:00 AM",TRESPASS,5707,6,3,1287465,1,1,2018,2,8
1,5B,459 PC BURG RESIDENCE-FORCE,1604,"Mon 01-01-2018, 04:45:00 PM",BURGLARY,2202,0,5,1287470,1,1,2018,16,13
2,1C,459 PC BURG BUSINESS-FORCE,309,"Mon 01-01-2018, 05:54:00 AM",BURGLARY,2203,0,1,1287505,1,1,2018,5,2
3,2A,459 PC BURG RESIDENCE-FORCE,222,"Mon 01-01-2018, 03:17:00 AM",BURGLARY,2202,0,2,1287511,1,1,2018,3,3
4,6C,459 PC BURG RESIDENCE-FORCE,1445,"Mon 01-01-2018, 08:00:00 AM",BURGLARY,2202,0,6,1287564,1,1,2018,8,17


In [3]:
# Setting features
selected_features = df[['Offense_Category','Encoded_Beat','Police_District',
                        'Day_of_the_Week','Month','Hour','Year']]
selected_features.head()

Unnamed: 0,Offense_Category,Encoded_Beat,Police_District,Day_of_the_Week,Month,Hour,Year
0,TRESPASS,8,3,1,1,2,2018
1,BURGLARY,13,5,1,1,16,2018
2,BURGLARY,2,1,1,1,5,2018
3,BURGLARY,3,2,1,1,3,2018
4,BURGLARY,17,6,1,1,8,2018


# Data Preprocessing

In [4]:
# Assigning X and y values
X = selected_features.drop('Offense_Category', axis=1)
y = selected_features['Offense_Category']

print(X.shape, y.shape)

(7027, 6) (7027,)


In [5]:
# Importing dependencies
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder, MinMaxScaler, StandardScaler

# Splitting data into training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [6]:
# Creating a MinMaxScaler model and fitting it to the training data
X_scaler = MinMaxScaler().fit(X_train)

# Transform the training and testing data using the X_scaler
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Creating a LabelEncoder model and fitting it to y training data
label_encoder = LabelEncoder()
label_encoder.fit(y)

# Encoding y data
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

# One-hot encoding y
one_hot_y_train = to_categorical(encoded_y_train)
one_hot_y_test = to_categorical(encoded_y_test)
one_hot_y_train.shape

(5270, 10)

# Building a Deep Neural Network Model

In [7]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Creating an empty sequential model
deep_model = Sequential()

In [8]:
# Adding layers
deep_model.add(Dense(units=100, activation="relu", input_dim=6))
deep_model.add(Dense(units=100, activation='relu'))
deep_model.add(Dense(units=10, activation='softmax'))

In [9]:
deep_model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 100)               700       
_________________________________________________________________
dense_1 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_2 (Dense)              (None, 10)                1010      
Total params: 11,810
Trainable params: 11,810
Non-trainable params: 0
_________________________________________________________________


In [10]:
# Compiling the model
deep_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'], run_eagerly=True)

In [11]:
# Using the training data to fit and train the deep model
deep_model.fit(X_train_scaled, one_hot_y_train, epochs=100, shuffle=True, verbose=2)

Train on 5270 samples
Epoch 1/100
5270/5270 - 13s - loss: 1.4022 - accuracy: 0.4393
Epoch 2/100
5270/5270 - 12s - loss: 1.2429 - accuracy: 0.4603
Epoch 3/100
5270/5270 - 11s - loss: 1.2319 - accuracy: 0.4717
Epoch 4/100
5270/5270 - 11s - loss: 1.2288 - accuracy: 0.4793
Epoch 5/100
5270/5270 - 11s - loss: 1.2255 - accuracy: 0.4808
Epoch 6/100
5270/5270 - 11s - loss: 1.2214 - accuracy: 0.4805
Epoch 7/100
5270/5270 - 11s - loss: 1.2178 - accuracy: 0.4801
Epoch 8/100
5270/5270 - 11s - loss: 1.2171 - accuracy: 0.4899
Epoch 9/100
5270/5270 - 12s - loss: 1.2158 - accuracy: 0.4945
Epoch 10/100
5270/5270 - 11s - loss: 1.2116 - accuracy: 0.4954
Epoch 11/100
5270/5270 - 11s - loss: 1.2092 - accuracy: 0.4937
Epoch 12/100
5270/5270 - 11s - loss: 1.2080 - accuracy: 0.4928
Epoch 13/100
5270/5270 - 11s - loss: 1.2047 - accuracy: 0.4985
Epoch 14/100
5270/5270 - 11s - loss: 1.2047 - accuracy: 0.5004
Epoch 15/100
5270/5270 - 11s - loss: 1.2016 - accuracy: 0.5004
Epoch 16/100
5270/5270 - 11s - loss: 1.201

<tensorflow.python.keras.callbacks.History at 0x7f8ff8f41490>

# Saving Trained Model
Will be saving the trained model using the HDF5 binary format with the extension .h5

In [12]:
# Save model
deep_model.save("crime_prediction_model.h5")

# Loading the Model

In [13]:
# Load the model
from tensorflow.keras.models import load_model
crime_prediction_model = load_model("crime_prediction_model.h5")

# Evaluating the Deep Model

In [14]:
deep_model_loss, deep_model_accuracy = crime_prediction_model.evaluate(X_test_scaled, one_hot_y_test, verbose=5)
print(f'Loss: {deep_model_loss}, Accuracy: {deep_model_accuracy}')

Loss: 1.3098622470941472, Accuracy: 0.4735344350337982


In [15]:
# Testing with additional data point
new_crime_data = [[13,5,1,1,16,2018]]
predicted_crime = crime_prediction_model.predict(new_crime_data)
print(predicted_crime)

[[0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]]


In [16]:
predicted_crime_label = label_encoder.inverse_transform([np.argmax(predicted_crime, axis=None, out=None)])[0]
predicted_crime_label

'LARCENY'