In [1]:
# Importing dependencies
import pandas as pd
import numpy as np

# Reading the CSV

In [2]:
# Reading CSV file
df = pd.read_csv('../Data/sac_crime.csv')

df.head()

Unnamed: 0,Beat,Description,Grid,Occurence_Date,Offense_Category,Offense_Code,Offense_Ext,Police_District,Record_ID,Day_of_the_Week,Month,Day_of_the_Month,Year,Hour,Minute,Encoded_Beat
0,3M,602(L)(1)TRESPASS REFUSE TO LV,734,"Mon 01-01-2018, 02:30:00 AM",TRESPASS,5707,6,3,1287465,1,1,1,2018,2,15,8
1,5B,459 PC BURG RESIDENCE-FORCE,1604,"Mon 01-01-2018, 05:00:00 PM",BURGLARY,2202,0,5,1287470,1,1,1,2018,16,45,13
2,1C,459 PC BURG BUSINESS-FORCE,309,"Mon 01-01-2018, 06:00:00 AM",BURGLARY,2203,0,1,1287505,1,1,1,2018,5,54,2
3,2A,459 PC BURG RESIDENCE-FORCE,222,"Mon 01-01-2018, 03:30:00 AM",BURGLARY,2202,0,2,1287511,1,1,1,2018,3,17,3
4,6C,459 PC BURG RESIDENCE-FORCE,1445,"Mon 01-01-2018, 08:00:00 AM",BURGLARY,2202,0,6,1287564,1,1,1,2018,8,0,17


In [3]:
# Setting features
selected_features = df[['Offense_Category','Encoded_Beat','Police_District',
                        'Day_of_the_Week','Month','Day_of_the_Month','Hour','Minute','Year']]
selected_features.head()

Unnamed: 0,Offense_Category,Encoded_Beat,Police_District,Day_of_the_Week,Month,Day_of_the_Month,Hour,Minute,Year
0,TRESPASS,8,3,1,1,1,2,15,2018
1,BURGLARY,13,5,1,1,1,16,45,2018
2,BURGLARY,2,1,1,1,1,5,54,2018
3,BURGLARY,3,2,1,1,1,3,17,2018
4,BURGLARY,17,6,1,1,1,8,0,2018


# Data Preprocessing

In [4]:
# Assigning X and y values
X = selected_features.drop('Offense_Category', axis=1)
y = selected_features['Offense_Category']

print(X.shape, y.shape)

(7061, 8) (7061,)


In [5]:
# Importing dependencies
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder, MinMaxScaler, StandardScaler

# Splitting data into training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [6]:
# Creating a MinMaxScaler model and fitting it to the training data
X_scaler = MinMaxScaler().fit(X_train)

# Transform the training and testing data using the X_scaler
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Creating a LabelEncoder model and fitting it to y training data
label_encoder = LabelEncoder()
label_encoder.fit(y)

# Encoding y data
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

# One-hot encoding y
one_hot_y_train = to_categorical(encoded_y_train)
one_hot_y_test = to_categorical(encoded_y_test)
one_hot_y_train.shape

(5295, 10)

# Building a Deep Neural Network Model

In [7]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Creating an empty sequential model
deep_model = Sequential()

In [8]:
# Adding layers
deep_model.add(Dense(units=100, activation="relu", input_dim=8))
deep_model.add(Dense(units=100, activation='relu'))
deep_model.add(Dense(units=10, activation='softmax'))

In [9]:
deep_model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 100)               900       
_________________________________________________________________
dense_1 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_2 (Dense)              (None, 10)                1010      
Total params: 12,010
Trainable params: 12,010
Non-trainable params: 0
_________________________________________________________________


In [10]:
# Compiling the model
deep_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'], run_eagerly=True)

In [11]:
# Using the training data to fit and train the deep model
deep_model.fit(X_train_scaled, one_hot_y_train, epochs=100, shuffle=True, verbose=2)

Train on 5295 samples
Epoch 1/100
5295/5295 - 11s - loss: 1.3863 - accuracy: 0.4312
Epoch 2/100
5295/5295 - 13s - loss: 1.2421 - accuracy: 0.4640
Epoch 3/100
5295/5295 - 14s - loss: 1.2279 - accuracy: 0.4693
Epoch 4/100
5295/5295 - 13s - loss: 1.2222 - accuracy: 0.4720
Epoch 5/100
5295/5295 - 12s - loss: 1.2166 - accuracy: 0.4839
Epoch 6/100
5295/5295 - 13s - loss: 1.2113 - accuracy: 0.4897
Epoch 7/100
5295/5295 - 12s - loss: 1.2096 - accuracy: 0.4854
Epoch 8/100
5295/5295 - 14s - loss: 1.2068 - accuracy: 0.4878
Epoch 9/100
5295/5295 - 13s - loss: 1.2035 - accuracy: 0.4982
Epoch 10/100
5295/5295 - 15s - loss: 1.2015 - accuracy: 0.4876
Epoch 11/100
5295/5295 - 13s - loss: 1.2018 - accuracy: 0.4937
Epoch 12/100
5295/5295 - 13s - loss: 1.1959 - accuracy: 0.4933
Epoch 13/100
5295/5295 - 12s - loss: 1.1942 - accuracy: 0.4988
Epoch 14/100
5295/5295 - 12s - loss: 1.1912 - accuracy: 0.5039
Epoch 15/100
5295/5295 - 13s - loss: 1.1875 - accuracy: 0.5020
Epoch 16/100
5295/5295 - 13s - loss: 1.186

<tensorflow.python.keras.callbacks.History at 0x7fc03cdacfd0>

# Saving Trained Model
Will be saving the trained model using the HDF5 binary format with the extension .h5

In [12]:
# Save model
deep_model.save("crime_prediction_model.h5")

# Loading the Model

In [13]:
# Load the model
from tensorflow.keras.models import load_model
crime_prediction_model = load_model("crime_prediction_model.h5")

# Evaluating the Deep Model

In [14]:
deep_model_loss, deep_model_accuracy = crime_prediction_model.evaluate(X_test_scaled, one_hot_y_test, verbose=5)
print(f'Loss: {deep_model_loss}, Accuracy: {deep_model_accuracy}')

Loss: 1.33238209994976, Accuracy: 0.4654586613178253


In [18]:
# Testing with additional data point
new_crime_data = [[8,3,1,1,1,2,15,2018]]
predicted_crime = crime_prediction_model.predict(new_crime_data)
print(predicted_crime)

[[0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]]


In [19]:
predicted_crime_label = label_encoder.inverse_transform([np.argmax(predicted_crime, axis=None, out=None)])[0]
predicted_crime_label

'LARCENY'