In [2]:
# Import the modules
import numpy as np
import pandas as pd
from pathlib import Path
import csv
from sklearn.metrics import balanced_accuracy_score, confusion_matrix, classification_report

In [3]:
# Read the CSV file from the Resources folder into a Pandas DataFrame

df= pd.read_csv(r'C:\Users\dkrenn\Desktop\GW-Bootcamp-Final-Project\Data Analysis\Resources\cleaned_data.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,ID,AreaName,VicAge,VicSex,VicRace,StatusCode,Lat,Lon,CrimeType,AgeGroup,AddressType,Arrest
0,0,190326475,Wilshire,999,M,Multiracial,AA,34.0375,-118.3506,Theft,Unknown,Public Space,Yes
1,1,200106753,Central,47,M,Multiracial,IC,34.0444,-118.2628,Theft,40-54,Public Space,No
2,2,200320258,Southwest,19,,,IC,34.021,-118.3002,Theft,Young Adult,Residential,No
3,3,200907217,Van Nuys,19,M,Multiracial,IC,34.1576,-118.4387,Theft,Young Adult,Buisness,No
4,4,220614831,Hollywood,28,M,Hispanic,IC,34.0944,-118.3277,Theft,25-39,Public Space,No


In [4]:
# Remove Unneeded Columns
df= df.drop(columns =["Unnamed: 0", "ID", "StatusCode", "Lat", "Lon", "AgeGroup", "Arrest"])
df.head()

Unnamed: 0,AreaName,VicAge,VicSex,VicRace,CrimeType,AddressType
0,Wilshire,999,M,Multiracial,Theft,Public Space
1,Central,47,M,Multiracial,Theft,Public Space
2,Southwest,19,,,Theft,Residential
3,Van Nuys,19,M,Multiracial,Theft,Buisness
4,Hollywood,28,M,Hispanic,Theft,Public Space


In [5]:
# Recode Crime Types
crime =['Theft', 'Assault', 'Sex Crime', 'Other', 'Weapons', 'Robbery',
       'Fraud', 'Property Destruct', 'Kidnapping', 'Driving Offense',
       'Homicide']
violent = ["No", "Yes", "Yes", "No", "Yes","Yes","No", "No", "Yes", "No", "Yes"]
df["Violent"]=df["CrimeType"].replace(crime, violent)
df.head()

Unnamed: 0,AreaName,VicAge,VicSex,VicRace,CrimeType,AddressType,Violent
0,Wilshire,999,M,Multiracial,Theft,Public Space,No
1,Central,47,M,Multiracial,Theft,Public Space,No
2,Southwest,19,,,Theft,Residential,No
3,Van Nuys,19,M,Multiracial,Theft,Buisness,No
4,Hollywood,28,M,Hispanic,Theft,Public Space,No


In [6]:
# Remove Missing Age Codes
df=df.loc[df["VicAge"]<999]

# Drop Crime Type
df= df.drop(columns =["CrimeType"])
df.head()

Unnamed: 0,AreaName,VicAge,VicSex,VicRace,AddressType,Violent
1,Central,47,M,Multiracial,Public Space,No
2,Southwest,19,,,Residential,No
3,Van Nuys,19,M,Multiracial,Buisness,No
4,Hollywood,28,M,Hispanic,Public Space,No
5,Southeast,41,M,Hispanic,Residential,No


In [7]:
# Convert categorical data to numeric with `pd.get_dummies`
X_encoded = pd.get_dummies(df, dtype=int)
X_encoded


Unnamed: 0,VicAge,AreaName_77th Street,AreaName_Central,AreaName_Devonshire,AreaName_Foothill,AreaName_Harbor,AreaName_Hollenbeck,AreaName_Hollywood,AreaName_Mission,AreaName_N Hollywood,...,VicRace_Hispanic,VicRace_Multiracial,VicRace_White,AddressType_Buisness,AddressType_Other,AddressType_Public Space,AddressType_Public Transit,AddressType_Residential,Violent_No,Violent_Yes
1,47,0,1,0,0,0,0,0,0,0,...,0,1,0,0,0,1,0,0,1,0
2,19,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,1,0
3,19,0,0,0,0,0,0,0,0,0,...,0,1,0,1,0,0,0,0,1,0
4,28,0,0,0,0,0,0,1,0,0,...,1,0,0,0,0,1,0,0,1,0
5,41,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
883982,29,1,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,1
883983,30,0,0,0,0,0,0,1,0,0,...,1,0,0,0,0,0,0,1,1,0
883984,24,0,1,0,0,0,0,0,0,0,...,1,0,0,0,1,0,0,0,1,0
883985,53,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,1,1,0


In [8]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import tensorflow as tf


# Split our preprocessed data into our features and target arrays
y = X_encoded["Violent_Yes"]
X = X_encoded.drop(columns="Violent_Yes")
X = X.drop(columns="Violent_No")
# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [9]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [12]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.

nn_model1 = tf.keras.models.Sequential()

# First hidden layer
nn_model1.add(tf.keras.layers.Dense(units=6, activation="relu", input_dim=36))

# Second hidden layer
nn_model1.add(tf.keras.layers.Dense(units=6, activation="relu"))

# Output layer
nn_model1.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn_model1.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_3 (Dense)             (None, 6)                 222       
                                                                 
 dense_4 (Dense)             (None, 6)                 42        
                                                                 
 dense_5 (Dense)             (None, 1)                 7         
                                                                 
Total params: 271 (1.06 KB)
Trainable params: 271 (1.06 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [13]:
# Compile the model
nn_model1.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [14]:
# Train the model
fit_model = nn_model1.fit(X_train_scaled, y_train, epochs=50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [15]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn_model1.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

5177/5177 - 5s - loss: 0.6090 - accuracy: 0.6601 - 5s/epoch - 946us/step
Loss: 0.6090089678764343, Accuracy: 0.6601342558860779


In [None]:
# Second Attempt- Added Sigmoid Layer; Added Neurons to Layers
nn_model2 = tf.keras.models.Sequential()

# First hidden layer
nn_model2.add(tf.keras.layers.Dense(units=50, activation="relu", input_dim=36))

# Second hidden layer
nn_model2.add(tf.keras.layers.Dense(units=50, activation="relu"))

# Third hidden layer
nn_model2.add(tf.keras.layers.Dense(units=50, activation="sigmoid"))


# Output layer
nn_model2.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn_model2.summary()

# Compile the model
nn_model2.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# Train the model
fit_model = nn_model2.fit(X_train_scaled, y_train, epochs=50)

Model: "sequential_15"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_57 (Dense)            (None, 50)                1850      
                                                                 
 dense_58 (Dense)            (None, 50)                2550      
                                                                 
 dense_59 (Dense)            (None, 50)                2550      
                                                                 
 dense_60 (Dense)            (None, 1)                 51        
                                                                 
Total params: 7001 (27.35 KB)
Trainable params: 7001 (27.35 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoc

In [None]:
# Second Evaluation
model_loss, model_accuracy = nn_model2.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

In [None]:
# USE RESAMPLED DATA
# Import the RandomOverSampler module form imbalanced-learn
from imblearn.over_sampling import RandomOverSampler

# # Assign a random_state parameter of 1 to the model
ros_model = RandomOverSampler(random_state=1)

# Fit the original training data to the random_oversampler model
x_resampled, y_resampled = ros_model.fit_resample(X_train_scaled, y_train)

# Split Data
X_rs_train, X_rs_test, y_rs_train, y_rs_test = train_test_split(X_resampled, y_resampled, random_state=42)


In [None]:
# Third Attempt- Used Resampled Data
nn_model3 = tf.keras.models.Sequential()

# First hidden layer
nn_model3.add(tf.keras.layers.Dense(units=50, activation="relu", input_dim=36))

# Second hidden layer
nn_model3.add(tf.keras.layers.Dense(units=50, activation="relu"))

# Third hidden layer
nn_model3.add(tf.keras.layers.Dense(units=50, activation="sigmoid"))


# Output layer
nn_model3.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn_model3.summary()

# Compile the model
nn_model3.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# Train the model
fit_model = nn_model3.fit(x_rs_train, y_rs_train, epochs=50)

In [None]:
# Third Evaluation
model_loss, model_accuracy = nn_model3.evaluate(X_rs_test,y_rs_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

In [30]:
# Fourth Attempt- Returned to Regular Data (not resampled); Added a relu layer and neurons
nn_model4 = tf.keras.models.Sequential()

# First hidden layer
nn_model4.add(tf.keras.layers.Dense(units=100, activation="relu", input_dim=36))

# Second hidden layer
nn_model4.add(tf.keras.layers.Dense(units=100, activation="relu"))

# Third hidden layer
nn_model4.add(tf.keras.layers.Dense(units=100, activation="relu"))

# Fourth hidden layer
nn_model4.add(tf.keras.layers.Dense(units=100, activation="sigmoid"))

# Output layer
nn_model4.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn_model4.summary()

# Compile the model
nn_model4.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# Train the model
fit_model = nn_model4.fit(X_train_scaled, y_train, epochs=50)

Model: "sequential_12"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_38 (Dense)            (None, 100)               3700      
                                                                 
 dense_39 (Dense)            (None, 100)               10100     
                                                                 
 dense_40 (Dense)            (None, 100)               10100     
                                                                 
 dense_41 (Dense)            (None, 100)               10100     
                                                                 
 dense_42 (Dense)            (None, 1)                 101       
                                                                 
Total params: 34101 (133.21 KB)
Trainable params: 34101 (133.21 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/50
Epoc

In [31]:
# Fourth Evaluation
model_loss, model_accuracy = nn_model4.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

5177/5177 - 5s - loss: 0.6155 - accuracy: 0.6541 - 5s/epoch - 993us/step
Loss: 0.6154928207397461, Accuracy: 0.6541211009025574


In [32]:
# Fifth Attempt- Added Layers; Varied Neurons
nn_model5 = tf.keras.models.Sequential()

# First hidden layer
nn_model5.add(tf.keras.layers.Dense(units=100, activation="relu", input_dim=36))

# Second hidden layer
nn_model5.add(tf.keras.layers.Dense(units=90, activation="relu"))

# Third hidden layer
nn_model5.add(tf.keras.layers.Dense(units=80, activation="relu"))

# Fourth hidden layer
nn_model5.add(tf.keras.layers.Dense(units=70, activation="sigmoid"))

# Fifth hidden layer
nn_model5.add(tf.keras.layers.Dense(units=60, activation="sigmoid"))

# Sixth hidden layer
nn_model5.add(tf.keras.layers.Dense(units=3, activation="sigmoid"))

# Output layer
nn_model5.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn_model5.summary()

# Compile the model
nn_model5.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# Train the model
fit_model = nn_model5.fit(X_train_scaled, y_train, epochs=50)

Model: "sequential_13"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_43 (Dense)            (None, 100)               3700      
                                                                 
 dense_44 (Dense)            (None, 90)                9090      
                                                                 
 dense_45 (Dense)            (None, 80)                7280      
                                                                 
 dense_46 (Dense)            (None, 70)                5670      
                                                                 
 dense_47 (Dense)            (None, 60)                4260      
                                                                 
 dense_48 (Dense)            (None, 3)                 183       
                                                                 
 dense_49 (Dense)            (None, 1)               

In [34]:
# Fifth Evaluation
model_loss, model_accuracy = nn_model5.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

5177/5177 - 5s - loss: 0.6144 - accuracy: 0.6589 - 5s/epoch - 1ms/step
Loss: 0.614448070526123, Accuracy: 0.65887850522995


In [35]:
# Sixth Attempt- Added and Varied Neurons
nn_model6 = tf.keras.models.Sequential()

# First hidden layer
nn_model6.add(tf.keras.layers.Dense(units=500, activation="relu", input_dim=36))

# Second hidden layer
nn_model6.add(tf.keras.layers.Dense(units=300, activation="relu"))

# Third hidden layer
nn_model6.add(tf.keras.layers.Dense(units=100, activation="relu"))

# Fourth hidden layer
nn_model6.add(tf.keras.layers.Dense(units=250, activation="sigmoid"))

# Fifth hidden layer
nn_model6.add(tf.keras.layers.Dense(units=350, activation="sigmoid"))

# Sixth hidden layer
nn_model6.add(tf.keras.layers.Dense(units=20, activation="sigmoid"))

# Output layer
nn_model6.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn_model6.summary()

# Compile the model
nn_model6.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# Train the model
fit_model = nn_model6.fit(X_train_scaled, y_train, epochs=50)

Model: "sequential_14"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_50 (Dense)            (None, 500)               18500     
                                                                 
 dense_51 (Dense)            (None, 300)               150300    
                                                                 
 dense_52 (Dense)            (None, 100)               30100     
                                                                 
 dense_53 (Dense)            (None, 250)               25250     
                                                                 
 dense_54 (Dense)            (None, 350)               87850     
                                                                 
 dense_55 (Dense)            (None, 20)                7020      
                                                                 
 dense_56 (Dense)            (None, 1)               

In [36]:
# Sixth Evaluation
model_loss, model_accuracy = nn_model6.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

5177/5177 - 13s - loss: 0.6285 - accuracy: 0.6515 - 13s/epoch - 2ms/step
Loss: 0.6284589767456055, Accuracy: 0.6515250205993652


In [20]:
# Sixth Attempt- Added and Varied Neurons
nn_model6 = tf.keras.models.Sequential()

# First hidden layer
nn_model6.add(tf.keras.layers.Dense(units=15, activation="relu", input_dim=36))


# First hidden layer
nn_model6.add(tf.keras.layers.Dense(units=30, activation="relu", input_dim=36))

# First hidden layer
nn_model6.add(tf.keras.layers.Dense(units=40, activation="relu", input_dim=36))

# Output layer
nn_model6.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn_model6.summary()

# Compile the model
nn_model6.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# Train the model
fit_model = nn_model6.fit(X_train_scaled, y_train, epochs=50)

Model: "sequential_10"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_36 (Dense)            (None, 15)                555       
                                                                 
 dense_37 (Dense)            (None, 30)                480       
                                                                 
 dense_38 (Dense)            (None, 40)                1240      
                                                                 
 dense_39 (Dense)            (None, 1)                 41        
                                                                 
Total params: 2316 (9.05 KB)
Trainable params: 2316 (9.05 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/50
Epoch 2/50

KeyboardInterrupt: 

In [None]:
# Sixth Evaluation
model_loss, model_accuracy = nn_model6.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

In [None]:
# Export our model to HDF5 file
nn_model2.save("Saved NN Models/ArrestRateModel.h5")