In [1]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder
import pandas as pd
import tensorflow as tf


In [2]:
# Import our input dataset
df = pd.read_csv('../Resources/tableau_data.csv')
df.head()

Unnamed: 0,Registration State,Plate Type,Violation Code,Vehicle Body Type,Vehicle Make,Violation Time,Vehicle Color,Vehicle Year
0,NY,PAS,67,SDN,TOYOT,10:37 AM,BLK,2004
1,NY,PAS,51,SUBN,JEEP,10:45 AM,GRY,2017
2,NY,PAS,67,SUBN,TOYOT,12:05 PM,OTH,2021
3,NY,PAS,98,SUBN,NISSA,05:35 AM,BLK,2002
4,FL,PAS,51,SUBN,SUBAR,03:20 AM,GRY,2005


In [3]:
# Generate our categorical variable list
df_cat = df.dtypes[df.dtypes == "object"].index.tolist()

In [4]:
# Check the number of unique values in each column
df[df_cat].nunique()

Registration State      8
Plate Type              9
Vehicle Body Type       9
Vehicle Make           13
Violation Time        784
Vehicle Color           8
dtype: int64

In [5]:
#convert to violation time to dataetime format
import datetime
df["Violation Time"] = pd.to_datetime(df["Violation Time"], format='%H:%M %p' )

In [6]:
#converting datetime into minutes passed 12:00 AM
df['Violation Time'] = df['Violation Time'].apply(lambda x: (x.to_pydatetime()-datetime.datetime(1900,1,1)).total_seconds()/60)
df.head(10)

Unnamed: 0,Registration State,Plate Type,Violation Code,Vehicle Body Type,Vehicle Make,Violation Time,Vehicle Color,Vehicle Year
0,NY,PAS,67,SDN,TOYOT,637.0,BLK,2004
1,NY,PAS,51,SUBN,JEEP,645.0,GRY,2017
2,NY,PAS,67,SUBN,TOYOT,725.0,OTH,2021
3,NY,PAS,98,SUBN,NISSA,335.0,BLK,2002
4,FL,PAS,51,SUBN,SUBAR,200.0,GRY,2005
5,NY,PAS,63,SUBN,CHEVR,662.0,WHT,2010
6,NY,PAS,45,SDN,NISSA,366.0,BLK,2002
7,NY,OMS,14,SDN,TOYOT,770.0,BLK,2022
8,NY,PAS,20,SUBN,HONDA,631.0,RED,2005
9,NY,PAS,27,SDN,NISSA,367.0,BLK,2022


In [7]:
#Optional Code if ML doesn't like the smaller counts of violation codes
filtered_df = df.copy()
bool_df = (filtered_df.groupby(['Violation Code']).count() > 10000)
list_of_codes = bool_df.index[bool_df['Registration State'] == True].tolist()

filtered_df['Violation Code'] = filtered_df['Violation Code'].loc[filtered_df['Violation Code'].isin(list_of_codes)]
filtered_df = filtered_df.dropna()
filtered_df.reset_index(drop = True)
filtered_df.head()

Unnamed: 0,Registration State,Plate Type,Violation Code,Vehicle Body Type,Vehicle Make,Violation Time,Vehicle Color,Vehicle Year
1,NY,PAS,51.0,SUBN,JEEP,645.0,GRY,2017
3,NY,PAS,98.0,SUBN,NISSA,335.0,BLK,2002
4,FL,PAS,51.0,SUBN,SUBAR,200.0,GRY,2005
7,NY,OMS,14.0,SDN,TOYOT,770.0,BLK,2022
8,NY,PAS,20.0,SUBN,HONDA,631.0,RED,2005


In [8]:
filtered_df_cat = filtered_df.dtypes[filtered_df.dtypes == "object"].index.tolist()
filtered_df[filtered_df_cat].nunique()

Registration State     8
Plate Type             9
Vehicle Body Type      9
Vehicle Make          13
Vehicle Color          8
dtype: int64

In [9]:
# Create a OneHotEncoder instance
enc = OneHotEncoder(sparse=False)

# Fit and transform the OneHotEncoder using the categorical variable list
encode_df = pd.DataFrame(enc.fit_transform(filtered_df[filtered_df_cat]))

# Add the encoded variable names to the DataFrame
encode_df.columns = enc.get_feature_names(filtered_df_cat)
encode_df.head()



Unnamed: 0,Registration State_CT,Registration State_FL,Registration State_GA,Registration State_NJ,Registration State_NY,Registration State_OTHER,Registration State_PA,Registration State_TX,Plate Type_APP,Plate Type_COM,...,Vehicle Make_SUBAR,Vehicle Make_TOYOT,Vehicle Color_BLK,Vehicle Color_BLU,Vehicle Color_GRN,Vehicle Color_GRY,Vehicle Color_ORG,Vehicle Color_OTH,Vehicle Color_RED,Vehicle Color_WHT
0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


In [10]:
# Merge one-hot encoded features and drop the originals
nn_df = df.merge(encode_df,left_index=True, right_index=True)
nn_df = nn_df.drop(df_cat,1)
nn_df.head()

  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,Violation Code,Vehicle Year,Registration State_CT,Registration State_FL,Registration State_GA,Registration State_NJ,Registration State_NY,Registration State_OTHER,Registration State_PA,Registration State_TX,...,Vehicle Make_SUBAR,Vehicle Make_TOYOT,Vehicle Color_BLK,Vehicle Color_BLU,Vehicle Color_GRN,Vehicle Color_GRY,Vehicle Color_ORG,Vehicle Color_OTH,Vehicle Color_RED,Vehicle Color_WHT
0,67,2004,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
1,51,2017,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,67,2021,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
3,98,2002,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,51,2005,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


In [11]:
# Split our preprocessed data into our features and target arrays
x_cols = [i for i in nn_df.columns if i not in ('Violation Code')]
X, y = nn_df[x_cols], nn_df['Violation Code']

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

In [12]:
# Create a StandardScaler instance
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [13]:
len(X_train.iloc[0])

48

In [14]:
# Define the model - deep neural net
number_input_features = len(X_train.iloc[0])
hidden_nodes_layer1 = 8
hidden_nodes_layer2 = 5

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(
    tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu")
)

# Second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))

# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 8)                 392       
                                                                 
 dense_1 (Dense)             (None, 5)                 45        
                                                                 
 dense_2 (Dense)             (None, 1)                 6         
                                                                 
Total params: 443
Trainable params: 443
Non-trainable params: 0
_________________________________________________________________


In [15]:
# Compile the model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [16]:
# Train the model
fit_model = nn.fit(X_train,y_train,epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [17]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

59295/59295 - 57s - loss: -7.1722e+18 - accuracy: 0.0000e+00 - 57s/epoch - 966us/step
Loss: -7.172166574785167e+18, Accuracy: 0.0


In [21]:
y_pred = nn.predict(X_test)



In [14]:
from sklearn.metrics import confusion_matrix, balanced_accuracy_score
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from imblearn.metrics import classification_report_imbalanced

In [29]:
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')
cm = confusion_matrix(y_test, y_pred)
bac = balanced_accuracy_score(y_test, y_pred)

print("Evaluation metrics:")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
print(f"Confusion Matrix: {cm}")
print(f"Balanced Accuracy Score: {bac}")
print()

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Evaluation metrics:
Accuracy: 0.0000
Precision: 0.0000
Recall: 0.0000
F1 Score: 0.0000
Confusion Matrix: [[   0    0    0 ...    0    0    0]
 [   2    0    0 ...    0    0    0]
 [  88    0    0 ...    0    0    0]
 ...
 [   8    0    0 ...    0    0    0]
 [5772    0    0 ...    0    0    0]
 [ 265    0    0 ...    0    0    0]]
Balanced Accuracy Score: 0.0





In [15]:
# Define the model - deep neural net
number_input_features = len(X_train_scaled[0])
hidden_nodes_layer1 = 100
hidden_nodes_layer2 = 40

nn2 = tf.keras.models.Sequential()

# First hidden layer
nn2.add(
    tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="tanh")
)

# Second hidden layer
nn2.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="leaky_relu"))

# Output layer
nn2.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn2.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 100)               4900      
                                                                 
 dense_1 (Dense)             (None, 40)                4040      
                                                                 
 dense_2 (Dense)             (None, 1)                 41        
                                                                 
Total params: 8,981
Trainable params: 8,981
Non-trainable params: 0
_________________________________________________________________


In [16]:
# Compile the model
nn2.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [17]:
# Import checkpoint dependencies
import os
from tensorflow.keras.callbacks import ModelCheckpoint

# Define the checkpoint path and filenames
os.makedirs("../checkpoints/",exist_ok=True)
checkpoint_path = "../checkpoints/parking_violations__nn.h5"

cp_callback = ModelCheckpoint(
    filepath=checkpoint_path,
    verbose=1,
    save_weights_only=True,
    save_freq='epoch')

In [18]:
# Train the model
fit_model = nn2.fit(X_train_scaled,y_train,epochs=100,callbacks=[cp_callback])

Epoch 1/100
Epoch 1: saving model to ../checkpoints\parking_violations__nn.h5
Epoch 2/100
Epoch 2: saving model to ../checkpoints\parking_violations__nn.h5
Epoch 3/100
Epoch 3: saving model to ../checkpoints\parking_violations__nn.h5
Epoch 4/100
Epoch 4: saving model to ../checkpoints\parking_violations__nn.h5
Epoch 5/100
Epoch 5: saving model to ../checkpoints\parking_violations__nn.h5
Epoch 6/100
Epoch 6: saving model to ../checkpoints\parking_violations__nn.h5
Epoch 7/100
Epoch 7: saving model to ../checkpoints\parking_violations__nn.h5
Epoch 8/100
Epoch 8: saving model to ../checkpoints\parking_violations__nn.h5
Epoch 9/100
Epoch 9: saving model to ../checkpoints\parking_violations__nn.h5
Epoch 10/100
Epoch 10: saving model to ../checkpoints\parking_violations__nn.h5
Epoch 11/100
Epoch 11: saving model to ../checkpoints\parking_violations__nn.h5
Epoch 12/100
Epoch 12: saving model to ../checkpoints\parking_violations__nn.h5
Epoch 13/100
Epoch 13: saving model to ../checkpoints\park

Epoch 28: saving model to ../checkpoints\parking_violations__nn.h5
Epoch 29/100
Epoch 29: saving model to ../checkpoints\parking_violations__nn.h5
Epoch 30/100
Epoch 30: saving model to ../checkpoints\parking_violations__nn.h5
Epoch 31/100
Epoch 31: saving model to ../checkpoints\parking_violations__nn.h5
Epoch 32/100
Epoch 32: saving model to ../checkpoints\parking_violations__nn.h5
Epoch 33/100
Epoch 33: saving model to ../checkpoints\parking_violations__nn.h5
Epoch 34/100
Epoch 34: saving model to ../checkpoints\parking_violations__nn.h5
Epoch 35/100
Epoch 35: saving model to ../checkpoints\parking_violations__nn.h5
Epoch 36/100
Epoch 36: saving model to ../checkpoints\parking_violations__nn.h5
Epoch 37/100
Epoch 37: saving model to ../checkpoints\parking_violations__nn.h5
Epoch 38/100
Epoch 38: saving model to ../checkpoints\parking_violations__nn.h5
Epoch 39/100
Epoch 39: saving model to ../checkpoints\parking_violations__nn.h5
Epoch 40/100
Epoch 40: saving model to ../checkpoints

Epoch 55: saving model to ../checkpoints\parking_violations__nn.h5
Epoch 56/100
Epoch 56: saving model to ../checkpoints\parking_violations__nn.h5
Epoch 57/100
Epoch 57: saving model to ../checkpoints\parking_violations__nn.h5
Epoch 58/100
Epoch 58: saving model to ../checkpoints\parking_violations__nn.h5
Epoch 59/100
Epoch 59: saving model to ../checkpoints\parking_violations__nn.h5
Epoch 60/100
Epoch 60: saving model to ../checkpoints\parking_violations__nn.h5
Epoch 61/100
Epoch 61: saving model to ../checkpoints\parking_violations__nn.h5
Epoch 62/100
Epoch 62: saving model to ../checkpoints\parking_violations__nn.h5
Epoch 63/100
Epoch 63: saving model to ../checkpoints\parking_violations__nn.h5
Epoch 64/100
Epoch 64: saving model to ../checkpoints\parking_violations__nn.h5
Epoch 65/100
Epoch 65: saving model to ../checkpoints\parking_violations__nn.h5
Epoch 66/100
Epoch 66: saving model to ../checkpoints\parking_violations__nn.h5
Epoch 67/100
Epoch 67: saving model to ../checkpoints

Epoch 82: saving model to ../checkpoints\parking_violations__nn.h5
Epoch 83/100
Epoch 83: saving model to ../checkpoints\parking_violations__nn.h5
Epoch 84/100
Epoch 84: saving model to ../checkpoints\parking_violations__nn.h5
Epoch 85/100
Epoch 85: saving model to ../checkpoints\parking_violations__nn.h5
Epoch 86/100
Epoch 86: saving model to ../checkpoints\parking_violations__nn.h5
Epoch 87/100
Epoch 87: saving model to ../checkpoints\parking_violations__nn.h5
Epoch 88/100
Epoch 88: saving model to ../checkpoints\parking_violations__nn.h5
Epoch 89/100
Epoch 89: saving model to ../checkpoints\parking_violations__nn.h5
Epoch 90/100
Epoch 90: saving model to ../checkpoints\parking_violations__nn.h5
Epoch 91/100
Epoch 91: saving model to ../checkpoints\parking_violations__nn.h5
Epoch 92/100
Epoch 92: saving model to ../checkpoints\parking_violations__nn.h5
Epoch 93/100
Epoch 93: saving model to ../checkpoints\parking_violations__nn.h5
Epoch 94/100
Epoch 94: saving model to ../checkpoints

In [20]:
model_loss, model_accuracy = nn2.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

59295/59295 - 63s - loss: -2.0906e+13 - accuracy: 0.0000e+00 - 63s/epoch - 1ms/step
Loss: -20906388750336.0, Accuracy: 0.0


In [21]:
y_pred = nn2.predict(X_test)



In [22]:
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')
cm = confusion_matrix(y_test, y_pred)
bac = balanced_accuracy_score(y_test, y_pred)

print("Evaluation metrics:")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
print(f"Confusion Matrix: {cm}")
print(f"Balanced Accuracy Score: {bac}")
print()

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Evaluation metrics:
Accuracy: 0.0000
Precision: 0.0000
Recall: 0.0000
F1 Score: 0.0000
Confusion Matrix: [[   0    0    0 ...    0    0    0]
 [   0    0    0 ...    0    0    0]
 [   0    2    0 ...    0    0    0]
 ...
 [   0    8    0 ...    0    0    0]
 [  48 5724    0 ...    0    0    0]
 [   5  260    0 ...    0    0    0]]
Balanced Accuracy Score: 0.0



