In [1]:
# load libraries
import numpy as np
import pickle

In [15]:
# load saved training/validation/test data
with open('train_val_test_data.pkl', 'rb') as file:
    X_train, X_val, X_test, y_train, y_val, y_test = pickle.load(file)

In [16]:
# select specific features of the input passwords
mlp_features = ['length', 'uppercase_count', 'lowercase_count', 'numbers_count', 'special_character_count', 'entropy']
X_train = X_train[mlp_features].to_numpy()
X_val = X_val[mlp_features].to_numpy()
X_test = X_test[mlp_features].to_numpy()

In [4]:
# flatten the label data
y_train = y_train.flatten()
y_val = y_val.flatten()
y_test = y_test.flatten()

In [5]:
# calculate accuracy given the true labels and the predictions
def accuracy(y_truth, y_pred):
    correct_pred = 0
    # iterate through the values and check if the labels are the same, update as required
    for y_t, y_p in zip(y_truth, y_pred):
        if y_t == y_p :
            correct_pred += 1
    # find the proportion by dividing the correct predictions by all the predictions
    return correct_pred / len(y_truth)

In [6]:
# softmax function
def softmax(z):
   exp_z = np.exp(z - np.max(z, axis = 1, keepdims = True))
   return exp_z / np.sum(exp_z, axis = 1, keepdims = True)

In [7]:
# relu activation function
def relu(x):
    return np.maximum(0, x)

# derivative of relu for backprop
def relu_derivative(x):
    return (x > 0).astype(float)

In [8]:
# MLP forward function to pass through inputs and weights/biases to retrieve the outputs of last hidden layer
def forward(X, weights, biases, verbose=False):
    # initialize list of layer outputs
    all_layer_outputs = []

    # number of hidden layers is 1 less than the number of weights
    num_hidden_layers = len(weights) - 1

    # initial layer's inputs is the original X
    next_layer_inputs = X
    # create num_hidden_layers layers using a for loop
    for layer in range(num_hidden_layers):
        # calculate the inputs to this hidden layer
        layer_logits = next_layer_inputs.dot(weights[layer]) + biases[layer]
        # find the outputs of the this hidden layer using the activation function
        layer_outputs = relu(layer_logits)
        # save this layer's outputs as the next layer's inputs
        next_layer_inputs = layer_outputs.copy()
        # add the outputs to the cummulative list
        all_layer_outputs.append(layer_outputs.copy())

    # calculate the inputs to the ouput layer
    output_layer_logits = next_layer_inputs.dot(weights[-1]) + biases[-1]
    # find the outputs of the output layer using the activation function
    output_layer_outputs = softmax(output_layer_logits)
    # add the output layer outputs to the cummulative list
    all_layer_outputs.append(output_layer_outputs.copy())
    
    if verbose:
        print("\nforward:\n", all_layer_outputs[-1])

    # return list of outputs of all layers to calculate the error using backprop
    return all_layer_outputs

In [9]:
# MLP backward function to calculate the error using backpropagation
def backward(X, y, all_layer_outputs, weights, biases, learning_rate, verbose=False):
    # find the number of samples
    samples = y.shape[0]

    # the number of hidden layers is one less than the number of weights
    num_hidden_layers = len(weights) - 1

    # total layers in MLP
    total_layers = num_hidden_layers + 1

    # initialize gradients of each layer's weights and biases as they will be calculated in reverse
    weights_gradient = [None] * total_layers
    biases_gradient = [None] * total_layers

    # calculate the initial error as the difference between predictions and true labels (one-hot encoded y data)
    error = all_layer_outputs[-1] - y
    
    # calculate the gradient of each layer's weights and bias term by using the derivative of loss function 
    for layer in range(num_hidden_layers, 0, -1):
        previous_layer_outputs = all_layer_outputs[layer - 1]
        weights_gradient[layer] = previous_layer_outputs.T.dot(error) / samples
        biases_gradient[layer] = np.sum(error, axis=0, keepdims=True) / samples
        
        # inputs to previous layer are outputs from 2 layers before this current layer, otherwise initial inputs X
        if layer > 1:
            previous_layer_inputs = all_layer_outputs[layer - 2]
        else:
            previous_layer_inputs = X
            
        # backpropogate error from this layer to previous layer
        current_layer_logits = previous_layer_inputs.dot(weights[layer - 1]) + biases[layer - 1]
        error = error.dot(weights[layer].T) * relu_derivative(current_layer_logits)
        if verbose:
            print("error", layer, ":", error)

    # calculate gradient of the initial hidden layer's weights and bias term
    weights_gradient[0] = X.T.dot(error) / samples
    biases_gradient[0] = np.sum(error, axis=0, keepdims=True) / samples

    # adjust the weights and bias terms of each layer
    for layer in range(total_layers):
        weights[layer] -= learning_rate * weights_gradient[layer]
        biases[layer] -= learning_rate * biases_gradient[layer]
        
    return weights, biases

In [10]:
# training function for MLP
def MLP_train(X, y, num_classes, num_hidden_nodes_per_layer, learning_rate, epochs):
    # do one-hot encoding of the y labels
    y_one_hot = np.eye(num_classes)[y].reshape(len(y), num_classes)
    # input size is the number of features for each password
    num_features = X.shape[1]
    # number of hidden layers is the length of number of hidden nodes per layer
    num_hidden_layers = len(num_hidden_nodes_per_layer)

    # initialize weight vectors and bias terms lists (weights and biases for each layer)
    weights = []
    biases = []
    # initial rows in weights is columns in X
    previous_layer_dimension = num_features
    for layer in range(num_hidden_layers):
        weights.append(np.ones((previous_layer_dimension, num_hidden_nodes_per_layer[layer])))
        biases.append(np.ones((1, num_hidden_nodes_per_layer[layer])))
        # update the previous layer dimension
        previous_layer_dimension = num_hidden_nodes_per_layer[layer]

    # add the output layer's weights and bias term
    weights.append(np.ones((previous_layer_dimension, num_classes)))
    biases.append(np.ones((1, num_classes)))

    # iterate through the epochs and adjust the weights/biases to learn
    for epoch in range(epochs):
        # do the forward pass to find the predictions
        all_layer_outputs = forward(X, weights, biases)
        # do the backward pass to find the error and adjust the weight vectors/bias terms
        weights, biases = backward(X, y_one_hot, all_layer_outputs, weights, biases, learning_rate)
    
    # return the final weights and biases after training
    return weights, biases

In [11]:
# prediction function that takes the argmax of the outputs of the last layer in the MLP
def MLP_predict(X, weights, biases):
    # do the foward pass to find the prediction probability distribution
    all_layer_outputs = forward(X, weights, biases)
    # returns the argmax of the outputs which is the class with the highest probability
    return np.argmax(all_layer_outputs[-1], axis=1)

In [13]:
weights, biases = MLP_train(X_train, y_train, 5, [3], 0.02, 1000)

In [None]:
# find the accuracy metrics for each set of data using MLP
train_predictions = MLP_predict(X_train, weights, biases)
train_accuracy = accuracy(y_train, train_predictions)
print("MLP Train Accuracy:", train_accuracy)

val_predictions = MLP_predict(X_val, weights, biases)
val_accuracy = accuracy(y_val, val_predictions)
print("MLP Validation Accuracy:", val_accuracy)

test_predictions = MLP_predict(X_test, weights, biases)
test_accuracy = accuracy(y_test, test_predictions)
print("MLP Test Accuracy:", test_accuracy)

MLP Train Accuracy: 0.8203125
MLP Validation Accuracy: 0.8164
MLP Test Accuracy: 0.8203
