In [2]:
# Disable warnings from printing
from warnings import filterwarnings
filterwarnings('ignore')

import numpy as np
import pandas as pd
from sklearn.neural_network import MLPRegressor

from sklearn.metrics import roc_curve
from scipy.spatial.distance import cityblock, mahalanobis, euclidean


In [3]:
data = pd.read_csv("DSL-StrongPasswordData.csv")

In [4]:
# 51 total
subjects = data["subject"].unique()

In [5]:
def evaluateEER(user_scores, imposter_scores):
    labels = [0]*len(user_scores) + [1]*len(imposter_scores)
    fpr, tpr, thresholds = roc_curve(labels, user_scores + imposter_scores)
    missrates = 1 - tpr
    farates = fpr
    dists = missrates - farates
    idx1 = np.argmin(dists[dists >= 0])
    idx2 = np.argmax(dists[dists < 0])
    x = [missrates[idx1], farates[idx1]]
    y = [missrates[idx2], farates[idx2]]
    a = ( x[0] - x[1] ) / ( y[1] - x[1] - y[0] + x[0] )
    eer = x[0] + a * ( y[0] - x[0] )
    return eer

In [6]:


class NeuralNetAutoAssocDetector:
    def __init__(self, subjects):
        self.user_scores = []
        self.imposter_scores = []
        self.subjects = subjects
        self.learning_rate = 0.0001
        self.training_epochs = 500
        self.n_hidden = 31
        self.learning_momentum = 0.0003  # Momentum is not directly configurable in MLPRegressor

    def training(self):
        # Define the MLPRegressor model
        self.nn = MLPRegressor(
            hidden_layer_sizes=(self.n_hidden,),  # Single hidden layer with n_hidden units
            learning_rate_init=self.learning_rate,
            max_iter=self.training_epochs,
            activation="relu",  # Rectifier activation
            solver="adam",  # Adam solver (momentum is implicitly handled)
            random_state=42
        )
        
        # Train the model on the same data for input and output (autoencoder)
        self.nn.fit(np.array(self.train), np.array(self.train))
        
    def testing(self):
        # Predict for genuine data and calculate reconstruction error
        preds = self.nn.predict(np.array(self.test_genuine))
        for i in range(self.test_genuine.shape[0]):
            reconstruction_error = np.linalg.norm(self.test_genuine.iloc[i].values - preds[i])
            self.user_scores.append(reconstruction_error)
        
        # Predict for imposter data and calculate reconstruction error
        preds = self.nn.predict(np.array(self.test_imposter))
        for i in range(self.test_imposter.shape[0]):
            reconstruction_error = np.linalg.norm(self.test_imposter.iloc[i].values - preds[i])
            self.imposter_scores.append(reconstruction_error)
    
    def evaluate(self, data):
        eers = []
        
        for subject in self.subjects:
            self.user_scores = []
            self.imposter_scores = []
    
            # Consider current subject as genuine and rest as imposters
            genuine_user_data = data.loc[data.subject == subject, "H.period":"H.Return"]
            imposter_data = data.loc[data.subject != subject, :]
    
            # Genuine user's first 200 time vectors for training
            self.train = genuine_user_data[:200]
    
            # True set (200 records)
            self.test_genuine = genuine_user_data[200:]
    
            # False set (250 records, 5 per imposter, 50 imposters in all)
            self.test_imposter = imposter_data.groupby("subject").head(5).loc[:, "H.period":"H.Return"]
            
            self.training()
            self.testing()
    
            # Calculate Equal Error Rate (EER)
            eers.append(evaluateEER(self.user_scores, self.imposter_scores))
        
        # Return mean and standard deviation of EERs
        return np.mean(eers), np.std(eers)


In [8]:
NeuralNetAutoAssocDetector(subjects).evaluate(data)

(0.18676036717925942, 0.0987501674968358)