# Setup

In [None]:
import os
import re
import numpy as np
import tensorflow as tf
import tensorflow_addons as tfa
from tqdm import tqdm
from tensorflow.keras.callbacks import ModelCheckpoint
os.environ['XLA_FLAGS'] = '--xla_gpu_cuda_data_dir=/opt/conda'

# Testing

#### Defining Functions

In [None]:
# Function to read in the training textfiles 
def read_text_files(directory):
    labels = []
    features = []
    # List all files in the directory and sort them
    file_paths = sorted([os.path.join(directory, f) for f in os.listdir(directory) if f.endswith('.txt')])
    # Process each file
    for file_path in tqdm(file_paths, total=len(file_paths), desc="Processing text files"):
        # Extract user ID (label) from the filename
        user_id = int(os.path.basename(file_path).split('_')[0])
        # Read the contents of the file, skipping the first line
        with open(file_path, 'r') as file:
            lines = file.readlines()[1:]  # Skip the "TIME_DELTA" line
            keystrokes = [int(line.strip()) for line in lines]  # Convert to integers
        # Append the extracted data to the lists
        labels.append(user_id)
        features.append(keystrokes)
    return labels, features

# Function to create instance of Taunet
def create_model():
    # Define the Taunet model architecture
    model = tf.keras.Sequential([
        tf.keras.layers.LSTM(256, input_shape=(None, 1)),  
        tf.keras.layers.Dense(128, activation=None),  
        tf.keras.layers.Lambda(lambda x: tf.math.l2_normalize(x, axis=1)) 
    ])
    return model

# Function to calculate rank n accuracies
def calculate_rank_n_accuracy(embeddings1, embeddings2, labels1, labels2, n):
    correct_matches = 0
    for i in range(len(embeddings1)):
        # Compute Euclidean distances from embeddings1[i] to all embeddings2
        distances = np.linalg.norm(embeddings2 - embeddings1[i], axis=1)
        # Get the indices of the top 10 closest embeddings in embeddings2
        closest_indices = np.argsort(distances)[:n]
        # Check if the correct label is within these top 10 closest embeddings
        if labels1[i] in labels2[closest_indices]:
            correct_matches += 1
    # Calculate accuracy
    accuracy = correct_matches / len(embeddings1)
    return accuracy

#### Loading Taunet model

In [None]:
# Recreate the model architecture
recreated_model = tf.keras.Sequential([
    tf.keras.layers.LSTM(256, input_shape=(None, 1)),
    tf.keras.layers.Dense(128, activation=None),
    tf.keras.layers.Lambda(lambda x: tf.math.l2_normalize(x, axis=1))
])
# Compile the recreated model
recreated_model.compile(
    optimizer=tf.keras.optimizers.Adam(0.001),
    loss=tfa.losses.TripletSemiHardLoss()
)
# Load the weights
recreated_model.load_weights('taunet_weights.h5')

#### Building Test Dataset

In [None]:
# Define directory name
directory = 'keystrokes-testing'
# Read in testing data
labels, features = read_text_files(directory)
# Convert inter-event differences and parsed user id's into tensorflow object
intervals_tensor_eval = tf.constant(features).numpy()
labels_tensor_eval = tf.constant(labels).numpy()
# Reading every odd and even featyre/label to create dataset with same users but different inter-event sequences
labels1 = labels_tensor_eval[::2]  
labels2 = labels_tensor_eval[1::2]
input_sequences1 = intervals_tensor_eval[::2]  
input_sequences2 = intervals_tensor_eval[1::2]

#### Embedding Test Datasets

In [None]:
# Generate embeddings
embeddings1 = recreated_model.predict(input_sequences1)
embeddings2 = recreated_model.predict(input_sequences2)

#### Calculate Rank N Accuracy

In [None]:
calculate_rank_n_accuracy(embeddings1, embeddings2, labels1, labels2, 1)

#### Save Embeddings 

In [None]:
np.savetxt('embeddings1_taunet.csv', embeddings1, delimiter=',', header=','.join(['column{}'.format(i) for i in range(1, embeddings1.shape[1] + 1)]), comments='')
np.savetxt('embeddings2_taunet.csv', embeddings2, delimiter=',', header=','.join(['column{}'.format(i) for i in range(1, embeddings2.shape[1] + 1)]), comments='')