# Import packages and modules

In [None]:
from numpy import random as rd
import random
from sklearn.metrics.pairwise import euclidean_distances
from scipy.optimize import brentq
from scipy.interpolate import interp1d
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve
import numpy as np
import pandas as pd
from tensorflow.keras.models import load_model

'''
Author: Ahmed Anu Wahab
Date: February 2022
'''

# Error Rate

In [None]:
def eer_compute(scores_g, scores_i):
    '''
    Function to calculate Error Rate'''
    far = []
    frr = []
    ini=min(np.concatenate((scores_g, scores_i)))
    fin=max(np.concatenate((scores_g, scores_i)))
    paso=(fin-ini)/10000
    threshold = ini-paso
    while threshold < fin+paso:
        far.append(len(np.where(scores_i >= threshold)[0])/len(scores_i))
        frr.append(len(np.where(scores_g < threshold)[0])/len(scores_g))
        threshold = threshold + paso
    
    gap = abs(np.asarray(far) - np.asarray(frr))
    j = np.where(gap==min(gap))[0]
    index = j[0]
    return 100.0 - (((far[index]+frr[index])/2)*100)

# Test Data Generator

In [None]:
def get_embeddings(data):
    y_pred=loaded_model.predict(data, verbose=0)
    out_size = 128
    anchor = y_pred[:,:out_size]
    return anchor

def get_sample(user, test_users, seq_length, features,num_imp):
    '''Get genuine user sentence'''
    gen_df = pd.read_csv('ml_features/' + user + '.csv')
    data = np.zeros(shape=(15, 3, seq_length, features))
    G=1
    for i in range(15): # get each of the 15 sentences
        '''Clean the data by removing duration greater than 5 secs. They are considered outliers'''
        A_arr = gen_df[gen_df.sentence_number==i].iloc[:, 3:]
        A_arr.drop(A_arr.index[(abs(A_arr.m)>5)|(abs(A_arr.ud)>5)|(abs(A_arr.dd)>5)|(abs(A_arr.uu)>5)], inplace=True)
        A_arr = A_arr.values

        '''Truncate if number of keystrokes in the sample is greater than the specified sequence length'''
        if len(A_arr) >= seq_length:
            A = A_arr[:seq_length, :]

        '''Pad with zeros if number of keystrokes in the sample is less than the specified sequence length'''
        if len(A_arr) < seq_length:
            A = np.concatenate([A_arr, np.zeros((seq_length-len(A_arr), features))])

        data[i,0,:,:] = A
        data[i,1,:,:] = A
        data[i,2,:,:] = A

    '''Get genuine embeddings'''
    genuine_embeddings = get_embeddings([data[:,0,:,:],data[:,1,:,:],data[:,2,:,:]])
    '''Select first G embeddings as gallery'''
    gallery_embeddings = genuine_embeddings[:G,:]
    '''Last 5 as genuine test embeddings'''
    genuine_test_embeddings = genuine_embeddings[10:,:]
    
    # Genuine scores between G gallery samples and 5 genuine test samples
    gen_scores = np.mean(euclidean_distances(gallery_embeddings, genuine_test_embeddings), axis = 0)

    
    '''Get impostor data'''
    data = np.zeros(shape=(num_imp, 3, seq_length, features))
    imp_users = test_users.copy()
    imp_users = imp_users.tolist()
    imp_users.remove(user)
    for i in range(num_imp):
        imp = imp_users[i]
        imp_df = pd.read_csv('ml_features/' + imp + '.csv')
        A_arr = imp_df[imp_df.sentence_number==11].iloc[:, 3:]
        '''Clean the data by removing duration greater than 5 secs. They are considered outliers'''
        A_arr.drop(A_arr.index[(abs(A_arr.m)>5)|(abs(A_arr.ud)>5)|(abs(A_arr.dd)>5)|(abs(A_arr.uu)>5)], inplace=True)
        A_arr = A_arr.values

        '''Truncate if number of keystrokes in the sample is greater than the specified sequence length'''
        if len(A_arr) >= seq_length:
            A = A_arr[:seq_length, :]

        '''Pad with zeros if number of keystrokes in the sample is less than the specified sequence length'''
        if len(A_arr) < seq_length:
            A = np.concatenate([A_arr, np.zeros((seq_length-len(A_arr), features))])

        data[i,0,:,:] = A
        data[i,1,:,:] = A
        data[i,2,:,:] = A

    '''Get impostor embeddings'''
    imp_test_embeddings = get_embeddings([data[:,0,:,:],data[:,1,:,:],data[:,2,:,:]])
    # Impostor scores between G gallery samples and num_imp test samples
    imp_scores = np.mean(euclidean_distances(gallery_embeddings, imp_test_embeddings), axis = 0)
    
    # Get Error rate
    error_rate = eer_compute(gen_scores, imp_scores)
    
    return error_rate

# Predict

In [None]:
def gen_data(num_test_users, seq_length, features=5):
    test_users = np.load('triplets/test_users.npz')['arr_0']
    np.random.shuffle(test_users) # Shuffle the test users
    test_users = test_users[:num_test_users]
    num_imp = num_test_users-1
    EER=[]
    for user in test_users:
        error_rate = get_sample(user,test_users,seq_length,features,num_imp)
        EER.append(error_rate)
    return np.mean(EER)

def predict_pairs(model):
    '''Get genuine test data and predict'''
    num_test_users = 1000 # Number of test users
    eer = gen_data(num_test_users, 70)
    print('AVERAGE EER & ACC for %s Users:' % num_test_users, eer)

# Load Trained Model

In [None]:
'''Load the trained model
'''
model_path='triplets/new_model/triplet_model.h5'
loaded_model = load_model(model_path, compile=False)


'''Call the predict function
'''
eer=predict_pairs(loaded_model)