## Second experiment - third configuration - window size 232.

In [1]:
import csv
import os
import glob
import random
import pandas as pd
import numpy as np
from collections import OrderedDict

from analysis_tools import load_raw

import mne
from mne import Epochs, find_events
from mne.decoding import Vectorizer

from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

from sklearn.linear_model import LogisticRegression

from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis as QDA
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, classification_report

### The generated statistics and authentication datasets will be saved in the "statistics" directory. If the directory does not exist, it is created.

In [None]:
path = 'second_experiment/third_configuration/window_232/statistics'

if not os.path.exists(path):
    os.makedirs(path)

### Application of Notch to attenuate the frequency at 50 Hz, the sixth-order Butterworth band-pass filter with cut-off frequencies of 1-17 Hz, and ICA. After their application, the framework generates the epochs in Dataframe format.

In [None]:
def process_by_subject(subject_name):
    count = 1
    datasets = sorted(glob.glob('data/'+ subject_name + '_*.csv'))
    df_final = pd.DataFrame()
    array_epochs = []
    for dataset in datasets:
        sampling_rate = 256

        ch_names = {}
            
        raw = load_raw(dataset, sfreq=sampling_rate, stim_ind=8, replace_ch_names=None, ch_ind=[0, 1, 2, 3, 4, 5, 6, 7])

        for i, chn in enumerate(raw.ch_names):
            ch_names[chn] = i
                                
        raw_notch = raw.copy().notch_filter([50.0])

        iir_params = dict(order=6, ftype='butter')
        raw_notch_and_filter = raw_notch.copy().filter(1, 17, method='iir', iir_params=iir_params)
        
        ica = mne.preprocessing.ICA(n_components=8, random_state=97)
        ica.fit(raw_notch_and_filter)
        
        raw_notch_and_filter_ica = raw_notch_and_filter.copy()
        
        ica.exclude = []
        eog_inds, eog_scores = ica.find_bads_eog(raw_notch_and_filter_ica, ['Fp1','Fp2'], threshold=1.5)
        ica.exclude = eog_inds
                
        ica.apply(raw_notch_and_filter_ica)

        events = find_events(raw_notch_and_filter_ica, shortest_event=1) 
                
        event_id = {'Target': 1, 'NoTarget': 2}
        reject = {'eeg': 100e-6}

        epochs = Epochs(raw_notch_and_filter_ica, events=events, event_id=event_id, tmin=-0.1, tmax=0.8, reject=reject, preload=True)
        epochs.pick_types(eeg=True)
    
        array_epochs.append(epochs)
        
        if count == 20:
            all_epochs = mne.concatenate_epochs(array_epochs, add_offset=True)
            df_final = all_epochs.to_data_frame()
            no_targets = np.count_nonzero(all_epochs.events[:, -1]==2)
        
            index_no_targets = []
            y = all_epochs.events[:, -1]

            while(no_targets != 0):
                position = random.randint(0, len(y)-1)
                if y[position] == 2 and position not in index_no_targets:
                    index_no_targets.append(position)
                    no_targets -= 1

            all_epochs.drop(index_no_targets)
            
            df_final_only_targets = all_epochs.to_data_frame()
            
            df_final.to_csv('second_experiment/third_configuration/window_232/statistics/df_{}.csv'.format(subject_name), index=False)
            df_final_only_targets.to_csv('second_experiment/third_configuration/window_232/statistics/df_{}_targets.csv'.format(subject_name), index=False)
        
        count = count + 1

In [None]:
process_by_subject("user_01")
process_by_subject("user_02")
process_by_subject("user_03")
process_by_subject("user_04")
process_by_subject("user_05")
process_by_subject("user_06")
process_by_subject("user_07")
process_by_subject("user_08")
process_by_subject("user_09")
process_by_subject("user_10")

### Getting the statistics using a sliding window size equal to 232.

In [None]:
import numpy as np

def get_stadistical_values(channel, data):    
    dicc = dict()

    dicc[channel+"_Mean"] = np.mean(data[channel])
    dicc[channel+"_variance"] = np.var(data[channel])
    dicc[channel+"_deviation"] = np.std(data[channel])
    dicc[channel+"_max"] = np.max(data[channel])
    dicc[channel+"_summatory"] = np.sum(data[channel])
    dicc[channel+"_median"] = np.median(data[channel])
    
    dfReturned = pd.DataFrame()

    dfReturned = dfReturned.append(pd.DataFrame.from_dict(dicc, orient='index'))

    dfReturned = dfReturned.transpose()

    return dfReturned

In [None]:
def aply_all_channels(workDF):

    channels = ["Fp1","Fp2","C3","C4","P7","P8","O1","O2"]
    
    window_size = 232

    allData = pd.DataFrame()

    for i in range(0, workDF.shape[0]):
        
        if ((i+window_size) > workDF.shape[0]):
            break

        vectors = workDF.copy().iloc[i:i+window_size]
        
        allChannels = pd.DataFrame()
        
        for channel in channels:
            aux = get_stadistical_values(channel, vectors)

            allChannels = pd.concat([allChannels, aux], axis=1)
            
        allChannels['Condition'] = 1
                
        allData = pd.concat([allChannels, allData], axis=0)
        
    return allData

In [None]:
aply_all_channels(pd.read_csv('second_experiment/third_configuration/window_232/statistics/df_user_01_targets.csv')).to_csv('second_experiment/third_configuration/window_232/statistics/df_statistics_user_01_window_232.csv', index=False)
aply_all_channels(pd.read_csv('second_experiment/third_configuration/window_232/statistics/df_user_02_targets.csv')).to_csv('second_experiment/third_configuration/window_232/statistics/df_statistics_user_02_window_232.csv', index=False)
aply_all_channels(pd.read_csv('second_experiment/third_configuration/window_232/statistics/df_user_03_targets.csv')).to_csv('second_experiment/third_configuration/window_232/statistics/df_statistics_user_03_window_232.csv', index=False)
aply_all_channels(pd.read_csv('second_experiment/third_configuration/window_232/statistics/df_user_04_targets.csv')).to_csv('second_experiment/third_configuration/window_232/statistics/df_statistics_user_04_window_232.csv', index=False)
aply_all_channels(pd.read_csv('second_experiment/third_configuration/window_232/statistics/df_user_05_targets.csv')).to_csv('second_experiment/third_configuration/window_232/statistics/df_statistics_user_05_window_232.csv', index=False)
aply_all_channels(pd.read_csv('second_experiment/third_configuration/window_232/statistics/df_user_06_targets.csv')).to_csv('second_experiment/third_configuration/window_232/statistics/df_statistics_user_06_window_232.csv', index=False)
aply_all_channels(pd.read_csv('second_experiment/third_configuration/window_232/statistics/df_user_07_targets.csv')).to_csv('second_experiment/third_configuration/window_232/statistics/df_statistics_user_07_window_232.csv', index=False)
aply_all_channels(pd.read_csv('second_experiment/third_configuration/window_232/statistics/df_user_08_targets.csv')).to_csv('second_experiment/third_configuration/window_232/statistics/df_statistics_user_08_window_232.csv', index=False)
aply_all_channels(pd.read_csv('second_experiment/third_configuration/window_232/statistics/df_user_09_targets.csv')).to_csv('second_experiment/third_configuration/window_232/statistics/df_statistics_user_09_window_232.csv', index=False)
aply_all_channels(pd.read_csv('second_experiment/third_configuration/window_232/statistics/df_user_10_targets.csv')).to_csv('second_experiment/third_configuration/window_232/statistics/df_statistics_user_10_window_232.csv', index=False)

### Generation of five authentication datasets for each subject.

In [None]:
def get_authentication_statistics(subject_name):    
    statistics_user_01 = pd.read_csv('second_experiment/third_configuration/window_232/statistics/df_statistics_user_01_window_232.csv')
    statistics_user_02 = pd.read_csv('second_experiment/third_configuration/window_232/statistics/df_statistics_user_02_window_232.csv')
    statistics_user_03 = pd.read_csv('second_experiment/third_configuration/window_232/statistics/df_statistics_user_03_window_232.csv')
    statistics_user_04 = pd.read_csv('second_experiment/third_configuration/window_232/statistics/df_statistics_user_04_window_232.csv')
    statistics_user_05 = pd.read_csv('second_experiment/third_configuration/window_232/statistics/df_statistics_user_05_window_232.csv')
    statistics_user_06 = pd.read_csv('second_experiment/third_configuration/window_232/statistics/df_statistics_user_06_window_232.csv')
    statistics_user_07 = pd.read_csv('second_experiment/third_configuration/window_232/statistics/df_statistics_user_07_window_232.csv')
    statistics_user_08 = pd.read_csv('second_experiment/third_configuration/window_232/statistics/df_statistics_user_08_window_232.csv')
    statistics_user_09 = pd.read_csv('second_experiment/third_configuration/window_232/statistics/df_statistics_user_09_window_232.csv')
    statistics_user_10 = pd.read_csv('second_experiment/third_configuration/window_232/statistics/df_statistics_user_10_window_232.csv')
    
    subjects = ["user_01", "user_02", "user_03", "user_04", "user_05", "user_06", "user_07", "user_08", "user_09", "user_10"]
    statistics_subjects = [statistics_user_01, statistics_user_02, statistics_user_03, statistics_user_04, statistics_user_05, statistics_user_06, statistics_user_07, statistics_user_08, statistics_user_09, statistics_user_10]
   
    statistics_train = pd.DataFrame()
    statistics_test = pd.DataFrame()
   
    new_statistics_targets_only = []
    
    index_subject = subjects.index(subject_name)
    subject = 0
        
    for statistic in statistics_subjects:
        if subject == index_subject:
            statistics_subject = statistic
            
        else:
            new_statistics_targets_only.append(statistic)
    
        subject += 1
    
    targets_subject = statistics_subject.shape[0]
    number_rest_subjects = len(new_statistics_targets_only)
    targets_rest_subjects = targets_subject // number_rest_subjects
    targets_last_subject = (targets_subject - (targets_rest_subjects * number_rest_subjects)) + targets_rest_subjects
    
    num_statistics_train = int(targets_subject * 0.7)
    index_statistics_subject = np.random.permutation(targets_subject)
    index_train_subject = index_statistics_subject[:num_statistics_train]
    index_test_subject = index_statistics_subject[num_statistics_train:]
    

    statistics_train_subject = statistics_subject.iloc[index_train_subject]
    statistics_test_subject = statistics_subject.iloc[index_test_subject]
    
    statistics_test = statistics_test.append(statistics_test_subject, ignore_index=True)
    statistics_test = statistics_test.reset_index(drop=True)
    
    statistics_train = statistics_train.append(statistics_train_subject, ignore_index=True)
    statistics_train = statistics_train.reset_index(drop=True)
    
    number_attackers = 3
    
    attacker_index = random.sample(range(number_rest_subjects), number_attackers)
    
    subject = 0
    for i, statistic in enumerate(new_statistics_targets_only):     
        if(subject == number_rest_subjects - 1):
            targets_selected = targets_last_subject

        else:
            targets_selected = targets_rest_subjects
                    
        statistics_selected = statistic.sample(n=targets_selected, replace=False)
        statistics_selected['Condition'] = 0
                        
        subject += 1
        
        if i in attacker_index:
            statistics_test = statistics_test.append(statistics_selected, ignore_index=True)
            statistics_test = statistics_test.reset_index(drop=True)
        else:
            statistics_train = statistics_train.append(statistics_selected, ignore_index=True)
            statistics_train = statistics_train.reset_index(drop=True)
            
    return statistics_train, statistics_test   
    
subjects = ["user_01", "user_02", "user_03", "user_04", "user_05", "user_06", "user_07", "user_08", "user_09", "user_10"]

for subject in subjects:
    for i in range(5):
        statistics_train, statistics_test = get_authentication_statistics(subject)
        statistics_train.to_csv('second_experiment/third_configuration/window_232/statistics/authentication_statistics_train_{}_{}_second_experiment_third_configuration_window_232.csv'.format(subject, i))
        statistics_test.to_csv('second_experiment/third_configuration/window_232/statistics/authentication_statistics_test_{}_{}_second_experiment_third_configuration_window_232.csv'.format(subject, i))

### The results obtained will be saved in the "results" directory. If the directory does not exist, it is created.

In [None]:
path = 'second_experiment/third_configuration/window_232/results'

if not os.path.exists(path):
    os.makedirs(path)

### Generation of a CSV file that will contain the results obtained in the authentication process.

In [3]:
header = ['Option/Classifier', 'Classifier1-F1Score', 'Classifier1-EER', 'Classifier1-FAR', 'Classifier1-FRR', 'Classifier2-F1Score', 'Classifier2-EER', 'Classifier2-FAR', 'Classifier2-FRR', 'Classifier6-F1Score', 'Classifier6-EER', 'Classifier6-FAR', 'Classifier6-FRR', 'Classifier7-F1Score', 'Classifier7-EER', 'Classifier7-FAR', 'Classifier7-FRR', 'Classifier8-F1Score', 'Classifier8-EER', 'Classifier8-FAR', 'Classifier8-FRR']
with open('second_experiment/third_configuration/window_232/results/results_second_experiment_third_configuration_window_232.csv', 'w', encoding='UTF8') as f:
    writer = csv.writer(f)
    
    writer.writerow(header)

### Authentication process using multiclass classification.

In [None]:
clfs = OrderedDict()

clfs['Clasificador I'] = make_pipeline(Vectorizer(), StandardScaler(), LogisticRegression())
clfs['Clasificador II'] = make_pipeline(Vectorizer(), LDA(shrinkage='auto', solver='eigen'))
clfs['Clasificador VI'] = make_pipeline(Vectorizer(), RandomForestClassifier(random_state=42))
clfs['Clasificador VII'] = make_pipeline(Vectorizer(), QDA())
clfs['Clasificador VIII'] = make_pipeline(Vectorizer(), KNeighborsClassifier(n_neighbors=50))

def calculate_eer_far_frr(tp, fp, tn, fn):
    if (tp + fp) > 0:
        far = fp / (tn + fp)
    else:
        far = 0.0
    
    if (tp + fn) > 0:
        frr = fn / (tp + fn)
    else:
        frr = 0.0
    
    eer = (fp + fn) / (tp + tn + fp + fn)
    
    return eer, far, frr

def authentication_by_subject(subject_name, statistics_train, statistics_test, experiment):
   
    option = 'Second_experiment_third_configuration_window_232_' + subject_name + '_' + str(experiment)
    
    data = []
    data.append(option)
        
    channels_train = statistics_train.loc[:, "Fp1_Mean":"O2_median"]
    X_train = channels_train.to_numpy()
    conditions_train = statistics_train.loc[:, "Condition"]
    y_train = conditions_train.to_numpy()
    
    channels_test = statistics_test.loc[:, "Fp1_Mean":"O2_median"]
    X_test = channels_test.to_numpy()
    conditions_test = statistics_test.loc[:, "Condition"]
    y_test = conditions_test.to_numpy()
        
    for m in clfs:
        clfs[m].fit(X_train, y_train)
        y_pred = clfs[m].predict(X_test)
        report = classification_report(y_test, y_pred, output_dict=True)
        f1_score = report['weighted avg']['f1-score']

        cm = confusion_matrix(y_test, y_pred)

        tn, fp, fn, tp = cm.ravel()

        eer, far, frr = calculate_eer_far_frr(tp, fp, tn, fn)

        data.append(round(f1_score, 2))
        data.append(round(eer, 2))
        data.append(round(far, 2))
        data.append(round(frr, 2))
       
    with open('second_experiment/third_configuration/window_232/results/results_second_experiment_third_configuration_window_232.csv', 'a') as f:
        writer = csv.writer(f)
    
        writer.writerow(data)
        
        f.close()        

subjects = ["user_01", "user_02", "user_03", "user_04", "user_05", "user_06", "user_07", "user_08", "user_09", "user_10"]

for subject in subjects:
    for i in range(5):
        authentication_by_subject(subject, pd.read_csv('second_experiment/third_configuration/window_232/statistics/authentication_statistics_train_{}_{}_second_experiment_third_configuration_window_232.csv'.format(subject, i)), pd.read_csv('second_experiment/third_configuration/window_232/statistics/authentication_statistics_test_{}_{}_second_experiment_third_configuration_window_232.csv'.format(subject, i)), i)