## Second experiment - third configuration - window 232.

In [1]:
import csv
import os
import glob
import random
import numpy as np
import pandas as pd
from collections import OrderedDict

from analysis_tools import load_raw

import mne
from mne import Epochs, find_events
from mne.decoding import Vectorizer

from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

from sklearn.linear_model import LogisticRegression

from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis as QDA
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import confusion_matrix, classification_report

from sklearn.multiclass import OneVsRestClassifier

### The generated statistics and identification datasets will be saved in the "statistics" directory. If the directory does not exist, it is created.

In [None]:
path = 'third_experiment/third_configuration/window_232/statistics'

if not os.path.exists(path):
    os.makedirs(path)

### Application of Notch to attenuate the frequency at 50 Hz, the sixth-order Butterworth band-pass filter with cut-off frequencies of 1-17 Hz, and ICA. After their application, the framework generates the epochs in Dataframe format.

In [None]:
def process_by_subject(subject_name):
    count = 1
    datasets = sorted(glob.glob('data/'+ subject_name + '_*.csv'))
    df_final = pd.DataFrame()
    array_epochs = []
    for dataset in datasets:
        sampling_rate = 256

        ch_names = {}
        
        raw = load_raw(dataset, sfreq=sampling_rate, stim_ind=8, replace_ch_names=None, ch_ind=[0, 1, 2, 3, 4, 5, 6, 7])
        
        for i, chn in enumerate(raw.ch_names):
            ch_names[chn] = i

        raw_notch = raw.copy().notch_filter([50.0])

        iir_params = dict(order=6, ftype='butter')
        raw_notch_and_filter = raw_notch.copy().filter(1, 17, method='iir', iir_params=iir_params)

        ica = mne.preprocessing.ICA(n_components=8, random_state=97)
        ica.fit(raw_notch_and_filter)
        
        raw_notch_and_filter_ica = raw_notch_and_filter.copy()
        
        ica.exclude = []
        eog_inds, eog_scores = ica.find_bads_eog(raw_notch_and_filter_ica, ['Fp1','Fp2'], threshold=1.5)
        ica.exclude = eog_inds
                
        ica.apply(raw_notch_and_filter_ica)

        events = find_events(raw_notch_and_filter_ica, shortest_event=1) 
                
        event_id = {'Target': 1, 'NoTarget': 2}
        reject = {'eeg': 100e-6}

        epochs = Epochs(raw_notch_and_filter_ica, events=events, event_id=event_id, tmin=-0.1, tmax=0.8, reject=reject, preload=True)
        epochs.pick_types(eeg=True)
    
        array_epochs.append(epochs)
        
        if count == 20:
            all_epochs = mne.concatenate_epochs(array_epochs, add_offset=True)
            df_final = all_epochs.to_data_frame()
            no_targets = np.count_nonzero(all_epochs.events[:, -1]==2)
        
            index_no_targets = []
            y = all_epochs.events[:, -1]

            while(no_targets != 0):
                position = random.randint(0, len(y)-1)
                if y[position] == 2 and position not in index_no_targets:
                    index_no_targets.append(position)
                    no_targets -= 1

            all_epochs.drop(index_no_targets)
            
            df_final_only_targets = all_epochs.to_data_frame()
            
            df_final.to_csv('third_experiment/third_configuration/window_232/statistics/df_{}.csv'.format(subject_name), index=False)
            df_final_only_targets.to_csv('third_experiment/third_configuration/window_232/statistics/df_{}_targets.csv'.format(subject_name), index=False)
        
        count = count + 1

In [None]:
process_by_subject("user_01")
process_by_subject("user_02")
process_by_subject("user_03")
process_by_subject("user_04")
process_by_subject("user_05")
process_by_subject("user_06")
process_by_subject("user_07")
process_by_subject("user_08")
process_by_subject("user_09")
process_by_subject("user_10")

### Getting the statistics using a sliding window size equal to 232.

In [None]:
def get_stadistical_values(channel, data):    
    dicc = dict()

    dicc[channel+"_Mean"] = np.mean(data[channel])
    dicc[channel+"_variance"] = np.var(data[channel])
    dicc[channel+"_deviation"] = np.std(data[channel])
    dicc[channel+"_max"] = np.max(data[channel])
    dicc[channel+"_summatory"] = np.sum(data[channel])
    dicc[channel+"_median"] = np.median(data[channel])

    dfReturned = pd.DataFrame()

    dfReturned = dfReturned.append(pd.DataFrame.from_dict(dicc, orient='index'))

    dfReturned = dfReturned.transpose()

    return dfReturned

In [None]:
def aply_all_channels(workDF):

    channels = ["Fp1","Fp2","C3","C4","P7","P8","O1","O2"]
    
    window_size = 232

    allData = pd.DataFrame()

    for i in range(0, workDF.shape[0]):
        
        if ((i+window_size) > workDF.shape[0]):
            break

        vectors = workDF.copy().iloc[i:i+window_size]
        
        allChannels = pd.DataFrame()
        
        for channel in channels:
            aux = get_stadistical_values(channel, vectors)

            allChannels = pd.concat([allChannels, aux], axis=1)
            
        allChannels['Condition'] = 1
                
        allData = pd.concat([allChannels, allData], axis=0)
        
    return allData

In [None]:
aply_all_channels(pd.read_csv('third_experiment/third_configuration/window_232/statistics/df_user_01_targets.csv')).to_csv('third_experiment/third_configuration/window_232/statistics/df_statistics_user_01_window_232.csv', index=False)
aply_all_channels(pd.read_csv('third_experiment/third_configuration/window_232/statistics/df_user_02_targets.csv')).to_csv('third_experiment/third_configuration/window_232/statistics/df_statistics_user_02_window_232.csv', index=False)
aply_all_channels(pd.read_csv('third_experiment/third_configuration/window_232/statistics/df_user_03_targets.csv')).to_csv('third_experiment/third_configuration/window_232/statistics/df_statistics_user_03_window_232.csv', index=False)
aply_all_channels(pd.read_csv('third_experiment/third_configuration/window_232/statistics/df_user_04_targets.csv')).to_csv('third_experiment/third_configuration/window_232/statistics/df_statistics_user_04_window_232.csv', index=False)
aply_all_channels(pd.read_csv('third_experiment/third_configuration/window_232/statistics/df_user_05_targets.csv')).to_csv('third_experiment/third_configuration/window_232/statistics/df_statistics_user_05_window_232.csv', index=False)
aply_all_channels(pd.read_csv('third_experiment/third_configuration/window_232/statistics/df_user_06_targets.csv')).to_csv('third_experiment/third_configuration/window_232/statistics/df_statistics_user_06_window_232.csv', index=False)
aply_all_channels(pd.read_csv('third_experiment/third_configuration/window_232/statistics/df_user_07_targets.csv')).to_csv('third_experiment/third_configuration/window_232/statistics/df_statistics_user_07_window_232.csv', index=False)
aply_all_channels(pd.read_csv('third_experiment/third_configuration/window_232/statistics/df_user_08_targets.csv')).to_csv('third_experiment/third_configuration/window_232/statistics/df_statistics_user_08_window_232.csv', index=False)
aply_all_channels(pd.read_csv('third_experiment/third_configuration/window_232/statistics/df_user_09_targets.csv')).to_csv('third_experiment/third_configuration/window_232/statistics/df_statistics_user_09_window_232.csv', index=False)
aply_all_channels(pd.read_csv('third_experiment/third_configuration/window_232/statistics/df_user_10_targets.csv')).to_csv('third_experiment/third_configuration/window_232/statistics/df_statistics_user_10_window_232.csv', index=False)

### Generation of five identification datasets.

In [None]:
def get_identification_statistics():    
    statistics_user_01 = pd.read_csv('third_experiment/third_configuration/window_232/statistics/df_statistics_user_01_window_232.csv')
    statistics_user_02 = pd.read_csv('third_experiment/third_configuration/window_232/statistics/df_statistics_user_02_window_232.csv')
    statistics_user_03 = pd.read_csv('third_experiment/third_configuration/window_232/statistics/df_statistics_user_03_window_232.csv')
    statistics_user_04 = pd.read_csv('third_experiment/third_configuration/window_232/statistics/df_statistics_user_04_window_232.csv')
    statistics_user_05 = pd.read_csv('third_experiment/third_configuration/window_232/statistics/df_statistics_user_05_window_232.csv')
    statistics_user_06 = pd.read_csv('third_experiment/third_configuration/window_232/statistics/df_statistics_user_06_window_232.csv')
    statistics_user_07 = pd.read_csv('third_experiment/third_configuration/window_232/statistics/df_statistics_user_07_window_232.csv')
    statistics_user_08 = pd.read_csv('third_experiment/third_configuration/window_232/statistics/df_statistics_user_08_window_232.csv')
    statistics_user_09 = pd.read_csv('third_experiment/third_configuration/window_232/statistics/df_statistics_user_09_window_232.csv')
    statistics_user_10 = pd.read_csv('third_experiment/third_configuration/window_232/statistics/df_statistics_user_10_window_232.csv')
    
    subjects = ["user_01", "user_02", "user_03", "user_04", "user_05", "user_06", "user_07", "user_08", "user_09", "user_10"]
    statistics_subjects = [statistics_user_01, statistics_user_02, statistics_user_03, statistics_user_04, statistics_user_05, statistics_user_06, statistics_user_07, statistics_user_08, statistics_user_09, statistics_user_10]
   
    new_statistics_targets_only = []
    
    subject = 0
            
    for statistic in statistics_subjects:
        new_statistics_targets_only.append(statistic)
    
        subject += 1
        
    minimun_targets = 1000000000
    subject = 0
    subject_minimun_targets = 0
    for statistic in new_statistics_targets_only:
        targets = statistic.shape[0]
        if (targets < minimun_targets):
            minimun_targets = targets
            subject_minimun_targets = subject
        subject += 1
        
    name_subject_minimun_targets = subjects[subject_minimun_targets] 
        
    statistics_subject_minimun_targets = statistics_subjects[subject_minimun_targets]
    list_name_subject_minimun_targets = [name_subject_minimun_targets] * minimun_targets
    statistics_subject_minimun_targets['Condition'] = list_name_subject_minimun_targets
    
    statistics_multiclass = pd.DataFrame()
    subject = 0
    for statistic in new_statistics_targets_only:
        if (subject != subject_minimun_targets):
            name_subject = subjects[subject]
            targets_statistic = statistic.shape[0]
            index_selected_targets = random.sample(range(targets_statistic), minimun_targets)
            targets_selected = statistic.iloc[index_selected_targets]

            name_subject = subjects[subject]
            list_name_subject = [name_subject] * minimun_targets
            targets_selected['Condition'] = list_name_subject
            statistics_multiclass = statistics_multiclass.append(targets_selected, ignore_index=True)
            statistics_multiclass = statistics_multiclass.reset_index(drop=True)
                    
        else:
            statistics_multiclass = statistics_multiclass.append(statistics_subject_minimun_targets, ignore_index=True)
            statistics_multiclass = statistics_multiclass.reset_index(drop=True)
        
        subject += 1
        
    return statistics_multiclass

for i in range(5):
    get_identification_statistics().to_csv('third_experiment/third_configuration/window_232/statistics/identification_statistics_{}_third_experiment_third_configuration_window_232.csv'.format(i), index=False)

### The results obtained will be saved in the "results" directory. If the directory does not exist, it is created.

In [None]:
path = 'third_experiment/third_configuration/window_232/results'

if not os.path.exists(path):
    os.makedirs(path)

### Generation of a CSV file that will contain the results obtained in the identification process.

In [4]:
header = ['Option/Classifier', 'Classifier1-F1Score', 'Classifier1-EER', 'Classifier1-FAR', 'Classifier1-FRR', 'Classifier2-F1Score', 'Classifier2-EER', 'Classifier2-FAR', 'Classifier2-FRR', 'Classifier6-F1Score', 'Classifier6-EER', 'Classifier6-FAR', 'Classifier6-FRR', 'Classifier7-F1Score', 'Classifier7-EER', 'Classifier7-FAR', 'Classifier7-FRR', 'Classifier8-F1Score', 'Classifier8-EER', 'Classifier8-FAR', 'Classifier8-FRR']
with open('third_experiment/third_configuration/window_232/results/results_third_experiment_third_configuration_window_232.csv', 'w', encoding='UTF8') as f:
    writer = csv.writer(f)
    
    writer.writerow(header)

### Identification process using multiclass classification.

In [None]:
clfs = OrderedDict()

clfs['Clasificador I'] = make_pipeline(Vectorizer(), StandardScaler(), LogisticRegression())
clfs['Clasificador II'] = make_pipeline(Vectorizer(), LDA(shrinkage='auto', solver='eigen'))
clfs['Clasificador VI'] = make_pipeline(Vectorizer(), RandomForestClassifier(random_state=42))
clfs['Clasificador VII'] = make_pipeline(Vectorizer(), QDA())
clfs['Clasificador VIII'] = make_pipeline(Vectorizer(), KNeighborsClassifier(n_neighbors=50))

def calculate_eer_far_frr(cm):
    num_classes = cm.shape[0]
    eer_sum = 0.0
    far_sum = 0.0
    frr_sum = 0.0
    
    for i in range(num_classes):
        tp = cm[i, i]
        fp = np.sum(cm[:, i]) - tp
        fn = np.sum(cm[i, :]) - tp
        tn = np.sum(cm) - (tp + fp + fn)
        
        if (tp + fp) > 0:
            far = fp / (tn + fp)
        else:
            far = 0.0
        
        if (tp + fn) > 0:
            frr = fn / (tp + fn)
        else:
            frr = 0.0
        
        eer = (fp + fn) / (tp + tn + fp + fn)
        
        eer_sum += eer
        far_sum += far
        frr_sum += frr
    
    avg_eer = eer_sum / num_classes
    avg_far = far_sum / num_classes
    avg_frr = frr_sum / num_classes
    
    return avg_eer, avg_far, avg_frr

def inicializate_dict():
    results = {
        clf_name: {'f1_score': [], 'eer': [], 'far': [], 'frr': []} for clf_name in clfs
    }
    
    return results

def identification(statistics, experiment):
   
    option = 'Third_experiment_third_configuration_window_232_' + str(experiment)
    
    final_results = []
    final_results.append(option)
    
    results = inicializate_dict()
    
    X = []
    y = []
        
    channels = statistics.loc[:, "Fp1_Mean":"O2_median"]
    X = channels.to_numpy()
    conditions = statistics.loc[:, "Condition"]
    y = conditions.to_numpy()
           
    skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
            
    for train_index, test_index in skf.split(X, y):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        
        for m in clfs:
            clf = OneVsRestClassifier(clfs[m])
            clf.fit(X_train, y_train)
            y_pred = clf.predict(X_test)
            report = classification_report(y_test, y_pred, output_dict=True)
            f1_score = report['weighted avg']['f1-score']
                        
            cm = confusion_matrix(y_test, y_pred)
            
            eer, far, frr = calculate_eer_far_frr(cm)
            
            results[m]['f1_score'].append(round(f1_score, 2))
            results[m]['eer'].append(round(eer, 2))
            results[m]['far'].append(round(far, 2))
            results[m]['frr'].append(round(frr, 2))
            
    for classifier, metrics in results.items():
        final_results.extend([
            f"{sum(metrics['f1_score']) / len(metrics['f1_score']):.2f}",  
            f"{sum(metrics['eer']) / len(metrics['eer']):.2f}",             
            f"{sum(metrics['far']) / len(metrics['far']):.2f}",              
            f"{sum(metrics['frr']) / len(metrics['frr']):.2f}"               
        ])
       
    with open('third_experiment/third_configuration/window_232/results/results_third_experiment_third_configuration_window_232.csv', 'a') as f:
        writer = csv.writer(f)
    
        writer.writerow(final_results)
        
        f.close()
        
for i in range(5):
    identification(pd.read_csv('third_experiment/third_configuration/window_232/statistics/identification_statistics_{}_third_experiment_third_configuration_window_232.csv'.format(i)), i)