In [1]:
import pandas as pd
import numpy as np
import os

from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
from sklearn.metrics import classification_report
import lightgbm
from sklearn.svm import SVC
from sklearn.utils import shuffle
import pickle
import main_utils_1
from sklearn.metrics import accuracy_score, f1_score
from datetime import datetime

In [16]:
from Training_Code.config import SELECTCOLS, ECG_SELECTCOLS, EDA_SELECTCOLS, SELECTFOUR

In [3]:
len(EDA_SELECTCOLS)

50

In [4]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network  import MLPClassifier
from sklearn.preprocessing import StandardScaler
import xgboost as xgb

In [5]:
funcs_for_matbii = [
    (RandomForestClassifier , {'n_estimators': 1000,
                                'min_samples_split': 5,
                                'min_samples_leaf': 1,
                                'max_features': 'auto',
                                'max_depth': 30,
                                'bootstrap': False, 'random_state': 24, 'class_weight': 'balanced'}),
    (LinearDiscriminantAnalysis, {'solver': 'lsqr'}), 
    (GradientBoostingClassifier, {'max_depth': 3, 'n_estimators': 300, 'max_features': 'auto'}), 
    (MLPClassifier, {'hidden_layer_sizes': (100, 10), 'learning_rate': 'adaptive', 'max_iter': 1000}),
    (LogisticRegression, {'C': 1, 'max_iter': 400}),
    (SVC , {'class_weight': 'balanced', 'C': 10}), 
    (xgb.XGBClassifier, {'n_estimators': 300, 'learning_rate': 0.01, 
                         'use_label_encoder': False, 
                         'booster': 'dart', 'n_jobs': 4,
                         'reg_lambda': 0.0001, 'random_state': 24}),
    (lightgbm.LGBMClassifier, {'num_leaves': 100,
                                'n_estimators': 2000,
                                'learning_rate': 0.001,
                                'importance_type': 'gains',
                                'boosting_type': 'gbdt',
                                'class_weight': 'balanced',
                                'random_state': 24})
]

funcs_for_virage = [
    (RandomForestClassifier , {'n_estimators': 2000,
                                'min_samples_split': 2,
                                'min_samples_leaf': 1,
                                'max_features': 'auto',
                                'max_depth': 30,
                                'bootstrap': False, 'random_state': 24, 'class_weight': 'balanced'}),
    (LinearDiscriminantAnalysis, {'solver': 'lsqr'}), 
    (GradientBoostingClassifier, {'max_depth': 3, 'n_estimators': 300, 'max_features': 'auto'}), 
    (MLPClassifier, {'hidden_layer_sizes': (100, 10), 'learning_rate': 'adaptive', 'max_iter': 1000}),
    (LogisticRegression, {'C': 0.5, 'max_iter': 400}),
    (SVC , {'class_weight': 'balanced', 'C': 10}), 
    (xgb.XGBClassifier, {'n_estimators': 300, 'learning_rate': 0.01, 
                         'use_label_encoder': False, 
                         'booster': 'dart', 'n_jobs': 4,
                         'reg_lambda': 0.0001, 'random_state': 24}),
    (lightgbm.LGBMClassifier, {'num_leaves': 100,
                                'n_estimators': 2000,
                                'learning_rate': 0.001,
                                'importance_type': 'gains',
                                'boosting_type': 'gbdt',
                                'class_weight': 'balanced',
                                'random_state': 24})
]

In [6]:
def make_loso(dr_feat_path):
    subjects = os.listdir(dr_feat_path)
    xtrainDriv = pd.DataFrame()
    for subTrain in subjects:
        train = pd.read_csv(os.path.join(dr_feat_path, '{}'.format(subTrain)))
        train[['scrAmpDF_min','scrRecoveryTime_min', 'scrRiseTime_min']].fillna(0)
        if np.isinf(train).values.sum():
            cinf = np.isinf(train).values.sum()
            print("Train Dataframe contains {} values".format(cinf))
        train.replace([np.inf], 9999, inplace=True)        
        train.replace([-np.inf], -9999, inplace=True)        

        train['scrNumPeaks'] = train['scrNumPeaks'].values.astype(int)
        train['scrNumPeaks'] = train['scrNumPeaks'].values.clip(min=0) # converting negatives to zero

        train.dropna(inplace=True)
        xtrainDriv = xtrainDriv.append(train)
        xtrainDriv.reset_index(drop=True, inplace=True)

    return xtrainDriv.copy()

In [7]:
def read_dataset(dataset, folder, basefolder):
    dr_feat_path = r'X:\All Modes\{}\ECG EDA\Combined\{}'.format(dataset, folder) # ECG_EDA_Features_Combined_scld
    bs_feat_path = r'X:\All Modes\{}\ECG EDA\Combined\{}'.format(dataset, basefolder) # ECG_EDA_Base2_Features_Combined

    XtrainDriv = make_loso(dr_feat_path)
    XtrainBase = make_loso(bs_feat_path)

    XtrainDriv = XtrainDriv[SELECTCOLS].copy()
    ytrainDriv = list(XtrainDriv['scaled label'].copy())

    XtrainBase = XtrainBase[SELECTCOLS[:-3]].copy()
    ytrainBase = XtrainBase.shape[0] * [0]

    XtrainDriv.drop(columns=['label', 'complexity', 'scaled label'], inplace=True)
    XtrainDriv = XtrainDriv.append(XtrainBase)

    ytrain = ytrainDriv + ytrainBase

    X = XtrainDriv.values
    
    X, ytrain = shuffle(X, ytrain, random_state=42)

    for idx, val in enumerate(ytrain):
        if val <= 4:
            ytrain[idx] = 0
        else: ytrain[idx] = 1

    return X, ytrain

In [8]:
def mk_training_data(dr_feat_path, sdriv):
    subjects = os.listdir(dr_feat_path)
    xtrainDriv = pd.DataFrame()

    for subTrain in subjects:
        if subTrain != sdriv:
            train = pd.read_csv(os.path.join(dr_feat_path, '{}'.format(subTrain)))

            train[['scrAmpDF_min','scrRecoveryTime_min', 'scrRiseTime_min']].fillna(0)
            if np.isinf(train).values.sum():
                cinf = np.isinf(train).values.sum()
                print("Train Dataframe contains {} values".format(cinf))
            train.replace([np.inf], 9999, inplace=True)        
            train.replace([-np.inf], -9999, inplace=True)        

            train['scrNumPeaks'] = train['scrNumPeaks'].values.astype(int)
            train['scrNumPeaks'] = train['scrNumPeaks'].values.clip(min=0) # converting negatives to zero

            train.dropna(inplace=True)
            xtrainDriv = xtrainDriv.append(train)
            xtrainDriv.reset_index(drop=True, inplace=True)

    return xtrainDriv.copy()

# Only SVC, LGBM, XGBoost

In [9]:
def losoValidation(dataset, folder, basefolder, paramsSELECTCOLS):
    dr_feat_path = r'X:\All Modes\{}\ECG EDA\Combined\{}'.format(dataset, folder) # ECG_EDA_Features_Combined_scld
    bs_feat_path = r'X:\All Modes\{}\ECG EDA\Combined\{}'.format(dataset, basefolder) # ECG_EDA_Base2_Features_Combined
    date_time = datetime.now().strftime("%Y_%m_%d_%H_%M")
    print("Saved Directory: {}".format(date_time))
    savePath_0 = f"X:/All Modes/Data Files/{date_time}"
    main_utils_1.mk_dirs(savePath_0)
    savePath1 = os.path.join(savePath_0, f'{dataset}')
    main_utils_1.mk_dirs(savePath1)
    savePath = os.path.join(savePath1, 'ECG EDA')
    main_utils_1.mk_dirs(savePath)

    mycls = {}

    if dataset == 'MatbII':
        parameter_list = funcs_for_matbii
    elif dataset == 'Virage':
        parameter_list = funcs_for_virage

    subjects = os.listdir(dr_feat_path)
    results_df = pd.DataFrame(columns=['dataset', 'method', 'test_subject', 'test_acc', 'test_f1'])
    for sdriv in subjects:
        main_utils_1.mk_dirs(os.path.join(savePath, sdriv))

        xtestDriv = pd.read_csv(os.path.join(dr_feat_path, '{}'.format(sdriv)))
        xtestDriv[['scrAmpDF_min','scrRecoveryTime_min', 'scrRiseTime_min']].fillna(0)
        if np.isinf(xtestDriv).values.sum():
            cinf = np.isinf(xtestDriv).values.sum()
            print("Dataframe contains {} values".format(cinf))

        xtestDriv.replace([np.inf], 9999, inplace=True)
        xtestDriv.replace([-np.inf], -9999, inplace=True)

        xtestDriv['scrNumPeaks'] = xtestDriv['scrNumPeaks'].values.astype(int)
        xtestDriv['scrNumPeaks'] = xtestDriv['scrNumPeaks'].values.clip(min=0) # converting negatives to zero
        xtestDriv.dropna(inplace=True) # .reset_index(drop=True, inplace=True)

        ytestDriv = list(xtestDriv['scaled label'].copy()) 

        XtrainDriv = mk_training_data(dr_feat_path, sdriv)
        # XtrainBase = mk_training_data(bs_feat_path, sdriv)

        XtrainDriv = XtrainDriv[paramsSELECTCOLS].copy()
        XtestDriv = xtestDriv[paramsSELECTCOLS].copy()  ### Look out for small x and X
        # XtrainBase = XtrainBase[SELECTCOLS[:-3]].copy()

        ytrainDriv = list(XtrainDriv['scaled label'].copy())
        # ytrainBase = XtrainBase.shape[0] * [0]

        XtrainDriv.drop(columns=['label', 'complexity', 'scaled label'], inplace=True)
        XtestDriv.drop(columns=['label', 'complexity', 'scaled label'], inplace=True)

        ytrain = ytrainDriv #+ ytrainBase
        # XtrainDriv = XtrainDriv.append(XtrainBase)

        X = XtrainDriv.values
        X, ytrain = shuffle(X, ytrain, random_state=42)

        for idx, val in enumerate(ytrain):
            if val <= 3:
                ytrain[idx] = 0
            elif 3 < val <= 6:
                ytrain[idx] = 1
            else: ytrain[idx] = 2

        for idx, val in enumerate(ytestDriv):
            if val <= 3:
                ytestDriv[idx] = 0
            elif 3 < val <= 6:
                ytestDriv[idx] = 1
            else: ytestDriv[idx] = 2

        # training different classifier for all subjects and saving them in different dictionnaries
        mycls = {}
        for cls_modl, cls_parameters in parameter_list:
            print("--------------------------------------------")
            print(f"---- Training classifier {cls_modl.__name__} for subject: {sdriv} ---")

            classifier_save_path = os.path.join(savePath, sdriv, cls_modl.__name__)
            main_utils_1.mk_dirs(classifier_save_path)

            classifier_report = os.path.join(classifier_save_path, 'report')
            classifier_sav = os.path.join(classifier_save_path, 'classifier')
            main_utils_1.mk_dirs(classifier_report)
            main_utils_1.mk_dirs(classifier_sav)

            if cls_modl.__name__ in ['LogisticRegression', 'SVC', 'LGBMClassifier', 'XGBClassifier']:
                scaler = StandardScaler()
                X = scaler.fit_transform(X)
                XtestDriv = scaler.transform(XtestDriv)
            clf = cls_modl(**cls_parameters)
            hist = clf.fit(X, ytrain)

            yPred = hist.predict(XtestDriv)

            test_accuray = accuracy_score(ytestDriv, yPred)
            test_f1 = f1_score(ytestDriv, yPred, average='macro')
            
            results_df = results_df.append({'dataset': folder,
                                            'method':cls_modl.__name__,
                                            'test_subject': sdriv,
                                            'test_acc': test_accuray,
                                            'test_f1':test_f1}, ignore_index=True)
            print('Test Subject: {}'.format(sdriv))

            mycls[sdriv] = classification_report(ytestDriv, yPred, zero_division=1, output_dict=True)
            print("----- Classification Report ------")
            print(f"Test accuracy for {sdriv} is: {test_accuray} and f1 score is: {test_f1}\n")

            print(classification_report(ytestDriv, yPred, zero_division=1))
            with open(os.path.join(classifier_report, 'Test_fold_{}_report.pickle'.format(sdriv)), 'wb') as handle:
                pickle.dump(mycls, handle, protocol= pickle.HIGHEST_PROTOCOL)

            with open(os.path.join(classifier_sav, 'Test_fold_{}_report.sav'.format(sdriv)), 'wb') as handle:
                pickle.dump(hist, handle, protocol= pickle.HIGHEST_PROTOCOL)

        # estimatorPath = os.path.join(savePath, '{}.sav'.format(saveName))
        # pickle.dump(hist, open(estimatorPath, 'wb'))

    results_df.to_csv(os.path.join(savePath, 'results.csv'), index=False)
    return
    # return XtrainDriv, y_train

In [10]:
losoValidation('Virage', 'ECG_EDA_Features_Combined_scld', 'ECG_EDA_Base2_Features_Combined', SELECTCOLS)

Saved Directory: 2021_12_15_14_08
--------------------------------------------
---- Training classifier RandomForestClassifier for subject: 1030.csv ---
Test Subject: 1030.csv
----- Classification Report ------
Test accuracy for 1030.csv is: 0.5875 and f1 score is: 0.5931397386753612

              precision    recall  f1-score   support

           0       0.63      0.73      0.68        33
           1       0.51      0.40      0.45        63
           2       0.62      0.70      0.66        64

    accuracy                           0.59       160
   macro avg       0.59      0.61      0.59       160
weighted avg       0.58      0.59      0.58       160

--------------------------------------------
---- Training classifier LinearDiscriminantAnalysis for subject: 1030.csv ---
Test Subject: 1030.csv
----- Classification Report ------
Test accuracy for 1030.csv is: 0.58125 and f1 score is: 0.5523648750717545

              precision    recall  f1-score   support

           0       0.

In [11]:
losoValidation('MatbII', 'ECG_EDA_Features_Combined_scld', 'ECG_EDA_Base2_Features_Combined', SELECTCOLS)

Saved Directory: 2021_12_15_15_03
--------------------------------------------
---- Training classifier RandomForestClassifier for subject: 1105.csv ---
Test Subject: 1105.csv
----- Classification Report ------
Test accuracy for 1105.csv is: 0.5833333333333334 and f1 score is: 0.3709279855713488

              precision    recall  f1-score   support

           0       1.00      0.11      0.20        36
           1       0.64      0.83      0.72       139
           2       0.22      0.17      0.19        41

    accuracy                           0.58       216
   macro avg       0.62      0.37      0.37       216
weighted avg       0.62      0.58      0.53       216

--------------------------------------------
---- Training classifier LinearDiscriminantAnalysis for subject: 1105.csv ---
Test Subject: 1105.csv
----- Classification Report ------
Test accuracy for 1105.csv is: 0.41203703703703703 and f1 score is: 0.37038076784070745

              precision    recall  f1-score   suppo

In [12]:
losoValidation('Virage', 'ECG_EDA_Features_Combined_scld', 'ECG_EDA_Base2_Features_Combined', ECG_SELECTCOLS)

Saved Directory: 2021_12_15_15_49
--------------------------------------------
---- Training classifier RandomForestClassifier for subject: 1030.csv ---
Test Subject: 1030.csv
----- Classification Report ------
Test accuracy for 1030.csv is: 0.45 and f1 score is: 0.4522957662492546

              precision    recall  f1-score   support

           0       0.44      0.61      0.51        33
           1       0.38      0.56      0.45        63
           2       0.77      0.27      0.40        64

    accuracy                           0.45       160
   macro avg       0.53      0.48      0.45       160
weighted avg       0.55      0.45      0.44       160

--------------------------------------------
---- Training classifier LinearDiscriminantAnalysis for subject: 1030.csv ---
Test Subject: 1030.csv
----- Classification Report ------
Test accuracy for 1030.csv is: 0.63125 and f1 score is: 0.6099499058882812

              precision    recall  f1-score   support

           0       0.81

In [13]:
losoValidation('MatbII', 'ECG_EDA_Features_Combined_scld', 'ECG_EDA_Base2_Features_Combined', ECG_SELECTCOLS)

Saved Directory: 2021_12_15_16_20
--------------------------------------------
---- Training classifier RandomForestClassifier for subject: 1105.csv ---
Test Subject: 1105.csv
----- Classification Report ------
Test accuracy for 1105.csv is: 0.6388888888888888 and f1 score is: 0.3413203633502611

              precision    recall  f1-score   support

           0       1.00      0.14      0.24        36
           1       0.66      0.96      0.78       139
           2       0.00      0.00      0.00        41

    accuracy                           0.64       216
   macro avg       0.55      0.37      0.34       216
weighted avg       0.59      0.64      0.54       216

--------------------------------------------
---- Training classifier LinearDiscriminantAnalysis for subject: 1105.csv ---
Test Subject: 1105.csv
----- Classification Report ------
Test accuracy for 1105.csv is: 0.37962962962962965 and f1 score is: 0.3281764217389967

              precision    recall  f1-score   suppor

In [14]:
losoValidation('Virage', 'ECG_EDA_Features_Combined_scld', 'ECG_EDA_Base2_Features_Combined', EDA_SELECTCOLS)

Saved Directory: 2021_12_15_16_45
--------------------------------------------
---- Training classifier RandomForestClassifier for subject: 1030.csv ---
Test Subject: 1030.csv
----- Classification Report ------
Test accuracy for 1030.csv is: 0.51875 and f1 score is: 0.5202815718944751

              precision    recall  f1-score   support

           0       0.71      0.45      0.56        33
           1       0.52      0.40      0.45        63
           2       0.47      0.67      0.55        64

    accuracy                           0.52       160
   macro avg       0.57      0.51      0.52       160
weighted avg       0.54      0.52      0.51       160

--------------------------------------------
---- Training classifier LinearDiscriminantAnalysis for subject: 1030.csv ---
Test Subject: 1030.csv
----- Classification Report ------
Test accuracy for 1030.csv is: 0.48125 and f1 score is: 0.46658060814687313

              precision    recall  f1-score   support

           0       

In [15]:
losoValidation('MatbII', 'ECG_EDA_Features_Combined_scld', 'ECG_EDA_Base2_Features_Combined', EDA_SELECTCOLS)

Saved Directory: 2021_12_15_17_18
--------------------------------------------
---- Training classifier RandomForestClassifier for subject: 1105.csv ---
Test Subject: 1105.csv
----- Classification Report ------
Test accuracy for 1105.csv is: 0.5324074074074074 and f1 score is: 0.3664736935298478

              precision    recall  f1-score   support

           0       1.00      0.14      0.24        36
           1       0.63      0.73      0.68       139
           2       0.16      0.20      0.18        41

    accuracy                           0.53       216
   macro avg       0.60      0.36      0.37       216
weighted avg       0.60      0.53      0.51       216

--------------------------------------------
---- Training classifier LinearDiscriminantAnalysis for subject: 1105.csv ---
Test Subject: 1105.csv
----- Classification Report ------
Test accuracy for 1105.csv is: 0.6574074074074074 and f1 score is: 0.383920105944117

              precision    recall  f1-score   support


In [18]:
def losoValidation(dataset, folder, basefolder, paramsSELECTCOLS):
    dr_feat_path = r'X:\All Modes\All\{}\Combine'.format(dataset) # ECG_EDA_Features_Combined_scld
    bs_feat_path = r'X:\All Modes\All\{}\Combine'.format(dataset) # ECG_EDA_Base2_Features_Combined
    date_time = datetime.now().strftime("%Y_%m_%d_%H_%M")
    print("Saved Directory: {}".format(date_time))
    savePath_0 = f"X:/All Modes/Data Files/{date_time}"
    main_utils_1.mk_dirs(savePath_0)
    savePath1 = os.path.join(savePath_0, f'{dataset}')
    main_utils_1.mk_dirs(savePath1)
    savePath = os.path.join(savePath1, 'ECG EDA')
    main_utils_1.mk_dirs(savePath)

    mycls = {}

    if dataset == 'MatbII':
        parameter_list = funcs_for_matbii
    elif dataset == 'Virage':
        parameter_list = funcs_for_virage

    subjects = os.listdir(dr_feat_path)
    results_df = pd.DataFrame(columns=['dataset', 'method', 'test_subject', 'test_acc', 'test_f1'])
    for sdriv in subjects:
        main_utils_1.mk_dirs(os.path.join(savePath, sdriv))

        xtestDriv = pd.read_csv(os.path.join(dr_feat_path, '{}'.format(sdriv)))
        xtestDriv[['scrAmpDF_min','scrRecoveryTime_min', 'scrRiseTime_min']].fillna(0)
        if np.isinf(xtestDriv).values.sum():
            cinf = np.isinf(xtestDriv).values.sum()
            print("Dataframe contains {} values".format(cinf))

        xtestDriv.replace([np.inf], 9999, inplace=True)
        xtestDriv.replace([-np.inf], -9999, inplace=True)

        xtestDriv['scrNumPeaks'] = xtestDriv['scrNumPeaks'].values.astype(int)
        xtestDriv['scrNumPeaks'] = xtestDriv['scrNumPeaks'].values.clip(min=0) # converting negatives to zero
        xtestDriv.dropna(inplace=True) # .reset_index(drop=True, inplace=True)

        ytestDriv = list(xtestDriv['scaled label'].copy()) 

        XtrainDriv = mk_training_data(dr_feat_path, sdriv)
        # XtrainBase = mk_training_data(bs_feat_path, sdriv)

        XtrainDriv = XtrainDriv[paramsSELECTCOLS].copy()
        XtestDriv = xtestDriv[paramsSELECTCOLS].copy()  ### Look out for small x and X
        # XtrainBase = XtrainBase[SELECTCOLS[:-3]].copy()

        ytrainDriv = list(XtrainDriv['scaled label'].copy())
        # ytrainBase = XtrainBase.shape[0] * [0]

        XtrainDriv.drop(columns=['label', 'complexity', 'scaled label'], inplace=True)
        XtestDriv.drop(columns=['label', 'complexity', 'scaled label'], inplace=True)

        ytrain = ytrainDriv #+ ytrainBase
        # XtrainDriv = XtrainDriv.append(XtrainBase)

        X = XtrainDriv.values
        X, ytrain = shuffle(X, ytrain, random_state=42)

        for idx, val in enumerate(ytrain):
            if val <= 3:
                ytrain[idx] = 0
            elif 3 < val <= 6:
                ytrain[idx] = 1
            else: ytrain[idx] = 2

        for idx, val in enumerate(ytestDriv):
            if val <= 3:
                ytestDriv[idx] = 0
            elif 3 < val <= 6:
                ytestDriv[idx] = 1
            else: ytestDriv[idx] = 2

        # training different classifier for all subjects and saving them in different dictionnaries
        mycls = {}
        for cls_modl, cls_parameters in parameter_list:
            print("--------------------------------------------")
            print(f"---- Training classifier {cls_modl.__name__} for subject: {sdriv} ---")

            classifier_save_path = os.path.join(savePath, sdriv, cls_modl.__name__)
            main_utils_1.mk_dirs(classifier_save_path)

            classifier_report = os.path.join(classifier_save_path, 'report')
            classifier_sav = os.path.join(classifier_save_path, 'classifier')
            main_utils_1.mk_dirs(classifier_report)
            main_utils_1.mk_dirs(classifier_sav)

            if cls_modl.__name__ in ['LogisticRegression', 'SVC', 'LGBMClassifier', 'XGBClassifier']:
                scaler = StandardScaler()
                X = scaler.fit_transform(X)
                XtestDriv = scaler.transform(XtestDriv)
            clf = cls_modl(**cls_parameters)
            hist = clf.fit(X, ytrain)

            yPred = hist.predict(XtestDriv)

            test_accuray = accuracy_score(ytestDriv, yPred)
            test_f1 = f1_score(ytestDriv, yPred, average='macro')
            
            results_df = results_df.append({'dataset': folder,
                                            'method':cls_modl.__name__,
                                            'test_subject': sdriv,
                                            'test_acc': test_accuray,
                                            'test_f1':test_f1}, ignore_index=True)
            print('Test Subject: {}'.format(sdriv))

            mycls[sdriv] = classification_report(ytestDriv, yPred, zero_division=1, output_dict=True)
            print("----- Classification Report ------")
            print(f"Test accuracy for {sdriv} is: {test_accuray} and f1 score is: {test_f1}\n")

            print(classification_report(ytestDriv, yPred, zero_division=1))
            with open(os.path.join(classifier_report, 'Test_fold_{}_report.pickle'.format(sdriv)), 'wb') as handle:
                pickle.dump(mycls, handle, protocol= pickle.HIGHEST_PROTOCOL)

            with open(os.path.join(classifier_sav, 'Test_fold_{}_report.sav'.format(sdriv)), 'wb') as handle:
                pickle.dump(hist, handle, protocol= pickle.HIGHEST_PROTOCOL)

        # estimatorPath = os.path.join(savePath, '{}.sav'.format(saveName))
        # pickle.dump(hist, open(estimatorPath, 'wb'))

    results_df.to_csv(os.path.join(savePath, 'results.csv'), index=False)
    return
    # return XtrainDriv, y_train

In [19]:
losoValidation('Virage', 'ECG_EDA_Features_Combined_scld', 'ECG_EDA_Base2_Features_Combined', SELECTFOUR)

Saved Directory: 2021_12_15_22_45
--------------------------------------------
---- Training classifier RandomForestClassifier for subject: 1030.csv ---
Test Subject: 1030.csv
----- Classification Report ------
Test accuracy for 1030.csv is: 0.5094339622641509 and f1 score is: 0.5181029706346162

              precision    recall  f1-score   support

           0       0.59      0.70      0.64        33
           1       0.42      0.65      0.51        62
           2       0.75      0.28      0.41        64

    accuracy                           0.51       159
   macro avg       0.59      0.54      0.52       159
weighted avg       0.59      0.51      0.49       159

--------------------------------------------
---- Training classifier LinearDiscriminantAnalysis for subject: 1030.csv ---
Test Subject: 1030.csv
----- Classification Report ------
Test accuracy for 1030.csv is: 0.5660377358490566 and f1 score is: 0.5451775633408633

              precision    recall  f1-score   support

In [20]:
losoValidation('MatbII', 'ECG_EDA_Features_Combined_scld', 'ECG_EDA_Base2_Features_Combined', SELECTFOUR)

Saved Directory: 2021_12_16_00_41
--------------------------------------------
---- Training classifier RandomForestClassifier for subject: 1105.csv ---
Test Subject: 1105.csv
----- Classification Report ------
Test accuracy for 1105.csv is: 0.6201923076923077 and f1 score is: 0.3625240494524647

              precision    recall  f1-score   support

           0       1.00      0.06      0.11        33
           1       0.68      0.88      0.76       137
           2       0.24      0.18      0.21        38

    accuracy                           0.62       208
   macro avg       0.64      0.37      0.36       208
weighted avg       0.65      0.62      0.56       208

--------------------------------------------
---- Training classifier LinearDiscriminantAnalysis for subject: 1105.csv ---
Test Subject: 1105.csv
----- Classification Report ------
Test accuracy for 1105.csv is: 0.3942307692307692 and f1 score is: 0.35754455045686306

              precision    recall  f1-score   suppor

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Test Subject: 1105.csv
----- Classification Report ------
Test accuracy for 1105.csv is: 0.4375 and f1 score is: 0.35197566311184025

              precision    recall  f1-score   support

           0       0.25      0.55      0.34        33
           1       0.73      0.50      0.59       137
           2       0.12      0.13      0.12        38

    accuracy                           0.44       208
   macro avg       0.37      0.39      0.35       208
weighted avg       0.54      0.44      0.47       208

--------------------------------------------
---- Training classifier SVC for subject: 1105.csv ---
Test Subject: 1105.csv
----- Classification Report ------
Test accuracy for 1105.csv is: 0.4326923076923077 and f1 score is: 0.4043197936814958

              precision    recall  f1-score   support

           0       0.31      0.73      0.44        33
           1       0.71      0.39      0.50       137
           2       0.23      0.34      0.28        38

    accuracy          

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Test Subject: 1106.csv
----- Classification Report ------
Test accuracy for 1106.csv is: 0.4013605442176871 and f1 score is: 0.3825396825396825

              precision    recall  f1-score   support

           0       0.36      0.31      0.33        13
           1       0.46      0.36      0.40        73
           2       0.37      0.48      0.41        61

    accuracy                           0.40       147
   macro avg       0.40      0.38      0.38       147
weighted avg       0.41      0.40      0.40       147

--------------------------------------------
---- Training classifier SVC for subject: 1106.csv ---
Test Subject: 1106.csv
----- Classification Report ------
Test accuracy for 1106.csv is: 0.40816326530612246 and f1 score is: 0.46744858065612777

              precision    recall  f1-score   support

           0       0.73      0.62      0.67        13
           1       0.39      0.21      0.27        73
           2       0.38      0.61      0.47        61

    accur

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Test Subject: 1175.csv
----- Classification Report ------
Test accuracy for 1175.csv is: 0.3615819209039548 and f1 score is: 0.33431480868708513

              precision    recall  f1-score   support

           0       0.29      0.21      0.24        43
           1       0.41      0.21      0.28        81
           2       0.36      0.72      0.48        53

    accuracy                           0.36       177
   macro avg       0.36      0.38      0.33       177
weighted avg       0.37      0.36      0.33       177

--------------------------------------------
---- Training classifier SVC for subject: 1175.csv ---
Test Subject: 1175.csv
----- Classification Report ------
Test accuracy for 1175.csv is: 0.4519774011299435 and f1 score is: 0.35139933166248954

              precision    recall  f1-score   support

           0       0.82      0.21      0.33        43
           1       0.46      0.81      0.59        81
           2       0.22      0.09      0.13        53

    accur

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Test Subject: 1337.csv
----- Classification Report ------
Test accuracy for 1337.csv is: 0.3194444444444444 and f1 score is: 0.3033228469045735

              precision    recall  f1-score   support

           0       0.57      0.33      0.41        86
           1       0.25      0.46      0.33        71
           2       0.22      0.14      0.17        59

    accuracy                           0.32       216
   macro avg       0.35      0.31      0.30       216
weighted avg       0.37      0.32      0.32       216

--------------------------------------------
---- Training classifier SVC for subject: 1337.csv ---
Test Subject: 1337.csv
----- Classification Report ------
Test accuracy for 1337.csv is: 0.3194444444444444 and f1 score is: 0.28606293896100693

              precision    recall  f1-score   support

           0       0.35      0.29      0.32        86
           1       0.30      0.54      0.38        71
           2       0.35      0.10      0.16        59

    accura

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Test Subject: 1390.csv
----- Classification Report ------
Test accuracy for 1390.csv is: 0.3427230046948357 and f1 score is: 0.28391772423220335

              precision    recall  f1-score   support

           0       0.41      0.25      0.31        56
           1       0.32      0.82      0.46        67
           2       0.80      0.04      0.08        90

    accuracy                           0.34       213
   macro avg       0.51      0.37      0.28       213
weighted avg       0.55      0.34      0.26       213

--------------------------------------------
---- Training classifier SVC for subject: 1390.csv ---
Test Subject: 1390.csv
----- Classification Report ------
Test accuracy for 1390.csv is: 0.3192488262910798 and f1 score is: 0.22802637475193652

              precision    recall  f1-score   support

           0       0.35      0.16      0.22        56
           1       0.32      0.88      0.46        67
           2       1.00      0.00      0.00        90

    accur

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Test Subject: 1400.csv
----- Classification Report ------
Test accuracy for 1400.csv is: 0.24761904761904763 and f1 score is: 0.24350497794932177

              precision    recall  f1-score   support

           0       0.27      0.37      0.31        51
           1       0.67      0.17      0.27       145
           2       0.09      0.64      0.15        14

    accuracy                           0.25       210
   macro avg       0.34      0.39      0.24       210
weighted avg       0.53      0.25      0.27       210

--------------------------------------------
---- Training classifier SVC for subject: 1400.csv ---
Test Subject: 1400.csv
----- Classification Report ------
Test accuracy for 1400.csv is: 0.26666666666666666 and f1 score is: 0.23796055138867142

              precision    recall  f1-score   support

           0       0.35      0.16      0.22        51
           1       0.61      0.28      0.38       145
           2       0.07      0.57      0.12        14

    acc

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Test Subject: 1419.csv
----- Classification Report ------
Test accuracy for 1419.csv is: 0.40654205607476634 and f1 score is: 0.3098952915886554

              precision    recall  f1-score   support

           0       0.12      0.27      0.17        30
           1       0.65      0.49      0.56       144
           2       0.20      0.20      0.20        40

    accuracy                           0.41       214
   macro avg       0.32      0.32      0.31       214
weighted avg       0.49      0.41      0.44       214

--------------------------------------------
---- Training classifier SVC for subject: 1419.csv ---
Test Subject: 1419.csv
----- Classification Report ------
Test accuracy for 1419.csv is: 0.48130841121495327 and f1 score is: 0.32760514752040176

              precision    recall  f1-score   support

           0       0.18      0.30      0.22        30
           1       0.62      0.62      0.62       144
           2       0.21      0.10      0.14        40

    accu

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Test Subject: 1517.csv
----- Classification Report ------
Test accuracy for 1517.csv is: 0.37037037037037035 and f1 score is: 0.35352920936862536

              precision    recall  f1-score   support

           0       0.61      0.24      0.34        71
           1       0.43      0.51      0.47        63
           2       0.18      0.39      0.25        28

    accuracy                           0.37       162
   macro avg       0.41      0.38      0.35       162
weighted avg       0.47      0.37      0.38       162

--------------------------------------------
---- Training classifier SVC for subject: 1517.csv ---
Test Subject: 1517.csv
----- Classification Report ------
Test accuracy for 1517.csv is: 0.37037037037037035 and f1 score is: 0.3271812408402541

              precision    recall  f1-score   support

           0       0.45      0.18      0.26        71
           1       0.36      0.65      0.46        63
           2       0.33      0.21      0.26        28

    accu

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Test Subject: 1544.csv
----- Classification Report ------
Test accuracy for 1544.csv is: 0.41304347826086957 and f1 score is: 0.19791666666666666

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         8
           1       0.42      1.00      0.59        19
           2       1.00      0.00      0.00        19

    accuracy                           0.41        46
   macro avg       0.47      0.33      0.20        46
weighted avg       0.59      0.41      0.25        46

--------------------------------------------
---- Training classifier SVC for subject: 1544.csv ---
Test Subject: 1544.csv
----- Classification Report ------
Test accuracy for 1544.csv is: 0.41304347826086957 and f1 score is: 0.19487179487179487

              precision    recall  f1-score   support

           0       1.00      0.00      0.00         8
           1       0.41      1.00      0.58        19
           2       1.00      0.00      0.00        19

    acc

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Test Subject: 1624.csv
----- Classification Report ------
Test accuracy for 1624.csv is: 0.3333333333333333 and f1 score is: 0.29399764318711963

              precision    recall  f1-score   support

           0       0.27      0.28      0.27        43
           1       0.37      0.55      0.44        64
           2       0.32      0.12      0.17        52

    accuracy                           0.33       159
   macro avg       0.32      0.31      0.29       159
weighted avg       0.32      0.33      0.31       159

--------------------------------------------
---- Training classifier SVC for subject: 1624.csv ---
Test Subject: 1624.csv
----- Classification Report ------
Test accuracy for 1624.csv is: 0.39622641509433965 and f1 score is: 0.2021810628353817

              precision    recall  f1-score   support

           0       0.00      0.00      0.00        43
           1       0.40      0.97      0.57        64
           2       1.00      0.02      0.04        52

    accur

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Test Subject: 1688.csv
----- Classification Report ------
Test accuracy for 1688.csv is: 0.425414364640884 and f1 score is: 0.3600366149950382

              precision    recall  f1-score   support

           0       0.18      0.22      0.20        23
           1       0.47      0.61      0.53        83
           2       0.47      0.28      0.35        75

    accuracy                           0.43       181
   macro avg       0.37      0.37      0.36       181
weighted avg       0.43      0.43      0.41       181

--------------------------------------------
---- Training classifier SVC for subject: 1688.csv ---
Test Subject: 1688.csv
----- Classification Report ------
Test accuracy for 1688.csv is: 0.3314917127071823 and f1 score is: 0.1994286280000566

              precision    recall  f1-score   support

           0       0.04      0.04      0.04        23
           1       0.41      0.69      0.51        83
           2       0.12      0.03      0.04        75

    accuracy

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Test Subject: 1717.csv
----- Classification Report ------
Test accuracy for 1717.csv is: 0.3148148148148148 and f1 score is: 0.31659891598915985

              precision    recall  f1-score   support

           0       0.20      0.42      0.27        26
           1       0.52      0.25      0.34        55
           2       0.33      0.33      0.33        27

    accuracy                           0.31       108
   macro avg       0.35      0.34      0.32       108
weighted avg       0.40      0.31      0.32       108

--------------------------------------------
---- Training classifier SVC for subject: 1717.csv ---
Test Subject: 1717.csv
----- Classification Report ------
Test accuracy for 1717.csv is: 0.3148148148148148 and f1 score is: 0.31741376902667223

              precision    recall  f1-score   support

           0       0.31      0.42      0.35        26
           1       0.45      0.18      0.26        55
           2       0.26      0.48      0.34        27

    accur

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Test Subject: 1765.csv
----- Classification Report ------
Test accuracy for 1765.csv is: 0.2654320987654321 and f1 score is: 0.2776248291906403

              precision    recall  f1-score   support

           0       0.38      0.28      0.32        18
           1       0.65      0.11      0.19       117
           2       0.19      0.93      0.32        27

    accuracy                           0.27       162
   macro avg       0.41      0.44      0.28       162
weighted avg       0.54      0.27      0.23       162

--------------------------------------------
---- Training classifier SVC for subject: 1765.csv ---
Test Subject: 1765.csv
----- Classification Report ------
Test accuracy for 1765.csv is: 0.3148148148148148 and f1 score is: 0.2977542591636551

              precision    recall  f1-score   support

           0       0.29      0.22      0.25        18
           1       0.85      0.19      0.31       117
           2       0.20      0.93      0.34        27

    accurac

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Test Subject: 1818.csv
----- Classification Report ------
Test accuracy for 1818.csv is: 0.5849056603773585 and f1 score is: 0.4133344126059037

              precision    recall  f1-score   support

           0       0.25      0.12      0.17        16
           1       0.55      0.27      0.36        78
           2       0.61      0.86      0.71       118

    accuracy                           0.58       212
   macro avg       0.47      0.42      0.41       212
weighted avg       0.56      0.58      0.54       212

--------------------------------------------
---- Training classifier SVC for subject: 1818.csv ---
Test Subject: 1818.csv
----- Classification Report ------
Test accuracy for 1818.csv is: 0.5471698113207547 and f1 score is: 0.5072730222566442

              precision    recall  f1-score   support

           0       0.46      0.38      0.41        16
           1       0.44      0.60      0.51        78
           2       0.68      0.53      0.60       118

    accurac

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Test Subject: 1892.csv
----- Classification Report ------
Test accuracy for 1892.csv is: 0.3148148148148148 and f1 score is: 0.26563125326587983

              precision    recall  f1-score   support

           0       0.60      0.21      0.31       136
           1       0.27      0.69      0.39        54
           2       0.09      0.12      0.10        26

    accuracy                           0.31       216
   macro avg       0.32      0.34      0.27       216
weighted avg       0.45      0.31      0.30       216

--------------------------------------------
---- Training classifier SVC for subject: 1892.csv ---
Test Subject: 1892.csv
----- Classification Report ------
Test accuracy for 1892.csv is: 0.25925925925925924 and f1 score is: 0.2543435544255565

              precision    recall  f1-score   support

           0       0.59      0.14      0.23       136
           1       0.24      0.48      0.32        54
           2       0.15      0.42      0.22        26

    accur

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Test Subject: 1929.csv
----- Classification Report ------
Test accuracy for 1929.csv is: 0.29245283018867924 and f1 score is: 0.22751902587519027

              precision    recall  f1-score   support

           0       0.05      0.06      0.05        33
           1       0.31      0.71      0.43        66
           2       0.76      0.12      0.20       113

    accuracy                           0.29       212
   macro avg       0.37      0.30      0.23       212
weighted avg       0.51      0.29      0.25       212

--------------------------------------------
---- Training classifier SVC for subject: 1929.csv ---
Test Subject: 1929.csv
----- Classification Report ------
Test accuracy for 1929.csv is: 0.3584905660377358 and f1 score is: 0.32412731433326397

              precision    recall  f1-score   support

           0       0.12      0.30      0.17        33
           1       0.47      0.24      0.32        66
           2       0.52      0.44      0.48       113

    accu

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Test Subject: 1933.csv
----- Classification Report ------
Test accuracy for 1933.csv is: 0.4489795918367347 and f1 score is: 0.3330177214271463

              precision    recall  f1-score   support

           0       0.23      0.29      0.26        21
           1       0.58      0.68      0.63        53
           2       0.20      0.08      0.12        24

    accuracy                           0.45        98
   macro avg       0.34      0.35      0.33        98
weighted avg       0.41      0.45      0.42        98

--------------------------------------------
---- Training classifier SVC for subject: 1933.csv ---
Test Subject: 1933.csv
----- Classification Report ------
Test accuracy for 1933.csv is: 0.4387755102040816 and f1 score is: 0.3343518343518343

              precision    recall  f1-score   support

           0       0.23      0.33      0.27        21
           1       0.59      0.64      0.61        53
           2       0.22      0.08      0.12        24

    accurac