In [1]:
import pandas as pd
import numpy as np
import os

from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
from sklearn.metrics import classification_report
import lightgbm
from sklearn.svm import SVC
from sklearn.utils import shuffle

In [2]:
from Training_Code.config import SELECTCOLS, ECG_SELECTCOLS, EDA_SELECTCOLS

In [3]:
def losoValidVirage(dataset, folder):
    dr_feat_path = r'X:\Four Modes\{}\Combined\{}'.format(dataset, folder) # ECG_EDA_Features_Combined_scld
    mycls = {}

    subjects = os.listdir(dr_feat_path)
    xtrainDriv = pd.DataFrame()
    for sdriv in subjects:
        xtestDriv = pd.read_csv(os.path.join(dr_feat_path, '{}'.format(sdriv)))
        xtestDriv[['scrAmpDF_min','scrRecoveryTime_min', 'scrRiseTime_min']].fillna(0)
        if np.isinf(xtestDriv).values.sum():
            cinf = np.isinf(xtestDriv).values.sum()
            print("Dataframe contains {} values".format(cinf))

        xtestDriv.replace([np.inf], 9999, inplace=True)
        xtestDriv.replace([-np.inf], -9999, inplace=True)

        xtestDriv['scrNumPeaks'] = xtestDriv['scrNumPeaks'].values.astype(int)
        xtestDriv['scrNumPeaks'] = xtestDriv['scrNumPeaks'].values.clip(min=0) # converting negatives to zero
        xtestDriv.dropna(inplace=True) # .reset_index(drop=True, inplace=True)

        ytestDriv = list(xtestDriv['label'].copy()) 

        xtrainDriv = pd.DataFrame()
        for subTrain in subjects:
            if subTrain != sdriv:
                train = pd.read_csv(os.path.join(dr_feat_path, '{}'.format(subTrain)))

                train[['scrAmpDF_min','scrRecoveryTime_min', 'scrRiseTime_min']].fillna(0)
                if np.isinf(train).values.sum():
                    cinf = np.isinf(train).values.sum()
                    print("Train Dataframe contains {} values".format(cinf))
                train.replace([np.inf], 9999, inplace=True)        
                train.replace([-np.inf], -9999, inplace=True)        

                train['scrNumPeaks'] = train['scrNumPeaks'].values.astype(int)
                train['scrNumPeaks'] = train['scrNumPeaks'].values.clip(min=0) # converting negatives to zero

                train.dropna(inplace=True)
                xtrainDriv = xtrainDriv.append(train)
                xtrainDriv.reset_index(drop=True, inplace=True)

        XtrainDriv = xtrainDriv[SELECTCOLS].copy()
        xtestDriv = xtestDriv[SELECTCOLS].copy()
        ytrainDriv = list(XtrainDriv['scaled label'].copy())

        XtrainDriv.drop(columns=['label', 'complexity', 'scaled label'], inplace=True)
        xtestDriv.drop(columns=['label', 'complexity', 'scaled label'], inplace=True)

        ytrain = ytrainDriv #+ yMat + yBas

        X = XtrainDriv.values
        X, ytrain = shuffle(X, ytrain, random_state=42)

        for idx, val in enumerate(ytrain):
            if val <= 4:
                ytrain[idx] = 0
            else: ytrain[idx] = 1

        for idx, val in enumerate(ytestDriv):
            if val <= 4:
                ytestDriv[idx] = 0
            else: ytestDriv[idx] = 1

        paramsrf = {
            'n_estimators': 3000,
            'min_samples_split': 2,
            'min_samples_leaf': 1,
            'max_features': 'sqrt',
            'max_depth': 50,
            'bootstrap': False
            }

        # paramlgbm = {
        #     'n_estimators': 3000,
        #     'num_leaves': 100,
        #     'learning_rate': 0.05,
        #     'class_weight': 'balanced',
        #     'random_state': 24
        #     }

        clf2 = RandomForestClassifier(**paramsrf)
        # clf3 = lightgbm.LGBMClassifier(**paramlgbm)
        # clf4 = SVC(C=700, probability=True, class_weight='balanced')

        # estimatorList = [('rf', clf2), ('svm', clf4), ('gbm', clf3)]
        # eclf = VotingClassifier(estimators=estimatorList, voting='soft')

        # clf2 = RandomForestClassifier(**paramsrf)
        # clf3 = lightgbm.LGBMClassifier(n_estimators = 3000, num_leaves=100, learning_rate=0.005, class_weight='balanced')
        # clf4 = SVC(C=1000, probability=True, class_weight='balanced')
        # eclf = VotingClassifier(estimators=[('rf', clf2), ('gbm', clf3), ('svm', clf4)], voting='soft') # 

        hist = clf2.fit(X, ytrain)
        yPred = hist.predict(xtestDriv)
        print('Test Subject: {}'.format(sdriv))
        mycls[sdriv] = classification_report(ytestDriv, yPred, zero_division=1, output_dict=True)
        print(classification_report(ytestDriv, yPred, zero_division=1))
    
    return mycls

test_report = losoValidVirage('Virage', 'ECG_EDA_Features_Combined_scld')
    # return XtrainDriv, y_train

Test Subject: 1030.csv
              precision    recall  f1-score   support

           0       0.56      0.61      0.58        38
           1       0.87      0.85      0.86       122

    accuracy                           0.79       160
   macro avg       0.72      0.73      0.72       160
weighted avg       0.80      0.79      0.80       160

Test Subject: 1105.csv
              precision    recall  f1-score   support

           0       0.45      0.94      0.61        31
           1       0.98      0.73      0.83       128

    accuracy                           0.77       159
   macro avg       0.72      0.83      0.72       159
weighted avg       0.88      0.77      0.79       159

Test Subject: 1106.csv
              precision    recall  f1-score   support

           0       0.21      0.33      0.25        24
           1       0.87      0.77      0.82       136

    accuracy                           0.71       160
   macro avg       0.54      0.55      0.54       160
weigh

In [4]:
# Get the classification report of the over all subjects

def get_the_report(test_cls):
    acc=[]
    f_1=[]
    for key in test_cls.keys():
        if key in ['1868.csv', '1717.csv', '1544.csv']:
            continue

        accuracy_ = test_cls[key]['accuracy']
        acc.append(accuracy_)
        fscore_ = test_cls[key]['macro avg']['f1-score']
        f_1.append(fscore_)

    # Average acc and f1
    print("Mean accuracy: {}".format(np.mean(acc)))
    print("Mean f1: {}".format(np.mean(f_1)))    

In [5]:
get_the_report(test_report)

Mean accuracy: 0.6851070569379358
Mean f1: 0.6130377761044988


In [6]:
def losoValidVirage(dataset, folder):
    dr_feat_path = r'X:\Four Modes\{}\Combined\{}'.format(dataset, folder) # ECG_EDA_Features_Combined_scld
    mycls = {}

    subjects = os.listdir(dr_feat_path)
    xtrainDriv = pd.DataFrame()
    for sdriv in subjects:
        xtestDriv = pd.read_csv(os.path.join(dr_feat_path, '{}'.format(sdriv)))
        xtestDriv[['scrAmpDF_min','scrRecoveryTime_min', 'scrRiseTime_min']].fillna(0)
        if np.isinf(xtestDriv).values.sum():
            cinf = np.isinf(xtestDriv).values.sum()
            print("Dataframe contains {} values".format(cinf))

        xtestDriv.replace([np.inf], 9999, inplace=True)
        xtestDriv.replace([-np.inf], -9999, inplace=True)

        xtestDriv['scrNumPeaks'] = xtestDriv['scrNumPeaks'].values.astype(int)
        xtestDriv['scrNumPeaks'] = xtestDriv['scrNumPeaks'].values.clip(min=0) # converting negatives to zero
        xtestDriv.dropna(inplace=True) # .reset_index(drop=True, inplace=True)

        ytestDriv = list(xtestDriv['scaled label'].copy()) 

        xtrainDriv = pd.DataFrame()
        for subTrain in subjects:
            if subTrain != sdriv:
                train = pd.read_csv(os.path.join(dr_feat_path, '{}'.format(subTrain)))

                train[['scrAmpDF_min','scrRecoveryTime_min', 'scrRiseTime_min']].fillna(0)
                if np.isinf(train).values.sum():
                    cinf = np.isinf(train).values.sum()
                    print("Train Dataframe contains {} values".format(cinf))
                train.replace([np.inf], 9999, inplace=True)
                train.replace([-np.inf], -9999, inplace=True)

                train['scrNumPeaks'] = train['scrNumPeaks'].values.astype(int)
                train['scrNumPeaks'] = train['scrNumPeaks'].values.clip(min=0) # converting negatives to zero

                train.dropna(inplace=True)
                xtrainDriv = xtrainDriv.append(train)
                xtrainDriv.reset_index(drop=True, inplace=True)

        XtrainDriv = xtrainDriv[SELECTCOLS].copy()
        xtestDriv = xtestDriv[SELECTCOLS].copy()
        ytrainDriv = list(XtrainDriv['scaled label'].copy())

        XtrainDriv.drop(columns=['label', 'complexity', 'scaled label'], inplace=True)
        xtestDriv.drop(columns=['label', 'complexity', 'scaled label'], inplace=True)

        ytrain = ytrainDriv #+ yMat + yBas

        X = XtrainDriv.values
        X, ytrain = shuffle(X, ytrain, random_state=42)

        for idx, val in enumerate(ytrain):
            if val <= 4:
                ytrain[idx] = 0
            else: ytrain[idx] = 1

        for idx, val in enumerate(ytestDriv):
            if val <= 4:
                ytestDriv[idx] = 0
            else: ytestDriv[idx] = 1

        paramsrf = {
            'n_estimators': 3000,
            'min_samples_split': 2,
            'min_samples_leaf': 1,
            'max_features': 'sqrt',
            'max_depth': 50,
            'bootstrap': False
            
            }

        # paramlgbm = {
        #     'n_estimators': 3000,
        #     'num_leaves': 100,
        #     'learning_rate': 0.05,
        #     'class_weight': 'balanced',
        #     'random_state': 24
        #     }

        clf2 = RandomForestClassifier(**paramsrf)
        # clf3 = lightgbm.LGBMClassifier(**paramlgbm)
        # clf4 = SVC(C=700, probability=True, class_weight='balanced')

        # estimatorList = [('rf', clf2), ('svm', clf4), ('gbm', clf3)]
        # eclf = VotingClassifier(estimators=estimatorList, voting='soft')

        # clf2 = RandomForestClassifier(**paramsrf)
        # clf3 = lightgbm.LGBMClassifier(n_estimators = 3000, num_leaves=100, learning_rate=0.005, class_weight='balanced')
        # clf4 = SVC(C=1000, probability=True, class_weight='balanced')
        # eclf = VotingClassifier(estimators=[('rf', clf2), ('gbm', clf3), ('svm', clf4)], voting='soft') # 

        hist = clf2.fit(X, ytrain)
        yPred = hist.predict(xtestDriv)
        print('Test Subject: {}'.format(sdriv))
        mycls[sdriv] = classification_report(ytestDriv, yPred, zero_division=1, output_dict=True)
        print(classification_report(ytestDriv, yPred, zero_division=1))
    
    return mycls

test_report = losoValidVirage('Virage', 'ECG_EDA_Features_Combined_scld')
    # return XtrainDriv, y_train

Test Subject: 1030.csv
              precision    recall  f1-score   support

           0       0.56      0.63      0.59        38
           1       0.88      0.84      0.86       122

    accuracy                           0.79       160
   macro avg       0.72      0.74      0.73       160
weighted avg       0.80      0.79      0.80       160

Test Subject: 1105.csv
              precision    recall  f1-score   support

           0       0.54      0.81      0.65        42
           1       0.92      0.75      0.83       117

    accuracy                           0.77       159
   macro avg       0.73      0.78      0.74       159
weighted avg       0.82      0.77      0.78       159

Test Subject: 1106.csv
              precision    recall  f1-score   support

           0       0.20      0.33      0.25        24
           1       0.87      0.76      0.81       136

    accuracy                           0.69       160
   macro avg       0.53      0.55      0.53       160
weigh

In [7]:
# Get the classification report of the over all subjects

def get_the_report(test_cls):
    acc=[]
    f_1=[]
    for key in test_cls.keys():
        if key in ['1868.csv', '1544.csv', '1372.csv', '1337.cav']:
            continue

        accuracy_ = test_cls[key]['accuracy']
        acc.append(accuracy_)
        fscore_ = test_cls[key]['macro avg']['f1-score']
        f_1.append(fscore_)

    # Average acc and f1
    print("Mean accuracy: {}".format(np.mean(acc)))
    print("Mean f1: {}".format(np.mean(f_1)))    

get_the_report(test_report)

Mean accuracy: 0.7241671293011205
Mean f1: 0.6626858156979287


In [8]:
def losoValidVirage(folder):
    dr_feat_path = r'X:\Four modes baseline\Virage_Clipped_Baseline\Extracted\{}'.format(folder) # Norm_ECG_EDA_Features_Combined_scld
    # mat_feat_path = r'X:\RealTimeSegment\MatbII\Extracted\{}'.format(folder)
    # bas_feat_path = r'X:\RealTimeSegment\Driving Simulator\Extracted\Norm_ECG_EDA_Features_Baseline_Combined'

    # xtrainBas, yBas = load_viragebase(bas_feat_path, 'label')
    # xtrainMat, yMat = load_matb(mat_feat_path, 'scaled label')

    subjects = os.listdir(dr_feat_path)
    xtrainDriv = pd.DataFrame()

    mycls = {}
    for sdriv in subjects:
        xtestDriv = pd.read_csv(os.path.join(dr_feat_path, '{}'.format(sdriv)))
        xtestDriv[['scrAmpDF_min','scrRecoveryTime_min', 'scrRiseTime_min']].fillna(0)
        xtestDriv.replace([np.inf, -np.inf], 9999, inplace=True)
        xtestDriv['scrNumPeaks'] = xtestDriv['scrNumPeaks'].values.astype(int)
        xtestDriv['scrNumPeaks'] = xtestDriv['scrNumPeaks'].values.clip(min=0) # converting negatives to zero
        xtestDriv.dropna(inplace=True) # .reset_index(drop=True, inplace=True)

        ytestDriv = list(xtestDriv['label'].copy())

        xtrainDriv = pd.DataFrame()
        for subTrain in subjects:
            if subTrain != sdriv:
                train = pd.read_csv(os.path.join(dr_feat_path, '{}'.format(subTrain)))

                train[['scrAmpDF_min','scrRecoveryTime_min', 'scrRiseTime_min']].fillna(0)
                train.replace([np.inf, -np.inf], 9999, inplace=True)        
                train['scrNumPeaks'] = train['scrNumPeaks'].values.astype(int)
                train['scrNumPeaks'] = train['scrNumPeaks'].values.clip(min=0) # converting negatives to zero

                train.dropna(inplace=True)
                xtrainDriv = xtrainDriv.append(train)
                xtrainDriv.reset_index(drop=True, inplace=True)

        XtrainDriv = xtrainDriv[SELECTCOLS].copy()
        xtestDriv = xtestDriv[SELECTCOLS].copy()
        ytrainDriv = list(XtrainDriv['scaled label'].copy())

        XtrainDriv.drop(columns=['label', 'complexity', 'scaled label'], inplace=True)
        xtestDriv.drop(columns=['label', 'complexity', 'scaled label'], inplace=True)

        # XtrainDriv = XtrainDriv.append(xtrainMat)
        # XtrainDriv = XtrainDriv.append(xtrainBas)

        ytrain = ytrainDriv #+ yMat + yBas

        X = XtrainDriv.values
        X, ytrain = shuffle(X, ytrain, random_state=42)

        for idx, val in enumerate(ytrain):
            if val <= 4:
                ytrain[idx] = 0
            else: ytrain[idx] = 1

        for idx, val in enumerate(ytestDriv):
            if val <= 4:
                ytestDriv[idx] = 0
            else: ytestDriv[idx] = 1

        # paramsrf = {
        #     'n_estimators': 3000,
        #     'min_samples_split': 2,
        #     'min_samples_leaf': 1,
        #     'max_features': 'sqrt',
        #     'max_depth': 50,
        #     'bootstrap': False
        #     }

        paramlgbm = {
            'n_estimators': 1000,
            'num_leaves': 100,
            'learning_rate': 0.05,
            'class_weight': 'balanced',
            'random_state': 24
            }

        # clf2 = RandomForestClassifier(**paramsrf)
        clf3 = lightgbm.LGBMClassifier(**paramlgbm)
        # clf4 = SVC(C=700, probability=True, class_weight='balanced')

        # estimatorList = [('rf', clf2), ('svm', clf4), ('gbm', clf3)]
        # eclf = VotingClassifier(estimators=estimatorList, voting='soft')

        # clf2 = RandomForestClassifier(**paramsrf)
        # clf3 = lightgbm.LGBMClassifier(n_estimators = 3000, num_leaves=100, learning_rate=0.005, class_weight='balanced')
        # clf4 = SVC(C=1000, probability=True, class_weight='balanced')
        # eclf = VotingClassifier(estimators=[('rf', clf2), ('gbm', clf3), ('svm', clf4)], voting='soft') # 

        hist = clf3.fit(X, ytrain)
        yPred = hist.predict(xtestDriv)
        print('Test Subject: {}'.format(sdriv))
        mycls[sdriv] = classification_report(ytestDriv, yPred, zero_division=1, output_dict=True)
        print(classification_report(ytestDriv, yPred, zero_division=1))
    
    return mycls

test_report = losoValidVirage('Norm_ECG_EDA_Features_Combined_scld')
    # return XtrainDriv, y_train

Test Subject: 1030.csv
              precision    recall  f1-score   support

           0       0.56      0.76      0.64        38
           1       0.92      0.81      0.86       122

    accuracy                           0.80       160
   macro avg       0.74      0.79      0.75       160
weighted avg       0.83      0.80      0.81       160

Test Subject: 1105.csv
              precision    recall  f1-score   support

           0       0.52      0.84      0.64        31
           1       0.95      0.81      0.88       128

    accuracy                           0.82       159
   macro avg       0.74      0.83      0.76       159
weighted avg       0.87      0.82      0.83       159

Test Subject: 1106.csv
              precision    recall  f1-score   support

           0       0.13      0.21      0.16        24
           1       0.84      0.75      0.79       136

    accuracy                           0.67       160
   macro avg       0.49      0.48      0.48       160
weigh

In [5]:
# Get the classification report of the over all subjects

def get_the_report_lgbm(test_cls):
    acc=[]
    f_1=[]
    for key in test_cls.keys():
        if key in ['1868.csv', '1717.csv', '1544.csv']:
            continue

        accuracy_ = test_cls[key]['accuracy']
        acc.append(accuracy_)
        fscore_ = test_cls[key]['macro avg']['f1-score']
        f_1.append(fscore_)

    # Average acc and f1
    print("Mean accuracy: {}".format(np.mean(acc)))
    print("Mean f1: {}".format(np.mean(f_1)))

In [11]:
get_the_report_lgbm(test_report)

Mean accuracy: 0.6737311685624012
Mean f1: 0.5858938348606393


### ECG Virage

In [3]:
def losoValidVirage(folder):
    dr_feat_path = r'X:\Four modes baseline\Virage_Clipped_Baseline\Extracted\{}'.format(folder) # Norm_ECG_EDA_Features_Combined_scld
    # mat_feat_path = r'X:\RealTimeSegment\MatbII\Extracted\{}'.format(folder)
    # bas_feat_path = r'X:\RealTimeSegment\Driving Simulator\Extracted\Norm_ECG_EDA_Features_Baseline_Combined'

    # xtrainBas, yBas = load_viragebase(bas_feat_path, 'label')
    # xtrainMat, yMat = load_matb(mat_feat_path, 'scaled label')

    subjects = os.listdir(dr_feat_path)
    xtrainDriv = pd.DataFrame()

    mycls = {}
    for sdriv in subjects:
        xtestDriv = pd.read_csv(os.path.join(dr_feat_path, '{}'.format(sdriv)))
        xtestDriv[['scrAmpDF_min','scrRecoveryTime_min', 'scrRiseTime_min']].fillna(0)
        xtestDriv.replace([np.inf, -np.inf], 9999, inplace=True)
        xtestDriv['scrNumPeaks'] = xtestDriv['scrNumPeaks'].values.astype(int)
        xtestDriv['scrNumPeaks'] = xtestDriv['scrNumPeaks'].values.clip(min=0) # converting negatives to zero
        xtestDriv.dropna(inplace=True) # .reset_index(drop=True, inplace=True)

        ytestDriv = list(xtestDriv['label'].copy())

        xtrainDriv = pd.DataFrame()
        for subTrain in subjects:
            if subTrain != sdriv:
                train = pd.read_csv(os.path.join(dr_feat_path, '{}'.format(subTrain)))

                train[['scrAmpDF_min','scrRecoveryTime_min', 'scrRiseTime_min']].fillna(0)
                train.replace([np.inf, -np.inf], 9999, inplace=True)        
                train['scrNumPeaks'] = train['scrNumPeaks'].values.astype(int)
                train['scrNumPeaks'] = train['scrNumPeaks'].values.clip(min=0) # converting negatives to zero

                train.dropna(inplace=True)
                xtrainDriv = xtrainDriv.append(train)
                xtrainDriv.reset_index(drop=True, inplace=True)

        XtrainDriv = xtrainDriv[ECG_SELECTCOLS].copy()
        xtestDriv = xtestDriv[ECG_SELECTCOLS].copy()
        ytrainDriv = list(XtrainDriv['scaled label'].copy())

        XtrainDriv.drop(columns=['label', 'complexity', 'scaled label'], inplace=True)
        xtestDriv.drop(columns=['label', 'complexity', 'scaled label'], inplace=True)

        # XtrainDriv = XtrainDriv.append(xtrainMat)
        # XtrainDriv = XtrainDriv.append(xtrainBas)

        ytrain = ytrainDriv #+ yMat + yBas

        X = XtrainDriv.values
        X, ytrain = shuffle(X, ytrain, random_state=42)

        for idx, val in enumerate(ytrain):
            if val <= 4:
                ytrain[idx] = 0
            else: ytrain[idx] = 1

        for idx, val in enumerate(ytestDriv):
            if val <= 4:
                ytestDriv[idx] = 0
            else: ytestDriv[idx] = 1

        # paramsrf = {
        #     'n_estimators': 3000,
        #     'min_samples_split': 2,
        #     'min_samples_leaf': 1,
        #     'max_features': 'sqrt',
        #     'max_depth': 50,
        #     'bootstrap': False
        #     }

        paramlgbm = {
            'n_estimators': 1000,
            'num_leaves': 100,
            'learning_rate': 0.05,
            'class_weight': 'balanced',
            'random_state': 24
            }

        # clf2 = RandomForestClassifier(**paramsrf)
        clf3 = lightgbm.LGBMClassifier(**paramlgbm)
        # clf4 = SVC(C=700, probability=True, class_weight='balanced')

        # estimatorList = [('rf', clf2), ('svm', clf4), ('gbm', clf3)]
        # eclf = VotingClassifier(estimators=estimatorList, voting='soft')

        # clf2 = RandomForestClassifier(**paramsrf)
        # clf3 = lightgbm.LGBMClassifier(n_estimators = 3000, num_leaves=100, learning_rate=0.005, class_weight='balanced')
        # clf4 = SVC(C=1000, probability=True, class_weight='balanced')
        # eclf = VotingClassifier(estimators=[('rf', clf2), ('gbm', clf3), ('svm', clf4)], voting='soft') # 

        hist = clf3.fit(X, ytrain)
        yPred = hist.predict(xtestDriv)
        print('Test Subject: {}'.format(sdriv))
        mycls[sdriv] = classification_report(ytestDriv, yPred, zero_division=1, output_dict=True)
        print(classification_report(ytestDriv, yPred, zero_division=1))
    
    return mycls

test_report = losoValidVirage('Norm_ECG_EDA_Features_Combined_scld')
    # return XtrainDriv, y_train

Test Subject: 1030.csv
              precision    recall  f1-score   support

           0       0.46      0.87      0.61        38
           1       0.94      0.69      0.80       122

    accuracy                           0.73       160
   macro avg       0.70      0.78      0.70       160
weighted avg       0.83      0.73      0.75       160

Test Subject: 1105.csv
              precision    recall  f1-score   support

           0       0.55      0.55      0.55        31
           1       0.89      0.89      0.89       128

    accuracy                           0.82       159
   macro avg       0.72      0.72      0.72       159
weighted avg       0.82      0.82      0.82       159

Test Subject: 1106.csv
              precision    recall  f1-score   support

           0       0.07      0.08      0.07        24
           1       0.83      0.79      0.81       136

    accuracy                           0.69       160
   macro avg       0.45      0.44      0.44       160
weigh

In [6]:
get_the_report_lgbm(test_report)

Mean accuracy: 0.6530336965250431
Mean f1: 0.5519561189766473


In [7]:
def losoValidVirage(folder):
    dr_feat_path = r'X:\Four modes baseline\Virage_Clipped_Baseline\Extracted\{}'.format(folder) # Norm_ECG_EDA_Features_Combined_scld
    # mat_feat_path = r'X:\RealTimeSegment\MatbII\Extracted\{}'.format(folder)
    # bas_feat_path = r'X:\RealTimeSegment\Driving Simulator\Extracted\Norm_ECG_EDA_Features_Baseline_Combined'

    # xtrainBas, yBas = load_viragebase(bas_feat_path, 'label')
    # xtrainMat, yMat = load_matb(mat_feat_path, 'scaled label')

    subjects = os.listdir(dr_feat_path)
    xtrainDriv = pd.DataFrame()

    mycls = {}
    for sdriv in subjects:
        xtestDriv = pd.read_csv(os.path.join(dr_feat_path, '{}'.format(sdriv)))
        xtestDriv[['scrAmpDF_min','scrRecoveryTime_min', 'scrRiseTime_min']].fillna(0)
        xtestDriv.replace([np.inf, -np.inf], 9999, inplace=True)
        xtestDriv['scrNumPeaks'] = xtestDriv['scrNumPeaks'].values.astype(int)
        xtestDriv['scrNumPeaks'] = xtestDriv['scrNumPeaks'].values.clip(min=0) # converting negatives to zero
        xtestDriv.dropna(inplace=True) # .reset_index(drop=True, inplace=True)

        ytestDriv = list(xtestDriv['label'].copy())

        xtrainDriv = pd.DataFrame()
        for subTrain in subjects:
            if subTrain != sdriv:
                train = pd.read_csv(os.path.join(dr_feat_path, '{}'.format(subTrain)))

                train[['scrAmpDF_min','scrRecoveryTime_min', 'scrRiseTime_min']].fillna(0)
                train.replace([np.inf, -np.inf], 9999, inplace=True)        
                train['scrNumPeaks'] = train['scrNumPeaks'].values.astype(int)
                train['scrNumPeaks'] = train['scrNumPeaks'].values.clip(min=0) # converting negatives to zero

                train.dropna(inplace=True)
                xtrainDriv = xtrainDriv.append(train)
                xtrainDriv.reset_index(drop=True, inplace=True)

        XtrainDriv = xtrainDriv[ECG_SELECTCOLS].copy()
        xtestDriv = xtestDriv[ECG_SELECTCOLS].copy()
        ytrainDriv = list(XtrainDriv['scaled label'].copy())

        XtrainDriv.drop(columns=['label', 'complexity', 'scaled label'], inplace=True)
        xtestDriv.drop(columns=['label', 'complexity', 'scaled label'], inplace=True)

        # XtrainDriv = XtrainDriv.append(xtrainMat)
        # XtrainDriv = XtrainDriv.append(xtrainBas)

        ytrain = ytrainDriv #+ yMat + yBas

        X = XtrainDriv.values
        X, ytrain = shuffle(X, ytrain, random_state=42)

        for idx, val in enumerate(ytrain):
            if val <= 4:
                ytrain[idx] = 0
            else: ytrain[idx] = 1

        for idx, val in enumerate(ytestDriv):
            if val <= 4:
                ytestDriv[idx] = 0
            else: ytestDriv[idx] = 1

        paramsrf = {
            'n_estimators': 1000,
            'min_samples_split': 2,
            'min_samples_leaf': 1,
            'max_features': 'sqrt',
            'max_depth': 50,
            'bootstrap': False,
            'class_weight': 'balanced'
            }

        # paramlgbm = {
        #     'n_estimators': 1000,
        #     'num_leaves': 100,
        #     'learning_rate': 0.05,
        #     'class_weight': 'balanced',
        #     'random_state': 24
        #     }

        clf2 = RandomForestClassifier(**paramsrf)
        # clf3 = lightgbm.LGBMClassifier(**paramlgbm)
        # clf4 = SVC(C=700, probability=True, class_weight='balanced')

        # estimatorList = [('rf', clf2), ('svm', clf4), ('gbm', clf3)]
        # eclf = VotingClassifier(estimators=estimatorList, voting='soft')

        # clf2 = RandomForestClassifier(**paramsrf)
        # clf3 = lightgbm.LGBMClassifier(n_estimators = 3000, num_leaves=100, learning_rate=0.005, class_weight='balanced')
        # clf4 = SVC(C=1000, probability=True, class_weight='balanced')
        # eclf = VotingClassifier(estimators=[('rf', clf2), ('gbm', clf3), ('svm', clf4)], voting='soft') # 

        hist = clf2.fit(X, ytrain)
        yPred = hist.predict(xtestDriv)
        print('Test Subject: {}'.format(sdriv))
        mycls[sdriv] = classification_report(ytestDriv, yPred, zero_division=1, output_dict=True)
        print(classification_report(ytestDriv, yPred, zero_division=1))
    
    return mycls

test_report = losoValidVirage('Norm_ECG_EDA_Features_Combined_scld')
    # return XtrainDriv, y_train

Test Subject: 1030.csv
              precision    recall  f1-score   support

           0       0.52      0.71      0.60        38
           1       0.90      0.80      0.84       122

    accuracy                           0.78       160
   macro avg       0.71      0.75      0.72       160
weighted avg       0.81      0.78      0.79       160

Test Subject: 1105.csv
              precision    recall  f1-score   support

           0       0.60      0.39      0.47        31
           1       0.86      0.94      0.90       128

    accuracy                           0.83       159
   macro avg       0.73      0.66      0.68       159
weighted avg       0.81      0.83      0.82       159

Test Subject: 1106.csv
              precision    recall  f1-score   support

           0       0.05      0.08      0.06        24
           1       0.82      0.73      0.77       136

    accuracy                           0.63       160
   macro avg       0.43      0.41      0.42       160
weigh

In [9]:
get_the_report_lgbm(test_report)

Mean accuracy: 0.6636411626985785
Mean f1: 0.5443866516473876


### EDA Virage

In [10]:
def losoValidVirage(folder):
    dr_feat_path = r'X:\Four modes baseline\Virage_Clipped_Baseline\Extracted\{}'.format(folder) # Norm_ECG_EDA_Features_Combined_scld
    # mat_feat_path = r'X:\RealTimeSegment\MatbII\Extracted\{}'.format(folder)
    # bas_feat_path = r'X:\RealTimeSegment\Driving Simulator\Extracted\Norm_ECG_EDA_Features_Baseline_Combined'

    # xtrainBas, yBas = load_viragebase(bas_feat_path, 'label')
    # xtrainMat, yMat = load_matb(mat_feat_path, 'scaled label')

    subjects = os.listdir(dr_feat_path)
    xtrainDriv = pd.DataFrame()

    mycls = {}
    for sdriv in subjects:
        xtestDriv = pd.read_csv(os.path.join(dr_feat_path, '{}'.format(sdriv)))
        xtestDriv[['scrAmpDF_min','scrRecoveryTime_min', 'scrRiseTime_min']].fillna(0)
        xtestDriv.replace([np.inf, -np.inf], 9999, inplace=True)
        xtestDriv['scrNumPeaks'] = xtestDriv['scrNumPeaks'].values.astype(int)
        xtestDriv['scrNumPeaks'] = xtestDriv['scrNumPeaks'].values.clip(min=0) # converting negatives to zero
        xtestDriv.dropna(inplace=True) # .reset_index(drop=True, inplace=True)

        ytestDriv = list(xtestDriv['label'].copy())

        xtrainDriv = pd.DataFrame()
        for subTrain in subjects:
            if subTrain != sdriv:
                train = pd.read_csv(os.path.join(dr_feat_path, '{}'.format(subTrain)))

                train[['scrAmpDF_min','scrRecoveryTime_min', 'scrRiseTime_min']].fillna(0)
                train.replace([np.inf, -np.inf], 9999, inplace=True)        
                train['scrNumPeaks'] = train['scrNumPeaks'].values.astype(int)
                train['scrNumPeaks'] = train['scrNumPeaks'].values.clip(min=0) # converting negatives to zero

                train.dropna(inplace=True)
                xtrainDriv = xtrainDriv.append(train)
                xtrainDriv.reset_index(drop=True, inplace=True)

        XtrainDriv = xtrainDriv[EDA_SELECTCOLS].copy()
        xtestDriv = xtestDriv[EDA_SELECTCOLS].copy()
        ytrainDriv = list(XtrainDriv['scaled label'].copy())

        XtrainDriv.drop(columns=['label', 'complexity', 'scaled label'], inplace=True)
        xtestDriv.drop(columns=['label', 'complexity', 'scaled label'], inplace=True)

        # XtrainDriv = XtrainDriv.append(xtrainMat)
        # XtrainDriv = XtrainDriv.append(xtrainBas)

        ytrain = ytrainDriv #+ yMat + yBas

        X = XtrainDriv.values
        X, ytrain = shuffle(X, ytrain, random_state=42)

        for idx, val in enumerate(ytrain):
            if val <= 4:
                ytrain[idx] = 0
            else: ytrain[idx] = 1

        for idx, val in enumerate(ytestDriv):
            if val <= 4:
                ytestDriv[idx] = 0
            else: ytestDriv[idx] = 1

        paramsrf = {
            'n_estimators': 1000,
            'min_samples_split': 2,
            'min_samples_leaf': 1,
            'max_features': 'sqrt',
            'max_depth': 50,
            'bootstrap': False,
            'class_weight': 'balanced'
            }

        # paramlgbm = {
        #     'n_estimators': 1000,
        #     'num_leaves': 100,
        #     'learning_rate': 0.05,
        #     'class_weight': 'balanced',
        #     'random_state': 24
        #     }

        clf2 = RandomForestClassifier(**paramsrf)
        # clf3 = lightgbm.LGBMClassifier(**paramlgbm)
        # clf4 = SVC(C=700, probability=True, class_weight='balanced')

        # estimatorList = [('rf', clf2), ('svm', clf4), ('gbm', clf3)]
        # eclf = VotingClassifier(estimators=estimatorList, voting='soft')

        # clf2 = RandomForestClassifier(**paramsrf)
        # clf3 = lightgbm.LGBMClassifier(n_estimators = 3000, num_leaves=100, learning_rate=0.005, class_weight='balanced')
        # clf4 = SVC(C=1000, probability=True, class_weight='balanced')
        # eclf = VotingClassifier(estimators=[('rf', clf2), ('gbm', clf3), ('svm', clf4)], voting='soft') # 

        hist = clf2.fit(X, ytrain)
        yPred = hist.predict(xtestDriv)
        print('Test Subject: {}'.format(sdriv))
        mycls[sdriv] = classification_report(ytestDriv, yPred, zero_division=1, output_dict=True)
        print(classification_report(ytestDriv, yPred, zero_division=1))
    
    return mycls

test_report = losoValidVirage('Norm_ECG_EDA_Features_Combined_scld')
    # return XtrainDriv, y_train

Test Subject: 1030.csv
              precision    recall  f1-score   support

           0       0.66      0.50      0.57        38
           1       0.85      0.92      0.89       122

    accuracy                           0.82       160
   macro avg       0.76      0.71      0.73       160
weighted avg       0.81      0.82      0.81       160

Test Subject: 1105.csv
              precision    recall  f1-score   support

           0       0.28      0.71      0.40        31
           1       0.89      0.55      0.68       128

    accuracy                           0.58       159
   macro avg       0.58      0.63      0.54       159
weighted avg       0.77      0.58      0.63       159

Test Subject: 1106.csv
              precision    recall  f1-score   support

           0       0.12      0.12      0.12        24
           1       0.85      0.85      0.85       136

    accuracy                           0.74       160
   macro avg       0.49      0.49      0.49       160
weigh

In [11]:
get_the_report_lgbm(test_report)

Mean accuracy: 0.6437444703250033
Mean f1: 0.5341784532823431


In [12]:
def losoValidVirage(folder):
    dr_feat_path = r'X:\Four modes baseline\Virage_Clipped_Baseline\Extracted\{}'.format(folder) # Norm_ECG_EDA_Features_Combined_scld
    # mat_feat_path = r'X:\RealTimeSegment\MatbII\Extracted\{}'.format(folder)
    # bas_feat_path = r'X:\RealTimeSegment\Driving Simulator\Extracted\Norm_ECG_EDA_Features_Baseline_Combined'

    # xtrainBas, yBas = load_viragebase(bas_feat_path, 'label')
    # xtrainMat, yMat = load_matb(mat_feat_path, 'scaled label')

    subjects = os.listdir(dr_feat_path)
    xtrainDriv = pd.DataFrame()

    mycls = {}
    for sdriv in subjects:
        xtestDriv = pd.read_csv(os.path.join(dr_feat_path, '{}'.format(sdriv)))
        xtestDriv[['scrAmpDF_min','scrRecoveryTime_min', 'scrRiseTime_min']].fillna(0)
        xtestDriv.replace([np.inf, -np.inf], 9999, inplace=True)
        xtestDriv['scrNumPeaks'] = xtestDriv['scrNumPeaks'].values.astype(int)
        xtestDriv['scrNumPeaks'] = xtestDriv['scrNumPeaks'].values.clip(min=0) # converting negatives to zero
        xtestDriv.dropna(inplace=True) # .reset_index(drop=True, inplace=True)

        ytestDriv = list(xtestDriv['label'].copy())

        xtrainDriv = pd.DataFrame()
        for subTrain in subjects:
            if subTrain != sdriv:
                train = pd.read_csv(os.path.join(dr_feat_path, '{}'.format(subTrain)))

                train[['scrAmpDF_min','scrRecoveryTime_min', 'scrRiseTime_min']].fillna(0)
                train.replace([np.inf, -np.inf], 9999, inplace=True)        
                train['scrNumPeaks'] = train['scrNumPeaks'].values.astype(int)
                train['scrNumPeaks'] = train['scrNumPeaks'].values.clip(min=0) # converting negatives to zero

                train.dropna(inplace=True)
                xtrainDriv = xtrainDriv.append(train)
                xtrainDriv.reset_index(drop=True, inplace=True)

        XtrainDriv = xtrainDriv[EDA_SELECTCOLS].copy()
        xtestDriv = xtestDriv[EDA_SELECTCOLS].copy()
        ytrainDriv = list(XtrainDriv['scaled label'].copy())

        XtrainDriv.drop(columns=['label', 'complexity', 'scaled label'], inplace=True)
        xtestDriv.drop(columns=['label', 'complexity', 'scaled label'], inplace=True)

        # XtrainDriv = XtrainDriv.append(xtrainMat)
        # XtrainDriv = XtrainDriv.append(xtrainBas)

        ytrain = ytrainDriv #+ yMat + yBas

        X = XtrainDriv.values
        X, ytrain = shuffle(X, ytrain, random_state=42)

        for idx, val in enumerate(ytrain):
            if val <= 4:
                ytrain[idx] = 0
            else: ytrain[idx] = 1

        for idx, val in enumerate(ytestDriv):
            if val <= 4:
                ytestDriv[idx] = 0
            else: ytestDriv[idx] = 1

        # paramsrf = {
        #     'n_estimators': 3000,
        #     'min_samples_split': 2,
        #     'min_samples_leaf': 1,
        #     'max_features': 'sqrt',
        #     'max_depth': 50,
        #     'bootstrap': False
        #     }

        paramlgbm = {
            'n_estimators': 1000,
            'num_leaves': 100,
            'learning_rate': 0.05,
            'class_weight': 'balanced',
            'random_state': 24
            }

        # clf2 = RandomForestClassifier(**paramsrf)
        clf3 = lightgbm.LGBMClassifier(**paramlgbm)
        # clf4 = SVC(C=700, probability=True, class_weight='balanced')

        # estimatorList = [('rf', clf2), ('svm', clf4), ('gbm', clf3)]
        # eclf = VotingClassifier(estimators=estimatorList, voting='soft')

        # clf2 = RandomForestClassifier(**paramsrf)
        # clf3 = lightgbm.LGBMClassifier(n_estimators = 3000, num_leaves=100, learning_rate=0.005, class_weight='balanced')
        # clf4 = SVC(C=1000, probability=True, class_weight='balanced')
        # eclf = VotingClassifier(estimators=[('rf', clf2), ('gbm', clf3), ('svm', clf4)], voting='soft') # 

        hist = clf3.fit(X, ytrain)
        yPred = hist.predict(xtestDriv)
        print('Test Subject: {}'.format(sdriv))
        mycls[sdriv] = classification_report(ytestDriv, yPred, zero_division=1, output_dict=True)
        print(classification_report(ytestDriv, yPred, zero_division=1))
    
    return mycls

test_report = losoValidVirage('Norm_ECG_EDA_Features_Combined_scld')
    # return XtrainDriv, y_train

Test Subject: 1030.csv
              precision    recall  f1-score   support

           0       0.49      0.45      0.47        38
           1       0.83      0.85      0.84       122

    accuracy                           0.76       160
   macro avg       0.66      0.65      0.65       160
weighted avg       0.75      0.76      0.75       160

Test Subject: 1105.csv
              precision    recall  f1-score   support

           0       0.26      0.74      0.38        31
           1       0.89      0.48      0.63       128

    accuracy                           0.53       159
   macro avg       0.57      0.61      0.50       159
weighted avg       0.76      0.53      0.58       159

Test Subject: 1106.csv
              precision    recall  f1-score   support

           0       0.21      0.42      0.28        24
           1       0.88      0.73      0.80       136

    accuracy                           0.68       160
   macro avg       0.54      0.57      0.54       160
weigh

In [13]:
get_the_report_lgbm(test_report)

Mean accuracy: 0.6194832412590621
Mean f1: 0.5240996349113242


# MatbII

In [10]:
def losoValidMatBII(folder):
    dr_feat_path = r'X:\Four modes baseline\MatB-II_Clipped_Baseline\Extracted\{}'.format(folder) # Norm_ECG_EDA_Features_Combined_scld
    # mat_feat_path = r'X:\RealTimeSegment\MatbII\Extracted\{}'.format(folder)
    # bas_feat_path = r'X:\RealTimeSegment\Driving Simulator\Extracted\Norm_ECG_EDA_Features_Baseline_Combined'

    # xtrainBas, yBas = load_viragebase(bas_feat_path, 'label')
    # xtrainMat, yMat = load_matb(mat_feat_path, 'scaled label')

    subjects = os.listdir(dr_feat_path)
    xtrainDriv = pd.DataFrame()

    mycls = {}
    for sdriv in subjects:
        xtestDriv = pd.read_csv(os.path.join(dr_feat_path, '{}'.format(sdriv)))
        xtestDriv[['scrAmpDF_min','scrRecoveryTime_min', 'scrRiseTime_min']].fillna(0)
        xtestDriv.replace([np.inf, -np.inf], 9999, inplace=True)
        xtestDriv['scrNumPeaks'] = xtestDriv['scrNumPeaks'].values.astype(int)
        xtestDriv['scrNumPeaks'] = xtestDriv['scrNumPeaks'].values.clip(min=0) # converting negatives to zero
        xtestDriv.dropna(inplace=True) # .reset_index(drop=True, inplace=True)

        ytestDriv = list(xtestDriv['label'].copy())

        xtrainDriv = pd.DataFrame()
        for subTrain in subjects:
            if subTrain != sdriv:
                train = pd.read_csv(os.path.join(dr_feat_path, '{}'.format(subTrain)))

                train[['scrAmpDF_min','scrRecoveryTime_min', 'scrRiseTime_min']].fillna(0)
                train.replace([np.inf, -np.inf], 9999, inplace=True)        
                train['scrNumPeaks'] = train['scrNumPeaks'].values.astype(int)
                train['scrNumPeaks'] = train['scrNumPeaks'].values.clip(min=0) # converting negatives to zero

                train.dropna(inplace=True)
                xtrainDriv = xtrainDriv.append(train)
                xtrainDriv.reset_index(drop=True, inplace=True)

        XtrainDriv = xtrainDriv[SELECTCOLS].copy()
        xtestDriv = xtestDriv[SELECTCOLS].copy()
        ytrainDriv = list(XtrainDriv['scaled label'].copy())

        XtrainDriv.drop(columns=['label', 'complexity', 'scaled label'], inplace=True)
        xtestDriv.drop(columns=['label', 'complexity', 'scaled label'], inplace=True)

        # XtrainDriv = XtrainDriv.append(xtrainMat)
        # XtrainDriv = XtrainDriv.append(xtrainBas)

        ytrain = ytrainDriv #+ yMat + yBas

        X = XtrainDriv.values
        X, ytrain = shuffle(X, ytrain, random_state=42)

        for idx, val in enumerate(ytrain):
            if val <= 4:
                ytrain[idx] = 0
            else: ytrain[idx] = 1

        for idx, val in enumerate(ytestDriv):
            if val <= 4:
                ytestDriv[idx] = 0
            else: ytestDriv[idx] = 1

        paramsrf = {
            'n_estimators': 3000,
            'min_samples_split': 2,
            'min_samples_leaf': 1,
            'max_features': 'sqrt',
            'max_depth': 50,
            'bootstrap': False
            }

        # paramlgbm = {
        #     'n_estimators': 3000,
        #     'num_leaves': 100,
        #     'learning_rate': 0.05,
        #     'class_weight': 'balanced',
        #     'random_state': 24
        #     }

        clf2 = RandomForestClassifier(**paramsrf)
        # clf3 = lightgbm.LGBMClassifier(**paramlgbm)
        # clf4 = SVC(C=700, probability=True, class_weight='balanced')

        # estimatorList = [('rf', clf2), ('svm', clf4), ('gbm', clf3)]
        # eclf = VotingClassifier(estimators=estimatorList, voting='soft')

        # clf2 = RandomForestClassifier(**paramsrf)
        # clf3 = lightgbm.LGBMClassifier(n_estimators = 3000, num_leaves=100, learning_rate=0.005, class_weight='balanced')
        # clf4 = SVC(C=1000, probability=True, class_weight='balanced')
        # eclf = VotingClassifier(estimators=[('rf', clf2), ('gbm', clf3), ('svm', clf4)], voting='soft') # 

        hist = clf2.fit(X, ytrain)
        yPred = hist.predict(xtestDriv)
        print('Test Subject: {}'.format(sdriv))
        mycls[sdriv] = classification_report(ytestDriv, yPred, zero_division=1, output_dict=True)
        print(classification_report(ytestDriv, yPred, zero_division=1))
    
    return mycls

test_report1 = losoValidMatBII('Norm_ECG_EDA_Features_Combined_scld')
    # return XtrainDriv, y_train

Test Subject: 1026.csv
              precision    recall  f1-score   support

           0       0.00      1.00      0.00         0
           1       1.00      0.79      0.88        96

    accuracy                           0.79        96
   macro avg       0.50      0.90      0.44        96
weighted avg       1.00      0.79      0.88        96

Test Subject: 1105.csv
              precision    recall  f1-score   support

           0       0.82      0.25      0.38        36
           1       0.87      0.99      0.92       180

    accuracy                           0.87       216
   macro avg       0.84      0.62      0.65       216
weighted avg       0.86      0.87      0.83       216

Test Subject: 1106.csv
              precision    recall  f1-score   support

           0       0.12      0.54      0.19        13
           1       0.96      0.74      0.84       203

    accuracy                           0.73       216
   macro avg       0.54      0.64      0.51       216
weigh

In [4]:
def losoValidMatBII(folder):
    dr_feat_path = r'X:\Four modes baseline\MatB-II_Clipped_Baseline\Extracted\{}'.format(folder) # Norm_ECG_EDA_Features_Combined_scld
    # mat_feat_path = r'X:\RealTimeSegment\MatbII\Extracted\{}'.format(folder)
    # bas_feat_path = r'X:\RealTimeSegment\Driving Simulator\Extracted\Norm_ECG_EDA_Features_Baseline_Combined'

    # xtrainBas, yBas = load_viragebase(bas_feat_path, 'label')
    # xtrainMat, yMat = load_matb(mat_feat_path, 'scaled label')

    subjects = os.listdir(dr_feat_path)
    xtrainDriv = pd.DataFrame()

    mycls = {}
    for sdriv in subjects:
        xtestDriv = pd.read_csv(os.path.join(dr_feat_path, '{}'.format(sdriv)))
        xtestDriv[['scrAmpDF_min','scrRecoveryTime_min', 'scrRiseTime_min']].fillna(0)
        xtestDriv.replace([np.inf, -np.inf], 9999, inplace=True)
        xtestDriv['scrNumPeaks'] = xtestDriv['scrNumPeaks'].values.astype(int)
        xtestDriv['scrNumPeaks'] = xtestDriv['scrNumPeaks'].values.clip(min=0) # converting negatives to zero
        xtestDriv.dropna(inplace=True) # .reset_index(drop=True, inplace=True)

        ytestDriv = list(xtestDriv['label'].copy())

        xtrainDriv = pd.DataFrame()
        for subTrain in subjects:
            if subTrain != sdriv:
                train = pd.read_csv(os.path.join(dr_feat_path, '{}'.format(subTrain)))

                train[['scrAmpDF_min','scrRecoveryTime_min', 'scrRiseTime_min']].fillna(0)
                train.replace([np.inf, -np.inf], 9999, inplace=True)        
                train['scrNumPeaks'] = train['scrNumPeaks'].values.astype(int)
                train['scrNumPeaks'] = train['scrNumPeaks'].values.clip(min=0) # converting negatives to zero

                train.dropna(inplace=True)
                xtrainDriv = xtrainDriv.append(train)
                xtrainDriv.reset_index(drop=True, inplace=True)

        XtrainDriv = xtrainDriv[SELECTCOLS].copy()
        xtestDriv = xtestDriv[SELECTCOLS].copy()
        ytrainDriv = list(XtrainDriv['scaled label'].copy())

        XtrainDriv.drop(columns=['label', 'complexity', 'scaled label'], inplace=True)
        xtestDriv.drop(columns=['label', 'complexity', 'scaled label'], inplace=True)

        # XtrainDriv = XtrainDriv.append(xtrainMat)
        # XtrainDriv = XtrainDriv.append(xtrainBas)

        ytrain = ytrainDriv #+ yMat + yBas

        X = XtrainDriv.values
        X, ytrain = shuffle(X, ytrain, random_state=42)

        for idx, val in enumerate(ytrain):
            if val <= 4:
                ytrain[idx] = 0
            else: ytrain[idx] = 1

        for idx, val in enumerate(ytestDriv):
            if val <= 4:
                ytestDriv[idx] = 0
            else: ytestDriv[idx] = 1

        paramsrf = {
            'n_estimators': 800,
            'min_samples_split': 2,
            'min_samples_leaf': 1,
            'max_features': 'sqrt',
            'max_depth': 20,
            'bootstrap': False
            }

        # paramlgbm = {
        #     'n_estimators': 3000,
        #     'num_leaves': 100,
        #     'learning_rate': 0.05,
        #     'class_weight': 'balanced',
        #     'random_state': 24
        #     }

        clf2 = RandomForestClassifier(**paramsrf)
        # clf3 = lightgbm.LGBMClassifier(**paramlgbm)
        # clf4 = SVC(C=700, probability=True, class_weight='balanced')

        # estimatorList = [('rf', clf2), ('svm', clf4), ('gbm', clf3)]
        # eclf = VotingClassifier(estimators=estimatorList, voting='soft')

        # clf2 = RandomForestClassifier(**paramsrf)
        # clf3 = lightgbm.LGBMClassifier(n_estimators = 3000, num_leaves=100, learning_rate=0.005, class_weight='balanced')
        # clf4 = SVC(C=1000, probability=True, class_weight='balanced')
        # eclf = VotingClassifier(estimators=[('rf', clf2), ('gbm', clf3), ('svm', clf4)], voting='soft') # 

        hist = clf2.fit(X, ytrain)
        yPred = hist.predict(xtestDriv)
        print('Test Subject: {}'.format(sdriv))
        mycls[sdriv] = classification_report(ytestDriv, yPred, zero_division=1, output_dict=True)
        print(classification_report(ytestDriv, yPred, zero_division=1))
    
    return mycls

test_report1 = losoValidMatBII('Norm_ECG_EDA_Features_Combined_scld')
    # return XtrainDriv, y_train

Test Subject: 1026.csv
              precision    recall  f1-score   support

           0       0.00      1.00      0.00         0
           1       1.00      0.79      0.88        96

    accuracy                           0.79        96
   macro avg       0.50      0.90      0.44        96
weighted avg       1.00      0.79      0.88        96

Test Subject: 1105.csv
              precision    recall  f1-score   support

           0       0.75      0.25      0.38        36
           1       0.87      0.98      0.92       180

    accuracy                           0.86       216
   macro avg       0.81      0.62      0.65       216
weighted avg       0.85      0.86      0.83       216

Test Subject: 1106.csv
              precision    recall  f1-score   support

           0       0.14      0.62      0.23        13
           1       0.97      0.76      0.85       203

    accuracy                           0.75       216
   macro avg       0.55      0.69      0.54       216
weigh

In [5]:
# Get the classification report of the over all subjects

def get_the_report_matb(test_cls):
    acc=[]
    f_1=[]
    for key in test_cls.keys():
        if key in ['1981.csv', '1953.csv', '1936.csv', '1629.csv', '1026.csv']:
            continue

        accuracy_ = test_cls[key]['accuracy']
        acc.append(accuracy_)
        fscore_ = test_cls[key]['macro avg']['f1-score']
        f_1.append(fscore_)

    # Average acc and f1
    print("Mean accuracy: {}".format(np.mean(acc)))
    print("Mean f1: {}".format(np.mean(f_1)))    

In [6]:
get_the_report_matb(test_report1)

Mean accuracy: 0.6142984290007452
Mean f1: 0.4737896820471688
