In [89]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.base import BaseEstimator
from sklearn.metrics import accuracy_score, balanced_accuracy_score, f1_score, precision_score, recall_score, confusion_matrix, ConfusionMatrixDisplay
from sklearn.model_selection import StratifiedKFold, LeaveOneGroupOut
import warnings
from tqdm import tqdm

In [90]:
def evaluator(y_pred, y_test, verbose=False):
    """Returns evaluation metric scores"""
    accuracy = accuracy_score(y_pred=y_pred, y_true=y_test)
    balanced_accuracy = balanced_accuracy_score(y_pred=y_pred, y_true=y_test)
    f1 = f1_score(y_pred=y_pred, y_true=y_test, average='weighted')
    recall = recall_score(y_pred=y_pred, y_true=y_test, average='weighted')
    precision = precision_score(y_pred=y_pred, y_true=y_test, average='weighted')
    confusion = confusion_matrix(y_pred=y_pred, y_true=y_test)

    # display scores
    if verbose:
        ConfusionMatrixDisplay(confusion_matrix=confusion, display_labels=[False, True]).plot(cmap=plt.cm.Blues)
        plt.title('Physical fatigue')

        print(f'accuracy: {accuracy}\n'
              f'balanced accuracy: {balanced_accuracy}\n'
              f'f1 (weighted): {f1}\n'
              f'recall (weighted): {recall}\n'
              f'precision (weighted): {precision}')

    return {'accuracy': accuracy,
            'balanced_accuracy': balanced_accuracy,
            'f1': f1,
            'recall': recall,
            'precision': precision,
            'confusion': confusion}

In [91]:
SHUFFLE = True # whether to shuffle data before applying CV

In [92]:
# for reproducability
SEED = 42

# Import data

In [93]:
# file path to data folder
path = './Output'

In [94]:
file = path + f'/combined_data.csv'
data = pd.read_csv(file, index_col=0).fillna(pd.NA)
subjects = np.unique(data['subjectID'])

In [95]:
# discard non-relevant data
N = data.shape[0]
data = data[['subjectID', 'phF', 'MF', 'VAS']]

# Model

In [96]:
class MajorityVoter(BaseEstimator):

    def __init__(self):
        self.majority_vote = None

    def fit(self, X, y):
        self.majority_vote = np.round(np.nanmean(y, axis=0))

    def predict(self, X):
        n = len(X)
        y_pred = np.array([self.majority_vote for _ in range(n)])
        return y_pred

# Model evaluation

### 5-fold stratified CV

In [97]:
# dummy inputs
X = np.empty((N, 1))

In [98]:
# separate label prediction
y_phf, y_mf = data['phF'], data['MF']

In [99]:
%%time
# nested CV
folds = 5

with warnings.catch_warnings():
    # ignore sklearn warning
    warnings.filterwarnings('ignore')

    for fatigue in ('Physical fatigue', 'Mental fatigue'):
        # load labels
        print(f'Starting cross-validation for {fatigue}')
        y_ = {'Physical fatigue': y_phf, 'Mental fatigue': y_mf}[fatigue] # pick phF or MF

        # CV: performance evaluation
        cv = StratifiedKFold(n_splits=folds, shuffle=True, random_state=SEED) if SHUFFLE \
            else StratifiedKFold(n_splits=folds)
        scores_cv = []
        with tqdm(total=folds) as pbar:
            for i, (train_outer_index, test_outer_index) in enumerate(cv.split(X, y_)):
                # train/test split
                X_train, X_test = X[train_outer_index], X[test_outer_index]
                y_train, y_test = y_[train_outer_index], y_[test_outer_index]

                # model
                model = MajorityVoter()

                # training
                model.fit(X_train, y_train)

                # evaluate
                y_pred = model.predict(X_test)
                scores = evaluator(y_pred, y_test, verbose=False)
                scores_cv.append(scores)

                # for progress bar
                pbar.update(1)
                pbar.set_description(f' Fold {i+1} F1: {scores["f1"]}')

        # final evaluation
        print('Performance model:')
        metrics = scores_cv[0].keys()
        for metric in metrics:
            # ignore confusion_matrix
            if metric == 'confusion':
                continue
            mean = np.mean([scores_cv_i[metric] for scores_cv_i in scores_cv])
            std = np.std([scores_cv_i[metric] for scores_cv_i in scores_cv])
            print(f' {metric}: {round(mean, 3)} +- {round(std, 3)} \n')

Starting cross-validation for Physical fatigue


 Fold 5 F1: 0.6346580249019274: 100%|██████████| 5/5 [00:00<00:00, 93.46it/s]


Performance model:
 accuracy: 0.737 +- 0.006 

 balanced_accuracy: 0.5 +- 0.0 

 f1: 0.625 +- 0.008 

 recall: 0.737 +- 0.006 

 precision: 0.543 +- 0.009 

Starting cross-validation for Mental fatigue


 Fold 5 F1: 0.5229555236728838: 100%|██████████| 5/5 [00:00<00:00, 100.01it/s]

Performance model:
 accuracy: 0.663 +- 0.006 

 balanced_accuracy: 0.5 +- 0.0 

 f1: 0.529 +- 0.008 

 recall: 0.663 +- 0.006 

 precision: 0.44 +- 0.008 

CPU times: total: 109 ms
Wall time: 111 ms





### Leave-one-subject-out (LOSO)

In [103]:
%%time
# nested CV
groups = data['subjectID']
folds = len(np.unique(subjects))

with warnings.catch_warnings():
    # ignore sklearn warning
    warnings.filterwarnings('ignore')

    for fatigue in ('Physical fatigue', 'Mental fatigue'):
        # load labels
        print(f'Starting cross-validation for {fatigue}')
        y_ = {'Physical fatigue': y_phf, 'Mental fatigue': y_mf}[fatigue] # pick phF or MF

        # CV: performance evaluation
        cv = LeaveOneGroupOut()
        scores_cv = []
        with tqdm(total=folds) as pbar:
            for i, (train_outer_index, test_outer_index) in enumerate(cv.split(X, y_, groups)):
                # train/test split
                X_train, X_test = X[train_outer_index], X[test_outer_index]
                y_train, y_test = y_[train_outer_index], y_[test_outer_index]

                # model
                model = MajorityVoter()

                # training
                model.fit(X_train, y_train)

                # evaluate
                y_pred = model.predict(X_test)
                scores = evaluator(y_pred, y_test, verbose=False)
                scores_cv.append(scores)

                # for progress bar
                pbar.update(1)
                pbar.set_description(f' Fold {i+1} F1: {scores["f1"]}')

        # final evaluation
        print('Performance model:')
        metrics = scores_cv[0].keys()
        for metric in metrics:
            # ignore confusion_matrix
            if metric == 'confusion':
                continue
            mean = np.mean([scores_cv_i[metric] for scores_cv_i in scores_cv])
            std = np.std([scores_cv_i[metric] for scores_cv_i in scores_cv])
            print(f' {metric}: {round(mean, 3)} +- {round(std, 3)} \n')

Starting cross-validation for Physical fatigue


 Fold 27 F1: 0.3333333333333333: 100%|██████████| 27/27 [00:00<00:00, 145.95it/s] 


Performance model:
 accuracy: 0.666 +- 0.286 

 balanced_accuracy: 0.556 +- 0.208 

 f1: 0.573 +- 0.334 

 recall: 0.666 +- 0.286 

 precision: 0.525 +- 0.348 

Starting cross-validation for Mental fatigue


 Fold 27 F1: 0.13846153846153844: 100%|██████████| 27/27 [00:00<00:00, 132.35it/s]

Performance model:
 accuracy: 0.578 +- 0.309 

 balanced_accuracy: 0.481 +- 0.254 

 f1: 0.484 +- 0.318 

 recall: 0.578 +- 0.309 

 precision: 0.43 +- 0.32 

CPU times: total: 406 ms
Wall time: 394 ms



