In [1]:
import numpy as np
import json
from sklearn.preprocessing import StandardScaler
import xgboost as xgb
import warnings
from evaluator import *

  from pandas import MultiIndex, Int64Index


In [2]:
VARIABLES = ['ActivityCounts', 'Barometer', 'BloodPerfusion',
             'BloodPulseWave', 'EnergyExpenditure', 'GalvanicSkinResponse', 'HR',
             'HRV', 'RESP', 'Steps', 'SkinTemperature', 'ActivityClass']

# Import data

In [3]:
# file path to data folder
path = './Output'

Metadata (subjectID etc.)

In [5]:
with open(path + '/metadata_stat.txt') as f:
    metadata = f.read()

metadata = json.loads(metadata.replace('\'', '\"').replace('False', 'false').replace('True', 'true')) # doesn't accept other chars

In [6]:
subjects = [meta['subjectID'] for meta in metadata]

# XGBoost

In [7]:
class XGBoost:

    def __init__(self, path, variable):
        self.SEED = 42
        self.model = xgb.XGBClassifier(random_state=self.SEED, verbosity=0)
        self.path = path
        assert variable in (0, 1)
        self.variable = variable
        self.normalizer = StandardScaler()

    def load_data(self, indices):
        # load shape
        N = len(indices)
        N_FEATURES = np.load(self.path + '/feature_vector_stat0.npy').shape[0]

        # init
        X = np.empty((N, N_FEATURES))
        y = np.empty(N)

        # load individual datapoints
        for i, index in enumerate(indices):
            X[i, ] = np.load(path + f'/feature_vector_stat{index}.npy', allow_pickle=True)
            y[i, ] = np.load(path + f'/labels_stat{index}.npy', allow_pickle=True)[self.variable]

        return X, y

    def fit(self, train_indices):
        # load data
        X_train, y_train = self.load_data(train_indices)

        # normalize training set
        self.normalizer.fit(X_train) # fit accord. to training set
        X_train = self.normalizer.transform(X_train, copy=True)

        self.model.fit(X_train, y_train)

    def predict(self, test_indices):
        # load data
        X_test, _ = self.load_data(test_indices)

        # normalize test set
        X_test = self.normalizer.transform(X_test, copy=True)

        return self.model.predict(X_test)

# CV

In [25]:
scores_strat_group_k_fold = [None]*2
scores_strat_k_fold = [None]*2
scores_loso = [None]*2

with warnings.catch_warnings():
    warnings.filterwarnings('ignore')

    for variable in (0, 1): # phF, MF
        model = XGBoost(path, variable=0)

        scores_strat_group_k_fold[variable] = stratified_group_k_fold(path=path,
                                                            groups=subjects,
                                                            model=model,
                                                            folds=5,
                                                            images=False,
                                                            verbose=True,
                                                            variable=variable)

        scores_strat_k_fold[variable] = stratified_k_fold(path=path,
                                                model=model,
                                                folds=5,
                                                images=False,
                                                verbose=True,
                                                variable=variable)

        scores_loso[variable] = leave_one_subject_out(path=path,
                                                groups=subjects,
                                                model=model,
                                                images=False,
                                                verbose=True,
                                                variable=variable)

Starting stratified group 5-fold for physical fatigue


 Fold 5 F1: 0.8354955889984432: 100%|██████████| 5/5 [00:02<00:00,  1.95it/s] 


Performance model:
 accuracy: 0.663 +- 0.101 

 balanced_accuracy: 0.508 +- 0.075 

 f1: 0.639 +- 0.165 

 recall: 0.663 +- 0.101 

 precision: 0.64 +- 0.22 

Starting stratified 5-fold for physical fatigue


 Fold 5 F1: 0.746031746031746: 100%|██████████| 5/5 [00:01<00:00,  2.81it/s] 


Performance model:
 accuracy: 0.767 +- 0.03 

 balanced_accuracy: 0.621 +- 0.02 

 f1: 0.746 +- 0.02 

 recall: 0.767 +- 0.03 

 precision: 0.754 +- 0.037 

Starting leave-one-subject-out for physical fatigue


 Fold 21 F1: 0.3409090909090909: 100%|██████████| 21/21 [00:08<00:00,  2.49it/s] 


Performance model:
 accuracy: 0.603 +- 0.311 

 balanced_accuracy: 0.539 +- 0.296 

 f1: 0.586 +- 0.324 

 recall: 0.603 +- 0.311 

 precision: 0.605 +- 0.345 

Starting stratified group 5-fold for mental fatigue


 Fold 5 F1: 0.4963527851458886: 100%|██████████| 5/5 [00:01<00:00,  3.09it/s] 


Performance model:
 accuracy: 0.611 +- 0.101 

 balanced_accuracy: 0.534 +- 0.043 

 f1: 0.576 +- 0.153 

 recall: 0.611 +- 0.101 

 precision: 0.611 +- 0.197 

Starting stratified 5-fold for mental fatigue


 Fold 5 F1: 0.6292004390408646: 100%|██████████| 5/5 [00:01<00:00,  2.98it/s]


Performance model:
 accuracy: 0.694 +- 0.046 

 balanced_accuracy: 0.578 +- 0.046 

 f1: 0.664 +- 0.044 

 recall: 0.694 +- 0.046 

 precision: 0.671 +- 0.068 

Starting leave-one-subject-out for mental fatigue


 Fold 21 F1: 0.2: 100%|██████████| 21/21 [00:08<00:00,  2.56it/s]                

Performance model:
 accuracy: 0.488 +- 0.329 

 balanced_accuracy: 0.445 +- 0.301 

 f1: 0.46 +- 0.328 

 recall: 0.488 +- 0.329 

 precision: 0.49 +- 0.367 






# Save scores

In [26]:
path_scores = './Scores'
model_name = 'xgboost'

In [27]:
# stratified 5-fold
with open(f'{path_scores}/strat_5_fold/{model_name}.txt', 'w') as dat:
    dat.write(str(scores_strat_group_k_fold))

In [28]:
# stratified group 5-fold
with open(f'{path_scores}/strat_group_5_fold/{model_name}.txt', 'w') as dat:
    dat.write(str(scores_strat_k_fold))

In [29]:
# LOSO
with open(f'{path_scores}/loso/{model_name}.txt', 'w') as dat:
    dat.write(str(scores_loso))