In [1]:
import numpy as np
import json
import warnings
import tensorflow as tf
from tensorflow.keras import models, Model
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Dense, Conv2D, Flatten, MaxPool2D, BatchNormalization, Dropout, Input, Concatenate, GlobalAvgPool2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import Sequence
import json
from evaluator import *

In [2]:
VARIABLES = ['ActivityCounts', 'Barometer', 'BloodPerfusion',
             'BloodPulseWave', 'EnergyExpenditure', 'GalvanicSkinResponse', 'HR',
             'HRV', 'RESP', 'Steps', 'SkinTemperature', 'ActivityClass']

In [3]:
GRAYSCALE = False # grayscale or rgb

# Import data

In [4]:
# file path to data folder
path = './Output'

In [5]:
# dimensions
N, HEIGHT, WIDTH, CHANNELS = sum([1 for p in os.listdir(path) if (p[:14] == 'feature_vector' and p[:19] != 'feature_vector_stat')]), \
                             *np.load(path + '/feature_vector0.npy').shape
CHANNELS = len(VARIABLES) if GRAYSCALE else CHANNELS # reduce channels for grayscale

print(N, HEIGHT, WIDTH, CHANNELS)

613 370 497 30


Metadata (subjectID etc.)

In [6]:
with open(path + '/metadata.txt') as f:
    metadata = f.read()

metadata = json.loads(metadata.replace('\'', '\"').replace('False', 'false').replace('True', 'true')) # doesn't accept other chars

In [7]:
subjects = [meta['subjectID'] for meta in metadata]

# CNN

Addditional functions

In [8]:
# image-wise transformer
def rgb2gray(rgb):
    """greyscale = 0.2989 * red + 0.5870 * green + 0.1140 * blue"""
    return np.dot(rgb[:, :, :3], [0.2989, 0.5870, 0.1140])

# loss function
def weighted_cross_entropy(weight):
    def weighted_cross_entropy_with_logits(labels, logits):
        loss = tf.nn.weighted_cross_entropy_with_logits(
            labels, logits, weight
        )
        return loss
    return weighted_cross_entropy_with_logits

# weight (imbalanced classes)
def check_imbalance(path_to_labels, indices):
    """Returns indices of positives/negatives"""
    y = np.empty((len(indices), 2), dtype=int)
    for i, index in enumerate(indices):
        y[i, ] = np.load(path_to_labels + f'/labels{index}.npy', allow_pickle=True)

    positives = np.where(y[:, variable] == 1)[0] # TODO: for now just one variable
    negatives = np.where(y[:, variable] == 0)[0] # TODO: for now just one variable

    return np.array(indices)[positives], np.array(indices)[negatives]

def get_weighting_factor(path, train_set_indices):
    positives, negatives = check_imbalance(path, train_set_indices)
    sample_weight = len(negatives) / len(positives) # for weighted cross-entropy
    return sample_weight

Dataloader (dataset with images too large)

In [9]:
class DataGenerator(Sequence):

    def __init__(self, data_path: str, indices_dataset: list, batch_size=32, dim=(HEIGHT, WIDTH), n_channels=CHANNELS, shuffle=True):
        self.data_path = data_path # path to full dataset
        self.dim = dim # image dimension
        self.batch_size = batch_size
        self.indices_dataset = indices_dataset # indices of full dataset (different for train/validation/test set)
        self.n_channels = n_channels
        self.shuffle = shuffle

        self.on_epoch_end() # shuffle data for each epoch

    def on_epoch_end(self):
        """
        Shuffle data for each epoch
        """
        if self.shuffle:
            np.random.shuffle(self.indices_dataset)

    def __data_generation(self, indices):
        """
        Loads and returns datapoints[indices]
        """
        # init
        X = np.empty((self.batch_size, *self.dim, self.n_channels))
        y = np.empty(self.batch_size, dtype=float) # TODO: int for non-logits

        # load individual datapoints
        for i, index in enumerate(indices):
            images = np.load(self.data_path + f'/feature_vector{index}.npy', allow_pickle=True)
            if GRAYSCALE:
                images_gray = np.empty((HEIGHT, WIDTH, self.n_channels))
                for j in range(len(VARIABLES)):
                    image_rgb = images[:, :, (3 * j): (3 * (j + 1))]
                    image_gray = rgb2gray(image_rgb)
                    images_gray[:, :, j] = image_gray
                images = images_gray

            X[i, ] = images
            y[i] = np.load(self.data_path + f'/labels{index}.npy', allow_pickle=True)[variable] # TODO: for now just one variable

        return X, y

    def __len__(self):
        """
        Number of batches per epoch
        """
        return int(np.floor(len(self.indices_dataset) / self.batch_size))

    def __getitem__(self, index):
        """
        Generates batch[index]
        """
        # calculate indices of batch
        indices = self.indices_dataset[index * self.batch_size:(index + 1) * self.batch_size]

        # generate batch
        X, y = self.__data_generation(indices)

        return X, y

Architecture

In [10]:
# TODO: make possible for grayscale
class ConvNet(tf.keras.Model):

    def __init__(self, name='cnn', **kwargs):
        super(ConvNet, self).__init__(name, **kwargs)

        self.in_shape = (HEIGHT, WIDTH, CHANNELS)
        self.in_shape_mobilenet = (HEIGHT, WIDTH, 3)

        # MobileNetV2 embedding
        self.mobilenet = MobileNetV2(input_shape=self.in_shape_mobilenet, weights='imagenet', include_top=False)
        self.mobilenet._name = 'mobilenet'
        self.mobilenet.trainable = False
        self.finetuning = False
        self.out_shape_mobilenet = self.mobilenet.layers[-1].output_shape # for one spectrogram

        # Concatenation
        self.concat = Concatenate(name='concat')

        # Global pooling
        self.pool = GlobalAvgPool2D(name='global_avg_pool')

        # TODO: more sophisticated dense (dropout, regularizer, init., ...)
        # Fully-connected network
        self.flatten = Flatten(name='flatten', input_shape=(self.out_shape_mobilenet * (CHANNELS // 3), ))
        self.dense = Dense(1, name='dense') # keep logits
        self.out_shape = 1

        # build graph
        self.build_graph()

    def build_graph(self):
        self.build(input_shape=(None, *self.in_shape))
        x = Input(shape=self.in_shape)
        Model(inputs=[x], outputs=self.call(x))

    def set_finetuning(self, mode=True):
        self.finetuning = mode
        self.mobilenet.trainable = mode

        for layers in self.mobilenet.layers:
            layers.trainable = False

        # "activate" last conv layer of MobileNet
        self.mobilenet.layers[-3].trainable = mode
        self.mobilenet.layers[-2].trainable = mode

    def call(self, inputs):
        """
        Model predictions (logits)
        :param inputs: all spectrograms of shape (HEIGHT, WIDTH, CHANNELS)
        :return: class prediction (logits)
        """
        # MobileNetV2 embeddings
        x = [self.mobilenet(inputs[..., i:i+3], training=self.finetuning) for i in range(0, CHANNELS, 3)]

        # Concatenation
        x = self.concat(x)

        # Global pooling
        x = self.pool(x)

        # Fully-connected network
        x = self.flatten(x)
        x = self.dense(x)

        return x

Model

In [11]:
class CNN:

    def __init__(self, path, variable, epochs, learning_rate, batch_size):
        self.model = ConvNet()
        self.path = path
        assert variable in (0, 1)
        self.variable = variable
        self.epochs = epochs
        self.learning_rate = learning_rate
        self.batch_size = batch_size
        self.history = None
        self.train_indices = None
        self.test_indices = None

    def fit(self, train_indices):
        self.train_indices = train_indices

        # training set
        train_dataloader = DataGenerator(self.path, train_indices, batch_size=self.batch_size)

        # weights for loss function
        sample_weights = get_weighting_factor(self.path, train_indices)

        # build model
        self.model.compile(optimizer=Adam(learning_rate=self.learning_rate),
                           loss=weighted_cross_entropy(sample_weights))

        # training
        self.history = self.model.fit_generator(generator=train_dataloader,
                                                epochs=self.epochs)

    def reset(self):
        """Resets model weights"""
        self.model = ConvNet()

    def predict(self, test_indices):
        """Predicts actual class labels (not logits/probability values!)"""
        self.test_indices = test_indices

        # TODO: make more efficient
        # test set + predict
        y_pred = np.empty(len(test_indices), dtype=float)

        for i, index in enumerate(test_indices):
            X_i = np.load(path + f'/feature_vector{index}.npy', allow_pickle=True)

            X_i = tf.expand_dims(X_i, axis=0) # add "batch dimension"
            logits_pred_i = self.model.predict(X_i)

            y_pred[i] = logits_pred_i

        y_probs = tf.math.sigmoid(y_pred) # logits to probs
        y_pred = tf.round(y_probs) # probs to labels

        return y_pred

    def summary(self):
        return self.model.summary()

# CV

In [None]:
with warnings.catch_warnings():
    warnings.filterwarnings('ignore')

    for variable in (0, 1): # phF, MF
        model = CNN(path, variable=0, epochs=10, learning_rate=1e-3, batch_size=16)

        scores_strat_group_k_fold = stratified_group_k_fold(path=path,
                                                            groups=subjects,
                                                            model=model,
                                                            folds=5,
                                                            images=True,
                                                            verbose=True,
                                                            variable=variable)

        # not a valid CV method (train/test mixing due to overlapping images)
        '''scores_strat_k_fold = stratified_k_fold(path=path,
                                                model=model,
                                                folds=5,
                                                images=True,
                                                verbose=True,
                                                variable=variable)'''

        scores_loso = leave_one_subject_out(path=path,
                                            groups=subjects,
                                            model=model,
                                            images=True,
                                            verbose=True,
                                            variable=variable)

Starting stratified group 5-fold for physical fatigue


  0%|          | 0/5 [00:00<?, ?it/s]

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


 Fold 1 F1: 0.8956603773584907:  20%|██        | 1/5 [04:31<18:06, 271.56s/it]

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


 Fold 2 F1: 0.6994500161759948:  40%|████      | 2/5 [08:05<11:52, 237.45s/it]

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


 Fold 3 F1: 0.49410232440891955:  60%|██████    | 3/5 [12:22<08:13, 246.75s/it]

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


 Fold 4 F1: 0.9090909090909091:  80%|████████  | 4/5 [17:06<04:21, 261.42s/it] 

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


 Fold 5 F1: 0.5937996031746031: 100%|██████████| 5/5 [21:31<00:00, 258.26s/it]


Performance model:
 accuracy: 0.732 +- 0.162 

 balanced_accuracy: 0.596 +- 0.166 

 f1: 0.718 +- 0.164 

 recall: 0.732 +- 0.162 

 precision: 0.735 +- 0.158 

Starting leave-one-subject-out for physical fatigue


  0%|          | 0/27 [00:00<?, ?it/s]

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


 Fold 1 F1: 0.898989898989899:   4%|▎         | 1/27 [04:43<2:02:58, 283.79s/it]

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


 Fold 2 F1: 1.0:   7%|▋         | 2/27 [09:26<1:58:04, 283.37s/it]              

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


 Fold 3 F1: 0.5333333333333333:  11%|█         | 3/27 [14:10<1:53:19, 283.30s/it]

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


 Fold 4 F1: 0.3977272727272728:  15%|█▍        | 4/27 [18:52<1:48:31, 283.11s/it]

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


 Fold 5 F1: 0.3636363636363636:  19%|█▊        | 5/27 [23:34<1:43:36, 282.57s/it]

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


 Fold 6 F1: 0.6:  22%|██▏       | 6/27 [28:20<1:39:19, 283.79s/it]               

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


 Fold 7 F1: 0.3333333333333333:  26%|██▌       | 7/27 [33:19<1:36:15, 288.78s/it]

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


 Fold 8 F1: 0.22892156862745097:  30%|██▉       | 8/27 [38:09<1:31:34, 289.17s/it]

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


 Fold 9 F1: 0.375:  33%|███▎      | 9/27 [43:00<1:26:53, 289.63s/it]              

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


 Fold 10 F1: 0.5252525252525253:  37%|███▋      | 10/27 [47:54<1:22:26, 290.94s/it]

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


 Fold 11 F1: 0.41391941391941395:  41%|████      | 11/27 [53:06<1:19:18, 297.39s/it]

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


 Fold 12 F1: 0.047619047619047616:  44%|████▍     | 12/27 [58:11<1:14:58, 299.87s/it]

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


 Fold 13 F1: 0.16666666666666666:  48%|████▊     | 13/27 [1:03:18<1:10:28, 302.06s/it] 

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


 Fold 14 F1: 1.0:  52%|█████▏    | 14/27 [1:08:11<1:04:49, 299.21s/it]                

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


 Fold 15 F1: 0.534412955465587:  56%|█████▌    | 15/27 [1:13:01<59:16, 296.39s/it]

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


 Fold 16 F1: 0.7363636363636363:  59%|█████▉    | 16/27 [1:18:00<54:30, 297.31s/it]

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


 Fold 17 F1: 0.43636363636363645:  63%|██████▎   | 17/27 [1:22:56<49:27, 296.74s/it]

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


 Fold 18 F1: 0.8375:  67%|██████▋   | 18/27 [1:27:54<44:34, 297.16s/it]             

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


 Fold 19 F1: 0.888888888888889:  70%|███████   | 19/27 [1:32:50<39:33, 296.73s/it]

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


 Fold 20 F1: 0.9090909090909091:  74%|███████▍  | 20/27 [1:37:36<34:15, 293.68s/it]

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


 Fold 21 F1: 0.38502673796791437:  78%|███████▊  | 21/27 [1:42:28<29:19, 293.23s/it]

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


 Fold 22 F1: 0.7815126050420167:  81%|████████▏ | 22/27 [1:47:31<24:39, 295.99s/it] 

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


 Fold 23 F1: 0.3903225806451613:  85%|████████▌ | 23/27 [1:52:07<19:19, 289.99s/it]

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


 Fold 24 F1: 0.7811120032115615:  89%|████████▉ | 24/27 [1:56:31<14:06, 282.33s/it]

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
 2/36 [>.............................] - ETA: 24s - loss: 0.2177

# Save scores

In [None]:
path_scores = './Scores'
model_name = 'cnn'

In [None]:
# stratified 5-fold
with open(f'{path_scores}/strat_5_fold//{model_name}.txt', 'w') as dat:
    dat.write(str(scores_strat_group_k_fold))

In [None]:
# stratified group 5-fold
with open(f'{path_scores}/strat_group_5_fold//{model_name}.txt', 'w') as dat:
    dat.write(str(scores_strat_k_fold))

In [None]:
# LOSO
with open(f'{path_scores}/strat_5_loso_fold//{model_name}.txt', 'w') as dat:
    dat.write(str(scores_loso))

# Under development

train/test split

In [14]:
variable = 0

model = CNN(path, variable=0, epochs=10, learning_rate=1e-3, batch_size=16)

scores = stratified_train_test(path, model, test_size=0.2, images=True, verbose=True, variable=0)

Starting stratified train/test for physical fatigue




Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Performance model:
 accuracy: 0.808 +- 0.808 

 balanced_accuracy: 0.801 +- 0.801 

 f1: 0.816 +- 0.816 

 recall: 0.808 +- 0.808 

 precision: 0.836 +- 0.836 



Daily majority vote

In [15]:
# test set
test_indices = model.test_indices

# day information for each segment
'''metadata_test = np.array(metadata)[test_indices]
days_test = [(meta['date'], meta['subjectID']) for meta in metadata_test] # note that same date can be used by different subjects'''

# true labels
y = np.empty(len(test_indices), dtype=int)
for i, index in enumerate(test_indices):
    y_i = np.load(path + f'/labels{index}.npy', allow_pickle=True)[0]
    y[i] = y_i

# model predictions
y_pred_segments = model.predict(test_indices)
y_pred_segments = y_pred_segments.numpy().astype(int)

In [16]:
daily_majority_vote(y_pred_segments, test_indices, metadata)

{('09.02.18', 24): 1,
 ('09.05.19', 9): 0,
 ('27.07.18', 24): 1,
 ('27.01.18', 26): 1,
 ('07.05.19', 9): 1,
 ('12.01.18', 23): 0,
 ('07.06.19', 15): 1,
 ('28.12.17', 23): 1,
 ('01.01.18', 23): 0,
 ('06.04.19', 5): 1,
 ('28.02.18', 26): 1,
 ('11.02.18', 23): 1,
 ('16.02.18', 23): 0,
 ('10.05.18', 24): 1,
 ('23.01.18', 23): 1,
 ('14.02.18', 23): 1,
 ('31.03.18', 24): 0,
 ('26.02.18', 23): 1,
 ('10.05.19', 9): 0,
 ('16.03.19', 1): 0,
 ('20.05.19', 16): 1,
 ('29.01.18', 26): 1,
 ('06.05.19', 7): 1,
 ('21.05.19', 16): 1,
 ('01.09.18', 27): 1,
 ('19.05.18', 24): 1,
 ('20.02.18', 23): 0,
 ('12.06.19', 8): 1,
 ('15.05.19', 11): 0,
 ('13.03.18', 24): 1,
 ('11.05.19', 9): 1,
 ('10.03.18', 24): 0,
 ('01.03.18', 24): 1,
 ('16.07.18', 24): 1,
 ('09.05.19', 11): 0,
 ('13.05.19', 11): 0,
 ('31.12.17', 23): 1,
 ('10.07.19', 21): 1,
 ('04.03.18', 24): 1,
 ('26.03.19', 4): 0,
 ('02.01.18', 24): 1,
 ('07.04.19', 5): 1,
 ('17.06.19', 14): 1,
 ('19.06.19', 13): 0,
 ('11.02.18', 25): 1,
 ('22.01.18', 23): 1

In [17]:
agreements(y_pred_segments, test_indices, metadata)

0.8035714285714286

In [19]:
variable = 0

model = CNN(path, variable=0, epochs=10, learning_rate=1e-3, batch_size=16)



In [20]:
N = sum([1 for p in os.listdir(path) if (p[:14] == 'feature_vector' and p[:19] != 'feature_vector_stat')])

# load labels (we need them for stratification)
y = np.empty(N, dtype=int)
for i in range(N):
    y[i] = np.load(path + f'/labels{i}.npy', allow_pickle=True)[variable]  # TODO: multiclass

# CV
cv = StratifiedGroupKFold(n_splits=5, shuffle=True, random_state=SEED)
scores_cv = []
data_indices = np.arange(N)

print(f'Starting stratified group {5}-fold for {["physical fatigue", "mental fatigue"][variable]}')
with tqdm(total=5) as pbar:
    for i, (train_indices, test_indices) in enumerate(cv.split(X=data_indices, y=y, groups=subjects)):
        # test labels
        y_test = y[test_indices]

        # training
        model.reset()
        model.fit(train_indices)

        # predict
        y_pred = model.predict(test_indices)

        # evaluate
        scores = evaluator(y_pred, y_test, verbose=False)
        scores_cv.append(scores)

        # agreements
        print(agreements(y_pred, model.test_indices, metadata))

        # for progress bar
        pbar.update(1)
        pbar.set_description(f' Fold {i+1} F1: {scores["f1"]}')

# print (if verbose==True)
print('Performance model:')
metrics = scores_cv[0].keys()
for metric in metrics:
    mean = np.mean([scores_cv_i[metric] for scores_cv_i in scores_cv])
    std = np.std([scores_cv_i[metric] for scores_cv_i in scores_cv])
    print(f' {metric}: {round(mean, 3)} +- {round(std, 3)} \n')

Starting stratified group 5-fold for physical fatigue


  0%|          | 0/5 [00:00<?, ?it/s]





Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


 Fold 1 F1: 0.5681694894300688:  20%|██        | 1/5 [09:09<36:39, 549.81s/it]

0.48863636363636365




Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


 Fold 2 F1: 0.7061826040972111:  40%|████      | 2/5 [20:07<30:39, 613.29s/it]

0.375




Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


 Fold 3 F1: 0.5849757145525873:  60%|██████    | 3/5 [31:15<21:16, 638.28s/it]

0.5




Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


 Fold 4 F1: 0.6328224960191371:  80%|████████  | 4/5 [39:38<09:44, 584.70s/it]

0.3811881188118812




Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


 Fold 5 F1: 0.5310234571376662: 100%|██████████| 5/5 [48:52<00:00, 586.58s/it]

0.3835616438356164
Performance model:
 accuracy: 0.59 +- 0.067 

 balanced_accuracy: 0.575 +- 0.055 

 f1: 0.605 +- 0.06 

 recall: 0.59 +- 0.067 

 precision: 0.651 +- 0.118 






In [14]:
variable = 0
model = CNN(path, variable=0, epochs=10, learning_rate=1e-3, batch_size=16)

N = sum([1 for p in os.listdir(path) if (p[:14] == 'feature_vector' and p[:19] != 'feature_vector_stat')])

# load labels (we need them for stratification)
y = np.empty(N, dtype=int)
for i in range(N):
    y[i] = np.load(path + f'/labels{i}.npy', allow_pickle=True)[variable]  # TODO: multiclass

# CV
cv = StratifiedGroupKFold(n_splits=5, shuffle=True, random_state=SEED)
scores_cv = []
data_indices = np.arange(N)

print(f'Starting stratified group {5}-fold for {["physical fatigue", "mental fatigue"][variable]}')
with tqdm(total=5) as pbar:
    for i, (train_indices, test_indices) in enumerate(cv.split(X=data_indices, y=y, groups=subjects)):
        # test labels
        y_test = y[test_indices]

        # training
        model.reset()
        model.fit(train_indices)

        # predict
        y_pred = model.predict(test_indices)

        # daily majorities
        y_pred_daily = np.array(list(daily_majority_vote(y_pred, test_indices, metadata).values()))
        y_true_daily = np.array(list(daily_majority_vote(y_test, test_indices, metadata).values()))

        # evaluate
        scores = evaluator(y_pred_daily, y_true_daily, verbose=False)
        scores_cv.append(scores)

        # agreements
        print('agreements:', agreements(y_pred, model.test_indices, metadata))

        # for progress bar
        pbar.update(1)
        pbar.set_description(f' Fold {i+1} F1: {scores["f1"]}')

# print (if verbose==True)
print('Performance model:')
metrics = scores_cv[0].keys()
for metric in metrics:
    mean = np.mean([scores_cv_i[metric] for scores_cv_i in scores_cv])
    std = np.std([scores_cv_i[metric] for scores_cv_i in scores_cv])
    print(f' {metric}: {round(mean, 3)} +- {round(std, 3)} \n')

Starting stratified group 5-fold for physical fatigue


  0%|          | 0/5 [00:00<?, ?it/s]





Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


 Fold 1 F1: 0.5801763453937365:  20%|██        | 1/5 [09:04<36:19, 544.78s/it]

agreements: 0.6136363636363636
agreements (aggregate): 0.9090909090909091




Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


 Fold 2 F1: 0.75:  40%|████      | 2/5 [20:56<32:09, 643.22s/it]              

agreements: 0.375
agreements (aggregate): 0.75




Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


 Fold 3 F1: 0.3777777777777778:  60%|██████    | 3/5 [32:29<22:11, 665.62s/it]

agreements: 0.42857142857142855
agreements (aggregate): 0.8571428571428571




Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


 Fold 4 F1: 0.6919870558484421:  80%|████████  | 4/5 [40:38<09:56, 596.05s/it]

agreements: 0.4801980198019802
agreements (aggregate): 0.7970297029702971




Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


 Fold 5 F1: 0.3568734427779364: 100%|██████████| 5/5 [50:41<00:00, 608.36s/it]

agreements: 0.9178082191780822
agreements (aggregate): 0.958904109589041
Performance model:
 accuracy: 0.582 +- 0.112 

 balanced_accuracy: 0.553 +- 0.108 

 f1: 0.551 +- 0.16 

 recall: 0.582 +- 0.112 

 precision: 0.608 +- 0.181 






# TODO: check why so few days (even fewer than statistical features)