# Data Augmentation

In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf 
import tensorflow_addons as tfa

from pathlib import Path
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import KFold
from sklearn.metrics import precision_recall_fscore_support, roc_auc_score, accuracy_score
from tensorflow.keras.layers import Input, LSTM, Dense, Flatten, TimeDistributed, Conv1D, BatchNormalization, MaxPooling1D, ConvLSTM1D, Bidirectional, Concatenate
from tensorflow.keras import Model
from tensorflow.keras.initializers import Constant
from keras.layers.advanced_activations import PReLU

## Methods for loading datasets and creating model architectures

### Data

In [2]:
def load_file(file):
    df = pd.read_csv(file, header=0)
    labels = df['pain']
    df = df.drop(columns=['pain'])
    df.drop(df.columns[-1], axis=1, inplace=True)
    return df, labels

def load_dataset(input_filepath, modality):
    feature_names = []
    classes = []
    root_dir = Path(os.path.abspath('')).parent
    data_dir = root_dir / 'data'
    train_filepath = data_dir / input_filepath / 'train' / modality
    test_filepath = data_dir / input_filepath / 'test' / modality
    # 1. Load train data
    X = list()
    y = list()
    for file in train_filepath.iterdir():
        if file.is_file() and file.name.endswith('.csv'):
            x, labels = load_file(file)
            if feature_names == []:
                feature_names = x.columns.values.tolist()
            if labels[0] not in classes:
                classes.append(labels[0])
            x = x.to_numpy()
            X.append(x)
            y.append(labels[0])
    n_length = X[0].shape[0]
    n_features = X[0].shape[1]
    X = np.array(X)
    X_train = X.reshape((len(X), 1, n_length, n_features))
    lb = LabelBinarizer()
    y_train = lb.fit_transform(y)
    # 2. Load test data
    X = list()
    y = list()
    for file in test_filepath.iterdir():
        if file.is_file() and file.name.endswith('.csv'):
            x, labels = load_file(file)
            x = x.to_numpy()
            X.append(x)
            y.append(labels[0])
    n_length = X[0].shape[0]
    n_features = X[0].shape[1]
    X = np.array(X)
    X_test = X.reshape((len(X), 1, n_length, n_features))
    y_test = lb.transform(y)
    return X_train, X_test, y_train, y_test, feature_names, classes

def load_earlyfusion_dataset(input_filepath):
    root_dir = Path(os.path.abspath('')).parent
    data_dir = root_dir / 'data'
    body_train_filepath = data_dir / input_filepath / 'train' / 'skeleton'
    body_test_filepath = data_dir / input_filepath / 'test' / 'skeleton'
    face_train_filepath = data_dir / input_filepath / 'train' / 'AUs'
    face_test_filepath = data_dir / input_filepath / 'test' / 'AUs'
    # 1. Load train data
    X = list()
    y = list()
    for file in body_train_filepath.iterdir():
        if file.is_file() and file.name.endswith('.csv'):
            body_x, body_labels = load_file(file)
            has_equivalent_face_file = False
            for second_file in face_train_filepath.iterdir():
                if second_file.is_file() and second_file.name.endswith('.csv') and second_file.name == file.name:
                    # Concatenate features
                    face_x, face_labels = load_file(second_file)
                    full_X = np.concatenate((body_x, face_x), axis=1)
                    body_x = body_x.to_numpy()
                    face_x = face_x.to_numpy()
                    X.append(full_X)
                    y.append(body_labels[0])
                    has_equivalent_face_file = True
                    break
            if not has_equivalent_face_file:
                continue
    n_length = X[0].shape[0]
    n_features = X[0].shape[1]
    X = np.array(X)
    X_train = X.reshape((len(X), 1, n_length, n_features))
    lb = LabelBinarizer()
    y_train = lb.fit_transform(y)
    # 2. Load test data
    X = list()
    y = list()
    for file in body_test_filepath.iterdir():
        if file.is_file() and file.name.endswith('.csv'):
            body_x, body_labels = load_file(file)
            body_x = body_x.to_numpy()
            has_equivalent_face_file = False
            for second_file in face_test_filepath.iterdir():
                if second_file.is_file() and second_file.name.endswith('.csv') and second_file.name == file.name:
                    # Concatenate features
                    face_x, face_labels = load_file(second_file)
                    face_x = face_x.to_numpy()
                    full_X = np.concatenate((body_x, face_x), axis=1)
                    X.append(full_X)
                    y.append(body_labels[0])
                    has_equivalent_face_file = True
                    break
            if not has_equivalent_face_file:
                continue
    n_length = X[0].shape[0]
    n_features = X[0].shape[1]
    X = np.array(X)
    X_test = X.reshape((len(X), 1, n_length, n_features))
    y_test = lb.transform(y)
    return X_train, X_test, y_train, y_test

def load_latefusion_dataset(input_filepath, modality):
    root_dir = Path(os.path.abspath('')).parent
    data_dir = root_dir / 'data'
    train_filepath = data_dir / input_filepath / 'train' / modality
    test_filepath = data_dir / input_filepath / 'test' / modality
    # 1. Load train data
    X = list()
    y = list()
    for file in train_filepath.iterdir():
        if file.is_file() and file.name.endswith('.csv'):
            x, labels = load_file(file)
            if modality == 'skeleton':
                has_equivalent_face_file = False 
                for second_file in (data_dir / input_filepath / 'train' / 'AUs').iterdir():
                    if second_file.is_file() and second_file.name.endswith('.csv') and second_file.name == file.name:
                        has_equivalent_face_file = True
                        break
                if not has_equivalent_face_file:
                    continue
                else: 
                    x = x.to_numpy()
                    X.append(x)
                    y.append(labels[0])
            else:
                x = x.to_numpy()
                X.append(x)
                y.append(labels[0])
    n_length = X[0].shape[0]
    n_features = X[0].shape[1]
    X = np.array(X)
    X_train = X.reshape((len(X), 1, n_length, n_features))
    lb = LabelBinarizer()
    y_train = lb.fit_transform(y)
    # 2. Load test data
    X = list()
    y = list()
    for file in test_filepath.iterdir():
        if file.is_file() and file.name.endswith('.csv'):
            x, labels = load_file(file)
            if modality == 'skeleton':
                has_equivalent_face_file = False 
                for second_file in (data_dir / input_filepath / 'test' / 'AUs').iterdir():
                    if second_file.is_file() and second_file.name.endswith('.csv') and second_file.name == file.name:
                        has_equivalent_face_file = True
                        break
                if not has_equivalent_face_file:
                    continue
                else: 
                    x = x.to_numpy()
                    X.append(x)
                    y.append(labels[0])
            else:
                x = x.to_numpy()
                X.append(x)
                y.append(labels[0])
    n_length = X[0].shape[0]
    n_features = X[0].shape[1]
    X = np.array(X)
    X_test = X.reshape((len(X), 1, n_length, n_features))
    y_test = lb.transform(y)
    return X_train, X_test, y_train, y_test

In [3]:
def jitter(x, sigma=0.03):
    return x + np.random.normal(loc=0., scale=sigma, size=x.shape)

In [4]:
def rotation(x):
    flip = np.random.choice([-1, 1], size=(x.shape[0],x.shape[2]))
    rotate_axis = np.arange(x.shape[2])
    np.random.shuffle(rotate_axis)    
    return flip[:,np.newaxis,:] * x[:,:,rotate_axis]

### Models

In [5]:
def train(model, X_train, y_train, X_val, y_val, epochs, batch_size, class_weight):
    early_stopping = tf.keras.callbacks.EarlyStopping(
        monitor='val_loss', patience=10)
    history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=epochs, callbacks=[early_stopping],
        batch_size=batch_size, class_weight=class_weight, verbose=2)
    return history

def train_fusioned(model, body_X_train, face_X_train, y_train, body_X_val, face_X_val, y_val, epochs, batch_size, class_weight):
    early_stopping = tf.keras.callbacks.EarlyStopping(
        monitor='val_loss', patience=10)
    history = model.fit([body_X_train, face_X_train], y_train, validation_data=([body_X_val, face_X_val], y_val), 
                        epochs=epochs, batch_size=batch_size, callbacks=[early_stopping], class_weight=class_weight, verbose=2)
    return history

### Dirichlet Markov Ensemble with soft voting

The Dirichlet ensemble weights the ensemble members based on validation set performance. The weight optimization search is performed with randomized search based on the dirichlet distribution. 
    
Soft voting involves summing the predicted probability scores for each class label and predicting the class label with the largest probability.

In [6]:
class KerasMember():
    def __init__(self, name=None, keras_model=None, train_batches=None,
                 val_batches=None, submission_probs=None, keras_modelpath=None,
                 keras_kwargs={}):
        assert(name is not None)
        self.name = name
        self.model = keras_model
        self.submission_probs = submission_probs
        # Initialize Params
        self.val_probs = None
        self.train_probs = None
        self.val_classes = None
        self.train_classes = None
        if (keras_model is None) and (keras_modelpath is not None):
            self.load_kerasmodel(self.keras_modelpath, self.keras_kwargs)
        if train_batches is not None:
            self._calculate_train_predictions(train_batches)
        if val_batches is not None:
            self._calculate_val_predictions(val_batches)

    def _test_datatuple(self, datatuple):
        assert(len(datatuple) == 2)
        assert(datatuple[0].shape[0] == datatuple[1].shape[0])

    def _calculate_predictions(self, batches):
        if hasattr(batches, 'shuffle'):
            batches.reset()
            batches.shuffle = False
        if type(batches) is tuple:
            self._test_datatuple(batches)
            return self.model.predict(batches[0])
        return self.model.predict_generator(
            batches, steps=(batches.n // batches.batch_size) + 1, verbose=1)

    def _calculate_val_predictions(self, val_batches):
        if type(val_batches) is tuple:
            self.val_classes = val_batches[1]
        elif hasattr(val_batches, 'classes'):
            self.val_classes = np.array(val_batches.classes)
        elif hasattr(val_batches, 'y'):
            self.val_classes = np.array(val_batches.y)
        else:
            raise ValueError("No known class in data batch")
        self.val_probs = self._calculate_predictions(val_batches)
        return self.val_probs

    def _calculate_train_predictions(self, train_batches):
        if type(train_batches) is tuple:
            self.train_classes = train_batches[1]
        elif hasattr(train_batches, 'classes'):
            self.train_classes = np.array(train_batches.classes)
        elif hasattr(train_batches, 'y'):
            self.train_classes = np.array(train_batches.y)
        else:
            raise ValueError("No known class in data batch")
        self.train_probs = self._calculate_predictions(train_batches)
        return self.train_probs


In [7]:
class DirichletEnsemble():
    def __init__(self, N=10000, metric=None, maximize=True):
        self.n = N
        self.metric = metric
        if metric is None:
            self.metric = roc_auc_score
        self.maximize = maximize
        # Initialize Parameters:
        self.members = []
        self.bestweights = []
        self.probabilities = None
        self._nmembers = 0
        self.bestscore = float("-inf") if maximize else float("inf")
        self.accuracy = None
        self.precision = None
        self.recall = None
        self.f1 = None
        self.fitted = False

    def add_members(self, members):
        for member in members:
            self.add_member(member)

    def add_member(self, member):
        self.members.append(member)
        self._nmembers += 1

    def fit(self, verbose=False):
        assert(len(self.members) > 1)
        val_classes = self.members[0].val_classes
        best_ensemble_score = float("-inf") if self.maximize else float("inf")
        best_ensemble_accuracy = 0
        best_ensemble_p = 0
        best_ensemble_r = 0
        best_ensemble_f1 = 0
        rsbest = None
        for i in range(self.n):
            rs = np.random.dirichlet(np.ones(self._nmembers), size=1)[0]
            preds = np.sum(np.array([self.members[i].val_probs * rs[i]
                                     for i in range(self._nmembers)]), axis=0)
            ensemble_score = _calculate_metric(val_classes, preds)
            ensemble_accuracy = _calculate_metric(val_classes, preds, accuracy_score)
            ensemble_prf1 = _calculate_metric(val_classes, preds, precision_recall_fscore_support)
            ensemble_p = ensemble_prf1[0]
            ensemble_r = ensemble_prf1[1]
            ensemble_f1 = ensemble_prf1[2]
            max_flag = self.maximize and ensemble_score > best_ensemble_score
            min_flag = not(
                self.maximize) and ensemble_score < best_ensemble_score
            if max_flag or min_flag:
                if verbose:
                    print(ensemble_score, i, rs) 
                best_ensemble_score = ensemble_score
                rsbest = rs
            if ensemble_accuracy > best_ensemble_accuracy:
                if verbose:
                    print(ensemble_accuracy, i, rs) 
                best_ensemble_accuracy = ensemble_accuracy
            if ensemble_p > best_ensemble_p:
                if verbose:
                    print(ensemble_p, i, rs) 
                best_ensemble_p = ensemble_p
            if ensemble_r > best_ensemble_r:
                if verbose:
                    print(ensemble_r, i, rs) 
                best_ensemble_r = ensemble_r
            if ensemble_f1 > best_ensemble_f1:
                if verbose:
                    print(ensemble_f1, i, rs) 
                best_ensemble_f1 = ensemble_f1
        self.bestweights = rsbest
        self.bestscore = best_ensemble_score
        self.accuracy = best_ensemble_accuracy
        self.precision = best_ensemble_p
        self.recall = best_ensemble_r
        self.f1 = best_ensemble_f1

    def predict(self):
        self.probabilities = np.sum(np.array([self.bestweights[i] *
                                              self.members[i].submission_probs
                                              for i in range(self._nmembers)]),
                                    axis=0)
        return self.probabilities
        
    def describe(self):
        for i in range(self._nmembers):
            member = self.members[i]
            text = member.name + " (weight: {:1.4f})".format(self.bestweights[i])
            print(text)
        print("Accuracy: {:1.2f}% - ROC/AUC: {:1.2f} - Precision: {:1.2f} - Recall: {:1.2f} - F1 score: {:1.2f}".format(
            self.accuracy * 100, self.bestscore, self.precision, self.recall, self.f1))
        return

def _calculate_metric(y_true, y_pred, metric=None):
    if metric is None:
        metric = roc_auc_score
    y_t = y_true
    y_p = y_pred
    if metric == roc_auc_score:
        return metric(y_t, y_p, multi_class='ovo')
    if y_true.ndim > 1:
        y_t = np.argmax(y_true, axis=1)
    if y_pred.ndim > 1:
        y_p = np.argmax(y_pred, axis=1)
    if metric is precision_recall_fscore_support:
        return precision_recall_fscore_support(y_t, y_p, average='macro')
    return metric(y_t, y_p)

### KFold CV

In [8]:
kf = KFold(n_splits=5, shuffle=True, random_state=42)
epochs = 50

## 1. Uni-modal approaches

Training the different model architectures on single modalities (skeleton vs action units). 

### 1.1 Body modality (skeleton) models

#### Apply Data Augmentation

In [9]:
full_body_X_train, full_body_X_test, full_body_y_train, full_body_y_test, body_feature_names, classes = load_dataset('processed', 'skeleton')
full_body_X = full_body_X_train.copy()
full_body_y = full_body_y_train.copy()

print(full_body_X_train.shape, full_body_y_train.shape)
print(full_body_X_test.shape, full_body_y_test.shape)

n_length, body_features, body_outputs = full_body_X_train.shape[2], full_body_X_train.shape[-1], full_body_y_test.shape[1]

(510, 1, 350, 38) (510, 4)
(105, 1, 350, 38) (105, 4)


In [10]:
jitter_X = jitter(full_body_X, sigma=0.03)

In [11]:
reshaped_X = full_body_X.reshape((full_body_X.shape[0], full_body_X.shape[2], full_body_X.shape[-1]))
rotated_X = rotation(reshaped_X)
rotated_X = rotated_X.reshape((rotated_X.shape[0], 1, rotated_X.shape[1], rotated_X.shape[-1]))

In [12]:
augmented_X = np.concatenate((rotated_X[0:250], jitter_X[0:250]), axis=0)
add_y = np.concatenate((full_body_y[0:250], full_body_y[0:250]), axis=0)
full_X = np.concatenate((augmented_X, full_body_X), axis=0)
full_y = np.concatenate((add_y, full_body_y), axis=0)
full_X.shape

(510, 1, 350, 38)

The train dataset is imbalanced

In [13]:
umild, lmoderate, lmild, umoderate = np.sum(full_y, axis=0)
total = umild + lmoderate + lmild + umoderate

print(classes[0], ':', umild, '({:.1f}% of total)'.format(umild / total * 100))
print(classes[2], ':', lmild, '({:.1f}% of total)'.format(lmild / total * 100))
print(classes[1], ':', lmoderate, '({:.1f}% of total)'.format(lmoderate / total * 100))
print(classes[3], ':', umoderate, '({:.1f}% of total)'.format(umoderate / total * 100))

UpperBody Mild : 257 (50.4% of total)
UpperBody Moderate : 160 (31.4% of total)
LowerBody Moderate : 60 (11.8% of total)
LowerBody Mild : 33 (6.5% of total)


In [14]:
weight_for_0 = (1 / umild) * (total / 2.0)
weight_for_1 = (1 / lmoderate) * (total / 2.0)
weight_for_2 = (1 / lmild) * (total / 2.0)
weight_for_3 = (1 / umoderate) * (total / 2.0)

class_weight = {0: weight_for_0, 1: weight_for_1, 2: weight_for_2, 3: weight_for_3}

print('Weight for Upper Body Pain Mild Level: {:.2f}'.format(weight_for_0))
print('Weight for Upper Body Pain Moderate Level: {:.2f}'.format(weight_for_3))
print('Weight for Lower Body Pain Mild Level: {:.2f}'.format(weight_for_2))
print('Weight for Lower Body Pain Moderate Level: {:.2f}'.format(weight_for_1))

Weight for Upper Body Pain Mild Level: 0.99
Weight for Upper Body Pain Moderate Level: 7.73
Weight for Lower Body Pain Mild Level: 1.59
Weight for Lower Body Pain Moderate Level: 4.25


In [18]:
for train_index, val_index in kf.split(full_X):
    continue

#### Train BiLSTM

In [16]:
def body_biLSTM(n_features, n_length, n_outputs):
    input = Input(shape=(1, n_length, n_features))
    bilstm_1 = Bidirectional(ConvLSTM1D(filters=100, kernel_size=(3), activation='relu', return_sequences=True, kernel_initializer='glorot_normal'))(input)
    bilstm_2 = Bidirectional(ConvLSTM1D(filters=100, kernel_size=(3), activation='relu', return_sequences=True, kernel_initializer='glorot_normal'))(bilstm_1)
    bilstm_3 = Bidirectional(ConvLSTM1D(filters=100, kernel_size=(3), activation='relu', kernel_initializer='glorot_normal'))(bilstm_2)
    flatten = Flatten()(bilstm_3)
    dense = Dense(100, activation='relu')(flatten)
    dense = Dense(100, activation='relu')(flatten)
    output = Dense(units=n_outputs, activation='softmax')(dense)
    model = Model(inputs=input, outputs=output)
    model.compile(loss='categorical_crossentropy', optimizer=tf.keras.optimizers.RMSprop(learning_rate=0.0001), metrics=['accuracy', tf.keras.metrics.AUC(), tf.keras.metrics.Precision(), tf.keras.metrics.Recall(), tfa.metrics.F1Score(num_classes=n_outputs, average='macro')])
    return model

In [19]:
X_train, X_val = full_X[train_index], full_X[val_index]
y_train, y_val = full_y[train_index], full_y[val_index]
body_biLSTM = body_biLSTM(body_features, n_length, body_outputs)
history = train(body_biLSTM, X_train, y_train, X_val, y_val, epochs=epochs, batch_size=32, class_weight=class_weight)

Epoch 1/50
13/13 - 30s - loss: 6.9388 - accuracy: 0.3284 - auc: 0.5988 - precision: 0.3317 - recall: 0.1691 - f1_score: 0.2712 - val_loss: 1.4113 - val_accuracy: 0.2843 - val_auc: 0.6064 - val_precision: 0.2593 - val_recall: 0.0686 - val_f1_score: 0.2364 - 30s/epoch - 2s/step
Epoch 2/50
13/13 - 13s - loss: 2.6856 - accuracy: 0.3799 - auc: 0.6699 - precision: 0.4298 - recall: 0.1201 - f1_score: 0.3353 - val_loss: 1.5062 - val_accuracy: 0.2549 - val_auc: 0.5940 - val_precision: 0.2800 - val_recall: 0.1373 - val_f1_score: 0.2098 - 13s/epoch - 1s/step
Epoch 3/50
13/13 - 14s - loss: 2.5443 - accuracy: 0.3750 - auc: 0.6929 - precision: 0.5141 - recall: 0.1789 - f1_score: 0.3437 - val_loss: 1.5353 - val_accuracy: 0.3529 - val_auc: 0.5928 - val_precision: 0.2927 - val_recall: 0.1176 - val_f1_score: 0.2621 - 14s/epoch - 1s/step
Epoch 4/50
13/13 - 11s - loss: 2.2915 - accuracy: 0.4583 - auc: 0.7437 - precision: 0.5482 - recall: 0.2230 - f1_score: 0.4192 - val_loss: 1.6331 - val_accuracy: 0.2745 

#### Train CNN+LSTM

In [20]:
def body_CNN(n_features, n_length, n_outputs):
    input = Input(shape=(1, n_length, n_features))
    conv1d_1 = TimeDistributed(Conv1D(filters=100, kernel_size=3, activation='tanh', kernel_initializer='he_uniform'))(input)
    conv1d_2 = TimeDistributed(Conv1D(filters=100, kernel_size=3, activation='tanh', kernel_initializer='he_uniform'))(conv1d_1)
    maxpool_1 = TimeDistributed(MaxPooling1D(pool_size=2, data_format='channels_first'))(conv1d_2)
    conv1d_3 = TimeDistributed(Conv1D(filters=100, kernel_size=3, activation='tanh', kernel_initializer='he_uniform'))(maxpool_1)
    conv1d_4 = TimeDistributed(Conv1D(filters=100, kernel_size=3, activation='tanh', kernel_initializer='he_uniform'))(conv1d_3)
    maxpool_2 = TimeDistributed(MaxPooling1D(pool_size=2, data_format='channels_first'))(conv1d_4)
    flatten = TimeDistributed(Flatten())(maxpool_2)
    lstm_1 = LSTM(100, return_sequences=True)(flatten)
    lstm_2 = LSTM(100)(lstm_1)
    dense_1 = Dense(100, activation='tanh')(lstm_2)
    dense_2 = Dense(100, activation='tanh')(dense_1)
    output = Dense(units=n_outputs, activation='softmax')(dense_2)
    model = Model(inputs=input, outputs=output)
    model.compile(loss='categorical_crossentropy', optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), metrics=['accuracy', tf.keras.metrics.AUC(), tf.keras.metrics.Precision(), tf.keras.metrics.Recall(), tfa.metrics.F1Score(num_classes=n_outputs, average='macro')])
    return model

In [21]:
X_train, X_val = full_X[train_index], full_X[val_index]
y_train, y_val = full_y[train_index], full_y[val_index]
body_cnn = body_CNN(body_features, n_length, body_outputs)
history = train(body_cnn, X_train, y_train, X_val, y_val, epochs=epochs, batch_size=128, class_weight=class_weight)

Epoch 1/50
4/4 - 5s - loss: 2.7984 - accuracy: 0.1495 - auc_1: 0.4171 - precision_1: 0.0000e+00 - recall_1: 0.0000e+00 - f1_score: 0.1002 - val_loss: 1.4057 - val_accuracy: 0.0588 - val_auc_1: 0.4466 - val_precision_1: 0.0000e+00 - val_recall_1: 0.0000e+00 - val_f1_score: 0.0278 - 5s/epoch - 1s/step
Epoch 2/50
4/4 - 2s - loss: 2.7191 - accuracy: 0.2696 - auc_1: 0.5268 - precision_1: 0.0000e+00 - recall_1: 0.0000e+00 - f1_score: 0.2122 - val_loss: 1.3583 - val_accuracy: 0.1667 - val_auc_1: 0.5373 - val_precision_1: 0.0000e+00 - val_recall_1: 0.0000e+00 - val_f1_score: 0.0917 - 2s/epoch - 391ms/step
Epoch 3/50
4/4 - 1s - loss: 2.7575 - accuracy: 0.1152 - auc_1: 0.5572 - precision_1: 0.0000e+00 - recall_1: 0.0000e+00 - f1_score: 0.0637 - val_loss: 1.3460 - val_accuracy: 0.1569 - val_auc_1: 0.5588 - val_precision_1: 0.0000e+00 - val_recall_1: 0.0000e+00 - val_f1_score: 0.0678 - 1s/epoch - 287ms/step
Epoch 4/50
4/4 - 1s - loss: 2.7566 - accuracy: 0.1078 - auc_1: 0.5572 - precision_1: 0.0000

#### Train ConvLSTM

In [22]:
def body_convLSTM(n_features, n_length, n_outputs):
    input = Input(shape=(1, n_length, n_features))
    convlstm_1 = ConvLSTM1D(filters=250, kernel_size=(5), activation='relu', return_sequences=True, kernel_initializer='glorot_uniform')(input)
    convlstm_2 = ConvLSTM1D(filters=250, kernel_size=(5), activation='relu', kernel_initializer='glorot_uniform')(convlstm_1)
    flatten = Flatten()(convlstm_2)
    dense_1 = Dense(250, activation='relu')(flatten)
    dense_2 = Dense(250, activation='relu')(dense_1)
    output = Dense(units=n_outputs, activation='softmax')(dense_2)
    model = Model(inputs=input, outputs=output)
    model.compile(loss='categorical_crossentropy', optimizer=tf.keras.optimizers.RMSprop(learning_rate=0.001), metrics=['accuracy', tf.keras.metrics.AUC(), tf.keras.metrics.Precision(), tf.keras.metrics.Recall(), tfa.metrics.F1Score(num_classes=n_outputs, average='macro')])
    return model

In [23]:
X_train, X_val = full_X[train_index], full_X[val_index]
y_train, y_val = full_y[train_index], full_y[val_index]
body_convlstm = body_convLSTM(body_features, n_length, body_outputs)
history = train(body_convlstm, X_train, y_train, X_val, y_val, epochs=epochs, batch_size=64, class_weight=class_weight)

Epoch 1/50
7/7 - 23s - loss: 986.7445 - accuracy: 0.2868 - auc_2: 0.5379 - precision_2: 0.2875 - recall_2: 0.2770 - f1_score: 0.2148 - val_loss: 114.7470 - val_accuracy: 0.0588 - val_auc_2: 0.3725 - val_precision_2: 0.0588 - val_recall_2: 0.0588 - val_f1_score: 0.0278 - 23s/epoch - 3s/step
Epoch 2/50
7/7 - 18s - loss: 93.9923 - accuracy: 0.2279 - auc_2: 0.4733 - precision_2: 0.2279 - recall_2: 0.2279 - f1_score: 0.2151 - val_loss: 20.6950 - val_accuracy: 0.4216 - val_auc_2: 0.6144 - val_precision_2: 0.4216 - val_recall_2: 0.4216 - val_f1_score: 0.1483 - 18s/epoch - 3s/step
Epoch 3/50
7/7 - 14s - loss: 18.1522 - accuracy: 0.2353 - auc_2: 0.5546 - precision_2: 0.3682 - recall_2: 0.1985 - f1_score: 0.1880 - val_loss: 1.3886 - val_accuracy: 0.0686 - val_auc_2: 0.3791 - val_precision_2: 0.0000e+00 - val_recall_2: 0.0000e+00 - val_f1_score: 0.0394 - 14s/epoch - 2s/step
Epoch 4/50
7/7 - 14s - loss: 4.2504 - accuracy: 0.1348 - auc_2: 0.4813 - precision_2: 0.2239 - recall_2: 0.0368 - f1_score: 

#### Train RCNN

In [24]:
def body_RCNN(n_features, n_length, n_outputs):
   input = Input(shape=(1, n_length, n_features))
   conv1d_1 = TimeDistributed(Conv1D(filters=250, kernel_size=5, activation=PReLU(alpha_initializer=Constant(value=0.25)), kernel_initializer='glorot_normal'))(input)
   bn_1 = TimeDistributed(BatchNormalization())(conv1d_1)
   maxpool_1 = TimeDistributed(MaxPooling1D(pool_size=2, data_format='channels_first'))(bn_1)
   conv1d_2 = TimeDistributed(Conv1D(filters=250, kernel_size=5, activation=PReLU(alpha_initializer=Constant(value=0.25)), kernel_initializer='glorot_normal'))(maxpool_1)
   bn_2 = TimeDistributed(BatchNormalization())(conv1d_2)
   maxpool_2 = TimeDistributed(MaxPooling1D(pool_size=2, data_format='channels_first'))(bn_2)
   flatten = Flatten()(maxpool_2)
   dense_1 = Dense(256, activation='relu')(flatten)
   dense_2 = Dense(256, activation='relu')(dense_1)
   output = Dense(units=n_outputs, activation='softmax')(dense_2)
   model = Model(inputs=input, outputs=output)
   model.compile(loss='categorical_crossentropy', optimizer=tf.keras.optimizers.RMSprop(learning_rate=0.001), metrics=['accuracy', tf.keras.metrics.AUC(), tf.keras.metrics.Precision(), tf.keras.metrics.Recall(), tfa.metrics.F1Score(num_classes=n_outputs, average='macro')])
   return model

In [25]:
X_train, X_val = full_X[train_index], full_X[val_index]
y_train, y_val = full_y[train_index], full_y[val_index]
body_rcnn = body_RCNN(body_features, n_length, body_outputs)
history = train(body_rcnn, X_train, y_train, X_val, y_val, epochs=epochs, batch_size=64, class_weight=class_weight)

Epoch 1/50
7/7 - 5s - loss: 80.9236 - accuracy: 0.3137 - auc_3: 0.5408 - precision_3: 0.3081 - recall_3: 0.2794 - f1_score: 0.2542 - val_loss: 215.7792 - val_accuracy: 0.4216 - val_auc_3: 0.6144 - val_precision_3: 0.4216 - val_recall_3: 0.4216 - val_f1_score: 0.1483 - 5s/epoch - 761ms/step
Epoch 2/50
7/7 - 3s - loss: 21.1887 - accuracy: 0.2745 - auc_3: 0.5376 - precision_3: 0.2730 - recall_3: 0.2696 - f1_score: 0.2457 - val_loss: 171.1971 - val_accuracy: 0.0588 - val_auc_3: 0.3725 - val_precision_3: 0.0588 - val_recall_3: 0.0588 - val_f1_score: 0.0278 - 3s/epoch - 482ms/step
Epoch 3/50
7/7 - 3s - loss: 9.3186 - accuracy: 0.3260 - auc_3: 0.5737 - precision_3: 0.3263 - recall_3: 0.3015 - f1_score: 0.2950 - val_loss: 28.4222 - val_accuracy: 0.4216 - val_auc_3: 0.6144 - val_precision_3: 0.4216 - val_recall_3: 0.4216 - val_f1_score: 0.1483 - 3s/epoch - 476ms/step
Epoch 4/50
7/7 - 3s - loss: 8.2074 - accuracy: 0.3284 - auc_3: 0.5954 - precision_3: 0.3342 - recall_3: 0.3064 - f1_score: 0.2772

### 1.2 Face modality (Action units)

#### Apply Data Augmentation

In [26]:
face_X_train, face_X_test, face_y_train, face_y_test, face_feature_names, c = load_dataset('processed', 'AUs')
face_X = face_X_train.copy()
face_y = face_y_train.copy()

print(face_X_train.shape, face_y_train.shape)
print(face_X_test.shape, face_y_test.shape)

n_length, face_features, face_outputs = face_X_train.shape[2], face_X_train.shape[-1], face_y_train.shape[1]

(495, 1, 350, 10) (495, 4)
(102, 1, 350, 10) (102, 4)


In [27]:
jitter_X = jitter(face_X, sigma=0.03)

In [28]:
reshaped_X = face_X.reshape((face_X.shape[0], face_X.shape[2], face_X.shape[-1]))
rotated_X = rotation(reshaped_X)
rotated_X = rotated_X.reshape((rotated_X.shape[0], 1, rotated_X.shape[1], rotated_X.shape[-1]))

In [29]:
augmented_X = np.concatenate((rotated_X[0:250], jitter_X[0:250]), axis=0)
add_y = np.concatenate((face_y[0:250], face_y[0:250]), axis=0)
full_X = np.concatenate((augmented_X, face_X), axis=0)
full_y = np.concatenate((add_y, face_y), axis=0)
full_X.shape

(495, 1, 350, 10)

In [30]:
umild, lmoderate, lmild, umoderate = np.sum(full_y, axis=0)
total = umild + lmoderate + lmild + umoderate

weight_for_0 = (1 / umild) * (total / 2.0)
weight_for_1 = (1 / lmoderate) * (total / 2.0)
weight_for_2 = (1 / lmild) * (total / 2.0)
weight_for_3 = (1 / umoderate) * (total / 2.0)

class_weight = {0: weight_for_0, 1: weight_for_1, 2: weight_for_2, 3: weight_for_3}

print('Weight for Upper Body Pain Mild Level: {:.2f}'.format(weight_for_0))
print('Weight for Upper Body Pain Moderate Level: {:.2f}'.format(weight_for_3))
print('Weight for Lower Body Pain Mild Level: {:.2f}'.format(weight_for_2))
print('Weight for Lower Body Pain Moderate Level: {:.2f}'.format(weight_for_1))

Weight for Upper Body Pain Mild Level: 0.99
Weight for Upper Body Pain Moderate Level: 7.73
Weight for Lower Body Pain Mild Level: 1.60
Weight for Lower Body Pain Moderate Level: 4.19


In [31]:
for train_index, val_index in kf.split(full_X):
    continue

#### Train BiLSTM

In [32]:
def face_biLSTM(n_features, n_length, n_outputs):
    input = Input(shape=(1, n_length, n_features))
    bilstm_1 = Bidirectional(ConvLSTM1D(filters=100, kernel_size=(3), activation='tanh', return_sequences=True, kernel_initializer='he_normal'))(input)
    bilstm_2 = Bidirectional(ConvLSTM1D(filters=100, kernel_size=(3), activation='tanh', return_sequences=True, kernel_initializer='he_normal'))(bilstm_1)
    bilstm_3 = Bidirectional(ConvLSTM1D(filters=100, kernel_size=(3), activation='tanh', kernel_initializer='he_normal'))(bilstm_2)
    flatten = Flatten()(bilstm_3)
    dense = Dense(100, activation='tanh')(flatten)
    dense = Dense(100, activation='tanh')(flatten)
    output = Dense(units=n_outputs, activation='softmax')(dense)
    model = Model(inputs=input, outputs=output)
    model.compile(loss='categorical_crossentropy', optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), metrics=['accuracy', tf.keras.metrics.AUC(), tf.keras.metrics.Precision(), tf.keras.metrics.Recall(), tfa.metrics.F1Score(num_classes=n_outputs, average='macro')])
    return model

In [33]:
X_train, X_val = full_X[train_index], full_X[val_index]
y_train, y_val = full_y[train_index], full_y[val_index]
face_biLSTM = face_biLSTM(face_features, n_length, face_outputs)
history = train(face_biLSTM, X_train, y_train, X_val, y_val, epochs=epochs, batch_size=32, class_weight=class_weight)

Epoch 1/50
13/13 - 24s - loss: 3.7930 - accuracy: 0.2702 - auc_4: 0.5162 - precision_4: 0.2576 - recall_4: 0.0859 - f1_score: 0.2432 - val_loss: 1.4804 - val_accuracy: 0.3232 - val_auc_4: 0.5805 - val_precision_4: 0.4359 - val_recall_4: 0.1717 - val_f1_score: 0.2352 - 24s/epoch - 2s/step
Epoch 2/50
13/13 - 11s - loss: 2.9361 - accuracy: 0.3207 - auc_4: 0.6084 - precision_4: 0.4127 - recall_4: 0.1313 - f1_score: 0.2707 - val_loss: 1.6289 - val_accuracy: 0.2525 - val_auc_4: 0.5557 - val_precision_4: 0.1667 - val_recall_4: 0.0505 - val_f1_score: 0.2237 - 11s/epoch - 869ms/step
Epoch 3/50
13/13 - 11s - loss: 2.7028 - accuracy: 0.2449 - auc_4: 0.5830 - precision_4: 0.3772 - recall_4: 0.1086 - f1_score: 0.2472 - val_loss: 1.4301 - val_accuracy: 0.2323 - val_auc_4: 0.5843 - val_precision_4: 0.3429 - val_recall_4: 0.1212 - val_f1_score: 0.2128 - 11s/epoch - 855ms/step
Epoch 4/50
13/13 - 11s - loss: 2.2874 - accuracy: 0.4268 - auc_4: 0.7311 - precision_4: 0.6396 - recall_4: 0.1793 - f1_score: 0

#### Train CNN+LSTM

In [34]:
def face_CNN(n_features, n_length, n_outputs):
    input = Input(shape=(1, n_length, n_features))
    conv1d_1 = TimeDistributed(Conv1D(filters=250, kernel_size=5, activation='relu', kernel_initializer='glorot_normal'))(input)
    conv1d_2 = TimeDistributed(Conv1D(filters=250, kernel_size=5, activation='relu', kernel_initializer='glorot_normal'))(conv1d_1)
    maxpool_1 = TimeDistributed(MaxPooling1D(pool_size=2, data_format='channels_first'))(conv1d_2)
    conv1d_3 = TimeDistributed(Conv1D(filters=250, kernel_size=5, activation='relu', kernel_initializer='glorot_normal'))(maxpool_1)
    conv1d_4 = TimeDistributed(Conv1D(filters=250, kernel_size=5, activation='relu', kernel_initializer='glorot_normal'))(conv1d_3)
    maxpool_2 = TimeDistributed(MaxPooling1D(pool_size=2, data_format='channels_first'))(conv1d_4)
    flatten = TimeDistributed(Flatten())(maxpool_2)
    lstm_1 = LSTM(250, return_sequences=True)(flatten)
    lstm_2 = LSTM(250)(lstm_1)
    dense_1 = Dense(250, activation='relu')(lstm_2)
    dense_2 = Dense(250, activation='relu')(dense_1)
    output = Dense(units=n_outputs, activation='softmax')(dense_2)
    model = Model(inputs=input, outputs=output)
    model.compile(loss='categorical_crossentropy', optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), metrics=['accuracy', tf.keras.metrics.AUC(), tf.keras.metrics.Precision(), tf.keras.metrics.Recall(), tfa.metrics.F1Score(num_classes=n_outputs, average='macro')])
    return model

In [35]:
X_train, X_val = full_X[train_index], full_X[val_index]
y_train, y_val = full_y[train_index], full_y[val_index]
face_cnn = face_CNN(face_features, n_length, face_outputs)
history = train(face_cnn, X_train, y_train, X_val, y_val, epochs=epochs, batch_size=128, class_weight=class_weight)

Epoch 1/50
4/4 - 8s - loss: 2.7594 - accuracy: 0.3384 - auc_5: 0.5600 - precision_5: 0.0000e+00 - recall_5: 0.0000e+00 - f1_score: 0.2445 - val_loss: 1.3813 - val_accuracy: 0.2929 - val_auc_5: 0.5795 - val_precision_5: 0.0000e+00 - val_recall_5: 0.0000e+00 - val_f1_score: 0.1550 - 8s/epoch - 2s/step
Epoch 2/50
4/4 - 4s - loss: 2.7557 - accuracy: 0.1540 - auc_5: 0.5025 - precision_5: 0.0000e+00 - recall_5: 0.0000e+00 - f1_score: 0.1033 - val_loss: 1.3477 - val_accuracy: 0.3737 - val_auc_5: 0.5907 - val_precision_5: 0.0000e+00 - val_recall_5: 0.0000e+00 - val_f1_score: 0.1827 - 4s/epoch - 938ms/step
Epoch 3/50
4/4 - 4s - loss: 2.7543 - accuracy: 0.3157 - auc_5: 0.5742 - precision_5: 0.0000e+00 - recall_5: 0.0000e+00 - f1_score: 0.1910 - val_loss: 1.3275 - val_accuracy: 0.4242 - val_auc_5: 0.6585 - val_precision_5: 0.0000e+00 - val_recall_5: 0.0000e+00 - val_f1_score: 0.2238 - 4s/epoch - 942ms/step
Epoch 4/50
4/4 - 4s - loss: 2.7605 - accuracy: 0.3561 - auc_5: 0.6639 - precision_5: 0.0000

#### Train ConvLSTM

In [36]:
def face_convLSTM(n_features, n_length, n_outputs):
    input = Input(shape=(1, n_length, n_features))
    convlstm_1 = ConvLSTM1D(filters=250, kernel_size=(5), activation='relu', return_sequences=True, kernel_initializer='he_uniform')(input)
    convlstm_2 = ConvLSTM1D(filters=250, kernel_size=(5), activation='relu', kernel_initializer='he_uniform')(convlstm_1)
    flatten = Flatten()(convlstm_2)
    dense_1 = Dense(250, activation='relu')(flatten)
    dense_2 = Dense(250, activation='relu')(dense_1)
    output = Dense(units=n_outputs, activation='softmax')(dense_2)
    model = Model(inputs=input, outputs=output)
    model.compile(loss='categorical_crossentropy', optimizer=tf.keras.optimizers.RMSprop(learning_rate=0.0001), metrics=['accuracy', tf.keras.metrics.AUC(), tf.keras.metrics.Precision(), tf.keras.metrics.Recall(), tfa.metrics.F1Score(num_classes=n_outputs, average='macro')])
    return model

In [37]:
X_train, X_val = full_X[train_index], full_X[val_index]
y_train, y_val = full_y[train_index], full_y[val_index]
face_convlstm = face_convLSTM(face_features, n_length, face_outputs)
history = train(face_convlstm, X_train, y_train, X_val, y_val, epochs=epochs, batch_size=64, class_weight=class_weight)

Epoch 1/50
7/7 - 18s - loss: 2.8416 - accuracy: 0.2929 - auc_6: 0.5818 - precision_6: 0.5000 - recall_6: 0.0076 - f1_score: 0.2281 - val_loss: 1.3104 - val_accuracy: 0.3030 - val_auc_6: 0.6792 - val_precision_6: 0.2000 - val_recall_6: 0.0101 - val_f1_score: 0.1521 - 18s/epoch - 3s/step
Epoch 2/50
7/7 - 14s - loss: 2.7070 - accuracy: 0.3485 - auc_6: 0.6753 - precision_6: 0.5000 - recall_6: 0.0101 - f1_score: 0.2503 - val_loss: 1.3337 - val_accuracy: 0.2424 - val_auc_6: 0.6433 - val_precision_6: 0.3333 - val_recall_6: 0.0303 - val_f1_score: 0.1297 - 14s/epoch - 2s/step
Epoch 3/50
7/7 - 14s - loss: 2.6173 - accuracy: 0.3384 - auc_6: 0.6831 - precision_6: 0.6154 - recall_6: 0.0202 - f1_score: 0.2937 - val_loss: 1.3845 - val_accuracy: 0.2424 - val_auc_6: 0.5620 - val_precision_6: 0.2667 - val_recall_6: 0.0404 - val_f1_score: 0.1809 - 14s/epoch - 2s/step
Epoch 4/50
7/7 - 14s - loss: 2.4998 - accuracy: 0.3687 - auc_6: 0.6834 - precision_6: 0.7273 - recall_6: 0.0606 - f1_score: 0.3104 - val_lo

#### Train RCNN

In [38]:
def face_RCNN(n_features, n_length, n_outputs):
   input = Input(shape=(1, n_length, n_features))
   conv1d_1 = TimeDistributed(Conv1D(filters=250, kernel_size=5, activation=PReLU(alpha_initializer=Constant(value=0.25)), kernel_initializer='he_normal'))(input)
   bn_1 = TimeDistributed(BatchNormalization())(conv1d_1)
   maxpool_1 = TimeDistributed(MaxPooling1D(pool_size=2, data_format='channels_first'))(bn_1)
   conv1d_2 = TimeDistributed(Conv1D(filters=250, kernel_size=5, activation=PReLU(alpha_initializer=Constant(value=0.25)), kernel_initializer='he_normal'))(maxpool_1)
   bn_2 = TimeDistributed(BatchNormalization())(conv1d_2)
   maxpool_2 = TimeDistributed(MaxPooling1D(pool_size=2, data_format='channels_first'))(bn_2)
   flatten = Flatten()(maxpool_2)
   dense_1 = Dense(256, activation='tanh')(flatten)
   dense_2 = Dense(256, activation='tanh')(dense_1)
   output = Dense(units=n_outputs, activation='softmax')(dense_2)
   model = Model(inputs=input, outputs=output)
   model.compile(loss='categorical_crossentropy', optimizer=tf.keras.optimizers.RMSprop(learning_rate=0.0001), metrics=['accuracy', tf.keras.metrics.AUC(), tf.keras.metrics.Precision(), tf.keras.metrics.Recall(), tfa.metrics.F1Score(num_classes=n_outputs, average='macro')])
   return model

In [39]:
X_train, X_val = full_X[train_index], full_X[val_index]
y_train, y_val = full_y[train_index], full_y[val_index]
face_rcnn = face_RCNN(face_features, n_length, face_outputs)
history = train(face_rcnn, X_train, y_train, X_val, y_val, epochs=epochs, batch_size=256, class_weight=class_weight)

Epoch 1/50
2/2 - 5s - loss: 5.1975 - accuracy: 0.2702 - auc_7: 0.4829 - precision_7: 0.2438 - recall_7: 0.1237 - f1_score: 0.2027 - val_loss: 1.5436 - val_accuracy: 0.4141 - val_auc_7: 0.6426 - val_precision_7: 0.5000 - val_recall_7: 0.2626 - val_f1_score: 0.2409 - 5s/epoch - 2s/step
Epoch 2/50
2/2 - 3s - loss: 5.7187 - accuracy: 0.2500 - auc_7: 0.4762 - precision_7: 0.2349 - recall_7: 0.1970 - f1_score: 0.2311 - val_loss: 1.3333 - val_accuracy: 0.3131 - val_auc_7: 0.6675 - val_precision_7: 0.2727 - val_recall_7: 0.0303 - val_f1_score: 0.2411 - 3s/epoch - 1s/step
Epoch 3/50
2/2 - 3s - loss: 3.9639 - accuracy: 0.4040 - auc_7: 0.5642 - precision_7: 0.4028 - recall_7: 0.2146 - f1_score: 0.3143 - val_loss: 1.3573 - val_accuracy: 0.4141 - val_auc_7: 0.6160 - val_precision_7: 0.5882 - val_recall_7: 0.1010 - val_f1_score: 0.2375 - 3s/epoch - 1s/step
Epoch 4/50
2/2 - 3s - loss: 3.1503 - accuracy: 0.4520 - auc_7: 0.5807 - precision_7: 0.5743 - recall_7: 0.1465 - f1_score: 0.3946 - val_loss: 1.3

## 2. Early Fusion - Fusion is performed at feature level

Early fusion concatenates features at the input level, then all the work is on the classifier. 

My initial assumption is that perhaps early fusion is not the best idea because the face modality is missing about half the times (OpenFace could not capture facial action units from the videos), which makes the dataset smaller. 
I did not replace the missing facial values because I thought null values will bring no benefit (I assume?) and adding for example zero would mean the absence of action units which I'm afraid might actually "trick" the model in the case where a person has pain and the facial modality was not available because of technical issues, not because of an actual lack of pain.

### About the data

In [40]:
early_X_train, early_X_test, early_y_train, early_y_test = load_earlyfusion_dataset('processed')
early_X = early_X_train.copy()
early_y = early_y_train.copy()

print(early_X_train.shape, early_y_train.shape)
print(early_X_test.shape, early_y_test.shape)

n_length, early_features, early_outputs = early_X_train.shape[2], early_X_train.shape[-1], early_y_train.shape[1]

(495, 1, 350, 48) (495, 4)
(102, 1, 350, 48) (102, 4)


In [41]:
jitter_X = jitter(early_X, sigma=0.03)

In [42]:
reshaped_X = early_X.reshape((early_X.shape[0], early_X.shape[2], early_X.shape[-1]))
rotated_X = rotation(reshaped_X)
rotated_X = rotated_X.reshape((rotated_X.shape[0], 1, rotated_X.shape[1], rotated_X.shape[-1]))

In [43]:
augmented_X = np.concatenate((rotated_X[0:250], jitter_X[0:250]), axis=0)
add_y = np.concatenate((early_y[0:250], early_y[0:250]), axis=0)
full_X = np.concatenate((augmented_X, early_X), axis=0)
full_y = np.concatenate((add_y, early_y), axis=0)
full_X.shape

(495, 1, 350, 48)

In [44]:
umild, lmoderate, lmild, umoderate = np.sum(early_y, axis=0)
total = umild + lmoderate + lmild + umoderate

weight_for_0 = (1 / umild) * (total / 2.0)
weight_for_1 = (1 / lmoderate) * (total / 2.0)
weight_for_2 = (1 / lmild) * (total / 2.0)
weight_for_3 = (1 / umoderate) * (total / 2.0)

class_weight = {0: weight_for_0, 1: weight_for_1, 2: weight_for_2, 3: weight_for_3}

print('Weight for Upper Body Pain Mild Level: {:.2f}'.format(weight_for_0))
print('Weight for Upper Body Pain Moderate Level: {:.2f}'.format(weight_for_3))
print('Weight for Lower Body Pain Mild Level: {:.2f}'.format(weight_for_2))
print('Weight for Lower Body Pain Moderate Level: {:.2f}'.format(weight_for_1))

Weight for Upper Body Pain Mild Level: 0.99
Weight for Upper Body Pain Moderate Level: 7.73
Weight for Lower Body Pain Mild Level: 1.60
Weight for Lower Body Pain Moderate Level: 4.19


In [45]:
for train_index, val_index in kf.split(full_X):
    continue

### Train BiLSTM (with early fusion)

In [46]:
def early_biLSTM(n_features, n_length, n_outputs):
    input = Input(shape=(1, n_length, n_features))
    bilstm_1 = Bidirectional(ConvLSTM1D(filters=100, kernel_size=(3), activation='relu', return_sequences=True, kernel_initializer='glorot_normal'))(input)
    bilstm_2 = Bidirectional(ConvLSTM1D(filters=100, kernel_size=(3), activation='relu', return_sequences=True, kernel_initializer='glorot_normal'))(bilstm_1)
    bilstm_3 = Bidirectional(ConvLSTM1D(filters=100, kernel_size=(3), activation='relu', kernel_initializer='glorot_normal'))(bilstm_2)
    flatten = Flatten()(bilstm_3)
    dense = Dense(100, activation='relu')(flatten)
    dense = Dense(100, activation='relu')(flatten)
    output = Dense(units=n_outputs, activation='softmax')(dense)
    model = Model(inputs=input, outputs=output)
    model.compile(loss='categorical_crossentropy', optimizer=tf.keras.optimizers.RMSprop(learning_rate=0.0001), metrics=['accuracy', tf.keras.metrics.AUC(), tf.keras.metrics.Precision(), tf.keras.metrics.Recall(), tfa.metrics.F1Score(num_classes=n_outputs, average='macro')])
    return model

In [47]:
X_train, X_val = full_X[train_index], full_X[val_index]
y_train, y_val = full_y[train_index], full_y[val_index]
early_bilstm = early_biLSTM(early_features, n_length, early_outputs)
history = train(early_bilstm, X_train, y_train, X_val, y_val, epochs=epochs, batch_size=64, class_weight=class_weight)

Epoch 1/50
7/7 - 25s - loss: 22.7472 - accuracy: 0.2753 - auc_8: 0.5077 - precision_8: 0.2595 - recall_8: 0.2424 - f1_score: 0.2454 - val_loss: 2.3256 - val_accuracy: 0.2121 - val_auc_8: 0.6297 - val_precision_8: 0.2088 - val_recall_8: 0.1919 - val_f1_score: 0.1656 - 25s/epoch - 4s/step
Epoch 2/50
7/7 - 11s - loss: 4.8231 - accuracy: 0.3359 - auc_8: 0.6149 - precision_8: 0.3260 - recall_8: 0.2626 - f1_score: 0.3091 - val_loss: 1.9159 - val_accuracy: 0.2424 - val_auc_8: 0.6134 - val_precision_8: 0.2703 - val_recall_8: 0.2020 - val_f1_score: 0.1729 - 11s/epoch - 2s/step
Epoch 3/50
7/7 - 11s - loss: 2.8630 - accuracy: 0.4040 - auc_8: 0.7000 - precision_8: 0.4106 - recall_8: 0.2551 - f1_score: 0.3696 - val_loss: 1.5425 - val_accuracy: 0.3535 - val_auc_8: 0.6032 - val_precision_8: 0.3208 - val_recall_8: 0.1717 - val_f1_score: 0.2786 - 11s/epoch - 2s/step
Epoch 4/50
7/7 - 11s - loss: 2.5951 - accuracy: 0.4091 - auc_8: 0.7067 - precision_8: 0.4349 - recall_8: 0.2955 - f1_score: 0.3775 - val_l

### Train CNN+LSTM (with early fusion)

In [48]:
def early_CNN(n_features, n_length, n_outputs):
    input = Input(shape=(1, n_length, n_features))
    conv1d_1 = TimeDistributed(Conv1D(filters=250, kernel_size=3, activation='tanh', kernel_initializer='glorot_normal'))(input)
    conv1d_2 = TimeDistributed(Conv1D(filters=250, kernel_size=3, activation='tanh', kernel_initializer='glorot_normal'))(conv1d_1)
    maxpool_1 = TimeDistributed(MaxPooling1D(pool_size=2, data_format='channels_first'))(conv1d_2)
    conv1d_3 = TimeDistributed(Conv1D(filters=250, kernel_size=3, activation='tanh', kernel_initializer='glorot_normal'))(maxpool_1)
    conv1d_4 = TimeDistributed(Conv1D(filters=250, kernel_size=3, activation='tanh', kernel_initializer='glorot_normal'))(conv1d_3)
    maxpool_2 = TimeDistributed(MaxPooling1D(pool_size=2, data_format='channels_first'))(conv1d_4)
    flatten = TimeDistributed(Flatten())(maxpool_2)
    lstm_1 = LSTM(250, return_sequences=True)(flatten)
    lstm_2 = LSTM(250)(lstm_1)
    dense_1 = Dense(250, activation='tanh')(lstm_2)
    dense_2 = Dense(250, activation='tanh')(dense_1)
    output = Dense(units=n_outputs, activation='softmax')(dense_2)
    model = Model(inputs=input, outputs=output)
    model.compile(loss='categorical_crossentropy', optimizer=tf.keras.optimizers.RMSprop(learning_rate=0.0001), metrics=['accuracy', tf.keras.metrics.AUC(), tf.keras.metrics.Precision(), tf.keras.metrics.Recall(), tfa.metrics.F1Score(num_classes=n_outputs, average='macro')])
    return model

In [49]:
X_train, X_val = full_X[train_index], full_X[val_index]
y_train, y_val = full_y[train_index], full_y[val_index]
early_cnn = early_CNN(early_features, n_length, early_outputs)
history = train(early_cnn, X_train, y_train, X_val, y_val, epochs=epochs, batch_size=32, class_weight=class_weight)

Epoch 1/50
13/13 - 11s - loss: 2.8380 - accuracy: 0.2601 - auc_9: 0.5120 - precision_9: 0.0000e+00 - recall_9: 0.0000e+00 - f1_score: 0.1974 - val_loss: 1.3616 - val_accuracy: 0.2727 - val_auc_9: 0.6455 - val_precision_9: 0.0000e+00 - val_recall_9: 0.0000e+00 - val_f1_score: 0.1071 - 11s/epoch - 846ms/step
Epoch 2/50
13/13 - 7s - loss: 2.7634 - accuracy: 0.2955 - auc_9: 0.6125 - precision_9: 0.0000e+00 - recall_9: 0.0000e+00 - f1_score: 0.1652 - val_loss: 1.3595 - val_accuracy: 0.3636 - val_auc_9: 0.6819 - val_precision_9: 0.0000e+00 - val_recall_9: 0.0000e+00 - val_f1_score: 0.1932 - 7s/epoch - 529ms/step
Epoch 3/50
13/13 - 7s - loss: 2.7662 - accuracy: 0.2904 - auc_9: 0.5854 - precision_9: 0.0000e+00 - recall_9: 0.0000e+00 - f1_score: 0.2019 - val_loss: 1.3522 - val_accuracy: 0.2727 - val_auc_9: 0.6357 - val_precision_9: 0.0000e+00 - val_recall_9: 0.0000e+00 - val_f1_score: 0.1071 - 7s/epoch - 534ms/step
Epoch 4/50
13/13 - 7s - loss: 2.7668 - accuracy: 0.3535 - auc_9: 0.6790 - precis

### Train ConvLSTM (with early fusion)

In [50]:
def early_convLSTM(n_features, n_length, n_outputs):
    input = Input(shape=(1, n_length, n_features))
    convlstm_1 = ConvLSTM1D(filters=250, kernel_size=(5), activation='relu', return_sequences=True, kernel_initializer='glorot_uniform')(input)
    convlstm_2 = ConvLSTM1D(filters=250, kernel_size=(5), activation='relu', kernel_initializer='glorot_uniform')(convlstm_1)
    flatten = Flatten()(convlstm_2)
    dense_1 = Dense(250, activation='relu')(flatten)
    dense_2 = Dense(250, activation='relu')(dense_1)
    output = Dense(units=n_outputs, activation='softmax')(dense_2)
    model = Model(inputs=input, outputs=output)
    model.compile(loss='categorical_crossentropy', optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), metrics=['accuracy', tf.keras.metrics.AUC(), tf.keras.metrics.Precision(), tf.keras.metrics.Recall(), tfa.metrics.F1Score(num_classes=n_outputs, average='macro')])
    return model

In [51]:
X_train, X_val = full_X[train_index], full_X[val_index]
y_train, y_val = full_y[train_index], full_y[val_index]
early_convlstm = early_convLSTM(early_features, n_length, early_outputs)
history = train(early_convlstm, X_train, y_train, X_val, y_val, epochs=epochs, batch_size=64, class_weight=class_weight)

Epoch 1/50
7/7 - 19s - loss: 20.1145 - accuracy: 0.3056 - auc_10: 0.5820 - precision_10: 0.3071 - recall_10: 0.3056 - f1_score: 0.2278 - val_loss: 2.5398 - val_accuracy: 0.1616 - val_auc_10: 0.4670 - val_precision_10: 0.1429 - val_recall_10: 0.1313 - val_f1_score: 0.0892 - 19s/epoch - 3s/step
Epoch 2/50
7/7 - 13s - loss: 3.2289 - accuracy: 0.1338 - auc_10: 0.4657 - precision_10: 0.1508 - recall_10: 0.0682 - f1_score: 0.1254 - val_loss: 1.2205 - val_accuracy: 0.4242 - val_auc_10: 0.7144 - val_precision_10: 0.4286 - val_recall_10: 0.0909 - val_f1_score: 0.3078 - 13s/epoch - 2s/step
Epoch 3/50
7/7 - 13s - loss: 2.7084 - accuracy: 0.3636 - auc_10: 0.6858 - precision_10: 0.5472 - recall_10: 0.0732 - f1_score: 0.2670 - val_loss: 1.3969 - val_accuracy: 0.2323 - val_auc_10: 0.5137 - val_precision_10: 0.7500 - val_recall_10: 0.0303 - val_f1_score: 0.1879 - 13s/epoch - 2s/step
Epoch 4/50
7/7 - 13s - loss: 2.6001 - accuracy: 0.1818 - auc_10: 0.5121 - precision_10: 0.4571 - recall_10: 0.0404 - f1_

### Train RCNN (with early fusion)

In [52]:
def early_RCNN(n_features, n_length, n_outputs):
   input = Input(shape=(1, n_length, n_features))
   conv1d_1 = TimeDistributed(Conv1D(filters=100, kernel_size=5, activation=PReLU(alpha_initializer=Constant(value=0.25)), kernel_initializer='he_uniform'))(input)
   bn_1 = TimeDistributed(BatchNormalization())(conv1d_1)
   maxpool_1 = TimeDistributed(MaxPooling1D(pool_size=2, data_format='channels_first'))(bn_1)
   conv1d_2 = TimeDistributed(Conv1D(filters=100, kernel_size=5, activation=PReLU(alpha_initializer=Constant(value=0.25)), kernel_initializer='he_uniform'))(maxpool_1)
   bn_2 = TimeDistributed(BatchNormalization())(conv1d_2)
   maxpool_2 = TimeDistributed(MaxPooling1D(pool_size=2, data_format='channels_first'))(bn_2)
   flatten = Flatten()(maxpool_2)
   dense_1 = Dense(256, activation='relu')(flatten)
   dense_2 = Dense(256, activation='relu')(dense_1)
   output = Dense(units=n_outputs, activation='softmax')(dense_2)
   model = Model(inputs=input, outputs=output)
   model.compile(loss='categorical_crossentropy', optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), metrics=['accuracy', tf.keras.metrics.AUC(), tf.keras.metrics.Precision(), tf.keras.metrics.Recall(), tfa.metrics.F1Score(num_classes=n_outputs, average='macro')])
   return model

In [53]:
X_train, X_val = full_X[train_index], full_X[val_index]
y_train, y_val = full_y[train_index], full_y[val_index]
early_rcnn = early_RCNN(early_features, n_length, early_outputs)
history = train(early_rcnn, X_train, y_train, X_val, y_val, epochs=epochs, batch_size=256, class_weight=class_weight)

Epoch 1/50
2/2 - 3s - loss: 15.7989 - accuracy: 0.3232 - auc_11: 0.5508 - precision_11: 0.3714 - recall_11: 0.1970 - f1_score: 0.2820 - val_loss: 104.7394 - val_accuracy: 0.2828 - val_auc_11: 0.5172 - val_precision_11: 0.2828 - val_recall_11: 0.2828 - val_f1_score: 0.1798 - 3s/epoch - 1s/step
Epoch 2/50
2/2 - 1s - loss: 7.3623 - accuracy: 0.2576 - auc_11: 0.5297 - precision_11: 0.2622 - recall_11: 0.2449 - f1_score: 0.2325 - val_loss: 100.8942 - val_accuracy: 0.1414 - val_auc_11: 0.4312 - val_precision_11: 0.1414 - val_recall_11: 0.1414 - val_f1_score: 0.0684 - 1s/epoch - 547ms/step
Epoch 3/50
2/2 - 1s - loss: 4.7721 - accuracy: 0.3813 - auc_11: 0.6531 - precision_11: 0.3844 - recall_11: 0.3359 - f1_score: 0.3422 - val_loss: 83.9243 - val_accuracy: 0.5253 - val_auc_11: 0.6835 - val_precision_11: 0.5253 - val_recall_11: 0.5253 - val_f1_score: 0.1722 - 1s/epoch - 543ms/step
Epoch 4/50
2/2 - 1s - loss: 4.2352 - accuracy: 0.4091 - auc_11: 0.6911 - precision_11: 0.4185 - recall_11: 0.3889 -

## 3. Late Fusion - Fusion is performed at decision level

In late fusion, I test two approaches, in both uni-modal classifiers are trained individually, and then either: 

- Their final dense (decision) layer is concatenated to produce the final prediction; or 

- Ensemble the uni-modal classifiers.

Just as with early fusion, we have the issue that the face modality is missing about half of the times and therefore, I can only use late fusion if both modalities are present.

### Apply Data Augmentation

In [54]:
body_X_train, body_X_test, y_train, y_test = load_latefusion_dataset('processed', 'skeleton')
body_X = body_X_train.copy()
y = y_train.copy()

print(body_X_train.shape, y_train.shape)
print(body_X_test.shape, y_test.shape)

n_length, body_features, body_outputs = body_X_train.shape[2], body_X_train.shape[-1], y_train.shape[1]

(495, 1, 350, 38) (495, 4)
(102, 1, 350, 38) (102, 4)


In [55]:
jitter_X = jitter(body_X, sigma=0.03)

In [56]:
reshaped_X = body_X.reshape((body_X.shape[0], body_X.shape[2], body_X.shape[-1]))
rotated_X = rotation(reshaped_X)
rotated_X = rotated_X.reshape((rotated_X.shape[0], 1, rotated_X.shape[1], rotated_X.shape[-1]))

In [57]:
augmented_X = np.concatenate((rotated_X[0:250], jitter_X[0:250]), axis=0)
add_y = np.concatenate((y[0:250], y[0:250]), axis=0)
body_X = np.concatenate((augmented_X, body_X), axis=0)
y = np.concatenate((add_y, y), axis=0) 
body_X.shape

(495, 1, 350, 38)

In [58]:
jitter_X = jitter(face_X, sigma=0.03)

In [59]:
reshaped_X = face_X.reshape((face_X.shape[0], face_X.shape[2], face_X.shape[-1]))
rotated_X = rotation(reshaped_X)
rotated_X = rotated_X.reshape((rotated_X.shape[0], 1, rotated_X.shape[1], rotated_X.shape[-1]))

In [60]:
augmented_X = np.concatenate((rotated_X[0:250], jitter_X[0:250]), axis=0)
face_X = np.concatenate((augmented_X, face_X), axis=0)
face_X.shape

(495, 1, 350, 10)

In [61]:
umild, lmoderate, lmild, umoderate = np.sum(y, axis=0)
total = umild + lmoderate + lmild + umoderate

weight_for_0 = (1 / umild) * (total / 2.0)
weight_for_1 = (1 / lmoderate) * (total / 2.0)
weight_for_2 = (1 / lmild) * (total / 2.0)
weight_for_3 = (1 / umoderate) * (total / 2.0)

class_weight = {0: weight_for_0, 1: weight_for_1, 2: weight_for_2, 3: weight_for_3}

print('Weight for Upper Body Pain Mild Level: {:.2f}'.format(weight_for_0))
print('Weight for Upper Body Pain Moderate Level: {:.2f}'.format(weight_for_3))
print('Weight for Lower Body Pain Mild Level: {:.2f}'.format(weight_for_2))
print('Weight for Lower Body Pain Moderate Level: {:.2f}'.format(weight_for_1))

Weight for Upper Body Pain Mild Level: 0.99
Weight for Upper Body Pain Moderate Level: 7.73
Weight for Lower Body Pain Mild Level: 1.60
Weight for Lower Body Pain Moderate Level: 4.19


In [62]:
for train_index, val_index in kf.split(body_X):
    continue

### BiLSTM (concatenate decision layer)

In [63]:
def build_fusioned_biLSTM(body_features, face_features, n_length, n_outputs):
    input_1 = Input(shape=(1, n_length, body_features))
    bilstm_1 = Bidirectional(ConvLSTM1D(filters=100, kernel_size=(3), activation='relu', return_sequences=True, kernel_initializer='glorot_normal'))(input_1)
    bilstm_11 = Bidirectional(ConvLSTM1D(filters=100, kernel_size=(3), activation='relu', return_sequences=True, kernel_initializer='glorot_normal'))(bilstm_1)
    bilstm_111 = Bidirectional(ConvLSTM1D(filters=100, kernel_size=(3), activation='relu'))(bilstm_11)
    flatten_1 = Flatten()(bilstm_111)
    dense_1 = Dense(100, activation='relu')(flatten_1)
    dense_11 = Dense(100, activation='relu')(dense_1)

    input_2 = Input(shape=(1, n_length, face_features))
    bilstm_2 = Bidirectional(ConvLSTM1D(filters=100, kernel_size=(3), activation='tanh', return_sequences=True, kernel_initializer='he_normal'))(input_2)
    bilstm_22 = Bidirectional(ConvLSTM1D(filters=100, kernel_size=(3), activation='tanh', return_sequences=True, kernel_initializer='he_normal'))(bilstm_2)
    bilstm_222 = Bidirectional(ConvLSTM1D(filters=100, kernel_size=(3), activation='tanh'))(bilstm_22)
    flatten_2 = Flatten()(bilstm_222)
    dense_2 = Dense(100, activation='tanh')(flatten_2)
    dense_22 = Dense(100, activation='tanh')(dense_2)

    concat = Concatenate()([dense_11, dense_22])
    output = Dense(units=n_outputs, activation='softmax')(concat)
    model = Model(inputs=[input_1, input_2], outputs=[output])
    model.compile(loss='categorical_crossentropy', optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), metrics=['accuracy', tf.keras.metrics.AUC(), tf.keras.metrics.Precision(), tf.keras.metrics.Recall(), tfa.metrics.F1Score(num_classes=n_outputs, average='macro')])
    return model

In [64]:
body_X_train, body_X_val = body_X[train_index], body_X[val_index]
face_X_train, face_X_val = face_X[train_index], face_X[val_index]
y_train, y_val = y[train_index], y[val_index]
late_biLSTM = build_fusioned_biLSTM(body_features, face_features, n_length, body_outputs)
history = train_fusioned(late_biLSTM, body_X_train, face_X_train, y_train, body_X_val, face_X_val, y_val, epochs=epochs, batch_size=32, class_weight=class_weight)

Epoch 1/50
13/13 - 49s - loss: 4.1068 - accuracy: 0.2626 - auc_12: 0.5278 - precision_12: 0.2799 - recall_12: 0.1894 - f1_score: 0.2363 - val_loss: 1.3854 - val_accuracy: 0.3131 - val_auc_12: 0.5868 - val_precision_12: 0.4000 - val_recall_12: 0.0404 - val_f1_score: 0.2865 - 49s/epoch - 4s/step
Epoch 2/50
13/13 - 21s - loss: 2.6522 - accuracy: 0.3838 - auc_12: 0.6715 - precision_12: 0.5106 - recall_12: 0.1212 - f1_score: 0.3249 - val_loss: 1.5114 - val_accuracy: 0.2323 - val_auc_12: 0.5560 - val_precision_12: 0.2750 - val_recall_12: 0.1111 - val_f1_score: 0.1928 - 21s/epoch - 2s/step
Epoch 3/50
13/13 - 21s - loss: 2.3135 - accuracy: 0.4470 - auc_12: 0.7152 - precision_12: 0.5308 - recall_12: 0.1742 - f1_score: 0.4185 - val_loss: 1.3937 - val_accuracy: 0.3434 - val_auc_12: 0.5996 - val_precision_12: 0.3939 - val_recall_12: 0.1313 - val_f1_score: 0.3102 - 21s/epoch - 2s/step
Epoch 4/50
13/13 - 21s - loss: 2.0944 - accuracy: 0.4646 - auc_12: 0.7571 - precision_12: 0.6047 - recall_12: 0.262

### CNN+LSTM (concatenate decision layer)

In [65]:
def build_fusioned_CNN(body_features, face_features, n_length, n_outputs):
    input_1 = Input(shape=(1, n_length, body_features))
    conv1d_1 = TimeDistributed(Conv1D(filters=64, kernel_size=3, activation='tanh', kernel_initializer='he_uniform'))(input_1)
    conv1d_11 = TimeDistributed(Conv1D(filters=64, kernel_size=3, activation='tanh', kernel_initializer='he_uniform'))(conv1d_1)
    maxpool_1 = TimeDistributed(MaxPooling1D(pool_size=2, data_format='channels_first'))(conv1d_11)
    conv1d_111 = TimeDistributed(Conv1D(filters=64, kernel_size=3, activation='tanh', kernel_initializer='he_uniform'))(maxpool_1)
    conv1d_1111 = TimeDistributed(Conv1D(filters=64, kernel_size=3, activation='tanh', kernel_initializer='he_uniform'))(conv1d_111)
    maxpool_11 = TimeDistributed(MaxPooling1D(pool_size=2, data_format='channels_first'))(conv1d_1111)
    flatten_1 = TimeDistributed(Flatten())(maxpool_11)
    lstm_1 = LSTM(100, return_sequences=True)(flatten_1)
    lstm_11 = LSTM(100)(lstm_1)
    dense_1 = Dense(100, activation='tanh')(lstm_11)
    dense_11 = Dense(100, activation='tanh')(dense_1)

    input_2 = Input(shape=(1, n_length, face_features))
    conv1d_2 = TimeDistributed(Conv1D(filters=250, kernel_size=5, activation='relu', kernel_initializer='glorot_normal'))(input_2)
    conv1d_22 = TimeDistributed(Conv1D(filters=250, kernel_size=5, activation='relu', kernel_initializer='glorot_normal'))(conv1d_2)
    maxpool_2 = TimeDistributed(MaxPooling1D(pool_size=2, data_format='channels_first'))(conv1d_22)
    conv1d_222 = TimeDistributed(Conv1D(filters=250, kernel_size=5, activation='relu', kernel_initializer='glorot_normal'))(maxpool_2)
    conv1d_2222 = TimeDistributed(Conv1D(filters=250, kernel_size=5, activation='relu', kernel_initializer='glorot_normal'))(conv1d_222)
    maxpool_22 = TimeDistributed(MaxPooling1D(pool_size=2, data_format='channels_first'))(conv1d_2222)
    flatten_2 = TimeDistributed(Flatten())(maxpool_22)
    lstm_2 = LSTM(250, return_sequences=True)(flatten_2)
    lstm_22 = LSTM(250)(lstm_2)
    dense_2 = Dense(250, activation='relu')(lstm_22)
    dense_22 = Dense(250, activation='relu')(dense_2)

    concat = Concatenate()([dense_11, dense_22])
    output = Dense(units=n_outputs, activation='softmax')(concat)
    model = Model(inputs=[input_1, input_2], outputs=[output])
    model.compile(loss='categorical_crossentropy', optimizer=tf.keras.optimizers.RMSprop(learning_rate=0.0001), metrics=['accuracy', tf.keras.metrics.AUC(), tf.keras.metrics.Precision(), tf.keras.metrics.Recall(), tfa.metrics.F1Score(num_classes=n_outputs, average='macro')])
    return model

In [66]:
body_X_train, body_X_val = body_X[train_index], body_X[val_index]
face_X_train, face_X_val = face_X[train_index], face_X[val_index]
y_train, y_val = y[train_index], y[val_index]
late_cnn = build_fusioned_CNN(body_features, face_features, n_length, body_outputs)
history = train_fusioned(late_cnn, body_X_train, face_X_train, y_train, body_X_val, face_X_val, y_val, epochs=epochs, batch_size=128, class_weight=class_weight)

Epoch 1/50
4/4 - 12s - loss: 2.7644 - accuracy: 0.3712 - auc_13: 0.6045 - precision_13: 0.0000e+00 - recall_13: 0.0000e+00 - f1_score: 0.2209 - val_loss: 1.3572 - val_accuracy: 0.4343 - val_auc_13: 0.6675 - val_precision_13: 0.0000e+00 - val_recall_13: 0.0000e+00 - val_f1_score: 0.2381 - 12s/epoch - 3s/step
Epoch 2/50
4/4 - 5s - loss: 2.7499 - accuracy: 0.3712 - auc_13: 0.6571 - precision_13: 0.0000e+00 - recall_13: 0.0000e+00 - f1_score: 0.1993 - val_loss: 1.3652 - val_accuracy: 0.3232 - val_auc_13: 0.6134 - val_precision_13: 0.0000e+00 - val_recall_13: 0.0000e+00 - val_f1_score: 0.1929 - 5s/epoch - 1s/step
Epoch 3/50
4/4 - 5s - loss: 2.7304 - accuracy: 0.2854 - auc_13: 0.6188 - precision_13: 0.0000e+00 - recall_13: 0.0000e+00 - f1_score: 0.2437 - val_loss: 1.3335 - val_accuracy: 0.3838 - val_auc_13: 0.6578 - val_precision_13: 0.0000e+00 - val_recall_13: 0.0000e+00 - val_f1_score: 0.2237 - 5s/epoch - 1s/step
Epoch 4/50
4/4 - 5s - loss: 2.7044 - accuracy: 0.2778 - auc_13: 0.6059 - prec

### ConvLSTM (concatenate decision layer)

In [67]:
def build_fusioned_convLSTM(body_features, face_features, n_length, n_outputs):
    input_1 = Input(shape=(1, n_length, body_features))
    convlstm_1 = ConvLSTM1D(filters=250, kernel_size=(5), activation='relu', return_sequences=True, kernel_initializer='glorot_uniform')(input_1)
    convlstm_11 = ConvLSTM1D(filters=250, kernel_size=(5), activation='relu', kernel_initializer='glorot_uniform')(convlstm_1)
    flatten_1 = Flatten()(convlstm_11)
    dense_1 = Dense(250, activation='relu')(flatten_1)
    dense_11 = Dense(250, activation='relu')(dense_1)

    input_2 = Input(shape=(1, n_length, face_features))
    convlstm_2 = ConvLSTM1D(filters=250, kernel_size=(5), activation='relu', return_sequences=True, kernel_initializer='he_uniform')(input_2)
    convlstm_22 = ConvLSTM1D(filters=250, kernel_size=(5), activation='relu', kernel_initializer='he_uniform')(convlstm_2)
    flatten_2 = Flatten()(convlstm_22)
    dense_2 = Dense(250, activation='relu')(flatten_2)
    dense_22 = Dense(250, activation='relu')(dense_2)

    concat = Concatenate()([dense_11, dense_22])
    output = Dense(units=n_outputs, activation='softmax')(concat)
    model = Model(inputs=[input_1, input_2], outputs=[output])
    model.compile(loss='categorical_crossentropy', optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), metrics=['accuracy', tf.keras.metrics.AUC(), tf.keras.metrics.Precision(), tf.keras.metrics.Recall(), tfa.metrics.F1Score(num_classes=n_outputs, average='macro')])
    return model

In [68]:
body_X_train, body_X_val = body_X[train_index], body_X[val_index]
face_X_train, face_X_val = face_X[train_index], face_X[val_index]
y_train, y_val = y[train_index], y[val_index]
late_convLSTM = build_fusioned_convLSTM(body_features, face_features, n_length, body_outputs)
history = train_fusioned(late_convLSTM, body_X_train, face_X_train, y_train, body_X_val, face_X_val, y_val, epochs=epochs, batch_size=64, class_weight=class_weight)

Epoch 1/50
7/7 - 34s - loss: 9.9612 - accuracy: 0.2273 - auc_14: 0.4884 - precision_14: 0.2197 - recall_14: 0.1970 - f1_score: 0.1964 - val_loss: 1.5695 - val_accuracy: 0.3838 - val_auc_14: 0.6706 - val_precision_14: 0.4474 - val_recall_14: 0.1717 - val_f1_score: 0.2513 - 34s/epoch - 5s/step
Epoch 2/50
7/7 - 25s - loss: 3.4643 - accuracy: 0.3460 - auc_14: 0.6294 - precision_14: 0.3981 - recall_14: 0.1086 - f1_score: 0.2674 - val_loss: 1.4628 - val_accuracy: 0.2626 - val_auc_14: 0.5056 - val_precision_14: 0.3158 - val_recall_14: 0.0606 - val_f1_score: 0.2282 - 25s/epoch - 4s/step
Epoch 3/50
7/7 - 25s - loss: 2.6177 - accuracy: 0.2727 - auc_14: 0.5516 - precision_14: 0.4242 - recall_14: 0.0707 - f1_score: 0.2628 - val_loss: 1.3807 - val_accuracy: 0.2828 - val_auc_14: 0.5713 - val_precision_14: 0.2353 - val_recall_14: 0.0404 - val_f1_score: 0.2230 - 25s/epoch - 4s/step
Epoch 4/50
7/7 - 25s - loss: 2.3382 - accuracy: 0.4444 - auc_14: 0.7065 - precision_14: 0.6824 - recall_14: 0.1465 - f1_s

### RCNN (concatenate decision layer)

In [69]:
def build_fusioned_RCNN(body_features, face_features, n_length, n_outputs):
   input_1 = Input(shape=(1, n_length, body_features))
   conv1d_1 = TimeDistributed(Conv1D(filters=250, kernel_size=5, activation=PReLU(alpha_initializer=Constant(value=0.25)), kernel_initializer='glorot_normal'))(input_1)
   bn_1 = TimeDistributed(BatchNormalization())(conv1d_1)
   maxpool_1 = TimeDistributed(MaxPooling1D(pool_size=2, data_format='channels_first'))(bn_1)
   conv1d_11 = TimeDistributed(Conv1D(filters=250, kernel_size=5, activation=PReLU(alpha_initializer=Constant(value=0.25)), kernel_initializer='glorot_normal'))(maxpool_1)
   bn_11 = TimeDistributed(BatchNormalization())(conv1d_11)
   maxpool_11 = TimeDistributed(MaxPooling1D(pool_size=2, data_format='channels_first'))(bn_11)
   flatten_1 = Flatten()(maxpool_11)
   dense_1 = Dense(256, activation='relu')(flatten_1)
   dense_11 = Dense(256, activation='relu')(dense_1)

   input_2 = Input(shape=(1, n_length, face_features))
   conv1d_2 = TimeDistributed(Conv1D(filters=250, kernel_size=5, activation=PReLU(alpha_initializer=Constant(value=0.25)), kernel_initializer='glorot_normal'))(input_2)
   bn_2 = TimeDistributed(BatchNormalization())(conv1d_2)
   maxpool_2 = TimeDistributed(MaxPooling1D(pool_size=2, data_format='channels_first'))(bn_2)
   conv1d_22 = TimeDistributed(Conv1D(filters=250, kernel_size=5, activation=PReLU(alpha_initializer=Constant(value=0.25)), kernel_initializer='glorot_normal'))(maxpool_2)
   bn_22 = TimeDistributed(BatchNormalization())(conv1d_22)
   maxpool_22 = TimeDistributed(MaxPooling1D(pool_size=2, data_format='channels_first'))(bn_22)
   flatten_2 = Flatten()(maxpool_22)
   dense_2 = Dense(256, activation='tanh')(flatten_2)
   dense_22 = Dense(256, activation='tanh')(dense_2)

   concat = Concatenate()([dense_11, dense_22])
   output = Dense(units=n_outputs, activation='softmax')(concat)
   model = Model(inputs=[input_1, input_2], outputs=[output])
   model.compile(loss='categorical_crossentropy', optimizer=tf.keras.optimizers.RMSprop(learning_rate=0.001), metrics=['accuracy', tf.keras.metrics.AUC(), tf.keras.metrics.Precision(), tf.keras.metrics.Recall(), tfa.metrics.F1Score(num_classes=n_outputs, average='macro')])
   return model

In [70]:
body_X_train, body_X_val = body_X[train_index], body_X[val_index]
face_X_train, face_X_val = face_X[train_index], face_X[val_index]
y_train, y_val = y[train_index], y[val_index]
late_RCNN = build_fusioned_RCNN(body_features, face_features, n_length, body_outputs)
history = train_fusioned(late_RCNN, body_X_train, face_X_train, y_train, body_X_val, face_X_val, y_val, epochs=epochs, batch_size=128, class_weight=class_weight)

Epoch 1/50
4/4 - 10s - loss: 81.9631 - accuracy: 0.2803 - auc_15: 0.5345 - precision_15: 0.3000 - recall_15: 0.2348 - f1_score: 0.2335 - val_loss: 197.8969 - val_accuracy: 0.5253 - val_auc_15: 0.6835 - val_precision_15: 0.5253 - val_recall_15: 0.5253 - val_f1_score: 0.1722 - 10s/epoch - 3s/step
Epoch 2/50
4/4 - 5s - loss: 31.5954 - accuracy: 0.2803 - auc_15: 0.5308 - precision_15: 0.2803 - recall_15: 0.2803 - f1_score: 0.2415 - val_loss: 122.8204 - val_accuracy: 0.1515 - val_auc_15: 0.4370 - val_precision_15: 0.1515 - val_recall_15: 0.1515 - val_f1_score: 0.1094 - 5s/epoch - 1s/step
Epoch 3/50
4/4 - 5s - loss: 18.2768 - accuracy: 0.2626 - auc_15: 0.5228 - precision_15: 0.2621 - recall_15: 0.2601 - f1_score: 0.2285 - val_loss: 318.3914 - val_accuracy: 0.0707 - val_auc_15: 0.3805 - val_precision_15: 0.0707 - val_recall_15: 0.0707 - val_f1_score: 0.0333 - 5s/epoch - 1s/step
Epoch 4/50
4/4 - 5s - loss: 13.0919 - accuracy: 0.3005 - auc_15: 0.5499 - precision_15: 0.2961 - recall_15: 0.2879 -

### Convert Uni-modal Approaches to Ensemble Members

In [71]:
biLSTM_body_member = KerasMember(name="Body Model", keras_model=body_biLSTM, train_batches=(body_X_train, y_train), val_batches=(body_X_test, y_test))
biLSTM_face_member = KerasMember(name="Face Model", keras_model=face_biLSTM, train_batches=(face_X_train, y_train), val_batches=(face_X_test, y_test))

In [72]:
cnn_body_member = KerasMember(name="Body Model", keras_model=body_cnn, train_batches=(body_X_train, y_train), val_batches=(body_X_test, y_test))
cnn_face_member = KerasMember(name="Face Model", keras_model=face_cnn, train_batches=(face_X_train, y_train), val_batches=(face_X_test, y_test))

In [73]:
convLSTM_body_member = KerasMember(name="Body Model", keras_model=body_convlstm, train_batches=(body_X_train, y_train), val_batches=(body_X_test, y_test))
convLSTM_face_member = KerasMember(name="Face Model", keras_model=face_convlstm, train_batches=(face_X_train, y_train), val_batches=(face_X_test, y_test))

In [74]:
rcnn_body_member = KerasMember(name="Body Model", keras_model=body_rcnn, train_batches=(body_X_train, y_train), val_batches=(body_X_test, y_test))
rcnn_face_member = KerasMember(name="Face Model", keras_model=face_rcnn, train_batches=(face_X_train, y_train), val_batches=(face_X_test, y_test))

### Dirichlet Markov Ensemble with weighted soft voting

In [75]:
biLSTM_dirichletEnsemble = DirichletEnsemble()
biLSTM_dirichletEnsemble.add_member(biLSTM_body_member)
biLSTM_dirichletEnsemble.add_member(biLSTM_face_member)
biLSTM_dirichletEnsemble.fit()

In [76]:
cnn_dirichletEnsemble = DirichletEnsemble()
cnn_dirichletEnsemble.add_member(cnn_body_member)
cnn_dirichletEnsemble.add_member(cnn_face_member)
cnn_dirichletEnsemble.fit()

In [77]:
convLSTM_dirichletEnsemble = DirichletEnsemble()
convLSTM_dirichletEnsemble.add_member(convLSTM_body_member)
convLSTM_dirichletEnsemble.add_member(convLSTM_face_member)
convLSTM_dirichletEnsemble.fit()

In [78]:
rcnn_dirichletEnsemble = DirichletEnsemble()
rcnn_dirichletEnsemble.add_member(rcnn_body_member)
rcnn_dirichletEnsemble.add_member(rcnn_face_member)
rcnn_dirichletEnsemble.fit()

## Performance Evaluation on Test set

### Uni-Modal (only Body/Skeleton data)

In [79]:
_, acc, auc, precision, recall, f1 = body_biLSTM.evaluate(full_body_X_test, full_body_y_test, verbose=0)
print('Bidirectional LSTM:')
print('Accuracy: {:.2f}% - ROC/AUC: {:.2f} - Precision: {:.2f} - Recall: {:.2f}, F1 score: {:.2f}'.format(acc * 100, auc, precision, recall, f1))

Bidirectional LSTM:
Accuracy: 24.76% - ROC/AUC: 0.55 - Precision: 0.26 - Recall: 0.14, F1 score: 0.16


In [80]:
_, acc, auc, precision, recall, f1 = body_cnn.evaluate(full_body_X_test, full_body_y_test, verbose=0)
print('CNN+LSTM:')
print('Accuracy: {:.2f}% - ROC/AUC: {:.2f} - Precision: {:.2f} - Recall: {:.2f}, F1 score: {:.2f}'.format(acc * 100, auc, precision, recall, f1))

CNN+LSTM:
Accuracy: 46.67% - ROC/AUC: 0.64 - Precision: 0.00 - Recall: 0.00, F1 score: 0.16


In [81]:
_, acc, auc, precision, recall, f1 = body_convlstm.evaluate(full_body_X_test, full_body_y_test, verbose=0)
print('Convolutional LSTM:')
print('Accuracy: {:.2f}% - ROC/AUC: {:.2f} - Precision: {:.2f} - Recall: {:.2f}, F1 score: {:.2f}'.format(acc * 100, auc, precision, recall, f1))

Convolutional LSTM:
Accuracy: 45.71% - ROC/AUC: 0.49 - Precision: 0.00 - Recall: 0.00, F1 score: 0.16


In [82]:
_, acc, auc, precision, recall, f1 = body_rcnn.evaluate(full_body_X_test, full_body_y_test, verbose=0)
print('RCNN:')
print('Accuracy: {:.2f}% - ROC/AUC: {:.2f} - Precision: {:.2f} - Recall: {:.2f}, F1 score: {:.2f}'.format(acc * 100, auc, precision, recall, f1))

RCNN:
Accuracy: 21.90% - ROC/AUC: 0.63 - Precision: 0.43 - Recall: 0.03, F1 score: 0.13


### Uni-Modal (only Facial Expression/Action Units)

In [83]:
_, acc, auc, precision, recall, f1 = face_biLSTM.evaluate(face_X_test, face_y_test, verbose=0)
print('Bidirectional LSTM:')
print('Accuracy: {:.2f}% - ROC/AUC: {:.2f} - Precision: {:.2f} - Recall: {:.2f}, F1 score: {:.2f}'.format(acc * 100, auc, precision, recall, f1))

Bidirectional LSTM:
Accuracy: 32.35% - ROC/AUC: 0.62 - Precision: 0.44 - Recall: 0.24, F1 score: 0.23


In [84]:
_, acc, auc, precision, recall, f1 = face_cnn.evaluate(face_X_test, face_y_test, verbose=0)
print('CNN+LSTM:')
print('Accuracy: {:.2f}% - ROC/AUC: {:.2f} - Precision: {:.2f} - Recall: {:.2f}, F1 score: {:.2f}'.format(acc * 100, auc, precision, recall, f1))

CNN+LSTM:
Accuracy: 38.24% - ROC/AUC: 0.62 - Precision: 0.26 - Recall: 0.12, F1 score: 0.23


In [85]:
_, acc, auc, precision, recall, f1 = face_convlstm.evaluate(face_X_test, face_y_test, verbose=0)
print('Convolutional LSTM:')
print('Accuracy: {:.2f}% - ROC/AUC: {:.2f} - Precision: {:.2f} - Recall: {:.2f}, F1 score: {:.2f}'.format(acc * 100, auc, precision, recall, f1))

Convolutional LSTM:
Accuracy: 49.02% - ROC/AUC: 0.74 - Precision: 0.50 - Recall: 0.23, F1 score: 0.26


In [86]:
_, acc, auc, precision, recall, f1 = face_rcnn.evaluate(face_X_test, face_y_test, verbose=0)
print('RCNN:')
print('Accuracy: {:.2f}% - ROC/AUC: {:.2f} - Precision: {:.2f} - Recall: {:.2f}, F1 score: {:.2f}'.format(acc * 100, auc, precision, recall, f1))

RCNN:
Accuracy: 29.41% - ROC/AUC: 0.60 - Precision: 0.44 - Recall: 0.15, F1 score: 0.15


### Early Fusion (Feature Level)

In [87]:
_, acc, auc, precision, recall, f1 = early_bilstm.evaluate(early_X_test, early_y_test, verbose=0)
print('Bidirectional LSTM:')
print('Accuracy: {:.2f}% - ROC/AUC: {:.2f} - Precision: {:.2f} - Recall: {:.2f}, F1 score: {:.2f}'.format(acc * 100, auc, precision, recall, f1))

Bidirectional LSTM:
Accuracy: 32.35% - ROC/AUC: 0.66 - Precision: 0.30 - Recall: 0.24, F1 score: 0.22


In [88]:
_, acc, auc, precision, recall, f1 = early_cnn.evaluate(early_X_test, early_y_test, verbose=0)
print('CNN+LSTM:')
print('Accuracy: {:.2f}% - ROC/AUC: {:.2f} - Precision: {:.2f} - Recall: {:.2f}, F1 score: {:.2f}'.format(acc * 100, auc, precision, recall, f1))

CNN+LSTM:
Accuracy: 39.22% - ROC/AUC: 0.67 - Precision: 0.39 - Recall: 0.28, F1 score: 0.25


In [89]:
_, acc, auc, precision, recall, f1 = early_convlstm.evaluate(early_X_test, early_y_test, verbose=0)
print('Convolutional LSTM:')
print('Accuracy: {:.2f}% - ROC/AUC: {:.2f} - Precision: {:.2f} - Recall: {:.2f}, F1 score: {:.2f}'.format(acc * 100, auc, precision, recall, f1))

Convolutional LSTM:
Accuracy: 18.63% - ROC/AUC: 0.43 - Precision: 0.29 - Recall: 0.06, F1 score: 0.15


In [90]:
_, acc, auc, precision, recall, f1 = early_rcnn.evaluate(early_X_test, early_y_test, verbose=0)
print('RCNN:')
print('Accuracy: {:.2f}% - ROC/AUC: {:.2f} - Precision: {:.2f} - Recall: {:.2f}, F1 score: {:.2f}'.format(acc * 100, auc, precision, recall, f1))

RCNN:
Accuracy: 48.04% - ROC/AUC: 0.75 - Precision: 0.48 - Recall: 0.48, F1 score: 0.16


### Late Fusion (Decision Level)

In [91]:
_, acc, auc, precision, recall, f1 = late_biLSTM.evaluate([body_X_test, face_X_test], y_test, verbose=0)
print('Bidirectional LSTM:')
print('Accuracy: {:.2f}% - ROC/AUC: {:.2f} - Precision: {:.2f} - Recall: {:.2f}, F1 score: {:.2f}'.format(acc * 100, auc, precision, recall, f1))

Bidirectional LSTM:
Accuracy: 34.31% - ROC/AUC: 0.61 - Precision: 0.36 - Recall: 0.32, F1 score: 0.21


In [92]:
_, acc, auc, precision, recall, f1 = late_cnn.evaluate([body_X_test, face_X_test], y_test, verbose=0)
print('CNN+LSTM:')
print('Accuracy: {:.2f}% - ROC/AUC: {:.2f} - Precision: {:.2f} - Recall: {:.2f}, F1 score: {:.2f}'.format(acc * 100, auc, precision, recall, f1))

CNN+LSTM:
Accuracy: 29.41% - ROC/AUC: 0.63 - Precision: 0.37 - Recall: 0.16, F1 score: 0.18


In [93]:
_, acc, auc, precision, recall, f1 = late_convLSTM.evaluate([body_X_test, face_X_test], y_test, verbose=0)
print('Convolutional LSTM:')
print('Accuracy: {:.2f}% - ROC/AUC: {:.2f} - Precision: {:.2f} - Recall: {:.2f}, F1 score: {:.2f}'.format(acc * 100, auc, precision, recall, f1))

Convolutional LSTM:
Accuracy: 37.25% - ROC/AUC: 0.64 - Precision: 0.51 - Recall: 0.31, F1 score: 0.27


In [94]:
_, acc, auc, precision, recall, f1 = late_RCNN.evaluate([body_X_test, face_X_test], y_test, verbose=0)
print('RCNN:')
print('Accuracy: {:.2f}% - ROC/AUC: {:.2f} - Precision: {:.2f} - Recall: {:.2f}, F1 score: {:.2f}'.format(acc * 100, auc, precision, recall, f1))

RCNN:
Accuracy: 8.82% - ROC/AUC: 0.37 - Precision: 0.08 - Recall: 0.08, F1 score: 0.07


In [95]:
print('BiLSTM Dirichlet Markov Ensemble:')
d = biLSTM_dirichletEnsemble.describe()

BiLSTM Dirichlet Markov Ensemble:
Body Model (weight: 0.0008)
Face Model (weight: 0.9992)
Accuracy: 32.35% - ROC/AUC: 0.60 - Precision: 0.33 - Recall: 0.29 - F1 score: 0.23


In [96]:
print('CNN+LSTM Dirichlet Markov Ensemble:')
d = cnn_dirichletEnsemble.describe()

CNN+LSTM Dirichlet Markov Ensemble:
Body Model (weight: 0.9944)
Face Model (weight: 0.0056)
Accuracy: 46.08% - ROC/AUC: 0.61 - Precision: 0.28 - Recall: 0.25 - F1 score: 0.23


In [97]:
print('ConvLSTM Dirichlet Markov Ensemble:')
d = convLSTM_dirichletEnsemble.describe()

ConvLSTM Dirichlet Markov Ensemble:
Body Model (weight: 0.2806)
Face Model (weight: 0.7194)
Accuracy: 49.02% - ROC/AUC: 0.63 - Precision: 0.26 - Recall: 0.26 - F1 score: 0.26


In [98]:
print('RCNN Dirichlet Markov Ensemble:')
d = rcnn_dirichletEnsemble.describe()

RCNN Dirichlet Markov Ensemble:
Body Model (weight: 0.0000)
Face Model (weight: 1.0000)
Accuracy: 36.27% - ROC/AUC: 0.57 - Precision: 0.26 - Recall: 0.25 - F1 score: 0.23
