# **A Residual Deep Learning Method for Accurate and Efficient Recognition of Gym Exercise Activities**

In [None]:
import os
import numpy as np
import pickle
from tqdm import tqdm
import tensorflow as tf
import matplotlib.pyplot as plt
from keras.models import Model
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.models import load_model
from keras.layers import Input, Layer, Conv1D, MaxPool1D, ReLU, BatchNormalization, LayerNormalization, Dropout, Add, Dense, GlobalMaxPooling1D, Bidirectional, GRU

In [None]:
#You MUST run this command before reading in any data from Google Drive
from google.colab import files
from google.colab import drive
import pandas as pd
drive.mount('/content/drive', force_remount=True)
os.chdir('/content/drive/My Drive/Colab Notebooks/Thesis/benchmarks')

%run ../sys_configs.ipynb

Mounted at /content/drive


This paper is the only paper considered in this research that is tested on the MyoGym dataset. It introduces the CNN-ResBiGRU architecture which integrates *convolutional neural networks, residual connections,
and bidirectional gated recurrent units*. The paper tests this method and a range of other benchmark methods on various permutations of the sensors available in the MyoGym data.

The network begins with a convolutional block comprising a 1D convolutional layer, then a batch normalisation layer, then a max-pooling layer and finally a dropout layer.

Next, the time series is read into the ResBiGRU block. The Gated Recurrent Unit (GRU) component of the block was originally developed for tackling the vanishing gradient problem by modelling long-range temporal contexts. The paper uses GRUs rather than LSTMs because LSTMs have high memory usafe and require dedicated memory cells, whereas GRUs only have update and reset gates that determine which information is kept and discarded. Because the value of the time series can be assumed to be dependent on both previous and succeeding events, this GRU model is *Bi*-directional.   

Whilst GRUs do tackle the vanishing gradient problem, it returns when modelling several ResBiGRU blocks. Therefore, a residual component is included which adds the data before and after the GRU to help enable gradient flow. There are 2 GRU layers in a ResBiGRU block, and the second layer is succeeded by a layer normalisation layer.

After the ResBiGRU blocks, the network applies a dense layer and a softmax function to output the final probabilities for each class.

In [None]:
with open('../data/1s_train.npy', 'rb') as f:
    x1s_train = np.load(f)
    y1s_train = np.load(f)
sz, dim = x1s_train.shape[1:]

with open('../data/1s_val.npy', 'rb') as f:
    x1s_val = np.load(f)
    y1s_val = np.load(f)

In [None]:
# Convert the labels to tensors
train_labels_tf = tf.one_hot(y1s_train, 31, dtype=tf.int32)
val_labels_tf = tf.one_hot(y1s_val, 31, dtype=tf.int32)

In [None]:
# Convert the dataset to tensors
train_data_tf = tf.convert_to_tensor(x1s_train, dtype=tf.float32)
val_data_tf = tf.convert_to_tensor(x1s_val, dtype=tf.float32)

In [None]:
train_ds = tf.data.Dataset.from_tensor_slices((train_data_tf, train_labels_tf))
val_ds = tf.data.Dataset.from_tensor_slices((val_data_tf, val_labels_tf))

In [None]:
train_ds = train_ds.shuffle(500)

train_ds = train_ds.padded_batch(64)
val_ds = val_ds.padded_batch(64)

In [None]:
C = len(set(y1s_train)) # Number of classes

### **Architecture**

<div style="display: flex; justify-content: center; align-items: center;">
    <figure style="margin: 10px;">
        <img src="../documentation/CNNBiResGRU.png" alt="Alt text" width="500" />
        <figcaption style="text-align: center;">CNN-ResBiGRU Network</figcaption>
    </figure>
    <figure style="margin: 10px;">
        <img src="../documentation/ResBiGRULayer.png" alt="Alt text" width="500" />
        <figcaption style="text-align: center;">ResBiGRU Layer</figcaption>
    </figure>
</div>

We first define the convolutional block. The paper does not specify appropriate hyperparameters to use, but there are 2 obvious hyperparameters in this block: `num_filters` and `kernel_size`.

In [None]:
class ConvBlock(Layer):

    def __init__(self, num_filters, **kwargs):
        super().__init__(**kwargs)
        self.num_filters = num_filters

    def build(self, input_shape): # Tensorflow calls this method automatically when the object is defined
        self.conv = Conv1D(self.num_filters, kernel_size=10, strides = 1, padding="same")
        self.batch_norm = BatchNormalization()
        self.max_pool = MaxPool1D(pool_size=3, strides=1, padding="same")
        self.dropout = Dropout(0.25)

    def call(self, input):
        x = self.conv(input)
        x = self.batch_norm(x)
        x = self.max_pool(x)
        output = self.dropout(x)
        return output

Define the ResBiGRU block which is the centerpiece idea of this benchmark method.

In [None]:
class ResBiGRU(Layer):

    def __init__(self, h1_units, h2_units, **kwargs):
        super().__init__(**kwargs)
        self.h1_units = h1_units
        self.h2_units = h2_units

    def build(self, input_shape): # Tensorflow calls this method automatically when the object is defined
        self.gru_1 =  Bidirectional(GRU(self.h1_units, activation = None, return_sequences=True), merge_mode=None)
        self.gru_2a = GRU(self.h2_units, activation = None, return_sequences=True)
        self.gru_2b = GRU(self.h2_units, activation = None, return_sequences=True)
        self.layer_norm = LayerNormalization()

    def call(self, input):
        # In the first (hidden) RNN layer, apply the forward and backward GRU layers concurrently
        z_forward, z_backward = self.gru_1(input)

        # In the second (hidden) RNN layer, apply the forward and backward GRU layers separately
        z2_forward = self.gru_2a(z_forward)
        z2_backward = self.gru_2b(z_backward)

        # Add the output of the first RNN layer to the output of the second RNN layer
        z_forward = Add()([z_forward, z2_forward])
        z_backward = Add()([z_backward, z2_backward])

        z_forward = self.layer_norm(z_forward)
        z_backward = self.layer_norm(z_backward)

        output = tf.concat([z_forward, z_backward], axis = 2)
        return output

Now, put together the network from the convolutional block, the ResBiGRU blocks and dense layers.

In [None]:
# We use early stopping as CNNResBiGRU is a high parameter network, so requires many epochs to train.
earlystopping = EarlyStopping(monitor='val_accuracy', patience=5)

# The previous benchmark paper also made use of ReduceLROnPlateau which reduces the lesrning rate when the loss plateaus.
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, min_lr=10e-5)

We will test a few different models with slightly different configurations.

#### **Model 1**

This model has 3 ResBiGRU blocks, each with the same number of hidden units. Each block has 32 hidden units.

In [None]:
def CNNResBiGRU_1(shape):
    block1_input_layer = Input(shape=shape)

    layer = ConvBlock(num_filters = 32, name = "ConvBlock")(block1_input_layer)

    # The ResBIGRU blocks is repeated several times; the architecture extensively factors in the gradient vanishing problem.
    layer = ResBiGRU(h1_units = 32, h2_units = 32, name = "ResBiGRU1")(layer)
    layer = ResBiGRU(h1_units = 32, h2_units = 32, name = "ResBiGRU2")(layer)
    layer = ResBiGRU(h1_units = 32, h2_units = 32, name = "ResBiGRU3")(layer)

    layer = GlobalMaxPooling1D()(layer)
    output_layer = Dense(C, activation="softmax")(layer)
    return Model(inputs=block1_input_layer, outputs=output_layer)

In [None]:
CNNResBiGRU_model_1 = CNNResBiGRU_1(shape = (sz, dim))
CNNResBiGRU_model_1.summary()

In [None]:
CNNResBiGRU_model_1 = CNNResBiGRU_1(shape = (sz, dim))
CNNResBiGRU_model_1.compile(optimizer=Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08), loss='categorical_crossentropy', metrics=['accuracy'])
history_1 = CNNResBiGRU_model_1.fit(train_ds, validation_data=val_ds, epochs=25, verbose = 1)

Epoch 1/25
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m98s[0m 2s/step - accuracy: 0.2199 - loss: 3.1893 - val_accuracy: 0.5595 - val_loss: 1.7117
Epoch 2/25
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m56s[0m 1s/step - accuracy: 0.5890 - loss: 1.4328 - val_accuracy: 0.5524 - val_loss: 1.2529
Epoch 3/25
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m56s[0m 1s/step - accuracy: 0.7501 - loss: 0.9393 - val_accuracy: 0.6270 - val_loss: 0.9927
Epoch 4/25
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m56s[0m 1s/step - accuracy: 0.8495 - loss: 0.6236 - val_accuracy: 0.6764 - val_loss: 0.8877
Epoch 5/25
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m56s[0m 1s/step - accuracy: 0.9168 - loss: 0.4150 - val_accuracy: 0.7157 - val_loss: 0.7739
Epoch 6/25
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m56s[0m 1s/step - accuracy: 0.9555 - loss: 0.2728 - val_accuracy: 0.6683 - val_loss: 0.8218
Epoch 7/25
[1m48/48[0m [32m━━━━━━━━━━

In [None]:
CNNResBiGRU_model_1.save('../models/CNNResBiGRU_model/CNNResBiGRU_model_1.keras')
with open('../models/CNNResBiGRU_model/train_history_1.pkl', 'wb') as f:
    pickle.dump(history_1.history, f)

In [None]:
# Load the model
custom_objects = {'ConvBlock': ConvBlock, 'ResBiGRU': ResBiGRU}
CNNResBiGRU_model_1 = load_model('../models/CNNResBiGRU_model/CNNResBiGRU_model_1.keras', custom_objects=custom_objects)

# Load the training history
with open('../models/CNNResBiGRU_model/train_history_1.pkl', 'rb') as f:
    history_1 = pickle.load(f)

  saveable.load_own_variables(weights_store.get(inner_path))


In [None]:
CNNResBiGRU_1_results = CNNResBiGRU_model_1.evaluate(val_ds, batch_size=128)
print("Validation Loss: {}\nValidation Accuracy: {}".format(*CNNResBiGRU_1_results))

[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 319ms/step - accuracy: 0.7212 - loss: 1.0287
Validation Loss: 1.0162723064422607
Validation Accuracy: 0.725806474685669


#### **Model 2**

This model has just 2 ResBiGRU blocks, but each block has GRUs with more dimensions.

In [None]:
def CNNResBiGRU_2(shape):
    block1_input_layer = Input(shape=shape)

    layer = ConvBlock(num_filters = 64, name = "ConvBlock")(block1_input_layer)

    # The ResBIGRU blocks is repeated several times; the architecture extensively factors in the gradient vanishing problem.
    layer = ResBiGRU(h1_units = 64, h2_units = 64, name = "ResBiGRU1")(layer)
    layer = ResBiGRU(h1_units = 64, h2_units = 64, name = "ResBiGRU2")(layer)

    layer = GlobalMaxPooling1D()(layer)
    output_layer = Dense(C, activation="softmax")(layer)
    return Model(inputs=block1_input_layer, outputs=output_layer)

In [None]:
CNNResBiGRU_model_2 = CNNResBiGRU_2(shape = (sz, dim))
CNNResBiGRU_model_2.summary()

In [None]:
CNNResBiGRU_model_2 = CNNResBiGRU_2(shape = (sz, dim))
CNNResBiGRU_model_2.compile(optimizer=Adam(learning_rate=0.001, beta_1=0.90, beta_2=0.99, epsilon=1e-08), loss='categorical_crossentropy', metrics=['accuracy'])
history_2 = CNNResBiGRU_model_2.fit(train_ds, validation_data=val_ds, epochs=25, verbose = 1)

Epoch 1/25
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m72s[0m 1s/step - accuracy: 0.3101 - loss: 2.6855 - val_accuracy: 0.5897 - val_loss: 1.2033
Epoch 2/25
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 844ms/step - accuracy: 0.7103 - loss: 0.9726 - val_accuracy: 0.7308 - val_loss: 0.7938
Epoch 3/25
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 842ms/step - accuracy: 0.8744 - loss: 0.5283 - val_accuracy: 0.7550 - val_loss: 0.7165
Epoch 4/25
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 842ms/step - accuracy: 0.9456 - loss: 0.2787 - val_accuracy: 0.7611 - val_loss: 0.6662
Epoch 5/25
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 847ms/step - accuracy: 0.9854 - loss: 0.1465 - val_accuracy: 0.7651 - val_loss: 0.6680
Epoch 6/25
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 842ms/step - accuracy: 0.9892 - loss: 0.0914 - val_accuracy: 0.7530 - val_loss: 0.6543
Epoch 7/25
[1m48/48[0m 

In [None]:
CNNResBiGRU_model_2.save('../models/CNNResBiGRU_model/CNNResBiGRU_model_2.keras')
with open('../models/CNNResBiGRU_model/train_history_2.pkl', 'wb') as f:
    pickle.dump(history_2.history, f)

In [None]:
# Load the model
custom_objects = {'ConvBlock': ConvBlock, 'ResBiGRU': ResBiGRU}
CNNResBiGRU_model_2 = load_model('../models/CNNResBiGRU_model/CNNResBiGRU_model_2.keras', custom_objects=custom_objects)

# Load the training history
with open('../models/CNNResBiGRU_model/train_history_2.pkl', 'rb') as f:
    history_2 = pickle.load(f)

In [None]:
CNNResBiGRU_2_results = CNNResBiGRU_model_2.evaluate(val_ds, batch_size=128)
print("Validation Loss: {}\nValidation Accuracy: {}".format(*CNNResBiGRU_2_results))

[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 255ms/step - accuracy: 0.7827 - loss: 0.7457
Validation Loss: 0.7390094995498657
Validation Accuracy: 0.7731854915618896


#### **Model 3**

This model has fewer parameters than either model 1 or model 2, intended to encourage the model to learn a more compact feature representation.

In [None]:
def CNNResBiGRU_3(shape):
    block1_input_layer = Input(shape=shape)

    layer = ConvBlock(num_filters = 32, name = "ConvBlock")(block1_input_layer)

    # The ResBIGRU blocks is repeated several times; the architecture extensively factors in the gradient vanishing problem.
    layer = ResBiGRU(h1_units = 32, h2_units = 32, name = "ResBiGRU1")(layer)

    layer = GlobalMaxPooling1D()(layer)
    output_layer = Dense(C, activation="softmax")(layer)
    return Model(inputs=block1_input_layer, outputs=output_layer)

In [None]:
CNNResBiGRU_model_3 = CNNResBiGRU_3(shape = (sz, dim))
CNNResBiGRU_model_3.summary()

In [None]:
CNNResBiGRU_model_3 = CNNResBiGRU_3(shape = (sz, dim))
CNNResBiGRU_model_3.compile(optimizer=Adam(learning_rate=0.001, beta_1=0.90, beta_2=0.99, epsilon=1e-08), loss='categorical_crossentropy', metrics=['accuracy'])
history_3 = CNNResBiGRU_model_3.fit(train_ds, validation_data=val_ds, epochs=25, verbose = 1)

Epoch 1/25
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 544ms/step - accuracy: 0.1499 - loss: 3.6189 - val_accuracy: 0.4758 - val_loss: 2.1063
Epoch 2/25
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 400ms/step - accuracy: 0.4465 - loss: 1.8864 - val_accuracy: 0.6129 - val_loss: 1.5812
Epoch 3/25
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 399ms/step - accuracy: 0.5535 - loss: 1.4457 - val_accuracy: 0.6492 - val_loss: 1.2530
Epoch 4/25
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 400ms/step - accuracy: 0.6525 - loss: 1.1604 - val_accuracy: 0.6250 - val_loss: 1.0859
Epoch 5/25
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 396ms/step - accuracy: 0.7567 - loss: 0.9049 - val_accuracy: 0.6865 - val_loss: 0.9350
Epoch 6/25
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 397ms/step - accuracy: 0.8212 - loss: 0.7233 - val_accuracy: 0.7077 - val_loss: 0.8356
Epoch 7/25
[1m48/48[

In [None]:
CNNResBiGRU_model_3.save('../models/CNNResBiGRU_model/CNNResBiGRU_model_3.keras')
with open('../models/CNNResBiGRU_model/train_history_3.pkl', 'wb') as f:
    pickle.dump(history_3.history, f)

In [None]:
# Load the model
custom_objects = {'ConvBlock': ConvBlock, 'ResBiGRU': ResBiGRU}
CNNResBiGRU_model_3 = load_model('../models/CNNResBiGRU_model/CNNResBiGRU_model_3.keras', custom_objects=custom_objects)

# Load the training history
with open('../models/CNNResBiGRU_model/train_history_3.pkl', 'rb') as f:
    history_3 = pickle.load(f)

In [None]:
CNNResBiGRU_3_results = CNNResBiGRU_model_3.evaluate(val_ds, batch_size=128)
print("Validation Loss: {}\nValidation Accuracy: {}".format(*CNNResBiGRU_3_results))

[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 123ms/step - accuracy: 0.7603 - loss: 0.7365
Validation Loss: 0.732425332069397
Validation Accuracy: 0.7641128897666931


#### **Model 4**

The final model tested is the largest of the 4 models considered, and has 5 ResBiGRU blocks and an additional residual connection *between* consecutive blocks.

In [None]:
def CNNResBiGRU_4(shape):
    block1_input_layer = Input(shape=shape)

    z = ConvBlock(num_filters = 64, name = "ConvBlock")(block1_input_layer)

    # The ResBIGRU blocks is repeated several times; the architecture extensively factors in the gradient vanishing problem.
    z1 = ResBiGRU(h1_units = 32, h2_units = 32, name = "ResBiGRU1")(z)
    z = Add()([z, z1])

    z2 = ResBiGRU(h1_units = 32, h2_units = 32, name = "ResBiGRU2")(z)
    z = Add()([z, z2])

    z3 = ResBiGRU(h1_units = 32, h2_units = 32, name = "ResBiGRU3")(z)
    z = Add()([z, z3])

    z4 = ResBiGRU(h1_units = 32, h2_units = 32, name = "ResBiGRU4")(z)
    z = Add()([z, z4])

    z5 = ResBiGRU(h1_units = 32, h2_units = 32, name = "ResBiGRU5")(z)
    z = Add()([z, z5])

    z = GlobalMaxPooling1D()(z)
    output_layer = Dense(C, activation="softmax")(z)
    return Model(inputs=block1_input_layer, outputs=output_layer)

In [None]:
CNNResBiGRU_model_4 = CNNResBiGRU_4(shape = (sz, dim))
CNNResBiGRU_model_4.summary()

In [None]:
CNNResBiGRU_model_4 = CNNResBiGRU_4(shape = (sz, dim))
CNNResBiGRU_model_4.compile(optimizer=Adam(learning_rate=0.001, beta_1=0.95, beta_2=0.99, epsilon=1e-08), loss='categorical_crossentropy', metrics=['accuracy'])
history_4 = CNNResBiGRU_model_4.fit(train_ds, validation_data=val_ds, epochs=25, verbose = 1)

Epoch 1/25
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m160s[0m 2s/step - accuracy: 0.2178 - loss: 4.9431 - val_accuracy: 0.5071 - val_loss: 1.4391
Epoch 2/25
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m94s[0m 2s/step - accuracy: 0.7074 - loss: 0.8923 - val_accuracy: 0.6694 - val_loss: 0.8974
Epoch 3/25
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m94s[0m 2s/step - accuracy: 0.8927 - loss: 0.4118 - val_accuracy: 0.7137 - val_loss: 0.7776
Epoch 4/25
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m94s[0m 2s/step - accuracy: 0.9291 - loss: 0.2676 - val_accuracy: 0.7268 - val_loss: 0.6832
Epoch 5/25
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m94s[0m 2s/step - accuracy: 0.9833 - loss: 0.1284 - val_accuracy: 0.7712 - val_loss: 0.7113
Epoch 6/25
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m94s[0m 2s/step - accuracy: 0.9901 - loss: 0.0828 - val_accuracy: 0.7571 - val_loss: 0.7371
Epoch 7/25
[1m48/48[0m [32m━━━━━━━━━

In [None]:
CNNResBiGRU_model_4.save('../models/CNNResBiGRU_model/CNNResBiGRU_model_4.keras')
with open('../models/CNNResBiGRU_model/train_history_4.pkl', 'wb') as f:
    pickle.dump(history_4.history, f)

In [None]:
# Load the model
custom_objects = {'ConvBlock': ConvBlock, 'ResBiGRU': ResBiGRU}
CNNResBiGRU_model_4 = load_model('../models/CNNResBiGRU_model/CNNResBiGRU_model_4.keras', custom_objects=custom_objects)

# Load the training history
with open('../models/CNNResBiGRU_model/train_history_4.pkl', 'rb') as f:
    history_4 = pickle.load(f)

In [None]:
CNNResBiGRU_4_results = CNNResBiGRU_model_4.evaluate(val_ds, batch_size=128)
print("Validation Loss: {}\nValidation Accuracy: {}".format(*CNNResBiGRU_4_results))

[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 531ms/step - accuracy: 0.7723 - loss: 0.9675
Validation Loss: 0.9277955293655396
Validation Accuracy: 0.7752016186714172


The takeaway from models 1-4 is that the larger models do not generalise better than small models. There is no obvious benefit to stacking more than 2 ResBiGRU blocks on the validation loss or accuracy.

## **References**

[1] Mekruksavanich, Sakorn and Anuchit Jitpattanakul. “A Residual Deep Learning Method for Accurate and Efficient Recognition of Gym Exercise Activities Using Electromyography and IMU Sensors.” Applied System Innovation (2024): n. pag.