In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.neural_network import MLPRegressor
from math import sqrt
import tensorflow as tf
from keras import models, layers
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
from sklearn.metrics import mean_squared_error

2023-08-11 13:17:14.843717: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-08-11 13:17:14.879318: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-08-11 13:17:14.880113: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
df_standardised = pd.read_csv('../gait_standardised.csv')
df_standardised.head()

Unnamed: 0,subject,condition,replication,leg,joint,time,angle,angle_scaled
0,1,1,1,1,1,0,4.682881,-0.465902
1,1,1,1,1,1,1,5.073127,-0.441551
2,1,1,1,1,1,2,5.229774,-0.431776
3,1,1,1,1,1,3,5.083273,-0.440918
4,1,1,1,1,1,4,4.652399,-0.467804


## Coverting to correct data types


In [4]:
print(f"Data types before:\n{df_standardised.dtypes}\n")
categorical_columns = ['subject', 'condition', 'replication', 'leg', 'joint', 'time']
df_standardised[categorical_columns] = df_standardised[categorical_columns].astype('category')
df_standardised['time'] = df_standardised['time'].cat.set_categories(list(range(0, 101)), ordered=True)
print(f"Data types after:\n{df_standardised.dtypes}")

Data types before:
subject           int64
condition         int64
replication       int64
leg               int64
joint             int64
time              int64
angle           float64
angle_scaled    float64
dtype: object

Data types after:
subject         category
condition       category
replication     category
leg             category
joint           category
time            category
angle            float64
angle_scaled     float64
dtype: object


## Split the dataset with KFold

In [5]:
kf = KFold(n_splits=10, random_state=42, shuffle=True)
kf.get_n_splits(df_standardised)

10

## Feedforward Neural Network (FNN)

In [6]:
results = {
    'Fold': [],
    'MSE': [],
    'R²': [],
    'RMSE': [],
    'MAE': [],
    'MAPE': [],
    'Adjusted R²': []
}

for i, (train_index, test_index) in enumerate(kf.split(df_standardised)):
    x_train = df_standardised.loc[train_index, ['subject', 'condition', 'replication', 'leg', 'joint','time']]
    y_train = df_standardised.loc[train_index, 'angle_scaled']
    x_test = df_standardised.loc[test_index, ['subject', 'condition', 'replication', 'leg', 'joint', 'time']]
    y_test = df_standardised.loc[test_index, 'angle_scaled']

    model = models.Sequential([
        layers.Dense(64, activation='relu', input_shape=(x_train.shape[1],)),
        layers.Dense(32, activation='relu'),
        layers.Dense(1)
    ])
    model.compile(optimizer='adam', loss='mse')
    

    model.fit(x_train, y_train, epochs=50, batch_size=32, verbose='0')

    y_pred = model.predict(x_test)
    y_pred = y_pred.reshape(y_pred.shape[0])

    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    rmse = sqrt(mse)
    mae = mean_absolute_error(y_test, y_pred)
    mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100
    
    n = x_test.shape[0]
    p = x_test.shape[1] 
    adjusted_r2 = 1 - ((1 - r2) * (n - 1)) / (n - p - 1)

    results['Fold'].append(i + 1)
    results['MSE'].append(mse)
    results['R²'].append(r2)
    results['RMSE'].append(rmse)
    results['MAE'].append(mae)
    results['MAPE'].append(mape)
    results['Adjusted R²'].append(adjusted_r2)

Epoch 1/50


2023-08-11 13:17:18.532660: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-08-11 13:17:18.564101: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1960] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
              0
0     -0.491617
1     -0.294479
2     -0.148091
3     -0.151156
4     -0.147109
...         ...
18175 -0.908866
18176  0.073994
18177  0.267497
18178  0.848008
18179  1.316959

[18180 rows x 1 columns]
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 1

<IPython.core.display.Javascript object>

In [7]:
FNN_results_df = pd.DataFrame(results)
FNN_results_df.loc['Average'] = FNN_results_df.mean()
FNN_results_df['Fold'] = FNN_results_df['Fold'].astype(int)
FNN_results_df.iloc[-1, 0] = ''
print('Results:\n============\n')
print(FNN_results_df)

Results:

        Fold       MSE        R²      RMSE       MAE        MAPE  Adjusted R²
0          1  0.036612  0.964354  0.191344  0.147019  109.064156     0.964342
1          2  0.044536  0.954499  0.211036  0.159855  116.403657     0.954484
2          3  0.041201  0.958882  0.202981  0.153147  203.250190     0.958869
3          4  0.034624  0.965247  0.186075  0.141599  125.263692     0.965236
4          5  0.028786  0.971619  0.169666  0.129435  127.509171     0.971609
5          6  0.032337  0.967846  0.179826  0.139678  112.608491     0.967835
6          7  0.048589  0.950424  0.220429  0.165448  134.027151     0.950408
7          8  0.035359  0.964633  0.188040  0.144420  314.826948     0.964621
8          9  0.028163  0.971957  0.167817  0.126242   89.346811     0.971948
9         10  0.038979  0.960683  0.197430  0.149490  181.648805     0.960670
Average       0.036919  0.963014  0.191464  0.145633  151.394907     0.963002


## Recurrent Neural Networks (RNNs)

In [20]:
results = {
    'Fold': [],
    'MSE': [],
    'R²': [],
    'RMSE': [],
    'MAE': [],
    'MAPE': [],
    'Adjusted R²': []
}
for i, (train_index, test_index) in enumerate(kf.split(df_standardised)):
    x_train = df_standardised.loc[train_index, ['subject', 'condition', 'replication', 'leg', 'joint', 'time']]
    y_train = df_standardised.loc[train_index, 'angle_scaled']
    x_test = df_standardised.loc[test_index, ['subject', 'condition', 'replication', 'leg', 'joint', 'time']]
    y_test = df_standardised.loc[test_index, 'angle_scaled']
    
    x_train = x_train.values.reshape((x_train.shape[0], 1, x_train.shape[1]))
    x_test = x_test.values.reshape((x_test.shape[0], 1, x_test.shape[1]))

    model = models.Sequential([
        layers.LSTM(64, activation='relu', input_shape=(x_train.shape[1], x_train.shape[2])),
        layers.Dense(1)
    ])
    model.compile(optimizer='adam', loss='mse')

    model.fit(x_train, y_train, epochs=50, batch_size=32, verbose='0')

    y_pred = model.predict(x_test)
    y_pred = y_pred.reshape(y_pred.shape[0])

    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    rmse = sqrt(mse)
    mae = mean_absolute_error(y_test, y_pred)
    mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100
    
    n = x_test.shape[0]
    p = x_test.shape[1] 
    adjusted_r2 = 1 - ((1 - r2) * (n - 1)) / (n - p - 1)

    results['Fold'].append(i + 1)
    results['MSE'].append(mse)
    results['R²'].append(r2)
    results['RMSE'].append(rmse)
    results['MAE'].append(mae)
    results['MAPE'].append(mape)
    results['Adjusted R²'].append(adjusted_r2)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/5

In [21]:
RNN_results_df = pd.DataFrame(results)
RNN_results_df.loc['Average'] = RNN_results_df.mean()
RNN_results_df['Fold'] = RNN_results_df['Fold'].astype(int)
RNN_results_df.iloc[-1, 0] = ''
print('Results:\n============\n')
print(RNN_results_df)

Results:

        Fold       MSE        R²      RMSE       MAE        MAPE  Adjusted R²
0          1  0.050567  0.950768  0.224871  0.170397  145.567483     0.950765
1          2  0.056151  0.942632  0.236962  0.182055  149.453231     0.942629
2          3  0.054052  0.946058  0.232491  0.177480  170.755205     0.946055
3          4  0.055365  0.944429  0.235297  0.182244  152.778886     0.944426
4          5  0.076582  0.924496  0.276734  0.212993  241.100627     0.924492
5          6  0.097067  0.903482  0.311556  0.225169  166.190088     0.903477
6          7  0.056875  0.941970  0.238485  0.182404  187.523676     0.941966
7          8  0.094072  0.905906  0.306711  0.222706  523.481730     0.905901
8          9  0.081245  0.919100  0.285035  0.218917  161.944380     0.919096
9         10  0.054477  0.945050  0.233403  0.180638  172.189340     0.945047
Average       0.067645  0.932389  0.258154  0.195500  207.098465     0.932385


## Convolutional Neural Network (CNN)

In [22]:
results = {
    'Fold': [],
    'MSE': [],
    'R²': [],
    'RMSE': [],
    'MAE': [],
    'MAPE': [],
    'Adjusted R²': []
}

for i, (train_index, test_index) in enumerate(kf.split(df_standardised)):
    x_train = df_standardised.loc[train_index, ['subject', 'condition', 'replication', 'leg', 'joint', 'time']]
    y_train = df_standardised.loc[train_index, 'angle_scaled']
    x_test = df_standardised.loc[test_index, ['subject', 'condition', 'replication', 'leg', 'joint', 'time']]
    y_test = df_standardised.loc[test_index, 'angle_scaled']

    x_train = x_train.values.reshape(-1, 6, 1)
    x_test = x_test.values.reshape(-1, 6, 1)

    model = models.Sequential([
        layers.Conv1D(64, kernel_size=3, activation='relu', input_shape=(6, 1)),
        layers.Flatten(),
        layers.Dense(32, activation='relu'),
        layers.Dense(1)
    ])
    model.compile(optimizer='adam', loss='mse')

    model.fit(x_train, y_train, epochs=50, batch_size=32, verbose='0')
    y_pred = model.predict(x_test).flatten()

    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    rmse = sqrt(mse)
    mae = mean_absolute_error(y_test, y_pred)

    results['Fold'].append(i + 1)
    results['MSE'].append(mse)
    results['R²'].append(r2)
    results['RMSE'].append(rmse)
    results['MAE'].append(mae)
    results['MAPE'].append(mape)
    results['Adjusted R²'].append(adjusted_r2)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/5

In [23]:
CNN_results_df = pd.DataFrame(results)
CNN_results_df.loc['Average'] = CNN_results_df.mean()
CNN_results_df['Fold'] = CNN_results_df['Fold'].astype(int)
CNN_results_df.iloc[-1, 0] = ''
print('Results:\n============\n')
print(CNN_results_df)

Results:

        Fold       MSE        R²      RMSE       MAE       MAPE  Adjusted R²
0          1  0.040495  0.960574  0.201233  0.154520  172.18934     0.945047
1          2  0.038482  0.960684  0.196169  0.149946  172.18934     0.945047
2          3  0.038403  0.961674  0.195968  0.148378  172.18934     0.945047
3          4  0.038948  0.960907  0.197352  0.153385  172.18934     0.945047
4          5  0.034873  0.965618  0.186743  0.144012  172.18934     0.945047
5          6  0.032094  0.968088  0.179147  0.135288  172.18934     0.945047
6          7  0.032171  0.967175  0.179363  0.139118  172.18934     0.945047
7          8  0.041443  0.958547  0.203575  0.157975  172.18934     0.945047
8          9  0.036496  0.963659  0.191039  0.146235  172.18934     0.945047
9         10  0.040960  0.958684  0.202385  0.157205  172.18934     0.945047
Average       0.037436  0.962561  0.193297  0.148606  172.18934     0.945047
