In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.ensemble import GradientBoostingRegressor
from math import sqrt

In [2]:
df_standardised = pd.read_csv('../gait_standardised.csv')
df_standardised.head()

Unnamed: 0,subject,condition,replication,leg,joint,time,angle,angle_scaled
0,1,1,1,1,1,0,4.682881,-0.465902
1,1,1,1,1,1,1,5.073127,-0.441551
2,1,1,1,1,1,2,5.229774,-0.431776
3,1,1,1,1,1,3,5.083273,-0.440918
4,1,1,1,1,1,4,4.652399,-0.467804


In [3]:
kf = KFold(n_splits=10, random_state=42, shuffle=True)
kf.get_n_splits(df_standardised)

10

In [4]:
results = {
    'Fold': [],
    'MSE': [],
    'R²': [],
    'RMSE': [],
    'MAE': [],
    'MAPE': [],
    'Adjusted R²': []
}

In [5]:
for i, (train_index, test_index) in enumerate(kf.split(df_standardised)):
    x_train = df_standardised.loc[train_index, ['subject', 'condition', 'replication', 'leg', 'joint']]
    y_train = df_standardised.loc[train_index, 'angle_scaled']

    # Create and fit the Gradient Boosting Regressor
    clf = GradientBoostingRegressor(random_state=42)
    clf.fit(x_train, y_train)

    x_test = df_standardised.loc[test_index, ['subject', 'condition', 'replication', 'leg', 'joint']]
    y_test = df_standardised.loc[test_index, 'angle_scaled']

    y_pred = clf.predict(x_test)
    
    # Calculate evaluation metrics
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    rmse = sqrt(mse)
    mae = mean_absolute_error(y_test, y_pred)
    mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100

    n = x_test.shape[0]  # Number of samples
    p = x_test.shape[1]  # Number of features
    adj_r2 = 1 - ((1 - r2) * (n - 1)) / (n - p - 1)
    
    # Store the results in the dictionary
    results['Fold'].append(i + 1)
    results['MSE'].append(mse)
    results['R²'].append(r2)
    results['RMSE'].append(rmse)
    results['MAE'].append(mae)
    results['MAPE'].append(mape)
    results['Adjusted R²'].append(adj_r2)

In [6]:
results_df = pd.DataFrame(results)
results_df.loc['Average'] = results_df.mean()
results_df['Fold'] = results_df['Fold'].astype(int)
results_df.iloc[-1, 0] = ''
print('Gradient Boosting Classifier Results:\n=====================================\n')
print(results_df)

Gradient Boosting Classifier Results:

        Fold       MSE        R²      RMSE       MAE         MAPE  Adjusted R²
0          1  0.748078  0.271669  0.864915  0.656933   436.033323     0.271468
1          2  0.723489  0.260834  0.850582  0.648852   418.546389     0.260631
2          3  0.733305  0.268182  0.856332  0.652072   574.936809     0.267981
3          4  0.728913  0.268376  0.853764  0.648979   446.560261     0.268175
4          5  0.742898  0.267559  0.861916  0.656961   527.998457     0.267358
5          6  0.739273  0.264913  0.859810  0.653289   419.226212     0.264711
6          7  0.721958  0.263374  0.849681  0.646621   462.915888     0.263171
7          8  0.731075  0.268752  0.855030  0.650949  1505.414291     0.268551
8          9  0.736114  0.267011  0.857971  0.651924   387.604427     0.266809
9         10  0.731166  0.262482  0.855082  0.652031   484.731366     0.262279
Average       0.733627  0.266315  0.856508  0.651861   566.396742     0.266113
