# Decision Tree

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
from sklearn import tree
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.metrics import mean_absolute_percentage_error
from math import sqrt

In [2]:
df_standardised = pd.read_csv('../gait_standardised.csv')
df_standardised.head()

Unnamed: 0,subject,condition,replication,leg,joint,time,angle,angle_scaled
0,1,1,1,1,1,0,4.682881,-0.465902
1,1,1,1,1,1,1,5.073127,-0.441551
2,1,1,1,1,1,2,5.229774,-0.431776
3,1,1,1,1,1,3,5.083273,-0.440918
4,1,1,1,1,1,4,4.652399,-0.467804


In [3]:
print(f"Data types before:\n{df_standardised.dtypes}\n")
categorical_columns = ['subject', 'condition', 'replication', 'leg', 'joint', 'time']
df_standardised[categorical_columns] = df_standardised[categorical_columns].astype('category')
df_standardised['time'] = df_standardised['time'].cat.set_categories(list(range(0, 101)), ordered=True)
print(f"Data types after:\n{df_standardised.dtypes}")

Data types before:
subject           int64
condition         int64
replication       int64
leg               int64
joint             int64
time              int64
angle           float64
angle_scaled    float64
dtype: object

Data types after:
subject         category
condition       category
replication     category
leg             category
joint           category
time            category
angle            float64
angle_scaled     float64
dtype: object


In [4]:
kf = KFold(n_splits=10, random_state=42, shuffle=True)
kf.get_n_splits(df_standardised)

10

In [5]:
results = {
    'Fold': [],
    'MSE': [],
    'R²': [],
    'RMSE': [],
    'MAE': [],
    'MAPE': [],
    'Adjusted R²': []
}

In [6]:
for i, (train_index, test_index) in enumerate(kf.split(df_standardised)):
    x_train = df_standardised.loc[train_index, ['subject', 'condition', 'replication', 'leg', 'joint']]
    y_train = df_standardised.loc[train_index, 'angle_scaled']
    clf = tree.DecisionTreeRegressor()
    clf = clf.fit(x_train, y_train)

    x_test = df_standardised.loc[test_index, ['subject', 'condition', 'replication', 'leg', 'joint']]
    y_test = df_standardised.loc[test_index, 'angle_scaled']

    y_pred = clf.predict(x_test)
    
    # Calculate metrics
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    rmse = sqrt(mse)
    mae = mean_absolute_error(y_test, y_pred)
    mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100

    n = x_test.shape[0]  # Number of samples
    p = x_test.shape[1]  # Number of features
    adj_r2 = 1 - ((1 - r2) * (n - 1)) / (n - p - 1)
    
    results['Fold'].append(i + 1)
    results['MSE'].append(mse)
    results['R²'].append(r2)
    results['RMSE'].append(rmse)
    results['MAE'].append(mae)
    results['MAPE'].append(mape)
    results['Adjusted R²'].append(adj_r2)

In [7]:
results_df = pd.DataFrame(results)
results_df.loc['Average'] = results_df.mean()
results_df['Fold'] = results_df['Fold'].astype(int)
results_df.iloc[-1, 0] = ''
print('Decision Tree Results:\n======================')
print(results_df)

Decision Tree Results:
        Fold       MSE        R²      RMSE       MAE         MAPE  Adjusted R²
0          1  0.743741  0.275891  0.862404  0.654472   441.031484     0.275692
1          2  0.719386  0.265026  0.848166  0.646619   453.024693     0.264824
2          3  0.729595  0.271884  0.854164  0.649366   589.536283     0.271684
3          4  0.726959  0.270338  0.852619  0.647323   456.933480     0.270137
4          5  0.735144  0.275205  0.857405  0.653848   556.983081     0.275006
5          6  0.735076  0.269087  0.857366  0.650741   426.806065     0.268885
6          7  0.720701  0.264656  0.848941  0.645590   443.831678     0.264454
7          8  0.727010  0.272819  0.852649  0.647862  1224.183739     0.272619
8          9  0.730633  0.272469  0.854771  0.648425   405.311376     0.272268
9         10  0.727458  0.266223  0.852911  0.649672   567.611925     0.266021
Average       0.729570  0.270360  0.854140  0.649392   556.525380     0.270159
