# Linear Regression

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error, cohen_kappa_score
from sklearn.linear_model import LinearRegression
from math import sqrt

In [2]:
df_standardised = pd.read_csv('../gait_standardised.csv')
df_standardised.head()

Unnamed: 0,subject,condition,replication,leg,joint,time,angle,angle_scaled
0,1,1,1,1,1,0,4.682881,-0.465902
1,1,1,1,1,1,1,5.073127,-0.441551
2,1,1,1,1,1,2,5.229774,-0.431776
3,1,1,1,1,1,3,5.083273,-0.440918
4,1,1,1,1,1,4,4.652399,-0.467804


In [3]:
kf = KFold(n_splits=10, random_state=42, shuffle=True)
kf.get_n_splits(df_standardised)

10

In [4]:
print(f"Data types before:\n{df_standardised.dtypes}\n")
categorical_columns = ['subject', 'condition', 'replication', 'leg', 'joint', 'time']
df_standardised[categorical_columns] = df_standardised[categorical_columns].astype('category')
df_standardised['time'] = df_standardised['time'].cat.set_categories(list(range(0, 101)), ordered=True)
print(f"Data types after:\n{df_standardised.dtypes}")

Data types before:
subject           int64
condition         int64
replication       int64
leg               int64
joint             int64
time              int64
angle           float64
angle_scaled    float64
dtype: object

Data types after:
subject         category
condition       category
replication     category
leg             category
joint           category
time            category
angle            float64
angle_scaled     float64
dtype: object


In [5]:
results = {
    'Fold': [],
    'MSE': [],
    'R²': [],
    'RMSE': [],
    'MAE': [],
    'MAPE': [],
    'Adjusted R²': [],
}

In [6]:
for i, (train_index, test_index) in enumerate(kf.split(df_standardised)):
    x_train = df_standardised.loc[train_index, ['subject', 'condition', 'replication', 'leg', 'joint']]
    y_train = df_standardised.loc[train_index, 'angle_scaled']

    # Create and fit the Linear Regression model
    clf = LinearRegression()
    clf.fit(x_train, y_train)

    x_test = df_standardised.loc[test_index, ['subject', 'condition', 'replication', 'leg', 'joint']]
    y_test = df_standardised.loc[test_index, 'angle_scaled']

    y_pred = clf.predict(x_test)
    
    # Calculate evaluation metrics
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    rmse = sqrt(mse)
    mae = mean_absolute_error(y_test, y_pred)
    mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100

    n = x_test.shape[0]  # Number of samples
    p = x_test.shape[1]  # Number of features
    adj_r2 = 1 - ((1 - r2) * (n - 1)) / (n - p - 1)
    
    # Store the results in the dictionary
    results['Fold'].append(i + 1)
    results['MSE'].append(mse)
    results['R²'].append(r2)
    results['RMSE'].append(rmse)
    results['MAE'].append(mae)
    results['MAPE'].append(mape)
    results['Adjusted R²'].append(adj_r2)

In [7]:
results_df = pd.DataFrame(results)
results_df.loc['Average'] = results_df.mean()
results_df['Fold'] = results_df['Fold'].astype(int)
results_df.iloc[-1, 0] = ''
print('Linear Regression Results:\n==========================')
print(results_df)

Linear Regression Results:
        Fold       MSE        R²      RMSE       MAE        MAPE  Adjusted R²
0          1  0.992714  0.033490  0.996350  0.693341  182.045424     0.033225
1          2  0.946672  0.032816  0.972970  0.680207  186.154575     0.032550
2          3  0.966859  0.035102  0.983290  0.685757  233.748001     0.034837
3          4  0.960965  0.035462  0.980288  0.681012  182.986056     0.035197
4          5  0.979794  0.033998  0.989846  0.689577  260.850475     0.033733
5          6  0.974421  0.031097  0.987128  0.686465  170.625891     0.030830
6          7  0.949438  0.031272  0.974391  0.680261  197.264056     0.031006
7          8  0.966131  0.033641  0.982919  0.684985  926.595779     0.033376
8          9  0.970786  0.033335  0.985285  0.685243  158.572453     0.033069
9         10  0.959541  0.032123  0.979562  0.682454  254.028812     0.031856
Average       0.966732  0.033234  0.983203  0.684930  275.287152     0.032968
