In [1]:
import pandas as pd
import numpy as np
import os
import glob
import json
from joblib import dump, load
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.svm import SVR

## Configuration

In [2]:
JOINT = 'Knee'
FORCE_CELLS_PER_JOINT = {
    'Hip': [5, 6],
    'Knee': [3, 4, 7, 8],
    'Ankle': [1, 2]
}

CELLS = FORCE_CELLS_PER_JOINT[JOINT]

# Path where the results are stored
RESULTS_PATH = '../../../../results'
# ID of the training and validation data resulting from this notebook, stored in RESULTS_PATH
DATA_ID = '0010_09082021'
# Hyperparameters search date
HS_DATE = '23082021'
# Number of folds in cross-validation
CV = 6

print('Model trained with data: ' + DATA_ID)

pd.set_option('display.max_columns', None)

Model trained with data: 0010_09082021


## Hyperparameters seach analysis

In [8]:
results_files_ls = glob.glob(os.path.join(RESULTS_PATH, DATA_ID, '{}_SVM_{}'.format(JOINT, HS_DATE), '{}_SVM_{}_*.json'.format(JOINT, HS_DATE)))

print('Number of results files: {}'.format(len(results_files_ls)))

Number of results files: 2


In [9]:
# Load all the results and generates a pandas dataframe
results_ls = []
for results_file in results_files_ls:
    with open(results_file) as json_file:
        results_dict = json.load(json_file)
        
    dict_aux = {}
    dict_aux['params_ID'] = results_dict['id']
    for key, value in results_dict['parameters'].items():
        dict_aux['param_' + key] = value
    for key, value in results_dict['cv_results'].items():
        dict_aux['__'.join([key, 'mean'])] = np.mean(value)
        dict_aux['__'.join([key, 'std'])] = np.std(value)

    results_ls.append(dict_aux)
        
results_df = pd.DataFrame(results_ls)
results_df

Unnamed: 0,params_ID,param_C,param_epsilon,param_kernel,fit_time__mean,fit_time__std,Train_Fx_MAE_mean__mean,Train_Fx_MAE_mean__std,Train_Fx_MAE_std__mean,Train_Fx_MAE_std__std,Train_Fx_MSE_mean__mean,Train_Fx_MSE_mean__std,Train_Fx_MSE_std__mean,Train_Fx_MSE_std__std,Train_Fx_R2_mean__mean,Train_Fx_R2_mean__std,Train_Fx_R2_std__mean,Train_Fx_R2_std__std,Train_Fy_MAE_mean__mean,Train_Fy_MAE_mean__std,Train_Fy_MAE_std__mean,Train_Fy_MAE_std__std,Train_Fy_MSE_mean__mean,Train_Fy_MSE_mean__std,Train_Fy_MSE_std__mean,Train_Fy_MSE_std__std,Train_Fy_R2_mean__mean,Train_Fy_R2_mean__std,Train_Fy_R2_std__mean,Train_Fy_R2_std__std,Valid_Fx_MAE_mean__mean,Valid_Fx_MAE_mean__std,Valid_Fx_MAE_std__mean,Valid_Fx_MAE_std__std,Valid_Fx_MSE_mean__mean,Valid_Fx_MSE_mean__std,Valid_Fx_MSE_std__mean,Valid_Fx_MSE_std__std,Valid_Fx_R2_mean__mean,Valid_Fx_R2_mean__std,Valid_Fx_R2_std__mean,Valid_Fx_R2_std__std,Valid_Fy_MAE_mean__mean,Valid_Fy_MAE_mean__std,Valid_Fy_MAE_std__mean,Valid_Fy_MAE_std__std,Valid_Fy_MSE_mean__mean,Valid_Fy_MSE_mean__std,Valid_Fy_MSE_std__mean,Valid_Fy_MSE_std__std,Valid_Fy_R2_mean__mean,Valid_Fy_R2_mean__std,Valid_Fy_R2_std__mean,Valid_Fy_R2_std__std
0,RZPN3DRWPG,1.2,0.5,poly,173.922353,33.703604,7.784519,0.09108,1.957698,0.123541,142.478158,6.201144,63.356511,7.162342,0.608028,0.013751,0.057119,0.01055,7.863635,0.079577,3.284827,0.062713,141.001356,1.798333,88.434492,2.11018,0.432833,0.004989,0.17607,0.00653,8.734623,0.543506,2.3678,0.60307,176.97161,27.385427,98.050431,34.792542,0.501343,0.090172,0.121173,0.058188,8.419054,0.408431,3.49951,0.235493,160.133823,7.957346,101.519287,4.843686,0.324526,0.054666,0.195021,0.05143
1,S03TD0HLXZ,0.3,0.3,rbf,138.097628,27.349773,7.6244,0.104425,1.867361,0.12937,135.428983,6.604526,61.49641,6.58457,0.629447,0.013212,0.055987,0.009997,8.074813,0.089167,3.442786,0.068831,153.888173,2.891384,99.384426,2.920233,0.409753,0.005828,0.153242,0.010371,8.323312,0.7474,2.071444,0.684524,153.602643,38.749409,77.424218,38.817307,0.560568,0.083821,0.115225,0.061854,8.518255,0.529435,3.614128,0.240606,166.470319,14.392772,111.210498,14.70622,0.336121,0.026932,0.164085,0.029141


In [10]:
# Sum up the scores by force axis in only one sortable score
for subset in ['Train', 'Valid']:
    for loss in ['MAE', 'MSE', 'R2']:
        results_df[subset + '_' + loss] = results_df[[subset + '_' + force + '_' + loss + '_mean__mean' for force in ['Fx', 'Fy']]].mean(axis=1)

In [11]:
# Sort the dataframe by the most relevant score
results_df = results_df.sort_values(['Valid_R2'], ascending=False)
results_df

Unnamed: 0,params_ID,param_C,param_epsilon,param_kernel,fit_time__mean,fit_time__std,Train_Fx_MAE_mean__mean,Train_Fx_MAE_mean__std,Train_Fx_MAE_std__mean,Train_Fx_MAE_std__std,Train_Fx_MSE_mean__mean,Train_Fx_MSE_mean__std,Train_Fx_MSE_std__mean,Train_Fx_MSE_std__std,Train_Fx_R2_mean__mean,Train_Fx_R2_mean__std,Train_Fx_R2_std__mean,Train_Fx_R2_std__std,Train_Fy_MAE_mean__mean,Train_Fy_MAE_mean__std,Train_Fy_MAE_std__mean,Train_Fy_MAE_std__std,Train_Fy_MSE_mean__mean,Train_Fy_MSE_mean__std,Train_Fy_MSE_std__mean,Train_Fy_MSE_std__std,Train_Fy_R2_mean__mean,Train_Fy_R2_mean__std,Train_Fy_R2_std__mean,Train_Fy_R2_std__std,Valid_Fx_MAE_mean__mean,Valid_Fx_MAE_mean__std,Valid_Fx_MAE_std__mean,Valid_Fx_MAE_std__std,Valid_Fx_MSE_mean__mean,Valid_Fx_MSE_mean__std,Valid_Fx_MSE_std__mean,Valid_Fx_MSE_std__std,Valid_Fx_R2_mean__mean,Valid_Fx_R2_mean__std,Valid_Fx_R2_std__mean,Valid_Fx_R2_std__std,Valid_Fy_MAE_mean__mean,Valid_Fy_MAE_mean__std,Valid_Fy_MAE_std__mean,Valid_Fy_MAE_std__std,Valid_Fy_MSE_mean__mean,Valid_Fy_MSE_mean__std,Valid_Fy_MSE_std__mean,Valid_Fy_MSE_std__std,Valid_Fy_R2_mean__mean,Valid_Fy_R2_mean__std,Valid_Fy_R2_std__mean,Valid_Fy_R2_std__std,Train_MAE,Train_MSE,Train_R2,Valid_MAE,Valid_MSE,Valid_R2
1,S03TD0HLXZ,0.3,0.3,rbf,138.097628,27.349773,7.6244,0.104425,1.867361,0.12937,135.428983,6.604526,61.49641,6.58457,0.629447,0.013212,0.055987,0.009997,8.074813,0.089167,3.442786,0.068831,153.888173,2.891384,99.384426,2.920233,0.409753,0.005828,0.153242,0.010371,8.323312,0.7474,2.071444,0.684524,153.602643,38.749409,77.424218,38.817307,0.560568,0.083821,0.115225,0.061854,8.518255,0.529435,3.614128,0.240606,166.470319,14.392772,111.210498,14.70622,0.336121,0.026932,0.164085,0.029141,7.849606,144.658578,0.5196,8.420784,160.036481,0.448344
0,RZPN3DRWPG,1.2,0.5,poly,173.922353,33.703604,7.784519,0.09108,1.957698,0.123541,142.478158,6.201144,63.356511,7.162342,0.608028,0.013751,0.057119,0.01055,7.863635,0.079577,3.284827,0.062713,141.001356,1.798333,88.434492,2.11018,0.432833,0.004989,0.17607,0.00653,8.734623,0.543506,2.3678,0.60307,176.97161,27.385427,98.050431,34.792542,0.501343,0.090172,0.121173,0.058188,8.419054,0.408431,3.49951,0.235493,160.133823,7.957346,101.519287,4.843686,0.324526,0.054666,0.195021,0.05143,7.824077,141.739757,0.52043,8.576839,168.552717,0.412934


In [12]:
best_params = dict(results_df.iloc[0][[col for col in results_df.columns if 'param_' in col]])
best_params = {key.replace('param_', ''): value for key, value in best_params.items()}
print('Best parameters: {}'.format(best_params))

Best parameters: {'C': 0.3, 'epsilon': 0.3, 'kernel': 'rbf'}


## Best model

In [8]:
# Load data
X_train = np.load(os.path.join(RESULTS_PATH, DATA_ID, 'data', '{}_X_train_{}.npy'.format(JOINT, DATA_ID)))
X_test = np.load(os.path.join(RESULTS_PATH, DATA_ID, 'data', '{}_X_test_{}.npy'.format(JOINT, DATA_ID)))
Y_train = np.load(os.path.join(RESULTS_PATH, DATA_ID, 'data', '{}_Y_train_{}.npy'.format(JOINT, DATA_ID)))
Y_test = np.load(os.path.join(RESULTS_PATH, DATA_ID, 'data', '{}_Y_test_{}.npy'.format(JOINT, DATA_ID)))

In [9]:

model.fit(X_train, Y_train)

# Save the model
dump(model, os.path.join(RESULTS_PATH, DATA_ID, '{}_SVM_{}'.format(JOINT, HS_DATE), '{}SVM_best_model_{}_{}.joblib'.format(JOINT, HS_DATE, DATA_ID))) 

results = defaultdict(list)
tr_time = []
for target in range(Y_train.shape[1]):
    # Setup the model with the best parameters
    model = SVR(**best_params, verbose=0)

    t_start = time.time()
    model.fit(X_train, Y_train[:, target])
    tr_time.append(time.time() - t_start)
    
    # Save the model
    dump(model, os.path.join(RESULTS_PATH, DATA_ID, '{}_SVM_{}'.format(JOINT, HS_DATE), '{}_SVM_best_model_{}_{}_{}.joblib'.format(JOINT, target, HS_DATE, DATA_ID))) 
    
    train_preds = model.predict(X_train)
    test_preds = model.predict(X_test)

    results['Train_MAE'].append(mean_absolute_error(Y_train[:, target], train_preds))
    results['Train_MSE'].append(mean_squared_error(Y_train[:, target], train_preds))
    results['Train_R2'].append(r2_score(Y_train[:, target], train_preds))
    results['Test_MAE'].append(mean_absolute_error(Y_test[:, target], test_preds))
    results['Test_MSE'].append(mean_squared_error(Y_test[:, target], test_preds))
    results['Test_R2'].append(r2_score(Y_test[:, target], test_preds))

print('Training time: {:.4f}'.format(sum(tr_time)))

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done 184 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done 434 tasks      | elapsed:    0.9s
[Parallel(n_jobs=-1)]: Done 784 tasks      | elapsed:    1.7s
[Parallel(n_jobs=-1)]: Done 1234 tasks      | elapsed:    2.7s
[Parallel(n_jobs=-1)]: Done 1784 tasks      | elapsed:    3.9s
[Parallel(n_jobs=-1)]: Done 2434 tasks      | elapsed:    5.3s
[Parallel(n_jobs=-1)]: Done 2500 out of 2500 | elapsed:    5.5s finished


['../../../../results/0013_09082021/Hip_RF_13082021/Hip_RF_best_model_13082021_0013_09082021.joblib']

In [10]:
# Display the score mean and standard deviation of each axis
for subset in ['Train', 'Test']:
    for f, force in enumerate(['Fx', 'Fy']):
        for loss in ['MAE', 'MSE', 'R2']:
            scores = [results['_'.join([subset, loss])][i + f] for i in range(0, len(CELLS) * 2, 2)]
            print(' '.join([subset, force, loss]) + ': {:.4f} ± {:.4f}'.format(np.mean(scores), np.std(scores)))

[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:    0.1s
[Parallel(n_jobs=8)]: Done 434 tasks      | elapsed:    0.1s
[Parallel(n_jobs=8)]: Done 784 tasks      | elapsed:    0.2s
[Parallel(n_jobs=8)]: Done 1234 tasks      | elapsed:    0.3s
[Parallel(n_jobs=8)]: Done 1784 tasks      | elapsed:    0.5s
[Parallel(n_jobs=8)]: Done 2434 tasks      | elapsed:    0.7s
[Parallel(n_jobs=8)]: Done 2500 out of 2500 | elapsed:    0.7s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 434 tasks      | elapsed:    0.1s
[Parallel(n_jobs=8)]: Done 784 tasks      | elapsed:    0.1s
[Parallel(n_jobs=8)]: Done 1234 tasks      | elapsed:    0.2s


Train Fx MAE: 1.4383 ± 0.1453
Train Fx MSE: 4.1693 ± 0.6342
Train Fx R2: 0.9814 ± 0.0078
Train Fy MAE: 2.2358 ± 1.4441
Train Fy MSE: 19.0880 ± 17.6000
Train Fy R2: 0.9614 ± 0.0008
Test Fx MAE: 5.8537 ± 0.5953
Test Fx MSE: 70.5377 ± 19.4663
Test Fx R2: 0.7938 ± 0.0597
Test Fy MAE: 9.6492 ± 6.9066
Test Fy MSE: 376.1152 ± 359.6698
Test Fy R2: 0.6109 ± 0.0379


[Parallel(n_jobs=8)]: Done 1784 tasks      | elapsed:    0.3s
[Parallel(n_jobs=8)]: Done 2434 tasks      | elapsed:    0.4s
[Parallel(n_jobs=8)]: Done 2500 out of 2500 | elapsed:    0.4s finished


In [11]:
model.coef_

array([0.06663225, 0.05709593, 0.06912625, 0.06582222, 0.05898258,
       0.07084185, 0.06822455, 0.05788318, 0.07201463, 0.06903547,
       0.0604749 , 0.07193388, 0.07474517, 0.06273663, 0.0744505 ])

In [None]:
# plt.figure(figsize=(20,15))
# plt.scatter(Y_train[:, 3], Y_train[:, 4], label='true', alpha=0.3)
# plt.scatter(train_preds[:, 3], train_preds[:, 4], label='preds', alpha=0.3)
# plt.legend()
# plt.show()

# plt.figure(figsize=(20,15))
# plt.scatter(Y_train[:100, 3], Y_train[:100, 4], label='true', alpha=0.3)
# plt.scatter(train_preds[:100, 3], train_preds[:100, 4], label='preds', alpha=0.3)
# plt.legend()
# plt.show()

# plt.figure(figsize=(20,15))
# plt.scatter(Y_test[:, 3], Y_test[:, 4], label='true', alpha=0.3)
# plt.scatter(test_preds[:, 3], test_preds[:, 4], label='preds', alpha=0.3)
# plt.legend()
# plt.show()