In [None]:
import numpy as np
import os
import gc
import sys
import time
import random
import string
from time import strftime
from joblib import dump, load
import json
from collections import defaultdict
from sklearn.model_selection import ParameterGrid, cross_validate
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.svm import SVR
from sklearn.multioutput import MultiOutputRegressor
from sklearn.utils import shuffle

In [None]:
from IPython.utils.io import Tee

# Redirect all the outputs messages to the terminal and to a log file
logs_dir = './logs'
logfilename = logs_dir + strftime('/ipython_%Y-%m-%d_%H:%M:%S') + '.log' 
if not os.path.exists(logs_dir):
    os.makedirs(logs_dir)
    
sys.stdout = open('/dev/stdout', 'w')
Tee(logfilename, mode='w', channel='stdout')


## Configuration

In [None]:
JOINT = 'Knee'
FORCE_CELLS_PER_JOINT = {
    'Hip': [5, 6],
    'Knee': [3, 4, 7, 8],
    'Ankle': [1, 2]
}

CELLS = FORCE_CELLS_PER_JOINT[JOINT]

# Path where the results are stored
RESULTS_PATH = '../../../../results'
# ID of the training and validation data resulting from this notebook, stored in RESULTS_PATH
DATA_ID = '0010_09082021'
# Number of folds for cross-validation
CV = 6

print('Model training with data: ' + DATA_ID)

## Support Vector Machine hyperparameters search

In [None]:
# Hyperparameters search date
hs_date = '24082021'

# Parameters grid
param_grid = {
    'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
    'C': [0.2, 0.3, 0.5, 0.8, 1.0, 1.2],
    'epsilon': [0.1, 0.2, 0.3, 0.5, 0.7, 0.9],
}



param_grid_ls = list(ParameterGrid(param_grid))
random.shuffle(param_grid_ls)
param_grid_len = len(param_grid_ls)
print('Number of parameters combinations: {}'.format(param_grid_len))

for idx, params in enumerate(param_grid_ls):
    print(strftime('%Y-%m-%d %H:%M:%S'))
    params_id = ''.join(random.choices(string.ascii_uppercase + string.digits, k=10))
    print('Parameters ({}) {}/{}'.format(params_id, idx + 1, param_grid_len))
    print(params)
    
    # Train the model with cross-validation
    cv_results = defaultdict(list)
    for fold_id in range(CV):
        print('Fold {}'.format(fold_id + 1))
        
        # Load data
        X_train = np.load(os.path.join(RESULTS_PATH, DATA_ID, 'data', '{}_X_train_cv{}_{}.npy'.format(JOINT, fold_id + 1, DATA_ID)))
        X_valid = np.load(os.path.join(RESULTS_PATH, DATA_ID, 'data', '{}_X_valid_cv{}_{}.npy'.format(JOINT, fold_id + 1, DATA_ID)))
        Y_train = np.load(os.path.join(RESULTS_PATH, DATA_ID, 'data', '{}_Y_train_cv{}_{}.npy'.format(JOINT, fold_id + 1, DATA_ID)))
        Y_valid = np.load(os.path.join(RESULTS_PATH, DATA_ID, 'data', '{}_Y_valid_cv{}_{}.npy'.format(JOINT, fold_id + 1, DATA_ID)))
        
        results = defaultdict(list)
        tr_time = []
        
        # Setup the model
        model = MultiOutputRegressor(SVR(**params, verbose=0), n_jobs=-1)
        
        # Train the model
        t_start = time.time()
        model.fit(X_train, Y_train)
        t_end = time.time()
        
        cv_results['fit_time'].append(t_end - t_start)
        print('Training time: {:.4f}'.format(cv_results['fit_time'][-1]))
        
        # Get the scores
        train_preds = model.predict(X_train)
        valid_preds = model.predict(X_valid)

        results = {
            'Train': {
                'MAE': mean_absolute_error(Y_train, train_preds, multioutput='raw_values'),
                'MSE': mean_squared_error(Y_train, train_preds, multioutput='raw_values'),
                'R2': r2_score(Y_train, train_preds, multioutput='raw_values')
            },
            'Valid': {
                'MAE': mean_absolute_error(Y_valid, valid_preds, multioutput='raw_values'),
                'MSE': mean_squared_error(Y_valid, valid_preds, multioutput='raw_values'),
                'R2': r2_score(Y_valid, valid_preds, multioutput='raw_values')
            }       

        }
        
        for subset in ['Train', 'Valid']:
            for f, force in enumerate(['Fx', 'Fy']):
                for loss in ['MAE', 'MSE', 'R2']:
                    scores = [results[subset][loss][i + f] for i in range(0, len(CELLS) * 2, 2)]
                    cv_results['_'.join([subset, force, loss, 'mean'])].append(np.mean(scores))
                    cv_results['_'.join([subset, force, loss, 'std'])].append(np.std(scores))
                    
#         for target in range(Y_train.shape[1]):

#             # Setup the model
#             model = SVR(**params, verbose=0)
            
#             t_start = time.time()
#             model.fit(X_train, Y_train[:, target])
#             tr_time.append(time.time() - t_start)

#             # Get the scores
#             train_preds = model.predict(X_train)
#             valid_preds = model.predict(X_valid)

#             results['Train_MAE'].append(mean_absolute_error(Y_train[:, target], train_preds))
#             results['Train_MSE'].append(mean_squared_error(Y_train[:, target], train_preds))
#             results['Train_R2'].append(r2_score(Y_train[:, target], train_preds))
#             results['Valid_MAE'].append(mean_absolute_error(Y_valid[:, target], valid_preds))
#             results['Valid_MSE'].append(mean_squared_error(Y_valid[:, target], valid_preds))
#             results['Valid_R2'].append(r2_score(Y_valid[:, target], valid_preds))

#         cv_results['fit_time'].append(sum(tr_time))
#         print('Training time: {:.4f}'.format(cv_results['fit_time'][-1]))
        
#         for subset in ['Train', 'Valid']:
#             for f, force in enumerate(['Fx', 'Fy']):
#                 for loss in ['MAE', 'MSE', 'R2']:
#                     scores = [results['_'.join([subset, loss])][i + f] for i in range(0, len(CELLS) * 2, 2)]
#                     cv_results['_'.join([subset, force, loss, 'mean'])].append(np.mean(scores))
#                     cv_results['_'.join([subset, force, loss, 'std'])].append(np.std(scores))
            
    # Save the obtained results and its parameters into a JSON file
    rd = {}
    rd['id'] = params_id
    rd['parameters'] = params
    rd['cv_results'] = dict(cv_results)
    
    save_dir = os.path.join(RESULTS_PATH, DATA_ID, '{}_SVM_{}'.format(JOINT, hs_date))
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
        
    with open(os.path.join(save_dir, '{}_SVM_{}_{}.json'.format(JOINT, hs_date, params_id)), 'w') as fp:
        json.dump(rd, fp)
    
    print('\n\n')
    del model, results, cv_results, rd
    gc.collect()

In [None]:
# Path where the results are stored
RESULTS_PATH = '../../../../results'

# Hyperparameters search date
hs_date = '24082021'

# Parameters grid
param_grid = {
    'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
    'C': [0.2, 0.3, 0.5, 0.8, 1.0, 1.2],
    'epsilon': [0.1, 0.2, 0.3, 0.5, 0.7, 0.9],
}

param_grid_ls = list(ParameterGrid(param_grid))
random.shuffle(param_grid_ls)
param_grid_len = len(param_grid_ls)
print('Number of parameters combinations: {}'.format(param_grid_len))
        
FORCE_CELLS_PER_JOINT = {
    'Hip': [5, 6],
    'Knee': [3, 4, 7, 8],
    'Ankle': [1, 2]
}

cv_data_id = {
    '0010_09082021': 6,
    '0011_09082021': 4,
    '0012_09082021': 6,
    '0013_09082021': 4,
}


for DATA_ID, CV in cv_data_id.items():
    print('Model training with data: ' + DATA_ID)

    for JOINT in ['Hip', 'Knee', 'Ankle']:
        CELLS = FORCE_CELLS_PER_JOINT[JOINT]

        for idx, params in enumerate(param_grid_ls):
            print(strftime('%Y-%m-%d %H:%M:%S'))
            params_id = ''.join(random.choices(string.ascii_uppercase + string.digits, k=10))
            print('Parameters ({}) {}/{}'.format(params_id, idx + 1, param_grid_len))
            print(params)

            # Train the model with cross-validation
            cv_results = defaultdict(list)
            for fold_id in range(CV):
                print('Fold {}'.format(fold_id + 1))

                # Load data
                X_train = np.load(os.path.join(RESULTS_PATH, DATA_ID, 'data', '{}_X_train_cv{}_{}.npy'.format(JOINT, fold_id + 1, DATA_ID)))
                X_valid = np.load(os.path.join(RESULTS_PATH, DATA_ID, 'data', '{}_X_valid_cv{}_{}.npy'.format(JOINT, fold_id + 1, DATA_ID)))
                Y_train = np.load(os.path.join(RESULTS_PATH, DATA_ID, 'data', '{}_Y_train_cv{}_{}.npy'.format(JOINT, fold_id + 1, DATA_ID)))
                Y_valid = np.load(os.path.join(RESULTS_PATH, DATA_ID, 'data', '{}_Y_valid_cv{}_{}.npy'.format(JOINT, fold_id + 1, DATA_ID)))

                results = defaultdict(list)
                tr_time = []

                # Setup the model
                model = MultiOutputRegressor(SVR(**params, verbose=0), n_jobs=-1)

                # Train the model
                t_start = time.time()
                model.fit(X_train, Y_train)
                t_end = time.time()

                cv_results['fit_time'].append(t_end - t_start)
                print('Training time: {:.4f}'.format(cv_results['fit_time'][-1]))

                # Get the scores
                train_preds = model.predict(X_train)
                valid_preds = model.predict(X_valid)

                results = {
                    'Train': {
                        'MAE': mean_absolute_error(Y_train, train_preds, multioutput='raw_values'),
                        'MSE': mean_squared_error(Y_train, train_preds, multioutput='raw_values'),
                        'R2': r2_score(Y_train, train_preds, multioutput='raw_values')
                    },
                    'Valid': {
                        'MAE': mean_absolute_error(Y_valid, valid_preds, multioutput='raw_values'),
                        'MSE': mean_squared_error(Y_valid, valid_preds, multioutput='raw_values'),
                        'R2': r2_score(Y_valid, valid_preds, multioutput='raw_values')
                    }       

                }

                for subset in ['Train', 'Valid']:
                    for f, force in enumerate(['Fx', 'Fy']):
                        for loss in ['MAE', 'MSE', 'R2']:
                            scores = [results[subset][loss][i + f] for i in range(0, len(CELLS) * 2, 2)]
                            cv_results['_'.join([subset, force, loss, 'mean'])].append(np.mean(scores))
                            cv_results['_'.join([subset, force, loss, 'std'])].append(np.std(scores))

            # Save the obtained results and its parameters into a JSON file
            rd = {}
            rd['id'] = params_id
            rd['parameters'] = params
            rd['cv_results'] = dict(cv_results)

            save_dir = os.path.join(RESULTS_PATH, DATA_ID, '{}_SVM_{}'.format(JOINT, hs_date))
            if not os.path.exists(save_dir):
                os.makedirs(save_dir)

            with open(os.path.join(save_dir, '{}_SVM_{}_{}.json'.format(JOINT, hs_date, params_id)), 'w') as fp:
                json.dump(rd, fp)

            print('\n\n')
            del model, results, cv_results, rd
            gc.collect()