In [1]:
import numpy as np
import os
import gc
import sys
from time import strftime
from joblib import dump, load

from sklearn.model_selection import GridSearchCV, ParameterGrid, train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import Pipeline
from sklearn.multioutput import MultiOutputRegressor

In [None]:
'''
from IPython.utils.io import Tee

# Redirect all the outputs messages to the terminal and to a log file
logs_dir = './logs'
logfilename = logs_dir + strftime('/ipython_%Y-%m-%d_%H:%M:%S') + '.log' 
if not os.path.exists(logs_dir):
    os.makedirs(logs_dir)
    
sys.stdout = open('/dev/stdout', 'w')
Tee(logfilename, mode='w', channel='stdout')
'''

## Configuration

In [2]:
# Path where the data is stored
SOURCE_PATH = '../../../data'
# Directory inside SOURCE_PATH where the derived data is stored
DERIVED_DATA_DIR = '/derived_data'
# Experiment params
DATE_EXPERIMENT = '24022021'

# Number of force cells in the robotic leg
N_CELLS = 8

# Experiment params
DATA_ID = '0003_08042021'

print('Model training with data: ' + DATA_ID)

Model training with data: 0003_08042021


## Load data

In [3]:
X_train = np.load(os.path.join(SOURCE_PATH + DERIVED_DATA_DIR, DATE_EXPERIMENT, 'X_train_' + DATA_ID + '.npy'))
X_test = np.load(os.path.join(SOURCE_PATH + DERIVED_DATA_DIR, DATE_EXPERIMENT, 'X_test_' + DATA_ID + '.npy'))
Y_train = np.load(os.path.join(SOURCE_PATH + DERIVED_DATA_DIR, DATE_EXPERIMENT, 'Y_train_' + DATA_ID + '.npy'))
Y_test = np.load(os.path.join(SOURCE_PATH + DERIVED_DATA_DIR, DATE_EXPERIMENT, 'Y_test_' + DATA_ID + '.npy'))

print('X train: {}, Y train: {}'.format(X_train.shape, Y_train.shape))
print('X test: {}, Y test: {}'.format(X_test.shape, Y_test.shape))

X train: (196835, 15), Y train: (196835, 24)
X test: (71711, 15), Y test: (71711, 24)


## Train model

In [None]:
# Hyperparameters search
param_grid = {
    'n_estimators': [10],
    'criterion': ['mae', 'mse'],
    'max_depth': [2, 5, 10, None],
}
print('Number of parameters combinations: {}'.format(len(list(ParameterGrid(param_grid)))))


rf = RandomForestRegressor(random_state=0, verbose=1)

cv = GridSearchCV(rf, param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1, pre_dispatch=8, verbose=1)
cv.fit(X_train, Y_train)

print('Best params: {}'.format(cv.best_params_))

# Save the model
dump(cv.best_estimator_, os.path.join(SOURCE_PATH + DERIVED_DATA_DIR, DATE_EXPERIMENT, 'model_' + DATA_ID + '.joblib')) 
model = cv.best_estimator_

In [16]:
model = RandomForestRegressor(n_estimators=100, max_depth=5, random_state=0, n_jobs=-1, verbose=1)

model.fit(X_train, Y_train)

# Save the model
dump(model, os.path.join(SOURCE_PATH + DERIVED_DATA_DIR, DATE_EXPERIMENT, 'model_' + DATA_ID + '.joblib')) 


[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:   23.0s
[Parallel(n_jobs=-1)]: Done 184 tasks      | elapsed:  2.1min
[Parallel(n_jobs=-1)]: Done 434 tasks      | elapsed:  4.4min
[Parallel(n_jobs=-1)]: Done 784 tasks      | elapsed:  7.9min
[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed: 10.0min finished


['../../../data/derived_data/24022021/model_0003_08042021.joblib']

In [17]:
train_preds = model.predict(X_train)
test_preds = model.predict(X_test)

results = {
    'Train': {
        'MAE': mean_absolute_error(Y_train, train_preds, multioutput='raw_values'),
        'MSE': mean_squared_error(Y_train, train_preds, multioutput='raw_values'),
        'R2': r2_score(Y_train, train_preds, multioutput='raw_values')
    },
    'Test': {
        'MAE': mean_absolute_error(Y_test, test_preds, multioutput='raw_values'),
        'MSE': mean_squared_error(Y_test, test_preds, multioutput='raw_values'),
        'R2': r2_score(Y_test, test_preds, multioutput='raw_values')
    }       
    
}

# Display the score for each axis of each force cell
# for subset in ['Train', 'Test']:
#     for f, force in enumerate(['Fx', 'Fy', 'Fz']):
#         for c in range(N_CELLS):
#             for loss in ['MAE', 'MSE', 'R2']:
#                 scores = [results[subset][loss][i + f] for i in range(0, N_CELLS * 3, 3)]
#                 print('{} {}{}{} {}: {:.4f}'.format(subset, force[0], c + 1, force[-1], loss, scores[c]))
            
print('\n')

# Display the score mean and standard deviation of each axis
for subset in ['Train', 'Test']:
    for f, force in enumerate(['Fx', 'Fy', 'Fz']):
        for loss in ['MAE', 'MSE', 'R2']:
            scores = [results[subset][loss][i + f] for i in range(0, N_CELLS * 3, 3)]
            print(' '.join([subset, force, loss]) + ': {:.4f} ± {:.4f}'.format(np.mean(scores), np.std(scores)))

[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    1.0s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:    3.6s
[Parallel(n_jobs=8)]: Done 434 tasks      | elapsed:    8.1s
[Parallel(n_jobs=8)]: Done 784 tasks      | elapsed:   14.4s
[Parallel(n_jobs=8)]: Done 1000 out of 1000 | elapsed:   18.2s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.2s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:    1.2s
[Parallel(n_jobs=8)]: Done 434 tasks      | elapsed:    2.8s
[Parallel(n_jobs=8)]: Done 784 tasks      | elapsed:    5.0s
[Parallel(n_jobs=8)]: Done 1000 out of 1000 | elapsed:    6.4s finished




Train Fx MAE: 9.4044 ± 2.8857
Train Fx MSE: 182.2424 ± 119.3611
Train Fx R2: 0.5802 ± 0.0614
Train Fy MAE: 8.2783 ± 5.3926
Train Fy MSE: 204.3028 ± 220.8775
Train Fy R2: 0.4743 ± 0.1304
Train Fz MAE: 12.6520 ± 4.4852
Train Fz MSE: 345.3635 ± 264.2377
Train Fz R2: 0.5795 ± 0.0906
Test Fx MAE: 12.2157 ± 3.8706
Test Fx MSE: 347.5893 ± 239.3705
Test Fx R2: -0.2756 ± 0.2874
Test Fy MAE: 12.3361 ± 9.1578
Test Fy MSE: 529.0750 ± 723.1517
Test Fy R2: -0.1454 ± 0.4550
Test Fz MAE: 16.0123 ± 5.7524
Test Fz MSE: 623.6688 ± 491.2103
Test Fz R2: -0.7265 ± 0.8100
