In [1]:
import numpy as np
import os
import gc
import sys
from time import strftime
from joblib import dump, load

from sklearn.model_selection import GridSearchCV, ParameterGrid, train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import Pipeline
from sklearn.multioutput import MultiOutputRegressor

In [None]:
'''
from IPython.utils.io import Tee

# Redirect all the outputs messages to the terminal and to a log file
logs_dir = './logs'
logfilename = logs_dir + strftime('/ipython_%Y-%m-%d_%H:%M:%S') + '.log' 
if not os.path.exists(logs_dir):
    os.makedirs(logs_dir)
    
sys.stdout = open('/dev/stdout', 'w')
Tee(logfilename, mode='w', channel='stdout')
'''

## Configuration

In [2]:
# Path where the data is stored
SOURCE_PATH = '../../../data'
# Directory inside SOURCE_PATH where the derived data is stored
DERIVED_DATA_DIR = '/derived_data'
# Experiment params
DATE_EXPERIMENT = '24022021'

# Number of force cells in the robotic leg
N_CELLS = 8

# Experiment params
DATA_ID = '0002_31032021'

print('Model training with data: ' + DATA_ID)

Model training with data: 0002_31032021


## Load data

In [3]:
X_train = np.load(os.path.join(SOURCE_PATH + DERIVED_DATA_DIR, DATE_EXPERIMENT, 'X_train_' + DATA_ID + '.npy'))
X_test = np.load(os.path.join(SOURCE_PATH + DERIVED_DATA_DIR, DATE_EXPERIMENT, 'X_test_' + DATA_ID + '.npy'))
Y_train = np.load(os.path.join(SOURCE_PATH + DERIVED_DATA_DIR, DATE_EXPERIMENT, 'Y_train_' + DATA_ID + '.npy'))
Y_test = np.load(os.path.join(SOURCE_PATH + DERIVED_DATA_DIR, DATE_EXPERIMENT, 'Y_test_' + DATA_ID + '.npy'))

print('X train: {}, Y train: {}'.format(X_train.shape, Y_train.shape))
print('X test: {}, Y test: {}'.format(X_test.shape, Y_test.shape))

X train: (187982, 15), Y train: (187982, 24)
X test: (80564, 15), Y test: (80564, 24)


## Train model

In [4]:
# param_grid = {
#     'n_estimators': [10, 100, 1000],
#     'criterion': ['mae', 'mse'],
#     'max_depth': [2, 5, 10, None],
# }
param_grid = {}

print('Number of parameters combinations: {}'.format(len(list(ParameterGrid(param_grid)))))

Number of parameters combinations: 1


In [5]:
rf = RandomForestRegressor(random_state=0, verbose=10)

model = GridSearchCV(rf, param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1, pre_dispatch=2, verbose=10)
model.fit(X_train, Y_train)

print('Best params: {}'.format(model.best_params_))

# Save the model
dump(model.best_estimator_, os.path.join(SOURCE_PATH + DERIVED_DATA_DIR, DATE_EXPERIMENT, 'model_' + DATA_ID + '.joblib')) 

Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


building tree 1 of 100


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    4.7s remaining:    0.0s


building tree 2 of 100


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    9.4s remaining:    0.0s


building tree 3 of 100


[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:   14.0s remaining:    0.0s


building tree 4 of 100


[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:   18.9s remaining:    0.0s


building tree 5 of 100


[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:   23.7s remaining:    0.0s


building tree 6 of 100


[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:   28.3s remaining:    0.0s


building tree 7 of 100


[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:   33.1s remaining:    0.0s


building tree 8 of 100


[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:   37.6s remaining:    0.0s


building tree 9 of 100


[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:   42.2s remaining:    0.0s


building tree 10 of 100
building tree 11 of 100
building tree 12 of 100
building tree 13 of 100
building tree 14 of 100
building tree 15 of 100
building tree 16 of 100
building tree 17 of 100
building tree 18 of 100
building tree 19 of 100
building tree 20 of 100
building tree 21 of 100
building tree 22 of 100
building tree 23 of 100
building tree 24 of 100
building tree 25 of 100
building tree 26 of 100
building tree 27 of 100
building tree 28 of 100
building tree 29 of 100
building tree 30 of 100
building tree 31 of 100
building tree 32 of 100
building tree 33 of 100
building tree 34 of 100
building tree 35 of 100
building tree 36 of 100
building tree 37 of 100
building tree 38 of 100
building tree 39 of 100
building tree 40 of 100
building tree 41 of 100
building tree 42 of 100
building tree 43 of 100
building tree 44 of 100
building tree 45 of 100
building tree 46 of 100
building tree 47 of 100
building tree 48 of 100
building tree 49 of 100
building tree 50 of 100
building tree 51

[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:  8.4min finished


Best params: {}


['../../../data/derived_data/24022021/model_0002_31032021.joblib']

In [6]:
train_preds = model.best_estimator_.predict(X_train)
test_preds = model.best_estimator_.predict(X_test)

results = {
    'Train': {
        'MAE': mean_absolute_error(Y_train, train_preds, multioutput='raw_values'),
        'MSE': mean_squared_error(Y_train, train_preds, multioutput='raw_values'),
        'R2': r2_score(Y_train, train_preds, multioutput='raw_values')
    },
    'Test': {
        'MAE': mean_absolute_error(Y_test, test_preds, multioutput='raw_values'),
        'MSE': mean_squared_error(Y_test, test_preds, multioutput='raw_values'),
        'R2': r2_score(Y_test, test_preds, multioutput='raw_values')
    }       
    
}

for subset in ['Train', 'Test']:
    for f, force in enumerate(['Fx', 'Fy', 'Fz']):
        for c in range(N_CELLS):
            for loss in ['MAE', 'MSE', 'R2']:
                scores = [results[subset][loss][i + f] for i in range(0, N_CELLS * 3, 3)]
                print('{} {}{}{} {}: {:.4f}'.format(subset, force[0], c + 1, force[-1], loss, scores[c]))
            
print('\n')

for subset in ['Train', 'Test']:
    for f, force in enumerate(['Fx', 'Fy', 'Fz']):
        for loss in ['MAE', 'MSE', 'R2']:
            scores = [results[subset][loss][i + f] for i in range(0, N_CELLS * 3, 3)]
            print(' '.join([subset, force, loss]) + ': {:.4f} ± {:.4f}'.format(np.mean(scores), np.std(scores)))

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.2s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.4s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    0.5s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:    0.5s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:    0.6s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:    0.7s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:    0.8s remaining:    0.0s
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    8.3s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elap

Train F1x MAE: 0.4092
Train F1x MSE: 1.1648
Train F1x R2: 0.9957
Train F2x MAE: 0.2973
Train F2x MSE: 0.3598
Train F2x R2: 0.9983
Train F3x MAE: 0.7045
Train F3x MSE: 1.9891
Train F3x R2: 0.9982
Train F4x MAE: 0.4447
Train F4x MSE: 1.2629
Train F4x R2: 0.9974
Train F5x MAE: 0.5354
Train F5x MSE: 0.8772
Train F5x R2: 0.9972
Train F6x MAE: 0.5385
Train F6x MSE: 1.2260
Train F6x R2: 0.9972
Train F7x MAE: 0.4292
Train F7x MSE: 1.0982
Train F7x R2: 0.9968
Train F8x MAE: 0.3377
Train F8x MSE: 0.6069
Train F8x R2: 0.9964
Train F1y MAE: 0.2180
Train F1y MSE: 0.2279
Train F1y R2: 0.9950
Train F2y MAE: 0.2263
Train F2y MSE: 0.1896
Train F2y R2: 0.9957
Train F3y MAE: 1.1662
Train F3y MSE: 3.7859
Train F3y R2: 0.9940
Train F4y MAE: 0.8536
Train F4y MSE: 4.1688
Train F4y R2: 0.9966
Train F5y MAE: 0.8263
Train F5y MSE: 3.6380
Train F5y R2: 0.9981
Train F6y MAE: 0.2940
Train F6y MSE: 0.2600
Train F6y R2: 0.9946
Train F7y MAE: 0.3027
Train F7y MSE: 0.3912
Train F7y R2: 0.9927
Train F8y MAE: 0.3863
Tra

[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    3.4s finished
