In [1]:
import numpy as np
import os
import gc
import sys
from time import strftime
from joblib import dump, load

from sklearn.model_selection import GridSearchCV, ParameterGrid, train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import Pipeline
from sklearn.multioutput import MultiOutputRegressor

In [None]:
'''
from IPython.utils.io import Tee

# Redirect all the outputs messages to the terminal and to a log file
logs_dir = './logs'
logfilename = logs_dir + strftime('/ipython_%Y-%m-%d_%H:%M:%S') + '.log' 
if not os.path.exists(logs_dir):
    os.makedirs(logs_dir)
    
sys.stdout = open('/dev/stdout', 'w')
Tee(logfilename, mode='w', channel='stdout')
'''

In [2]:
# Path where the data is stored
SOURCE_PATH = '../../../data'
# Directory inside SOURCE_PATH where the derived data is stored
DERIVED_DATA_DIR = '/derived_data'
# Experiment params
DATE_EXPERIMENT = '24022021'

# Number of force cells in the robotic leg
N_CELLS = 8

# Experiment params
DATA_ID = '0001_26032021'

print('Model training with data: ' + DATA_ID)

Model training with data: 0001_26032021


In [3]:
X_train = np.load(os.path.join(SOURCE_PATH + DERIVED_DATA_DIR, DATE_EXPERIMENT, 'X_train_' + DATA_ID + '.npy'))
X_test = np.load(os.path.join(SOURCE_PATH + DERIVED_DATA_DIR, DATE_EXPERIMENT, 'X_test_' + DATA_ID + '.npy'))
Y_train = np.load(os.path.join(SOURCE_PATH + DERIVED_DATA_DIR, DATE_EXPERIMENT, 'Y_train_' + DATA_ID + '.npy'))
Y_test = np.load(os.path.join(SOURCE_PATH + DERIVED_DATA_DIR, DATE_EXPERIMENT, 'Y_test_' + DATA_ID + '.npy'))

print('X train: {}, Y train: {}'.format(X_train.shape, Y_train.shape))
print('X test: {}, Y test: {}'.format(X_test.shape, Y_test.shape))

X train: (187982, 12), Y train: (187982, 24)
X test: (80564, 12), Y test: (80564, 24)


In [4]:
# param_grid = {
#     'n_estimators': [10, 100, 1000],
#     'criterion': ['mae', 'mse'],
#     'max_depth': [2, 5, 10, None],
# }
param_grid = {}

print('Number of parameters combinations: {}'.format(len(list(ParameterGrid(param_grid)))))

Number of parameters combinations: 1


In [5]:
rf = RandomForestRegressor(random_state=0, verbose=10)

clf = GridSearchCV(rf, param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1, pre_dispatch=2, verbose=10)
clf.fit(X_train, Y_train)

print('Best params: {}'.format(clf.best_params_))

Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


building tree 1 of 100


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    3.5s remaining:    0.0s


building tree 2 of 100


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    7.2s remaining:    0.0s


building tree 3 of 100


[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:   10.8s remaining:    0.0s


building tree 4 of 100


[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:   14.3s remaining:    0.0s


building tree 5 of 100


[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:   17.9s remaining:    0.0s


building tree 6 of 100


[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:   21.5s remaining:    0.0s


building tree 7 of 100


[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:   25.2s remaining:    0.0s


building tree 8 of 100


[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:   28.8s remaining:    0.0s


building tree 9 of 100


[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:   32.3s remaining:    0.0s


building tree 10 of 100
building tree 11 of 100
building tree 12 of 100
building tree 13 of 100
building tree 14 of 100
building tree 15 of 100
building tree 16 of 100
building tree 17 of 100
building tree 18 of 100
building tree 19 of 100
building tree 20 of 100
building tree 21 of 100
building tree 22 of 100
building tree 23 of 100
building tree 24 of 100
building tree 25 of 100
building tree 26 of 100
building tree 27 of 100
building tree 28 of 100
building tree 29 of 100
building tree 30 of 100
building tree 31 of 100
building tree 32 of 100
building tree 33 of 100
building tree 34 of 100
building tree 35 of 100
building tree 36 of 100
building tree 37 of 100
building tree 38 of 100
building tree 39 of 100
building tree 40 of 100
building tree 41 of 100
building tree 42 of 100
building tree 43 of 100
building tree 44 of 100
building tree 45 of 100
building tree 46 of 100
building tree 47 of 100
building tree 48 of 100
building tree 49 of 100
building tree 50 of 100
building tree 51

[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:  6.0min finished


In [12]:
train_preds = clf.best_estimator_.predict(X_train)
test_preds = clf.best_estimator_.predict(X_test)

results = {
    'Train': {
        'MAE': mean_absolute_error(Y_train, train_preds, multioutput='raw_values'),
        'MSE': mean_squared_error(Y_train, train_preds, multioutput='raw_values'),
        'R2': r2_score(Y_train, train_preds, multioutput='raw_values')
    },
    'Test': {
        'MAE': mean_absolute_error(Y_test, test_preds, multioutput='raw_values'),
        'MSE': mean_squared_error(Y_test, test_preds, multioutput='raw_values'),
        'R2': r2_score(Y_test, test_preds, multioutput='raw_values')
    }       
    
}

for subset in ['Train', 'Test']:
    for f, force in enumerate(['Fx', 'Fy', 'Fz']):
        for c in range(N_CELLS):
            for loss in ['MAE', 'MSE', 'R2']:
                scores = [results[subset][loss][i + f] for i in range(0, N_CELLS * 3, 3)]
                print('{} {}{}{} {}: {:.4f}'.format(subset, force[0], c + 1, force[-1], loss, scores[c]))
            
print('\n')

for subset in ['Train', 'Test']:
    for f, force in enumerate(['Fx', 'Fy', 'Fz']):
        for loss in ['MAE', 'MSE', 'R2']:
            scores = [results[subset][loss][i + f] for i in range(0, N_CELLS * 3, 3)]
            print(' '.join([subset, force, loss]) + ': {:.4f} ± {:.4f}'.format(np.mean(scores), np.std(scores)))

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.2s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    0.4s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:    0.5s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:    0.6s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:    0.6s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:    0.7s remaining:    0.0s
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    7.6s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elap

Train F1x MAE: 0.7051
Train F1x MSE: 2.9155
Train F1x R2: 0.9893
Train F2x MAE: 0.5305
Train F2x MSE: 1.2135
Train F2x R2: 0.9942
Train F3x MAE: 1.3953
Train F3x MSE: 8.3894
Train F3x R2: 0.9925
Train F4x MAE: 0.8278
Train F4x MSE: 3.7786
Train F4x R2: 0.9921
Train F5x MAE: 0.8482
Train F5x MSE: 2.8462
Train F5x R2: 0.9910
Train F6x MAE: 0.8957
Train F6x MSE: 4.0078
Train F6x R2: 0.9908
Train F7x MAE: 0.7467
Train F7x MSE: 3.1684
Train F7x R2: 0.9907
Train F8x MAE: 0.6402
Train F8x MSE: 1.9187
Train F8x R2: 0.9886
Train F1y MAE: 0.3245
Train F1y MSE: 0.4457
Train F1y R2: 0.9902
Train F2y MAE: 0.3338
Train F2y MSE: 0.4469
Train F2y R2: 0.9899
Train F3y MAE: 1.6683
Train F3y MSE: 8.4832
Train F3y R2: 0.9866
Train F4y MAE: 1.3202
Train F4y MSE: 9.2200
Train F4y R2: 0.9925
Train F5y MAE: 1.4666
Train F5y MSE: 13.1140
Train F5y R2: 0.9933
Train F6y MAE: 0.4139
Train F6y MSE: 0.6250
Train F6y R2: 0.9869
Train F7y MAE: 0.4133
Train F7y MSE: 0.7396
Train F7y R2: 0.9861
Train F8y MAE: 0.6115
Tr

[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    3.1s finished


In [None]:
# Save the model
dump(clf, os.path.join(SOURCE_PATH + DERIVED_DATA_DIR, DATE_EXPERIMENT, 'model_' + DATA_ID + '.joblib')) 

In [None]:
'''

param_grid = {}
#     'n_estimators': [10, 20, 50, 100, 500],
#     'criterion': ['mse', 'mae'],
#     'max_depth': [1, 2, 5, 10],

# }

for t in range(Y_train.shape[1]):
    target = targets_names[t]
    rf = RandomForestRegressor(random_state=0)

    clf = GridSearchCV(rf, param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=8)
    clf.fit(X_train, Y_train[:, t])

    #print('Best params: {}'.format(clf.best_params_))


    test_preds = clf.best_estimator_.predict(X_test)

    # print('{} train MAE: {}'.format(target, mean_absolute_error(Y_train[:, t], clf.best_estimator_.predict(X_train_norm))))
    # print('{} train MSE: {}'.format(target, mean_squared_error(Y_train[:, t], clf.best_estimator_.predict(X_train_norm))))
    # print('{} train R2: {}'.format(target, r2_score(Y_train[:, t], clf.best_estimator_.predict(X_train_norm))))

    print('{} test MAE: {}'.format(target, mean_absolute_error(Y_test[:, t], test_preds)))
    print('{} test MSE: {}'.format(target, mean_squared_error(Y_test[:, t], test_preds)))
    print('{} test R2: {}'.format(target, r2_score(Y_test[:, t], test_preds)))
    
    print()
    print('///////////////////////////////////////////////////')
    print()
    gc.collect()
    
'''