In [1]:
import numpy as np
import os
import gc

from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import Pipeline
from sklearn.multioutput import MultiOutputRegressor

In [2]:
# Path where the data is stored
SOURCE_PATH = '../../data'
# Directory inside SOURCE_PATH where the derived data is stored
MODEL_DATA_DIR = '/models_data'

# Experiment params
DATA_CODE = '0001_16032021'

In [3]:
X_train = np.load(os.path.join(SOURCE_PATH + MODEL_DATA_DIR, DATA_CODE, 'X_train.npy'))
X_test = np.load(os.path.join(SOURCE_PATH + MODEL_DATA_DIR, DATA_CODE, 'X_test.npy'))
Y_train = np.load(os.path.join(SOURCE_PATH + MODEL_DATA_DIR, DATA_CODE, 'Y_train.npy'))
Y_test = np.load(os.path.join(SOURCE_PATH + MODEL_DATA_DIR, DATA_CODE, 'Y_test.npy'))

In [4]:
N_CELLS = 8
targets_names = ['F' + str(i + 1) + ax for i in range(N_CELLS) for ax in ['x', 'y', 'z']]

In [5]:
param_grid = {}

rf = RandomForestRegressor(random_state=0, verbose=1)

clf = GridSearchCV(rf, param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=8)
clf.fit(X_train, Y_train)

#print('Best params: {}'.format(clf.best_params_))


test_preds = clf.best_estimator_.predict(X_test)

# print('{} train MAE: {}'.format(target, mean_absolute_error(Y_train[:, t], clf.best_estimator_.predict(X_train_norm))))
# print('{} train MSE: {}'.format(target, mean_squared_error(Y_train[:, t], clf.best_estimator_.predict(X_train_norm))))
# print('{} train R2: {}'.format(target, r2_score(Y_train[:, t], clf.best_estimator_.predict(X_train_norm))))

print('Test MAE: {}'.format(mean_absolute_error(Y_test, test_preds, multioutput='raw_values')))
print('Test MSE: {}'.format(mean_squared_error(Y_test, test_preds, multioutput='raw_values')))
print('Test R2: {}'.format(r2_score(Y_test, test_preds, multioutput='raw_values')))

NameError: name 'target' is not defined

In [9]:
print('Test MAE: {}'.format(mean_absolute_error(Y_test, test_preds, multioutput='raw_values')))
print('Test MSE: {}'.format(mean_squared_error(Y_test, test_preds, multioutput='raw_values')))
print('Test R2: {}'.format(r2_score(Y_test, test_preds, multioutput='raw_values')))

Test MAE: [1.28694339 0.77339846 1.05485009 1.11485601 1.82559902 1.28491778
 2.53563567 3.71303626 3.66258364 1.63073189 1.64968317 3.24157045
 2.37733029 2.2583988  3.11246761 1.4190731  0.81697227 2.92157832
 1.32897959 0.80830422 2.51804885 1.69890944 1.88767976 2.41202313]
Test MSE: [ 4.9138306   1.90000064  3.68335857  3.05650634  9.98354209  4.6130214
 16.94511184 39.47145444 37.57713399  7.16417488  6.80970275 38.83396075
 13.83451015 13.29925191 25.48228908  5.42941154  1.92040576 25.13511472
  4.99711889  1.66397013 18.06365393  7.86217717  8.99565273 17.06833691]
Test R2: [0.92275832 0.93405181 0.930773   0.88829958 0.93191741 0.95662695
 0.91886427 0.91680455 0.91390282 0.88947001 0.91359503 0.92959217
 0.91973503 0.94572267 0.92324048 0.91100526 0.89680181 0.91965442
 0.88814497 0.88303888 0.91422313 0.88611902 0.92312002 0.92213738]


In [20]:
results_test = {
    'MAE': mean_absolute_error(Y_test, test_preds, multioutput='raw_values'),
    'MSE': mean_squared_error(Y_test, test_preds, multioutput='raw_values'),
    'R2': r2_score(Y_test, test_preds, multioutput='raw_values')
}

scores_test = {}

for i, t in enumerate(targets_names):
    scores_test[t] = {}
        
    scores_test[t]['MAE'] = results_test['MAE'][i] 
    scores_test[t]['MSE'] = results_test['MSE'][i] 
    scores_test[t]['R2'] = results_test['R2'][i] 
            
scores_test

{'F1x': {'MAE': 1.2869433945481026,
  'MSE': 4.9138306012360005,
  'R2': 0.9227583193609529},
 'F1y': {'MAE': 0.7733984627383887,
  'MSE': 1.9000006399083045,
  'R2': 0.934051805076811},
 'F1z': {'MAE': 1.0548500862842647,
  'MSE': 3.683358569421289,
  'R2': 0.9307729973443367},
 'F2x': {'MAE': 1.1148560138123222,
  'MSE': 3.0565063425338224,
  'R2': 0.8882995818102599},
 'F2y': {'MAE': 1.8255990198835783,
  'MSE': 9.983542090008012,
  'R2': 0.9319174085753112},
 'F2z': {'MAE': 1.2849177841835189,
  'MSE': 4.613021396634134,
  'R2': 0.9566269525693342},
 'F3x': {'MAE': 2.535635672610846,
  'MSE': 16.945111838617745,
  'R2': 0.9188642711442028},
 'F3y': {'MAE': 3.713036263061726,
  'MSE': 39.47145444107257,
  'R2': 0.9168045510735037},
 'F3z': {'MAE': 3.6625836449063316,
  'MSE': 37.57713399208916,
  'R2': 0.913902821447683},
 'F4x': {'MAE': 1.630731887883112,
  'MSE': 7.164174884652132,
  'R2': 0.8894700137581021},
 'F4y': {'MAE': 1.6496831680389434,
  'MSE': 6.8097027548959455,
  'R2'

In [7]:
param_grid = {}
#     'n_estimators': [10, 20, 50, 100, 500],
#     'criterion': ['mse', 'mae'],
#     'max_depth': [1, 2, 5, 10],

# }

for t in range(Y_train.shape[1]):
    target = targets_names[t]
    rf = RandomForestRegressor(random_state=0)

    clf = GridSearchCV(rf, param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=8)
    clf.fit(X_train, Y_train[:, t])

    #print('Best params: {}'.format(clf.best_params_))


    test_preds = clf.best_estimator_.predict(X_test)

    # print('{} train MAE: {}'.format(target, mean_absolute_error(Y_train[:, t], clf.best_estimator_.predict(X_train_norm))))
    # print('{} train MSE: {}'.format(target, mean_squared_error(Y_train[:, t], clf.best_estimator_.predict(X_train_norm))))
    # print('{} train R2: {}'.format(target, r2_score(Y_train[:, t], clf.best_estimator_.predict(X_train_norm))))

    print('{} test MAE: {}'.format(target, mean_absolute_error(Y_test[:, t], test_preds)))
    print('{} test MSE: {}'.format(target, mean_squared_error(Y_test[:, t], test_preds)))
    print('{} test R2: {}'.format(target, r2_score(Y_test[:, t], test_preds)))
    
    print()
    print('///////////////////////////////////////////////////')
    print()
    gc.collect()

F1x test MAE: 1.3078272866194836
F1x test MSE: 5.242127154866028
F1x test R2: 0.9175977471702847

///////////////////////////////////////////////////

F1y test MAE: 0.7878041500044975
F1y test MSE: 1.954654049148908
F1y test R2: 0.9321548090389622

///////////////////////////////////////////////////

F1z test MAE: 1.0709921831749392
F1z test MSE: 3.826090311946997
F1z test R2: 0.9280904209585075

///////////////////////////////////////////////////

F2x test MAE: 1.12513744871997
F2x test MSE: 3.0569377190744427
F2x test R2: 0.8882838170989904

///////////////////////////////////////////////////

F2y test MAE: 1.9078619159852903
F2y test MSE: 10.566982221293094
F2y test R2: 0.9279386487603146

///////////////////////////////////////////////////

F2z test MAE: 1.3288542642290186
F2z test MSE: 4.835605789305866
F2z test R2: 0.9545341455800324

///////////////////////////////////////////////////

F3x test MAE: 2.553784725738174
F3x test MSE: 17.2091129221441
F3x test R2: 0.917600194486898


KeyboardInterrupt: 