In [1]:
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV
from joblib import dump

save_base = "../preprocessed_data/"
models_base = "./models/"


In [2]:
UV_x = np.loadtxt(save_base + "UV_x.csv", delimiter=',')
y = np.loadtxt(save_base + "y.csv", delimiter=',')

parameters = {
    'n_estimators': [5, 10, 30, 50],
    'max_depth': [2, 5, 10, 15, 20],
    'min_samples_split': [2, 5, 10, 15, 20, 30],
    'min_samples_leaf': [2, 5, 10, 15, 20, 30],
}

UV_forest = RandomForestRegressor()
UV_regr = GridSearchCV(UV_forest, parameters, scoring='neg_root_mean_squared_error', cv=7, n_jobs=-1, verbose=1)
UV_regr.fit(UV_x, y)

print(UV_regr.best_params_, UV_regr.best_score_)

Fitting 7 folds for each of 720 candidates, totalling 5040 fits
{'max_depth': 15, 'min_samples_leaf': 2, 'min_samples_split': 20, 'n_estimators': 10} -1.766901570296046


In [3]:
print(UV_regr.best_estimator_)

RandomForestRegressor(max_depth=15, min_samples_leaf=2, min_samples_split=20,
                      n_estimators=10)


In [4]:
dump(UV_regr.best_estimator_, models_base + "UV_random_forest.joblib")

['./models/UV_random_forest.joblib']

In [5]:
VIS_x = np.loadtxt(save_base + "VIS_x.csv", delimiter=',')
y = np.loadtxt(save_base + "y.csv", delimiter=',')

parameters = {
    'n_estimators': [5, 10, 30, 50],
    'max_depth': [2, 5, 10, 15, 20],
    'min_samples_split': [2, 5, 10, 15, 20, 30],
    'min_samples_leaf': [2, 5, 10, 15, 20, 30],
}

VIS_forest = RandomForestRegressor()
VIS_regr = GridSearchCV(VIS_forest, parameters, scoring='neg_root_mean_squared_error', cv=7, n_jobs=-1, verbose=1)
VIS_regr.fit(VIS_x, y)

print(VIS_regr.best_params_, VIS_regr.best_score_)

Fitting 7 folds for each of 720 candidates, totalling 5040 fits
{'max_depth': 5, 'min_samples_leaf': 10, 'min_samples_split': 10, 'n_estimators': 5} -1.7585439378872658


In [6]:
dump(VIS_regr.best_estimator_, models_base + "VIS_random_forest.joblib")

['./models/VIS_random_forest.joblib']

In [7]:
IR_x = np.loadtxt(save_base + "IR_x.csv", delimiter=',')
y = np.loadtxt(save_base + "y.csv", delimiter=',')

parameters = {
    'n_estimators': [5, 10, 30, 50],
    'max_depth': [2, 5, 10, 15, 20],
    'min_samples_split': [2, 5, 10, 15, 20, 30],
    'min_samples_leaf': [2, 5, 10, 15, 20, 30],
}

IR_forest = RandomForestRegressor()
IR_regr = GridSearchCV(IR_forest, parameters, scoring='neg_root_mean_squared_error', cv=7, n_jobs=-1, verbose=1)
IR_regr.fit(IR_x, y)

print(IR_regr.best_params_, IR_regr.best_score_)

Fitting 7 folds for each of 720 candidates, totalling 5040 fits
{'max_depth': 20, 'min_samples_leaf': 5, 'min_samples_split': 2, 'n_estimators': 5} -1.655483499010262


In [8]:
dump(IR_regr.best_estimator_, models_base + "IR_random_forest.joblib")

['./models/IR_random_forest.joblib']