In [6]:
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV
from joblib import dump

save_base = "../preprocessed_data/"


In [4]:
UV_x = np.loadtxt(save_base + "UV_x.csv", delimiter=',')
y = np.loadtxt(save_base + "y.csv", delimiter=',')

parameters = {
    'n_estimators': [425, 450, 475],
    'max_depth': [30, 35, 40],
    'min_samples_split': [5, 10, 20],
    'min_samples_leaf': [4, 6, 8],
}

UV_forest = RandomForestRegressor()
UV_regr = GridSearchCV(UV_forest, parameters, scoring='neg_root_mean_squared_error', cv=7, n_jobs=-1, verbose=1)
UV_regr.fit(UV_x, y)

print(UV_regr.best_params_, UV_regr.best_score_)

Fitting 7 folds for each of 81 candidates, totalling 567 fits
{'max_depth': 40, 'min_samples_leaf': 8, 'min_samples_split': 5, 'n_estimators': 475} -1.2795036914887492


In [7]:
dump(UV_regr.best_estimator_, "UV_random_forest.joblib")

['UV_random_forest.joblib']

In [8]:
VIS_x = np.loadtxt(save_base + "VIS_x.csv", delimiter=',')
y = np.loadtxt(save_base + "y.csv", delimiter=',')

parameters = {
    'n_estimators': [350, 400, 450],
    'max_depth': [30, 40, 50],
    'min_samples_split': [30, 40, 50],
    'min_samples_leaf': [4, 6, 8],
}

VIS_forest = RandomForestRegressor()
VIS_regr = GridSearchCV(VIS_forest, parameters, scoring='neg_root_mean_squared_error', cv=7, n_jobs=-1, verbose=1)
VIS_regr.fit(VIS_x, y)

print(VIS_regr.best_params_, VIS_regr.best_score_)

Fitting 7 folds for each of 81 candidates, totalling 567 fits
{'max_depth': 40, 'min_samples_leaf': 8, 'min_samples_split': 30, 'n_estimators': 400} -1.2940708610780935


In [9]:
dump(VIS_regr.best_estimator_, "VIS_random_forest.joblib")

['VIS_random_forest.joblib']

In [10]:
IR_x = np.loadtxt(save_base + "IR_x.csv", delimiter=',')
y = np.loadtxt(save_base + "y.csv", delimiter=',')

parameters = {
    'n_estimators': [250, 300, 350],
    'max_depth': [20, 30, 40],
    'min_samples_split': [30, 40, 50],
    'min_samples_leaf': [8, 10, 12],
}

IR_forest = RandomForestRegressor()
IR_regr = GridSearchCV(IR_forest, parameters, scoring='neg_root_mean_squared_error', cv=7, n_jobs=-1, verbose=1)
IR_regr.fit(IR_x, y)

print(IR_regr.best_params_, IR_regr.best_score_)

Fitting 7 folds for each of 81 candidates, totalling 567 fits
{'max_depth': 30, 'min_samples_leaf': 12, 'min_samples_split': 40, 'n_estimators': 300} -1.2930039377510076


In [11]:
dump(IR_regr.best_estimator_, "IR_random_forest.joblib")

['IR_random_forest.joblib']

In [48]:
ALL_x = np.hstack((UV_x, VIS_x, IR_x))
y = np.loadtxt(save_base + "y.csv", delimiter=',')

parameters = {
    'n_estimators': [150, 200, 250],
    'max_depth': [20, 30, 40],
    'min_samples_split': [20, 30, 40],
    'min_samples_leaf': [12, 14, 16],
}

ALL_forest = RandomForestRegressor()
ALL_regr = GridSearchCV(ALL_forest, parameters, scoring='neg_root_mean_squared_error', cv=7, n_jobs=-1, verbose=1)
ALL_regr.fit(ALL_x, y)

print(ALL_regr.best_params_, ALL_regr.best_score_)

Fitting 7 folds for each of 81 candidates, totalling 567 fits
{'max_depth': 40, 'min_samples_leaf': 14, 'min_samples_split': 20, 'n_estimators': 200} -1.361203224869112
