In [1]:
from sklearn.ensemble import HistGradientBoostingRegressor
from sklearn.model_selection import KFold, GridSearchCV
from sklearn.multioutput import MultiOutputRegressor
import pandas as pd
from os import path

In [2]:
df = pd.read_hdf(path.join(path.pardir, 'data', 'spinning_data.hf5'))
features = df.iloc[:,1:-5]
targets = df.iloc[:,-5:]
categorical_features = [0,2,3,4,6,10,17]

In [None]:
seed = 1234
loss = 'squared_error'
early_stopping = True
n_splits = 10

# CV parameters
learning_rate = [.001, .01, .1, 1]
max_leaf_nodes = [15, 30, 50]
max_iter = [5, 10, 50, 100, 300, 500]
l2_regularization = [0, .001, .01, .1]



In [5]:
model = MultiOutputRegressor(HistGradientBoostingRegressor(
        random_state=seed, 
        early_stopping=early_stopping, 
        loss=loss,
        categorical_features=categorical_features
    ))
cv = KFold(n_splits=n_splits, random_state=seed, shuffle=True)
param_grid = {"estimator__learning_rate": learning_rate,
              "estimator__max_leaf_nodes": max_leaf_nodes,
              "estimator__max_iter": max_iter,
              "estimator__l2_regularization": l2_regularization}

grid_search = GridSearchCV(
    estimator=model,
    param_grid=param_grid,
    return_train_score=True,
    cv=cv,
    verbose=5
).fit(features,targets)
    

Fitting 10 folds for each of 288 candidates, totalling 2880 fits
[CV 1/10] END estimator__l2_regularization=0.001, estimator__learning_rate=0.001, estimator__max_iter=10, estimator__max_leaf_nodes=15;, score=(train=0.008, test=0.005) total time=   0.5s
[CV 2/10] END estimator__l2_regularization=0.001, estimator__learning_rate=0.001, estimator__max_iter=10, estimator__max_leaf_nodes=15;, score=(train=0.008, test=0.007) total time=   0.4s
[CV 3/10] END estimator__l2_regularization=0.001, estimator__learning_rate=0.001, estimator__max_iter=10, estimator__max_leaf_nodes=15;, score=(train=0.008, test=-0.010) total time=   0.4s
[CV 4/10] END estimator__l2_regularization=0.001, estimator__learning_rate=0.001, estimator__max_iter=10, estimator__max_leaf_nodes=15;, score=(train=0.008, test=0.001) total time=   0.4s
[CV 5/10] END estimator__l2_regularization=0.001, estimator__learning_rate=0.001, estimator__max_iter=10, estimator__max_leaf_nodes=15;, score=(train=0.008, test=0.004) total time=  

KeyboardInterrupt: 

In [10]:
results = pd.DataFrame(grid_search.cv_results_)
print(results.to_string())

   mean_fit_time  std_fit_time  mean_score_time  std_score_time  param_estimator__max_iter                        params  split0_test_score  split1_test_score  split2_test_score  split3_test_score  split4_test_score  split5_test_score  split6_test_score  split7_test_score  split8_test_score  split9_test_score  mean_test_score  std_test_score  rank_test_score  split0_train_score  split1_train_score  split2_train_score  split3_train_score  split4_train_score  split5_train_score  split6_train_score  split7_train_score  split8_train_score  split9_train_score  mean_train_score  std_train_score
0       0.359088      0.052146         0.051859        0.020097                         10   {'estimator__max_iter': 10}           0.365103           0.380213           0.365464           0.359583           0.339080           0.357643           0.384020           0.363196           0.387124           0.389518         0.369094        0.015083                6            0.406509            0.400763    