In [5]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV, KFold
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import mean_squared_error, r2_score, make_scorer, mean_absolute_error
from xgboost import XGBRegressor
from sklearn.compose import ColumnTransformer
from sklearn.neural_network import MLPRegressor
import plotly.express as px
import plotly.graph_objects as go
from sklearn.linear_model import Ridge, RidgeCV
from sklearn.ensemble import RandomForestRegressor
import itertools

In [2]:
df = pd.read_csv("ploeg_final_df.csv")
print(df.head())

   ervaring  500_split  2k tijd  binary_trainingtype  binary_geslacht  \
0         1      104.6    379.9                    0                0   
1         1      104.7    379.9                    0                0   
2         1      104.3    379.9                    0                0   
3         1      104.0    379.9                    0                0   
4         1      104.1    379.9                    0                0   

   binary_gewichtsklasse  binary_ploeg  
0                      1             0  
1                      1             0  
2                      1             0  
3                      1             0  
4                      1             0  


In [3]:
### splitting the data

df = df.sample(frac=1, random_state=42).reset_index(drop=True)

train_data = pd.DataFrame()
val_data = pd.DataFrame()
test_data = pd.DataFrame()

for ervaring in df['ervaring'].unique():
    for geslacht in df['binary_geslacht'].unique():
        for gewichtsklasse in df['binary_gewichtsklasse'].unique():
            subset = df[(df['ervaring'] == ervaring) & (df['binary_geslacht'] == geslacht) & (df['binary_gewichtsklasse'] == gewichtsklasse)]
            if not subset.empty:
                temp_train, temp_temp = train_test_split(subset, test_size=0.3, random_state=42)
                temp_val, temp_test = train_test_split(temp_temp, test_size=0.3, random_state=42)
                train_data = pd.concat([train_data, temp_train])
                val_data = pd.concat([val_data, temp_val])
                test_data = pd.concat([test_data, temp_test])

train_data.reset_index(drop=True, inplace=True)
val_data.reset_index(drop=True, inplace=True)
test_data.reset_index(drop=True, inplace=True)

print(f"Trainingsdata: {len(train_data)} rijen")
print(f"Validatiedata: {len(val_data)} rijen")
print(f"Testdata: {len(test_data)} rijen")

X_train = train_data.drop(columns=['2k tijd'])
y_train = train_data['2k tijd']

X_val = val_data.drop(columns=['2k tijd'])
y_val = val_data['2k tijd']

X_test = test_data.drop(columns=['2k tijd'])
y_test = test_data['2k tijd']

Trainingsdata: 3018 rijen
Validatiedata: 905 rijen
Testdata: 391 rijen


In [6]:
# RIDGE REGRESSION

ridge_model = Ridge(alpha=1.0)
ridge_model.fit(X_train, y_train)

y_val_pred = ridge_model.predict(X_val)

val_mse = mean_squared_error(y_val, y_val_pred)
val_r2 = r2_score(y_val, y_val_pred)
val_mae = mean_absolute_error(y_val, y_val_pred)

print(f"Validatie MSE RR (zonder tuning): {val_mse:.3f}")
print(f"Validatie R-squared RR (zonder tuning): {val_r2:.3f}")
print(f"Validatie MAE RR (zonder tuning): {val_mae:.3f}")

poss_lam = [0.001, 0.01, 0.02, 0.25, 0.03, 0.04, 0.1, 1.0, 10.0, 50.0, 100.0]
ridge_cv_model = RidgeCV(alphas=poss_lam, store_cv_values=True)
ridge_cv_model.fit(X_train, y_train)

print(f"Beste alpha na tuning: {ridge_cv_model.alpha_}")

tuned_ridge_model = Ridge(alpha=ridge_cv_model.alpha_)
tuned_ridge_model.fit(X_train, y_train)

y_test_pred = tuned_ridge_model.predict(X_test)

test_mse_rr = mean_squared_error(y_test, y_test_pred)
test_rmse_rr = test_mse_rr ** 0.5
test_r2_rr = r2_score(y_test, y_test_pred)
test_mae_rr = mean_absolute_error(y_test, y_test_pred)

print('\n')
print(f"Test MSE RR (na tuning): {test_mse_rr:.3f}")
print(f"Test RMSE RR (na tuning): {test_rmse_rr:.3f}")
print(f"Test R-squared RR (na tuning): {test_r2_rr:.3f}")
print(f"Test MAE RR (na tuning): {test_mae_rr:.3f}")

Validatie MSE RR (zonder tuning): 111.010
Validatie R-squared RR (zonder tuning): 0.891
Validatie MAE RR (zonder tuning): 8.179
Beste alpha na tuning: 0.03


Test MSE RR (na tuning): 116.108
Test RMSE RR (na tuning): 10.775
Test R-squared RR (na tuning): 0.887
Test MAE RR (na tuning): 8.475




In [7]:
# RANDOM FOREST

rf_model = RandomForestRegressor(max_depth=10, random_state=0)
rf_model.fit(X_train, y_train)

val_pred_rf = rf_model.predict(X_val)

val_mse_rf = mean_squared_error(y_val, val_pred_rf)
val_r2_rf = r2_score(y_val, val_pred_rf)
val_mae_rf = mean_absolute_error(y_val, val_pred_rf)

print(f"Validatie MSE RF (zonder tuning): {val_mse_rf:.3f}")
print(f"Validatie R-squared RF (zonder tuning): {val_r2_rf:.3f}")
print(f"Validatie MAE RF (zonder tuning): {val_mae_rf:.3f}")

param_grid = {'max_depth': [3, 5, 7, 10, 13, 16],
              'n_estimators': [100, 200, 300],
              'min_samples_split': [2, 5, 10, 13, 16]}

grid_search = GridSearchCV(estimator=rf_model, param_grid=param_grid, 
                           scoring='neg_mean_squared_error', cv=5, n_jobs=-1)

grid_search.fit(X_train, y_train)

best_maxdepth = grid_search.best_params_['max_depth']
best_nestimators = grid_search.best_params_['n_estimators']
best_samples_split = grid_search.best_params_['min_samples_split']

tuned_random_forest = RandomForestRegressor(max_depth=best_maxdepth, n_estimators=best_nestimators, min_samples_split=best_samples_split, random_state=0)
tuned_random_forest.fit(X_train, y_train)

y_test_pred_rf = tuned_ridge_model.predict(X_test)

test_mse_rf = mean_squared_error(y_test, y_test_pred_rf)
test_rmse_rf = test_mse_rf ** 0.5
test_r2_rf = r2_score(y_test, y_test_pred_rf)
test_mae_rf = mean_absolute_error(y_test, y_test_pred_rf)

print('\n')
print(f"Test MSE RF (na tuning): {test_mse_rf:.3f}")
print(f"Test RMSE RF (na tuning): {test_rmse_rf:.3f}")
print(f"Test R-squared RF (na tuning): {test_r2_rf:.3f}")
print(f"Test MAE RF (na tuning): {test_mae_rf:.3f}")

Validatie MSE RF (zonder tuning): 62.208
Validatie R-squared RF (zonder tuning): 0.939
Validatie MAE RF (zonder tuning): 5.973


  _data = np.array(data, dtype=dtype, copy=copy,




Test MSE RF (na tuning): 116.108
Test RMSE RF (na tuning): 10.775
Test R-squared RF (na tuning): 0.887
Test MAE RF (na tuning): 8.475


In [9]:
# XGBOOST

xgb = XGBRegressor()
xgb.fit(X_train, y_train)

y_val_pred = xgb.predict(X_val)

val_mse = mean_squared_error(y_val, y_val_pred)
val_r2 = r2_score(y_val, y_val_pred)
val_mae = mean_absolute_error(y_val, y_val_pred)

print(f"Validation MSE (zonder tuning): {val_mse:.3f}")
print(f"Validation R-squared (zonder tuning): {val_r2:.3f}")
print(f"Validation MAE (zonder tuning): {val_mae:.3f}")

param_grid = {'reg_lambda': [0.1, 1, 10, 100], 
              'min_child_weight': [1, 3, 5, 7], 
              'colsample_bytree': [0.6, 0.8, 1.0],
              'learning_rate': [0.01, 0.05, 0.1, 0.2],
              'max_depth': [5, 7, 9],
              'n_estimators': [200, 300, 400]}

grid_search = GridSearchCV(estimator=XGBRegressor(), 
                           param_grid=param_grid, 
                           scoring='neg_mean_squared_error', 
                           cv=3, 
                           n_jobs=-1)

grid_search.fit(X_train, y_train)

print("Best parameters from GridSearchCV:")
print(grid_search.best_params_)

best_xgb = grid_search.best_estimator_

y_test_pred = best_xgb.predict(X_test)

test_mse_xgb = mean_squared_error(y_test, y_test_pred)
test_rmse_xgb = test_mse_xgb ** 0.5
test_r2_xgb = r2_score(y_test, y_test_pred)
test_mae_xgb = mean_absolute_error(y_test, y_test_pred)

print('\n')
print(f"Test MSE XGB (na tuning): {test_mse_xgb:.3f}")
print(f"Test RMSE XGB (na tuning): {test_rmse_xgb:.3f}")
print(f"Test R-squared XGB (na tuning): {test_r2_xgb:.3f}")
print(f"Test MAE XGB (na tuning): {test_mae_xgb:.3f}")

Validation MSE (zonder tuning): 65.748
Validation R-squared (zonder tuning): 0.936
Validation MAE (zonder tuning): 6.211
Best parameters from GridSearchCV:
{'colsample_bytree': 0.8, 'learning_rate': 0.2, 'max_depth': 5, 'min_child_weight': 7, 'n_estimators': 400, 'reg_lambda': 100}


Test MSE XGB (na tuning): 48.139
Test RMSE XGB (na tuning): 6.938
Test R-squared XGB (na tuning): 0.953
Test MAE XGB (na tuning): 5.445


In [None]:
# NEURAL NETWORK

mlp = MLPRegressor()
mlp.fit(X_train, y_train)

y_val_pred = mlp.predict(X_val)

val_mse = mean_squared_error(y_val, y_val_pred)
val_r2 = r2_score(y_val, y_val_pred)
val_mae = mean_absolute_error(y_val, y_val_pred)

print(f"Validation MSE (zonder tuning): {val_mse:.3f}")
print(f"Validation R-squared (zonder tuning): {val_r2:.3f}")
print(f"Validation MAE (zonder tuning): {val_mae:.3f}")

param_grid = {
    'hidden_layer_sizes': [(50, 25, 20), (100,), (64, 32), (5, 5)],
    'activation': ['relu', 'identity'],
    'solver': ['adam', 'sgd'],
    'alpha': [0.0001, 0.001, 0.01],
    'max_iter': [200, 1000, 2000]
}

grid_search = GridSearchCV(estimator=MLPRegressor(random_state=42),
                           param_grid=param_grid,
                           scoring='neg_mean_squared_error',
                           cv=3,
                           verbose=2)

grid_search.fit(X_train, y_train)

print("Best parameters from GridSearchCV:")
print(grid_search.best_params_)

best_mlp = grid_search.best_estimator_

y_test_pred = best_mlp.predict(X_test)
test_mse_nn = mean_squared_error(y_test, y_test_pred)
test_rmse_nn = test_mse_nn ** 0.5
test_r2_nn = r2_score(y_test, y_test_pred)
test_mae_nn = mean_absolute_error(y_test, y_test_pred)

print('\n')
print(f"Test MSE NN (na tuning): {test_mse_nn:.3f}")
print(f"Test RMSE NN (na tuning): {test_rmse_nn:.3f}")
print(f"Test R-squared NN (na tuning): {test_r2_nn:.3f}")
print(f"Test MAE NN (na tuning): {test_mae_nn:.3f}")

Validation MSE (zonder tuning): 760.815
Validation R-squared (zonder tuning): 0.256
Validation MAE (zonder tuning): 19.805
Fitting 3 folds for each of 144 candidates, totalling 432 fits
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(50, 25, 20), max_iter=200, solver=adam; total time=   0.3s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(50, 25, 20), max_iter=200, solver=adam; total time=   0.3s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(50, 25, 20), max_iter=200, solver=adam; total time=   0.4s


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = umr_sum(arr, axis, dtype, out, keepdims, where=where)
  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(50, 25, 20), max_iter=200, solver=sgd; total time=   0.7s


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(50, 25, 20), max_iter=200, solver=sgd; total time=   0.7s




[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(50, 25, 20), max_iter=200, solver=sgd; total time=   0.6s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(50, 25, 20), max_iter=1000, solver=adam; total time=   0.2s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(50, 25, 20), max_iter=1000, solver=adam; total time=   0.3s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(50, 25, 20), max_iter=1000, solver=adam; total time=   0.4s


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = umr_sum(arr, axis, dtype, out, keepdims, where=where)
  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(50, 25, 20), max_iter=1000, solver=sgd; total time=   3.2s


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(50, 25, 20), max_iter=1000, solver=sgd; total time=   3.4s




[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(50, 25, 20), max_iter=1000, solver=sgd; total time=   3.1s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(50, 25, 20), max_iter=2000, solver=adam; total time=   0.2s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(50, 25, 20), max_iter=2000, solver=adam; total time=   0.3s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(50, 25, 20), max_iter=2000, solver=adam; total time=   0.4s


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = umr_sum(arr, axis, dtype, out, keepdims, where=where)
  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(50, 25, 20), max_iter=2000, solver=sgd; total time=   6.2s


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(50, 25, 20), max_iter=2000, solver=sgd; total time=   6.7s




[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(50, 25, 20), max_iter=2000, solver=sgd; total time=   6.3s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100,), max_iter=200, solver=adam; total time=   0.9s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100,), max_iter=200, solver=adam; total time=   0.8s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100,), max_iter=200, solver=adam; total time=   0.5s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100,), max_iter=200, solver=sgd; total time=   0.1s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100,), max_iter=200, solver=sgd; total time=   0.1s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100,), max_iter=200, solver=sgd; total time=   0.1s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100,), max_iter=1000, solver=adam; total time=   0.7s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100,), max_iter=1000, solver=ada

  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b
  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(64, 32), max_iter=200, solver=sgd; total time=   1.4s


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(64, 32), max_iter=200, solver=sgd; total time=   1.5s




[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(64, 32), max_iter=200, solver=sgd; total time=   1.4s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(64, 32), max_iter=1000, solver=adam; total time=   0.4s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(64, 32), max_iter=1000, solver=adam; total time=   0.6s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(64, 32), max_iter=1000, solver=adam; total time=   0.5s


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b
  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(64, 32), max_iter=1000, solver=sgd; total time=   6.7s


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(64, 32), max_iter=1000, solver=sgd; total time=   6.6s




[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(64, 32), max_iter=1000, solver=sgd; total time=   6.6s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(64, 32), max_iter=2000, solver=adam; total time=   0.4s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(64, 32), max_iter=2000, solver=adam; total time=   0.6s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(64, 32), max_iter=2000, solver=adam; total time=   0.5s


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b
  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(64, 32), max_iter=2000, solver=sgd; total time=  13.6s


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(64, 32), max_iter=2000, solver=sgd; total time=  13.5s




[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(64, 32), max_iter=2000, solver=sgd; total time=  14.0s




[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(5, 5), max_iter=200, solver=adam; total time=   0.7s




[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(5, 5), max_iter=200, solver=adam; total time=   0.6s




[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(5, 5), max_iter=200, solver=adam; total time=   0.6s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(5, 5), max_iter=200, solver=sgd; total time=   0.0s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(5, 5), max_iter=200, solver=sgd; total time=   0.0s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(5, 5), max_iter=200, solver=sgd; total time=   0.0s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(5, 5), max_iter=1000, solver=adam; total time=   0.7s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(5, 5), max_iter=1000, solver=adam; total time=   0.6s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(5, 5), max_iter=1000, solver=adam; total time=   0.6s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(5, 5), max_iter=1000, solver=sgd; total time=   0.0s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(5, 5), max_iter=1000, solver=sgd; to

  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = umr_sum(arr, axis, dtype, out, keepdims, where=where)
  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(50, 25, 20), max_iter=200, solver=sgd; total time=   1.0s


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(50, 25, 20), max_iter=200, solver=sgd; total time=   1.0s




[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(50, 25, 20), max_iter=200, solver=sgd; total time=   1.0s
[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(50, 25, 20), max_iter=1000, solver=adam; total time=   0.3s
[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(50, 25, 20), max_iter=1000, solver=adam; total time=   0.3s
[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(50, 25, 20), max_iter=1000, solver=adam; total time=   0.4s


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = umr_sum(arr, axis, dtype, out, keepdims, where=where)
  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(50, 25, 20), max_iter=1000, solver=sgd; total time=   3.1s


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(50, 25, 20), max_iter=1000, solver=sgd; total time=   3.3s




[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(50, 25, 20), max_iter=1000, solver=sgd; total time=   3.3s
[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(50, 25, 20), max_iter=2000, solver=adam; total time=   0.2s
[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(50, 25, 20), max_iter=2000, solver=adam; total time=   0.3s
[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(50, 25, 20), max_iter=2000, solver=adam; total time=   0.4s


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = umr_sum(arr, axis, dtype, out, keepdims, where=where)
  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(50, 25, 20), max_iter=2000, solver=sgd; total time=   7.4s


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(50, 25, 20), max_iter=2000, solver=sgd; total time=   7.0s




[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(50, 25, 20), max_iter=2000, solver=sgd; total time=   6.6s
[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(100,), max_iter=200, solver=adam; total time=   0.6s
[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(100,), max_iter=200, solver=adam; total time=   0.7s
[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(100,), max_iter=200, solver=adam; total time=   0.5s
[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(100,), max_iter=200, solver=sgd; total time=   0.1s
[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(100,), max_iter=200, solver=sgd; total time=   0.1s
[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(100,), max_iter=200, solver=sgd; total time=   0.1s
[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(100,), max_iter=1000, solver=adam; total time=   0.7s
[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(100,), max_iter=1000, solver=adam; total 

  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b
  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(64, 32), max_iter=200, solver=sgd; total time=   1.9s


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(64, 32), max_iter=200, solver=sgd; total time=   2.3s




[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(64, 32), max_iter=200, solver=sgd; total time=   2.2s
[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(64, 32), max_iter=1000, solver=adam; total time=   1.0s
[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(64, 32), max_iter=1000, solver=adam; total time=   0.8s
[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(64, 32), max_iter=1000, solver=adam; total time=   0.8s


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b
  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(64, 32), max_iter=1000, solver=sgd; total time=   8.4s


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(64, 32), max_iter=1000, solver=sgd; total time=   8.5s




[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(64, 32), max_iter=1000, solver=sgd; total time=   7.8s
[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(64, 32), max_iter=2000, solver=adam; total time=   0.8s
[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(64, 32), max_iter=2000, solver=adam; total time=   0.7s
[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(64, 32), max_iter=2000, solver=adam; total time=   0.6s


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b
  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(64, 32), max_iter=2000, solver=sgd; total time=  17.4s


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(64, 32), max_iter=2000, solver=sgd; total time=  20.2s




[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(64, 32), max_iter=2000, solver=sgd; total time=  16.2s




[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(5, 5), max_iter=200, solver=adam; total time=   0.6s




[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(5, 5), max_iter=200, solver=adam; total time=   0.6s




[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(5, 5), max_iter=200, solver=adam; total time=   0.7s
[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(5, 5), max_iter=200, solver=sgd; total time=   0.0s
[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(5, 5), max_iter=200, solver=sgd; total time=   0.0s
[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(5, 5), max_iter=200, solver=sgd; total time=   0.0s
[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(5, 5), max_iter=1000, solver=adam; total time=   0.6s
[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(5, 5), max_iter=1000, solver=adam; total time=   0.7s
[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(5, 5), max_iter=1000, solver=adam; total time=   0.6s
[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(5, 5), max_iter=1000, solver=sgd; total time=   0.0s
[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(5, 5), max_iter=1000, solver=sgd; total time=

  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = umr_sum(arr, axis, dtype, out, keepdims, where=where)
  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(50, 25, 20), max_iter=200, solver=sgd; total time=   1.0s


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(50, 25, 20), max_iter=200, solver=sgd; total time=   1.0s




[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(50, 25, 20), max_iter=200, solver=sgd; total time=   1.0s
[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(50, 25, 20), max_iter=1000, solver=adam; total time=   0.4s
[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(50, 25, 20), max_iter=1000, solver=adam; total time=   0.5s
[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(50, 25, 20), max_iter=1000, solver=adam; total time=   0.6s


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = umr_sum(arr, axis, dtype, out, keepdims, where=where)
  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(50, 25, 20), max_iter=1000, solver=sgd; total time=   3.4s


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(50, 25, 20), max_iter=1000, solver=sgd; total time=   4.5s




[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(50, 25, 20), max_iter=1000, solver=sgd; total time=   5.0s
[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(50, 25, 20), max_iter=2000, solver=adam; total time=   0.4s
[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(50, 25, 20), max_iter=2000, solver=adam; total time=   0.4s
[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(50, 25, 20), max_iter=2000, solver=adam; total time=   0.5s


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = umr_sum(arr, axis, dtype, out, keepdims, where=where)
  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(50, 25, 20), max_iter=2000, solver=sgd; total time=   7.3s


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(50, 25, 20), max_iter=2000, solver=sgd; total time=   6.9s




[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(50, 25, 20), max_iter=2000, solver=sgd; total time=   6.2s
[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(100,), max_iter=200, solver=adam; total time=   0.4s
[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(100,), max_iter=200, solver=adam; total time=   0.6s
[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(100,), max_iter=200, solver=adam; total time=   0.5s
[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(100,), max_iter=200, solver=sgd; total time=   0.1s
[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(100,), max_iter=200, solver=sgd; total time=   0.1s
[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(100,), max_iter=200, solver=sgd; total time=   0.1s
[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(100,), max_iter=1000, solver=adam; total time=   0.5s
[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(100,), max_iter=1000, solver=adam; total time=   0

  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b
  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(64, 32), max_iter=200, solver=sgd; total time=   1.3s


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(64, 32), max_iter=200, solver=sgd; total time=   2.2s




[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(64, 32), max_iter=200, solver=sgd; total time=   1.9s
[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(64, 32), max_iter=1000, solver=adam; total time=   0.9s
[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(64, 32), max_iter=1000, solver=adam; total time=   0.5s
[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(64, 32), max_iter=1000, solver=adam; total time=   0.7s


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b
  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(64, 32), max_iter=1000, solver=sgd; total time=   8.1s


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(64, 32), max_iter=1000, solver=sgd; total time=   9.0s




[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(64, 32), max_iter=1000, solver=sgd; total time=   8.9s
[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(64, 32), max_iter=2000, solver=adam; total time=   1.3s
[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(64, 32), max_iter=2000, solver=adam; total time=   0.6s
[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(64, 32), max_iter=2000, solver=adam; total time=   0.7s


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b
