In [9]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV, KFold
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import mean_squared_error, r2_score, make_scorer
from xgboost import XGBRegressor
from sklearn.compose import ColumnTransformer
from sklearn.neural_network import MLPRegressor
import plotly.express as px
import plotly.graph_objects as go
from sklearn.linear_model import Ridge, RidgeCV
from sklearn.ensemble import RandomForestRegressor

In [10]:
df = pd.read_csv("final_df.csv")
print(df.head())

   ervaring  500_split  2k tijd  binary_trainingtype  binary_geslacht  \
0         1      104.6    379.9                    0                0   
1         1      104.7    379.9                    0                0   
2         1      104.3    379.9                    0                0   
3         1      104.0    379.9                    0                0   
4         1      104.1    379.9                    0                0   

   binary_gewichtsklasse  
0                      1  
1                      1  
2                      1  
3                      1  
4                      1  


In [11]:
from sklearn.model_selection import train_test_split

# Eerst de data opschudden om bias te voorkomen
df = df.sample(frac=1, random_state=42).reset_index(drop=True)

# Zorg ervoor dat elke ervaring, geslacht en gewichtsklasse in elke set vertegenwoordigd zijn
train_data = pd.DataFrame()
val_data = pd.DataFrame()
test_data = pd.DataFrame()

for ervaring in df['ervaring'].unique():
    for geslacht in df['binary_geslacht'].unique():
        for gewichtsklasse in df['binary_gewichtsklasse'].unique():
            subset = df[(df['ervaring'] == ervaring) & (df['binary_geslacht'] == geslacht) & (df['binary_gewichtsklasse'] == gewichtsklasse)]
            if not subset.empty:
                temp_train, temp_temp = train_test_split(subset, test_size=0.3, random_state=42)
                temp_val, temp_test = train_test_split(temp_temp, test_size=0.3, random_state=42)
                train_data = pd.concat([train_data, temp_train])
                val_data = pd.concat([val_data, temp_val])
                test_data = pd.concat([test_data, temp_test])

# Reset indexen
train_data.reset_index(drop=True, inplace=True)
val_data.reset_index(drop=True, inplace=True)
test_data.reset_index(drop=True, inplace=True)

# Controleren op juiste verdeling
print(f"Trainingsdata: {len(train_data)} rijen")
print(f"Validatiedata: {len(val_data)} rijen")
print(f"Testdata: {len(test_data)} rijen")

X_train = train_data.drop(columns=['2k tijd'])
y_train = train_data['2k tijd']

X_val = val_data.drop(columns=['2k tijd'])
y_val = val_data['2k tijd']

X_test = test_data.drop(columns=['2k tijd'])
y_test = test_data['2k tijd']

Trainingsdata: 3018 rijen
Validatiedata: 905 rijen
Testdata: 391 rijen


In [12]:
# RIDGE REGRESSION



In [None]:
# NEURAL NETWORK

mlp = MLPRegressor()
mlp.fit(X_train, y_train)

# Validate the model
y_val_pred = mlp.predict(X_val)
val_mse = mean_squared_error(y_val, y_val_pred)
val_r2 = r2_score(y_val, y_val_pred)

print(f"Validation MSE (zonder tuning): {val_mse}")
print(f"Validation R-squared (zonder tuning): {val_r2}")

# Define the parameter grid for hyperparameter tuning
param_grid = {
    'hidden_layer_sizes': [(50, 25, 20), (100,), (100, 50)],
    'activation': ['relu', 'identity'],
    'solver': ['adam', 'sgd'],
    'learning_rate': ['constant', 'adaptive'],
    'alpha': [0.0001, 0.001, 0.01],
    'max_iter': [1000, 2000, 3000]
}

# Perform GridSearchCV for hyperparameter tuning
grid_search = GridSearchCV(estimator=MLPRegressor(random_state=42),
                           param_grid=param_grid,
                           scoring='neg_mean_squared_error',
                           cv=3,
                           verbose=2)

grid_search.fit(X_train, y_train)

# Best parameters from grid search
print("Best parameters from GridSearchCV:")
print(grid_search.best_params_)

# Use the best model from grid search
best_mlp = grid_search.best_estimator_

# Test the final model on the test data
y_test_pred = best_mlp.predict(X_test)
test_mse_nn = mean_squared_error(y_test, y_test_pred)
test_rmse_nn = test_mse_nn ** 0.5
test_r2_nn = r2_score(y_test, y_test_pred)

print(f"Test MSE (na tuning): {test_mse_nn}")
print(f"Test RMSE (na tuning): {test_rmse_nn}")
print(f"Test R-squared (na tuning): {test_r2_nn}")

Validation MSE (zonder tuning): 488.1280127159293
Validation R-squared (zonder tuning): 0.5228237960506925
Fitting 3 folds for each of 216 candidates, totalling 648 fits
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=1000, solver=adam; total time=   0.4s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=1000, solver=adam; total time=   0.3s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=1000, solver=adam; total time=   0.3s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=1000, solver=sgd; total time=   0.0s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=1000, solver=sgd; total time=   0.1s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=1000, solver=sgd; total time=   0.0s
[CV] END ac

  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b
  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=1000, solver=sgd; total time=  10.8s


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=1000, solver=sgd; total time=  13.6s




[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=1000, solver=sgd; total time=  19.3s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=2000, solver=adam; total time=   1.1s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=2000, solver=adam; total time=   1.2s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=2000, solver=adam; total time=   1.3s


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b
  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=2000, solver=sgd; total time=  31.1s


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=2000, solver=sgd; total time=  22.5s




[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=2000, solver=sgd; total time=  24.8s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=3000, solver=adam; total time=   0.8s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=3000, solver=adam; total time=   0.9s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=3000, solver=adam; total time=   0.9s


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b
  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=3000, solver=sgd; total time=  40.3s


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=3000, solver=sgd; total time=  32.2s




[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=3000, solver=sgd; total time=  35.0s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=1000, solver=adam; total time=   0.7s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=1000, solver=adam; total time=   0.8s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=1000, solver=adam; total time=   0.8s


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b
  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=1000, solver=sgd; total time=  12.8s


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=1000, solver=sgd; total time=  13.1s




[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=1000, solver=sgd; total time=  11.7s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=2000, solver=adam; total time=   0.6s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=2000, solver=adam; total time=   0.7s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=2000, solver=adam; total time=   0.7s


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b
  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=2000, solver=sgd; total time=  22.0s


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=2000, solver=sgd; total time=  24.2s




[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=2000, solver=sgd; total time=  25.7s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=3000, solver=adam; total time=   0.7s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=3000, solver=adam; total time=   0.8s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=3000, solver=adam; total time=   0.7s


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b
  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=3000, solver=sgd; total time=  36.3s


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=3000, solver=sgd; total time=  35.3s




[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=3000, solver=sgd; total time=  25.5s
[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=1000, solver=adam; total time=   0.5s
[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=1000, solver=adam; total time=   0.4s
[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=1000, solver=adam; total time=   0.3s
[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=1000, solver=sgd; total time=   0.0s
[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=1000, solver=sgd; total time=   0.0s
[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=1000, solver=sgd; total time=   0.0s
[CV] END activation=relu, alpha=0.001, hidden_l

  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b
  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=1000, solver=sgd; total time=   9.0s


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=1000, solver=sgd; total time=   9.0s




[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=1000, solver=sgd; total time=   9.0s
[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=2000, solver=adam; total time=   0.9s
[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=2000, solver=adam; total time=   0.4s
[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=2000, solver=adam; total time=   0.7s


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b
  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=2000, solver=sgd; total time=  15.1s


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=2000, solver=sgd; total time=  14.8s




[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=2000, solver=sgd; total time=  17.1s
[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=3000, solver=adam; total time=   0.9s
[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=3000, solver=adam; total time=   0.3s
[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=3000, solver=adam; total time=   0.7s


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b
  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=3000, solver=sgd; total time=  38.7s


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=3000, solver=sgd; total time=  32.1s




[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=3000, solver=sgd; total time=  41.4s
[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=1000, solver=adam; total time=   1.7s
[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=1000, solver=adam; total time=   0.9s
[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=1000, solver=adam; total time=   1.5s


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b
  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=1000, solver=sgd; total time=  20.0s


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=1000, solver=sgd; total time=  15.6s




[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=1000, solver=sgd; total time=  14.1s
[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=2000, solver=adam; total time=   1.3s
[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=2000, solver=adam; total time=   0.5s
[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=2000, solver=adam; total time=   1.0s


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b
  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=2000, solver=sgd; total time=  29.8s


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=2000, solver=sgd; total time=  30.2s




[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=2000, solver=sgd; total time=  36.9s
[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=3000, solver=adam; total time=   1.6s
[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=3000, solver=adam; total time=   0.6s
[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=3000, solver=adam; total time=   1.1s


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b
  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=3000, solver=sgd; total time=  55.4s


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=3000, solver=sgd; total time=  38.9s




[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=3000, solver=sgd; total time=  39.7s
[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=1000, solver=adam; total time=   0.6s
[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=1000, solver=adam; total time=   0.5s
[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=1000, solver=adam; total time=   0.7s
[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=1000, solver=sgd; total time=   0.1s
[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=1000, solver=sgd; total time=   0.1s
[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=1000, solver=sgd; total time=   0.1s
[CV] END activation=relu, alpha=0.01, hidden_layer_siz

  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b
  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=1000, solver=sgd; total time=  13.6s


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=1000, solver=sgd; total time=  12.6s




[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=1000, solver=sgd; total time=  12.9s
[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=2000, solver=adam; total time=   1.1s
[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=2000, solver=adam; total time=   0.8s
[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=2000, solver=adam; total time=   0.9s


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b
  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=2000, solver=sgd; total time=  26.5s


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=2000, solver=sgd; total time=  26.1s




[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=2000, solver=sgd; total time=  26.2s
[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=3000, solver=adam; total time=   1.1s
[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=3000, solver=adam; total time=   0.8s
[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=3000, solver=adam; total time=   0.9s


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b
  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=3000, solver=sgd; total time=  38.3s


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=3000, solver=sgd; total time=  38.9s




[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=3000, solver=sgd; total time=  37.8s
[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=1000, solver=adam; total time=   1.0s
[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=1000, solver=adam; total time=   0.9s
[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=1000, solver=adam; total time=   0.9s


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b
  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=1000, solver=sgd; total time=  12.6s


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=1000, solver=sgd; total time=  12.8s




[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=1000, solver=sgd; total time=  12.8s
[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=2000, solver=adam; total time=   1.2s
[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=2000, solver=adam; total time=   0.9s
[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=2000, solver=adam; total time=   1.0s


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b
  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=2000, solver=sgd; total time=  26.5s


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=2000, solver=sgd; total time=  25.8s




[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=2000, solver=sgd; total time=  25.8s
[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=3000, solver=adam; total time=   1.0s
[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=3000, solver=adam; total time=   0.9s
[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=3000, solver=adam; total time=   1.1s


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b
  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=3000, solver=sgd; total time=  32.2s


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=3000, solver=sgd; total time=  26.9s




[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=3000, solver=sgd; total time=  27.1s




[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=1000, solver=adam; total time=   4.6s




[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=1000, solver=adam; total time=   4.6s




[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=1000, solver=adam; total time=   4.6s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=1000, solver=sgd; total time=   0.2s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=1000, solver=sgd; total time=   0.2s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=1000, solver=sgd; total time=   0.2s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=2000, solver=adam; total time=   5.1s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=2000, solver=adam; total time=   5.2s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=2000, solver=adam; total time=   4.6s
[CV] END activation=tanh, alpha=0.0001, hidd



[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(50,), learning_rate=adaptive, max_iter=1000, solver=adam; total time=   5.0s




[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(50,), learning_rate=adaptive, max_iter=1000, solver=adam; total time=   7.4s




[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(50,), learning_rate=adaptive, max_iter=1000, solver=adam; total time=   7.8s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(50,), learning_rate=adaptive, max_iter=1000, solver=sgd; total time=   1.0s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(50,), learning_rate=adaptive, max_iter=1000, solver=sgd; total time=   0.7s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(50,), learning_rate=adaptive, max_iter=1000, solver=sgd; total time=   0.6s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(50,), learning_rate=adaptive, max_iter=2000, solver=adam; total time=   9.3s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(50,), learning_rate=adaptive, max_iter=2000, solver=adam; total time=   8.7s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(50,), learning_rate=adaptive, max_iter=2000, solver=adam; total time=   8.1s
[CV] END activation=tanh, alpha=0.0001, hidd



[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=constant, max_iter=1000, solver=adam; total time=   8.0s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=constant, max_iter=1000, solver=adam; total time=   6.7s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=constant, max_iter=1000, solver=adam; total time=   7.5s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=constant, max_iter=1000, solver=sgd; total time=   0.4s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=constant, max_iter=1000, solver=sgd; total time=   0.3s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=constant, max_iter=1000, solver=sgd; total time=   0.3s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=constant, max_iter=2000, solver=adam; total time=   8.9s
[CV] END activation=tanh, alpha=0.000



[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=adaptive, max_iter=1000, solver=adam; total time=   7.9s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=adaptive, max_iter=1000, solver=adam; total time=   6.7s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=adaptive, max_iter=1000, solver=adam; total time=   7.5s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=adaptive, max_iter=1000, solver=sgd; total time=   1.2s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=adaptive, max_iter=1000, solver=sgd; total time=   0.9s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=adaptive, max_iter=1000, solver=sgd; total time=   1.1s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=adaptive, max_iter=2000, solver=adam; total time=   9.5s
[CV] END activation=tanh, alpha=0.000



[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=1000, solver=adam; total time=   1.6s




[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=1000, solver=adam; total time=  18.9s




[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=1000, solver=adam; total time=  17.7s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=1000, solver=sgd; total time=   0.6s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=1000, solver=sgd; total time=   0.4s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=1000, solver=sgd; total time=   0.5s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=2000, solver=adam; total time=  18.6s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=2000, solver=adam; total time=  25.4s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=2000, solver=adam; total time=  21.1s
[CV] END activat



[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=1000, solver=adam; total time=  12.6s




[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=1000, solver=adam; total time=  13.1s




[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=1000, solver=adam; total time=  13.7s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=1000, solver=sgd; total time=   1.2s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=1000, solver=sgd; total time=   1.4s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=1000, solver=sgd; total time=   1.1s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=2000, solver=adam; total time=  14.2s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=2000, solver=adam; total time=  19.4s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=2000, solver=adam; total time=  15.3s
[CV] END activat



[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=1000, solver=adam; total time=   5.4s




[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=1000, solver=adam; total time=   7.0s




[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=1000, solver=adam; total time=   5.5s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=1000, solver=sgd; total time=   0.4s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=1000, solver=sgd; total time=   0.2s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=1000, solver=sgd; total time=   0.9s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=2000, solver=adam; total time=   7.8s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=2000, solver=adam; total time=   6.6s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=2000, solver=adam; total time=   6.9s
[CV] END activation=tanh, alpha=0.001, hidden_layer



[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(50,), learning_rate=adaptive, max_iter=1000, solver=adam; total time=   5.4s




[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(50,), learning_rate=adaptive, max_iter=1000, solver=adam; total time=   4.9s




[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(50,), learning_rate=adaptive, max_iter=1000, solver=adam; total time=   5.8s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(50,), learning_rate=adaptive, max_iter=1000, solver=sgd; total time=   0.7s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(50,), learning_rate=adaptive, max_iter=1000, solver=sgd; total time=   0.9s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(50,), learning_rate=adaptive, max_iter=1000, solver=sgd; total time=   1.2s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(50,), learning_rate=adaptive, max_iter=2000, solver=adam; total time=   7.4s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(50,), learning_rate=adaptive, max_iter=2000, solver=adam; total time=   8.2s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(50,), learning_rate=adaptive, max_iter=2000, solver=adam; total time=   9.0s
[CV] END activation=tanh, alpha=0.001, hidden_layer



[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=1000, solver=adam; total time=  10.7s




[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=1000, solver=adam; total time=  13.9s




[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=1000, solver=adam; total time=  12.2s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=1000, solver=sgd; total time=   0.2s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=1000, solver=sgd; total time=   0.3s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=1000, solver=sgd; total time=   0.3s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=2000, solver=adam; total time=  14.4s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=2000, solver=adam; total time=  15.7s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=2000, solver=adam; total time=  18.9s
[CV] END activation=tan



[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=1000, solver=adam; total time=  17.1s




[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=1000, solver=adam; total time=  18.2s




[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=1000, solver=adam; total time=  11.9s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=1000, solver=sgd; total time=   1.0s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=1000, solver=sgd; total time=   1.1s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=1000, solver=sgd; total time=   1.1s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=2000, solver=adam; total time=  13.0s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=2000, solver=adam; total time=  13.8s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=2000, solver=adam; total time=  13.2s
[CV] END activation=tan



[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=1000, solver=adam; total time=   4.9s




[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=1000, solver=adam; total time=   5.1s




[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=1000, solver=adam; total time=   5.0s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=1000, solver=sgd; total time=   0.2s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=1000, solver=sgd; total time=   0.2s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=1000, solver=sgd; total time=   0.3s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=2000, solver=adam; total time=   7.2s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=2000, solver=adam; total time=   5.1s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=2000, solver=adam; total time=   7.0s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(



[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(50,), learning_rate=adaptive, max_iter=1000, solver=adam; total time=   8.1s




[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(50,), learning_rate=adaptive, max_iter=1000, solver=adam; total time=   8.1s




[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(50,), learning_rate=adaptive, max_iter=1000, solver=adam; total time=   8.3s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(50,), learning_rate=adaptive, max_iter=1000, solver=sgd; total time=   0.9s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(50,), learning_rate=adaptive, max_iter=1000, solver=sgd; total time=   0.9s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(50,), learning_rate=adaptive, max_iter=1000, solver=sgd; total time=   0.9s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(50,), learning_rate=adaptive, max_iter=2000, solver=adam; total time=  11.7s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(50,), learning_rate=adaptive, max_iter=2000, solver=adam; total time=   8.5s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(50,), learning_rate=adaptive, max_iter=2000, solver=adam; total time=  11.5s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(



[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=1000, solver=adam; total time=  17.6s




[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=1000, solver=adam; total time=  17.7s




[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=1000, solver=adam; total time=  18.1s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=1000, solver=sgd; total time=   0.4s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=1000, solver=sgd; total time=   0.4s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=1000, solver=sgd; total time=   0.4s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=2000, solver=adam; total time=  20.0s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=2000, solver=adam; total time=  25.0s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=2000, solver=adam; total time=  21.6s
[CV] END activation=tanh, alph



[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=1000, solver=adam; total time=  10.4s




[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=1000, solver=adam; total time=  10.7s




[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=1000, solver=adam; total time=  10.7s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=1000, solver=sgd; total time=   1.0s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=1000, solver=sgd; total time=   1.0s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=1000, solver=sgd; total time=   1.1s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=2000, solver=adam; total time=  11.9s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=2000, solver=adam; total time=  15.3s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=2000, solver=adam; total time=  13.1s
[CV] END activation=tanh, alph

54 fits failed out of a total of 648.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
54 fits failed with the following error:
Traceback (most recent call last):
  File "/home/imme/miniconda3/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 888, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/imme/miniconda3/lib/python3.12/site-packages/sklearn/base.py", line 1473, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/imme/miniconda3/lib/python3.12/site-packages/sklearn/neural_network/_multilayer_perceptron.py", line 751, in fit
    return self._fit(X, y, incremental=False)
           ^^^^^^^^^^^^^^^^^^^^^^^^

Best parameters from GridSearchCV:
{'activation': 'relu', 'alpha': 0.01, 'hidden_layer_sizes': (50,), 'learning_rate': 'constant', 'max_iter': 1000, 'solver': 'adam'}
Test MSE (na tuning): 2346.616002117587
Test RMSE (na tuning): 48.4418827268056
Test R-squared (na tuning): -1.2851088246488604


In [15]:
# XGBOOST

xgb = XGBRegressor()
xgb.fit(X_train, y_train)

# Validate the model
y_val_pred = xgb.predict(X_val)
val_mse = mean_squared_error(y_val, y_val_pred)
val_r2 = r2_score(y_val, y_val_pred)

print(f"Validation MSE (zonder tuning): {val_mse}")
print(f"Validation R-squared (zonder tuning): {val_r2}")

param_grid = {'reg_lambda': [0.1, 1, 10, 100, 150, 500], 
              'min_child_weight': [1, 3, 5, 7, 9, 12], 
              'colsample_bytree': [0.6, 0.8, 1.0],
              'learning_rate': [0.01, 0.05, 0.1, 0.2, 0.5, 0.8, 1, 1.2, 1.5],
              'max_depth': [3, 5, 7],
              'n_estimators': [100, 200, 300, 400, 500]}

grid_search = GridSearchCV(estimator=XGBRegressor(), 
                           param_grid=param_grid, 
                           scoring='neg_mean_squared_error', 
                           cv=3, 
                           n_jobs=-1)

grid_search.fit(X_train, y_train)

# Best parameters from grid search
print("Best parameters from GridSearchCV:")
print(grid_search.best_params_)

# Use the best model from grid search
best_xgb = grid_search.best_estimator_

# Test the final model on the test data
y_test_pred = best_xgb.predict(X_test)
test_mse_xgb = mean_squared_error(y_test, y_test_pred)
test_rmse_xgb = test_mse_xgb ** 0.5
test_r2_xgb = r2_score(y_test, y_test_pred)

print(f"Test MSE (na tuning): {test_mse_xgb}")
print(f"Test RMSE (na tuning): {test_rmse_xgb}")
print(f"Test R-squared (na tuning): {test_r2_xgb}")

Validation MSE (zonder tuning): 66.50744379765271
Validation R-squared (zonder tuning): 0.9349847401931329
Best parameters from GridSearchCV:
{'colsample_bytree': 1.0, 'learning_rate': 1.5, 'max_depth': 5, 'min_child_weight': 7, 'n_estimators': 100, 'reg_lambda': 500}
Test MSE (na tuning): 53.433444833608675
Test RMSE (na tuning): 7.309818385815661
Test R-squared (na tuning): 0.9479670997681414
