In [None]:
import pandas as pd
features = pd.read_csv('../../0 - ModelData/50MostImp+CombinedFeatures.csv')
target = pd.read_csv('../../0 - ModelData/TargetCutto50MostImpFeatures_DF.csv')

In [21]:
import numpy as np
from sklearn.linear_model import SGDRegressor
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error
from tqdm import tqdm
import matplotlib.pyplot as plt
from joblib import parallel_backend

# Ensure the features and target dataframes are correctly assigned
features = features.drop(columns=['Datum'])
target = target['PM10_Combined_Trend_Residual']

# Split the data into train, test, and validate sets
train_data, temp_data, train_target, temp_target = train_test_split(features, target, test_size=0.3, random_state=42)
test_data, validate_data, test_target, validate_target = train_test_split(temp_data, temp_target, test_size=0.3333, random_state=42)

# Define the parameter grid for GridSearchCV
param_grid = {
    'alpha': [0.0001, 0.001, 0.01, 0.1],
    'eta0': [0.001, 0.01, 0.1, 1],
    'learning_rate': ['constant', 'optimal', 'invscaling', 'adaptive'],
    'penalty': ['l2', 'l1', 'elasticnet', None]
}

# Initialize the SGDRegressor
sgd = SGDRegressor(max_iter=1000, tol=1e-3)

# Initialize GridSearchCV with joblib parallel backend
with parallel_backend('threading', n_jobs=-1):
    grid_search = GridSearchCV(sgd, param_grid, cv=5, scoring='neg_mean_squared_error', verbose=2)
    grid_search.fit(train_data, train_target.values.ravel())

# Get the best parameters and best score
best_params = grid_search.best_params_
best_score = grid_search.best_score_

# Plot the progress bar
progress_bar = tqdm(total=100)
for i in range(100):
    progress_bar.update(1)
progress_bar.close()

# Print the best parameters and best score
print("Best Parameters:", best_params)
print("Best Score:", best_score)

# Evaluate the model on the test set
predictions = grid_search.predict(test_data)
predictions = np.clip(predictions, 0, None)  # Ensure predictions are not lower than zero
mse = mean_squared_error(test_target, predictions)
print("Mean Squared Error on Test Set:", mse)

# Evaluate the model on the validation set
validate_predictions = grid_search.predict(validate_data)
validate_predictions = np.clip(validate_predictions, 0, None)  # Ensure predictions are not lower than zero
validate_mse = mean_squared_error(validate_target, validate_predictions)
print("Mean Squared Error on Validation Set:", validate_mse)

# Expanding CV Fold Sizes
initial_train_size = int(0.5 * len(train_data))
test_size = int(0.1 * len(train_data))
num_folds = (len(train_data) - initial_train_size) // test_size

for fold in range(num_folds):
    start_train_size = initial_train_size + fold * test_size
    end_train_size = start_train_size + test_size

    X_train_fold = train_data.iloc[:end_train_size]
    y_train_fold = train_target.iloc[:end_train_size]
    X_test_fold = train_data.iloc[end_train_size:end_train_size + test_size]
    y_test_fold = train_target.iloc[end_train_size:end_train_size + test_size]

    model = SGDRegressor(**best_params, max_iter=1000, tol=1e-3)
    model.fit(X_train_fold, y_train_fold)
    fold_predictions = model.predict(X_test_fold)
    fold_predictions = np.clip(fold_predictions, 0, None)  # Ensure predictions are not lower than zero
    fold_mse = mean_squared_error(y_test_fold, fold_predictions)
    print(f"Fold {fold + 1}/{num_folds}, MSE: {fold_mse}")

Fitting 5 folds for each of 256 candidates, totalling 1280 fits
[CV] END alpha=0.0001, eta0=0.001, learning_rate=constant, penalty=l2; total time=   0.1s
[CV] END alpha=0.0001, eta0=0.001, learning_rate=constant, penalty=l2; total time=   0.1s
[CV] END alpha=0.0001, eta0=0.001, learning_rate=constant, penalty=l2; total time=   0.1s
[CV] END alpha=0.0001, eta0=0.001, learning_rate=constant, penalty=l2; total time=   0.1s
[CV] END alpha=0.0001, eta0=0.001, learning_rate=constant, penalty=l1; total time=   0.1s
[CV] END alpha=0.0001, eta0=0.001, learning_rate=constant, penalty=l2; total time=   0.1s
[CV] END alpha=0.0001, eta0=0.001, learning_rate=constant, penalty=l1; total time=   0.1s
[CV] END alpha=0.0001, eta0=0.001, learning_rate=constant, penalty=l1; total time=   0.1s
[CV] END alpha=0.0001, eta0=0.001, learning_rate=constant, penalty=l1; total time=   0.1s
[CV] END alpha=0.0001, eta0=0.001, learning_rate=constant, penalty=None; total time=   0.0s
[CV] END alpha=0.0001, eta0=0.001,



[CV] END alpha=0.01, eta0=0.001, learning_rate=optimal, penalty=l1; total time=   6.1s
[CV] END alpha=0.01, eta0=0.001, learning_rate=optimal, penalty=l1; total time=   6.2s
[CV] END alpha=0.01, eta0=0.001, learning_rate=optimal, penalty=l1; total time=   6.2s
[CV] END alpha=0.01, eta0=0.001, learning_rate=optimal, penalty=elasticnet; total time=   4.8s




[CV] END alpha=0.01, eta0=0.001, learning_rate=optimal, penalty=l1; total time=   6.0s
[CV] END alpha=0.01, eta0=0.001, learning_rate=optimal, penalty=l1; total time=   6.2s
[CV] END alpha=0.01, eta0=0.001, learning_rate=optimal, penalty=elasticnet; total time=   4.9s
[CV] END alpha=0.01, eta0=0.001, learning_rate=optimal, penalty=elasticnet; total time=   4.1s
[CV] END alpha=0.01, eta0=0.001, learning_rate=invscaling, penalty=l2; total time=   0.1s
[CV] END alpha=0.01, eta0=0.001, learning_rate=invscaling, penalty=l2; total time=   0.3s
[CV] END alpha=0.01, eta0=0.001, learning_rate=invscaling, penalty=l2; total time=   0.2s
[CV] END alpha=0.01, eta0=0.001, learning_rate=invscaling, penalty=l2; total time=   0.3s
[CV] END alpha=0.01, eta0=0.001, learning_rate=invscaling, penalty=l2; total time=   0.7s
[CV] END alpha=0.01, eta0=0.001, learning_rate=invscaling, penalty=l1; total time=   0.3s
[CV] END alpha=0.01, eta0=0.001, learning_rate=optimal, penalty=elasticnet; total time=   4.2s
[



[CV] END alpha=0.01, eta0=0.001, learning_rate=invscaling, penalty=l1; total time=   0.2s
[CV] END alpha=0.01, eta0=0.001, learning_rate=invscaling, penalty=elasticnet; total time=   0.2s
[CV] END alpha=0.01, eta0=0.001, learning_rate=invscaling, penalty=elasticnet; total time=   0.2s
[CV] END alpha=0.01, eta0=0.001, learning_rate=invscaling, penalty=elasticnet; total time=   0.3s
[CV] END alpha=0.01, eta0=0.001, learning_rate=invscaling, penalty=elasticnet; total time=   0.3s
[CV] END alpha=0.01, eta0=0.001, learning_rate=invscaling, penalty=None; total time=   0.2s
[CV] END alpha=0.01, eta0=0.001, learning_rate=invscaling, penalty=None; total time=   0.2s
[CV] END alpha=0.01, eta0=0.001, learning_rate=invscaling, penalty=None; total time=   0.1s
[CV] END alpha=0.01, eta0=0.001, learning_rate=invscaling, penalty=elasticnet; total time=   0.3s
[CV] END alpha=0.01, eta0=0.001, learning_rate=adaptive, penalty=l2; total time=   0.3s
[CV] END alpha=0.01, eta0=0.001, learning_rate=invscalin



[CV] END alpha=0.01, eta0=0.001, learning_rate=adaptive, penalty=l1; total time=   0.3s
[CV] END alpha=0.01, eta0=0.001, learning_rate=adaptive, penalty=l1; total time=   0.3s
[CV] END alpha=0.01, eta0=0.001, learning_rate=adaptive, penalty=elasticnet; total time=   0.3s
[CV] END alpha=0.01, eta0=0.001, learning_rate=adaptive, penalty=elasticnet; total time=   0.3s
[CV] END alpha=0.01, eta0=0.001, learning_rate=adaptive, penalty=elasticnet; total time=   0.3s
[CV] END alpha=0.01, eta0=0.001, learning_rate=adaptive, penalty=elasticnet; total time=   0.4s
[CV] END alpha=0.01, eta0=0.001, learning_rate=adaptive, penalty=None; total time=   0.3s
[CV] END alpha=0.01, eta0=0.001, learning_rate=optimal, penalty=None; total time=   5.5s
[CV] END alpha=0.01, eta0=0.01, learning_rate=constant, penalty=l2; total time=   0.0s
[CV] END alpha=0.01, eta0=0.01, learning_rate=constant, penalty=l2; total time=   0.1s
[CV] END alpha=0.01, eta0=0.01, learning_rate=constant, penalty=l2; total time=   0.0s




[CV] END alpha=0.01, eta0=0.01, learning_rate=constant, penalty=l2; total time=   0.1s
[CV] END alpha=0.01, eta0=0.001, learning_rate=adaptive, penalty=None; total time=   0.3s
[CV] END alpha=0.01, eta0=0.001, learning_rate=adaptive, penalty=elasticnet; total time=   0.5s
[CV] END alpha=0.01, eta0=0.01, learning_rate=constant, penalty=l1; total time=   0.1s
[CV] END alpha=0.01, eta0=0.001, learning_rate=adaptive, penalty=None; total time=   0.4s
[CV] END alpha=0.01, eta0=0.01, learning_rate=constant, penalty=l1; total time=   0.1s
[CV] END alpha=0.01, eta0=0.001, learning_rate=adaptive, penalty=None; total time=   0.3s
[CV] END alpha=0.01, eta0=0.01, learning_rate=constant, penalty=l1; total time=   0.1s
[CV] END alpha=0.01, eta0=0.01, learning_rate=constant, penalty=l1; total time=   0.1s
[CV] END alpha=0.01, eta0=0.01, learning_rate=constant, penalty=None; total time=   0.0s
[CV] END alpha=0.01, eta0=0.01, learning_rate=constant, penalty=elasticnet; total time=   0.1s
[CV] END alpha=



[CV] END alpha=0.01, eta0=0.01, learning_rate=optimal, penalty=l2; total time=   2.1s
[CV] END alpha=0.01, eta0=0.01, learning_rate=optimal, penalty=l2; total time=   2.5s
[CV] END alpha=0.01, eta0=0.01, learning_rate=optimal, penalty=l2; total time=   2.5s
[CV] END alpha=0.01, eta0=0.01, learning_rate=optimal, penalty=l2; total time=   2.6s
[CV] END alpha=0.01, eta0=0.01, learning_rate=optimal, penalty=l2; total time=   3.1s




[CV] END alpha=0.01, eta0=0.01, learning_rate=optimal, penalty=l1; total time=   6.2s
[CV] END alpha=0.01, eta0=0.01, learning_rate=optimal, penalty=elasticnet; total time=   3.7s
[CV] END alpha=0.01, eta0=0.01, learning_rate=optimal, penalty=elasticnet; total time=   3.3s




[CV] END alpha=0.01, eta0=0.01, learning_rate=optimal, penalty=l1; total time=   6.4s
[CV] END alpha=0.01, eta0=0.01, learning_rate=optimal, penalty=elasticnet; total time=   4.0s




[CV] END alpha=0.01, eta0=0.01, learning_rate=optimal, penalty=l1; total time=   6.7s




[CV] END alpha=0.01, eta0=0.01, learning_rate=optimal, penalty=l1; total time=   5.7s
[CV] END alpha=0.01, eta0=0.01, learning_rate=optimal, penalty=l1; total time=   6.3s
[CV] END alpha=0.01, eta0=0.01, learning_rate=invscaling, penalty=l2; total time=   0.1s
[CV] END alpha=0.01, eta0=0.01, learning_rate=invscaling, penalty=l2; total time=   0.1s
[CV] END alpha=0.01, eta0=0.01, learning_rate=invscaling, penalty=l2; total time=   0.1s
[CV] END alpha=0.01, eta0=0.01, learning_rate=invscaling, penalty=l2; total time=   0.2s
[CV] END alpha=0.01, eta0=0.01, learning_rate=invscaling, penalty=l2; total time=   0.1s
[CV] END alpha=0.01, eta0=0.01, learning_rate=invscaling, penalty=l1; total time=   0.2s
[CV] END alpha=0.01, eta0=0.01, learning_rate=invscaling, penalty=l1; total time=   0.2s
[CV] END alpha=0.01, eta0=0.01, learning_rate=invscaling, penalty=l1; total time=   0.1s
[CV] END alpha=0.01, eta0=0.01, learning_rate=invscaling, penalty=l1; total time=   0.1s
[CV] END alpha=0.01, eta0=0



[CV] END alpha=0.01, eta0=0.01, learning_rate=adaptive, penalty=l2; total time=   0.3s
[CV] END alpha=0.01, eta0=0.01, learning_rate=optimal, penalty=None; total time=   5.4s




[CV] END alpha=0.01, eta0=0.01, learning_rate=optimal, penalty=None; total time=   5.6s
[CV] END alpha=0.01, eta0=0.01, learning_rate=adaptive, penalty=l1; total time=   0.6s
[CV] END alpha=0.01, eta0=0.01, learning_rate=adaptive, penalty=l1; total time=   0.5s
[CV] END alpha=0.01, eta0=0.01, learning_rate=adaptive, penalty=l1; total time=   0.4s
[CV] END alpha=0.01, eta0=0.01, learning_rate=optimal, penalty=None; total time=   5.7s
[CV] END alpha=0.01, eta0=0.01, learning_rate=adaptive, penalty=l1; total time=   0.5s




[CV] END alpha=0.01, eta0=0.01, learning_rate=optimal, penalty=None; total time=   5.7s
[CV] END alpha=0.01, eta0=0.01, learning_rate=adaptive, penalty=l1; total time=   0.4s
[CV] END alpha=0.01, eta0=0.01, learning_rate=adaptive, penalty=elasticnet; total time=   0.5s
[CV] END alpha=0.01, eta0=0.01, learning_rate=adaptive, penalty=elasticnet; total time=   0.4s
[CV] END alpha=0.01, eta0=0.01, learning_rate=adaptive, penalty=elasticnet; total time=   0.5s
[CV] END alpha=0.01, eta0=0.01, learning_rate=adaptive, penalty=elasticnet; total time=   0.5s
[CV] END alpha=0.01, eta0=0.1, learning_rate=constant, penalty=l2; total time=   0.0s
[CV] END alpha=0.01, eta0=0.1, learning_rate=constant, penalty=l2; total time=   0.1s
[CV] END alpha=0.01, eta0=0.01, learning_rate=adaptive, penalty=None; total time=   0.3s
[CV] END alpha=0.01, eta0=0.01, learning_rate=adaptive, penalty=None; total time=   0.4s
[CV] END alpha=0.01, eta0=0.01, learning_rate=adaptive, penalty=None; total time=   0.3s
[CV] E



[CV] END alpha=0.01, eta0=0.01, learning_rate=optimal, penalty=None; total time=   5.1s
[CV] END alpha=0.01, eta0=0.1, learning_rate=optimal, penalty=l2; total time=   0.9s
[CV] END alpha=0.01, eta0=0.1, learning_rate=optimal, penalty=l2; total time=   1.8s
[CV] END alpha=0.01, eta0=0.1, learning_rate=optimal, penalty=l2; total time=   2.1s
[CV] END alpha=0.01, eta0=0.1, learning_rate=optimal, penalty=l2; total time=   2.5s
[CV] END alpha=0.01, eta0=0.1, learning_rate=optimal, penalty=l2; total time=   3.2s
[CV] END alpha=0.01, eta0=0.1, learning_rate=optimal, penalty=elasticnet; total time=   3.4s




[CV] END alpha=0.01, eta0=0.1, learning_rate=optimal, penalty=l1; total time=   6.3s
[CV] END alpha=0.01, eta0=0.1, learning_rate=optimal, penalty=l1; total time=   6.3s
[CV] END alpha=0.01, eta0=0.1, learning_rate=optimal, penalty=l1; total time=   6.3s
[CV] END alpha=0.01, eta0=0.1, learning_rate=optimal, penalty=elasticnet; total time=   3.6s
[CV] END alpha=0.01, eta0=0.1, learning_rate=optimal, penalty=l1; total time=   5.9s




[CV] END alpha=0.01, eta0=0.1, learning_rate=optimal, penalty=elasticnet; total time=   5.4s




[CV] END alpha=0.01, eta0=0.1, learning_rate=optimal, penalty=l1; total time=   5.9s
[CV] END alpha=0.01, eta0=0.1, learning_rate=invscaling, penalty=l2; total time=   0.2s
[CV] END alpha=0.01, eta0=0.1, learning_rate=invscaling, penalty=l2; total time=   0.2s
[CV] END alpha=0.01, eta0=0.1, learning_rate=invscaling, penalty=l2; total time=   0.2s
[CV] END alpha=0.01, eta0=0.1, learning_rate=invscaling, penalty=l2; total time=   0.2s
[CV] END alpha=0.01, eta0=0.1, learning_rate=invscaling, penalty=l2; total time=   0.2s
[CV] END alpha=0.01, eta0=0.1, learning_rate=invscaling, penalty=l1; total time=   0.2s
[CV] END alpha=0.01, eta0=0.1, learning_rate=invscaling, penalty=l1; total time=   0.2s
[CV] END alpha=0.01, eta0=0.1, learning_rate=invscaling, penalty=l1; total time=   0.1s
[CV] END alpha=0.01, eta0=0.1, learning_rate=optimal, penalty=elasticnet; total time=   3.4s
[CV] END alpha=0.01, eta0=0.1, learning_rate=invscaling, penalty=l1; total time=   0.2s
[CV] END alpha=0.01, eta0=0.1,



[CV] END alpha=0.01, eta0=0.1, learning_rate=adaptive, penalty=l2; total time=   0.4s
[CV] END alpha=0.01, eta0=0.1, learning_rate=optimal, penalty=None; total time=   4.3s
[CV] END alpha=0.01, eta0=0.1, learning_rate=optimal, penalty=None; total time=   4.4s
[CV] END alpha=0.01, eta0=0.1, learning_rate=adaptive, penalty=l2; total time=   0.4s
[CV] END alpha=0.01, eta0=0.1, learning_rate=adaptive, penalty=l2; total time=   0.4s




[CV] END alpha=0.01, eta0=0.1, learning_rate=adaptive, penalty=l1; total time=   0.7s
[CV] END alpha=0.01, eta0=0.1, learning_rate=adaptive, penalty=l1; total time=   0.7s
[CV] END alpha=0.01, eta0=0.1, learning_rate=adaptive, penalty=elasticnet; total time=   0.5s
[CV] END alpha=0.01, eta0=0.1, learning_rate=adaptive, penalty=l1; total time=   0.6s
[CV] END alpha=0.01, eta0=0.1, learning_rate=adaptive, penalty=l1; total time=   0.7s
[CV] END alpha=0.01, eta0=0.1, learning_rate=optimal, penalty=None; total time=   4.5s




[CV] END alpha=0.01, eta0=0.1, learning_rate=adaptive, penalty=l1; total time=   0.8s
[CV] END alpha=0.01, eta0=0.1, learning_rate=adaptive, penalty=elasticnet; total time=   0.8s
[CV] END alpha=0.01, eta0=0.1, learning_rate=adaptive, penalty=elasticnet; total time=   0.5s
[CV] END alpha=0.01, eta0=1, learning_rate=constant, penalty=l2; total time=   0.1s
[CV] END alpha=0.01, eta0=1, learning_rate=constant, penalty=l2; total time=   0.0s
[CV] END alpha=0.01, eta0=0.1, learning_rate=adaptive, penalty=None; total time=   0.4s
[CV] END alpha=0.01, eta0=1, learning_rate=constant, penalty=l2; total time=   0.0s
[CV] END alpha=0.01, eta0=1, learning_rate=constant, penalty=l2; total time=   0.0s
[CV] END alpha=0.01, eta0=0.1, learning_rate=adaptive, penalty=None; total time=   0.4s
[CV] END alpha=0.01, eta0=1, learning_rate=constant, penalty=l2; total time=   0.1s
[CV] END alpha=0.01, eta0=0.1, learning_rate=adaptive, penalty=elasticnet; total time=   0.7s
[CV] END alpha=0.01, eta0=1, learnin



[CV] END alpha=0.01, eta0=1, learning_rate=optimal, penalty=l1; total time=   5.6s
[CV] END alpha=0.01, eta0=1, learning_rate=optimal, penalty=l1; total time=   5.7s




[CV] END alpha=0.01, eta0=1, learning_rate=optimal, penalty=l1; total time=   5.9s




[CV] END alpha=0.01, eta0=1, learning_rate=optimal, penalty=l1; total time=   6.0s
[CV] END alpha=0.01, eta0=1, learning_rate=optimal, penalty=elasticnet; total time=   5.2s
[CV] END alpha=0.01, eta0=1, learning_rate=optimal, penalty=elasticnet; total time=   3.9s




[CV] END alpha=0.01, eta0=1, learning_rate=optimal, penalty=l1; total time=   6.0s
[CV] END alpha=0.01, eta0=1, learning_rate=optimal, penalty=elasticnet; total time=   3.5s
[CV] END alpha=0.01, eta0=1, learning_rate=invscaling, penalty=l2; total time=   0.6s
[CV] END alpha=0.01, eta0=1, learning_rate=invscaling, penalty=l2; total time=   0.6s
[CV] END alpha=0.01, eta0=1, learning_rate=invscaling, penalty=l2; total time=   0.4s
[CV] END alpha=0.01, eta0=1, learning_rate=invscaling, penalty=l2; total time=   0.5s
[CV] END alpha=0.01, eta0=1, learning_rate=invscaling, penalty=l2; total time=   0.5s
[CV] END alpha=0.01, eta0=1, learning_rate=invscaling, penalty=l1; total time=   0.7s
[CV] END alpha=0.01, eta0=1, learning_rate=invscaling, penalty=l1; total time=   0.6s
[CV] END alpha=0.01, eta0=1, learning_rate=invscaling, penalty=l1; total time=   0.4s
[CV] END alpha=0.01, eta0=1, learning_rate=optimal, penalty=None; total time=   4.2s
[CV] END alpha=0.01, eta0=1, learning_rate=optimal, p



[CV] END alpha=0.01, eta0=1, learning_rate=invscaling, penalty=l1; total time=   0.9s
[CV] END alpha=0.01, eta0=1, learning_rate=invscaling, penalty=l1; total time=   0.7s
[CV] END alpha=0.01, eta0=1, learning_rate=invscaling, penalty=elasticnet; total time=   0.6s
[CV] END alpha=0.01, eta0=1, learning_rate=invscaling, penalty=elasticnet; total time=   0.6s
[CV] END alpha=0.01, eta0=1, learning_rate=invscaling, penalty=elasticnet; total time=   0.8s
[CV] END alpha=0.01, eta0=1, learning_rate=invscaling, penalty=None; total time=   0.3s
[CV] END alpha=0.01, eta0=1, learning_rate=optimal, penalty=elasticnet; total time=   5.5s
[CV] END alpha=0.01, eta0=1, learning_rate=optimal, penalty=None; total time=   4.5s




[CV] END alpha=0.01, eta0=1, learning_rate=invscaling, penalty=None; total time=   0.5s
[CV] END alpha=0.01, eta0=1, learning_rate=optimal, penalty=None; total time=   4.3s
[CV] END alpha=0.01, eta0=1, learning_rate=invscaling, penalty=None; total time=   0.4s
[CV] END alpha=0.01, eta0=1, learning_rate=optimal, penalty=None; total time=   4.4s
[CV] END alpha=0.01, eta0=1, learning_rate=invscaling, penalty=elasticnet; total time=   0.9s




[CV] END alpha=0.01, eta0=1, learning_rate=invscaling, penalty=None; total time=   0.3s
[CV] END alpha=0.01, eta0=1, learning_rate=invscaling, penalty=None; total time=   0.4s
[CV] END alpha=0.01, eta0=1, learning_rate=adaptive, penalty=l2; total time=   0.4s
[CV] END alpha=0.01, eta0=1, learning_rate=invscaling, penalty=elasticnet; total time=   1.1s
[CV] END alpha=0.01, eta0=1, learning_rate=adaptive, penalty=l2; total time=   0.4s
[CV] END alpha=0.01, eta0=1, learning_rate=adaptive, penalty=l2; total time=   0.6s
[CV] END alpha=0.01, eta0=1, learning_rate=adaptive, penalty=l2; total time=   0.4s
[CV] END alpha=0.01, eta0=1, learning_rate=adaptive, penalty=l2; total time=   0.4s
[CV] END alpha=0.01, eta0=1, learning_rate=adaptive, penalty=l1; total time=   0.7s
[CV] END alpha=0.01, eta0=1, learning_rate=adaptive, penalty=l1; total time=   0.8s
[CV] END alpha=0.01, eta0=1, learning_rate=adaptive, penalty=l1; total time=   0.7s
[CV] END alpha=0.01, eta0=1, learning_rate=adaptive, penal



[CV] END alpha=0.1, eta0=0.001, learning_rate=optimal, penalty=l2; total time=   4.0s




[CV] END alpha=0.1, eta0=0.001, learning_rate=optimal, penalty=l2; total time=   4.4s
[CV] END alpha=0.1, eta0=0.001, learning_rate=optimal, penalty=l2; total time=   4.5s
[CV] END alpha=0.1, eta0=0.001, learning_rate=optimal, penalty=l2; total time=   4.6s




[CV] END alpha=0.1, eta0=0.001, learning_rate=optimal, penalty=l2; total time=   4.6s




[CV] END alpha=0.1, eta0=0.001, learning_rate=optimal, penalty=l1; total time=   5.8s
[CV] END alpha=0.1, eta0=0.001, learning_rate=optimal, penalty=l1; total time=   5.9s




[CV] END alpha=0.1, eta0=0.001, learning_rate=optimal, penalty=l1; total time=   6.2s




[CV] END alpha=0.1, eta0=0.001, learning_rate=optimal, penalty=l1; total time=   5.6s




[CV] END alpha=0.1, eta0=0.001, learning_rate=optimal, penalty=elasticnet; total time=   5.3s
[CV] END alpha=0.1, eta0=0.001, learning_rate=optimal, penalty=elasticnet; total time=   5.5s




[CV] END alpha=0.1, eta0=0.001, learning_rate=optimal, penalty=l1; total time=   5.8s
[CV] END alpha=0.1, eta0=0.001, learning_rate=optimal, penalty=elasticnet; total time=   5.6s
[CV] END alpha=0.1, eta0=0.001, learning_rate=invscaling, penalty=l2; total time=   0.1s
[CV] END alpha=0.1, eta0=0.001, learning_rate=invscaling, penalty=l2; total time=   0.2s
[CV] END alpha=0.1, eta0=0.001, learning_rate=optimal, penalty=None; total time=   4.4s
[CV] END alpha=0.1, eta0=0.001, learning_rate=invscaling, penalty=l2; total time=   0.1s




[CV] END alpha=0.1, eta0=0.001, learning_rate=invscaling, penalty=l2; total time=   0.2s
[CV] END alpha=0.1, eta0=0.001, learning_rate=invscaling, penalty=l2; total time=   0.2s
[CV] END alpha=0.1, eta0=0.001, learning_rate=invscaling, penalty=l1; total time=   0.4s
[CV] END alpha=0.1, eta0=0.001, learning_rate=invscaling, penalty=l1; total time=   0.3s
[CV] END alpha=0.1, eta0=0.001, learning_rate=invscaling, penalty=l1; total time=   0.4s
[CV] END alpha=0.1, eta0=0.001, learning_rate=optimal, penalty=elasticnet; total time=   5.6s
[CV] END alpha=0.1, eta0=0.001, learning_rate=invscaling, penalty=l1; total time=   0.4s
[CV] END alpha=0.1, eta0=0.001, learning_rate=optimal, penalty=elasticnet; total time=   5.8s




[CV] END alpha=0.1, eta0=0.001, learning_rate=invscaling, penalty=elasticnet; total time=   0.2s
[CV] END alpha=0.1, eta0=0.001, learning_rate=invscaling, penalty=elasticnet; total time=   0.3s
[CV] END alpha=0.1, eta0=0.001, learning_rate=invscaling, penalty=elasticnet; total time=   0.2s
[CV] END alpha=0.1, eta0=0.001, learning_rate=invscaling, penalty=elasticnet; total time=   0.2s
[CV] END alpha=0.1, eta0=0.001, learning_rate=invscaling, penalty=l1; total time=   0.5s
[CV] END alpha=0.1, eta0=0.001, learning_rate=invscaling, penalty=None; total time=   0.2s
[CV] END alpha=0.1, eta0=0.001, learning_rate=invscaling, penalty=None; total time=   0.2s
[CV] END alpha=0.1, eta0=0.001, learning_rate=invscaling, penalty=None; total time=   0.2s
[CV] END alpha=0.1, eta0=0.001, learning_rate=invscaling, penalty=elasticnet; total time=   0.3s
[CV] END alpha=0.1, eta0=0.001, learning_rate=invscaling, penalty=None; total time=   0.2s
[CV] END alpha=0.1, eta0=0.001, learning_rate=invscaling, pena



[CV] END alpha=0.1, eta0=0.001, learning_rate=optimal, penalty=None; total time=   4.4s




[CV] END alpha=0.1, eta0=0.001, learning_rate=optimal, penalty=None; total time=   4.5s




[CV] END alpha=0.1, eta0=0.01, learning_rate=optimal, penalty=l2; total time=   5.5s




[CV] END alpha=0.1, eta0=0.01, learning_rate=optimal, penalty=l2; total time=   6.1s
[CV] END alpha=0.1, eta0=0.01, learning_rate=optimal, penalty=l2; total time=   6.1s




[CV] END alpha=0.1, eta0=0.01, learning_rate=optimal, penalty=l2; total time=   6.4s
[CV] END alpha=0.1, eta0=0.01, learning_rate=optimal, penalty=l2; total time=   6.4s




[CV] END alpha=0.1, eta0=0.01, learning_rate=optimal, penalty=l1; total time=   7.8s
[CV] END alpha=0.1, eta0=0.01, learning_rate=optimal, penalty=l1; total time=   7.6s




[CV] END alpha=0.1, eta0=0.01, learning_rate=optimal, penalty=l1; total time=   7.6s




[CV] END alpha=0.1, eta0=0.01, learning_rate=optimal, penalty=l1; total time=   6.5s
[CV] END alpha=0.1, eta0=0.01, learning_rate=optimal, penalty=elasticnet; total time=   6.1s
[CV] END alpha=0.1, eta0=0.01, learning_rate=optimal, penalty=l1; total time=   6.2s




[CV] END alpha=0.1, eta0=0.01, learning_rate=optimal, penalty=elasticnet; total time=   6.0s
[CV] END alpha=0.1, eta0=0.01, learning_rate=optimal, penalty=elasticnet; total time=   6.1s
[CV] END alpha=0.1, eta0=0.01, learning_rate=invscaling, penalty=l2; total time=   0.0s
[CV] END alpha=0.1, eta0=0.01, learning_rate=invscaling, penalty=l2; total time=   0.1s
[CV] END alpha=0.1, eta0=0.01, learning_rate=optimal, penalty=None; total time=   4.5s
[CV] END alpha=0.1, eta0=0.01, learning_rate=invscaling, penalty=l2; total time=   0.1s
[CV] END alpha=0.1, eta0=0.01, learning_rate=invscaling, penalty=l2; total time=   0.1s
[CV] END alpha=0.1, eta0=0.01, learning_rate=invscaling, penalty=l2; total time=   0.1s




[CV] END alpha=0.1, eta0=0.01, learning_rate=invscaling, penalty=l1; total time=   0.1s
[CV] END alpha=0.1, eta0=0.01, learning_rate=invscaling, penalty=l1; total time=   0.1s
[CV] END alpha=0.1, eta0=0.01, learning_rate=invscaling, penalty=l1; total time=   0.2s
[CV] END alpha=0.1, eta0=0.01, learning_rate=invscaling, penalty=l1; total time=   0.1s
[CV] END alpha=0.1, eta0=0.01, learning_rate=invscaling, penalty=elasticnet; total time=   0.2s
[CV] END alpha=0.1, eta0=0.01, learning_rate=invscaling, penalty=l1; total time=   0.2s
[CV] END alpha=0.1, eta0=0.01, learning_rate=invscaling, penalty=elasticnet; total time=   0.1s
[CV] END alpha=0.1, eta0=0.01, learning_rate=invscaling, penalty=elasticnet; total time=   0.2s
[CV] END alpha=0.1, eta0=0.01, learning_rate=invscaling, penalty=elasticnet; total time=   0.2s
[CV] END alpha=0.1, eta0=0.01, learning_rate=invscaling, penalty=elasticnet; total time=   0.1s
[CV] END alpha=0.1, eta0=0.01, learning_rate=invscaling, penalty=None; total tim



[CV] END alpha=0.1, eta0=0.01, learning_rate=optimal, penalty=elasticnet; total time=   6.1s
[CV] END alpha=0.1, eta0=0.01, learning_rate=optimal, penalty=elasticnet; total time=   6.1s
[CV] END alpha=0.1, eta0=0.01, learning_rate=adaptive, penalty=l2; total time=   0.3s
[CV] END alpha=0.1, eta0=0.01, learning_rate=adaptive, penalty=l2; total time=   0.3s
[CV] END alpha=0.1, eta0=0.01, learning_rate=adaptive, penalty=l2; total time=   0.3s
[CV] END alpha=0.1, eta0=0.01, learning_rate=adaptive, penalty=l2; total time=   0.3s
[CV] END alpha=0.1, eta0=0.01, learning_rate=adaptive, penalty=l2; total time=   0.3s
[CV] END alpha=0.1, eta0=0.01, learning_rate=adaptive, penalty=l1; total time=   0.4s
[CV] END alpha=0.1, eta0=0.01, learning_rate=adaptive, penalty=l1; total time=   0.4s
[CV] END alpha=0.1, eta0=0.01, learning_rate=adaptive, penalty=l1; total time=   0.4s
[CV] END alpha=0.1, eta0=0.01, learning_rate=adaptive, penalty=l1; total time=   0.4s
[CV] END alpha=0.1, eta0=0.01, learning_



[CV] END alpha=0.1, eta0=0.01, learning_rate=optimal, penalty=None; total time=   4.6s




[CV] END alpha=0.1, eta0=0.01, learning_rate=optimal, penalty=None; total time=   5.1s
[CV] END alpha=0.1, eta0=0.01, learning_rate=optimal, penalty=None; total time=   5.2s
[CV] END alpha=0.1, eta0=0.01, learning_rate=optimal, penalty=None; total time=   5.1s




[CV] END alpha=0.1, eta0=0.1, learning_rate=optimal, penalty=l2; total time=   5.6s
[CV] END alpha=0.1, eta0=0.1, learning_rate=optimal, penalty=l2; total time=   5.7s




[CV] END alpha=0.1, eta0=0.1, learning_rate=optimal, penalty=l2; total time=   6.0s
[CV] END alpha=0.1, eta0=0.1, learning_rate=optimal, penalty=l2; total time=   6.0s




[CV] END alpha=0.1, eta0=0.1, learning_rate=optimal, penalty=l2; total time=   5.9s




[CV] END alpha=0.1, eta0=0.1, learning_rate=optimal, penalty=l1; total time=   6.8s
[CV] END alpha=0.1, eta0=0.1, learning_rate=optimal, penalty=l1; total time=   7.0s




[CV] END alpha=0.1, eta0=0.1, learning_rate=optimal, penalty=l1; total time=   7.5s




[CV] END alpha=0.1, eta0=0.1, learning_rate=optimal, penalty=elasticnet; total time=   6.0s
[CV] END alpha=0.1, eta0=0.1, learning_rate=optimal, penalty=elasticnet; total time=   6.1s




[CV] END alpha=0.1, eta0=0.1, learning_rate=optimal, penalty=l1; total time=   6.5s
[CV] END alpha=0.1, eta0=0.1, learning_rate=optimal, penalty=l1; total time=   6.6s




[CV] END alpha=0.1, eta0=0.1, learning_rate=optimal, penalty=elasticnet; total time=   6.3s
[CV] END alpha=0.1, eta0=0.1, learning_rate=invscaling, penalty=l2; total time=   0.2s
[CV] END alpha=0.1, eta0=0.1, learning_rate=invscaling, penalty=l2; total time=   0.1s
[CV] END alpha=0.1, eta0=0.1, learning_rate=invscaling, penalty=l2; total time=   0.2s
[CV] END alpha=0.1, eta0=0.1, learning_rate=invscaling, penalty=l2; total time=   0.2s
[CV] END alpha=0.1, eta0=0.1, learning_rate=optimal, penalty=None; total time=   4.8s




[CV] END alpha=0.1, eta0=0.1, learning_rate=invscaling, penalty=l2; total time=   0.2s
[CV] END alpha=0.1, eta0=0.1, learning_rate=invscaling, penalty=l1; total time=   0.2s
[CV] END alpha=0.1, eta0=0.1, learning_rate=invscaling, penalty=l1; total time=   0.1s
[CV] END alpha=0.1, eta0=0.1, learning_rate=invscaling, penalty=l1; total time=   0.1s




[CV] END alpha=0.1, eta0=0.1, learning_rate=optimal, penalty=elasticnet; total time=   5.9s
[CV] END alpha=0.1, eta0=0.1, learning_rate=invscaling, penalty=l1; total time=   0.1s
[CV] END alpha=0.1, eta0=0.1, learning_rate=invscaling, penalty=l1; total time=   0.2s
[CV] END alpha=0.1, eta0=0.1, learning_rate=optimal, penalty=elasticnet; total time=   6.0s
[CV] END alpha=0.1, eta0=0.1, learning_rate=invscaling, penalty=elasticnet; total time=   0.1s
[CV] END alpha=0.1, eta0=0.1, learning_rate=invscaling, penalty=elasticnet; total time=   0.2s
[CV] END alpha=0.1, eta0=0.1, learning_rate=invscaling, penalty=elasticnet; total time=   0.2s
[CV] END alpha=0.1, eta0=0.1, learning_rate=invscaling, penalty=elasticnet; total time=   0.2s
[CV] END alpha=0.1, eta0=0.1, learning_rate=invscaling, penalty=None; total time=   0.1s
[CV] END alpha=0.1, eta0=0.1, learning_rate=invscaling, penalty=elasticnet; total time=   0.3s
[CV] END alpha=0.1, eta0=0.1, learning_rate=invscaling, penalty=None; total ti



[CV] END alpha=0.1, eta0=0.1, learning_rate=optimal, penalty=None; total time=   4.4s
[CV] END alpha=0.1, eta0=0.1, learning_rate=optimal, penalty=None; total time=   4.2s




[CV] END alpha=0.1, eta0=0.1, learning_rate=adaptive, penalty=None; total time=   0.5s
[CV] END alpha=0.1, eta0=0.1, learning_rate=adaptive, penalty=elasticnet; total time=   0.7s
[CV] END alpha=0.1, eta0=0.1, learning_rate=adaptive, penalty=None; total time=   0.3s
[CV] END alpha=0.1, eta0=1, learning_rate=constant, penalty=l2; total time=   0.0s
[CV] END alpha=0.1, eta0=0.1, learning_rate=optimal, penalty=None; total time=   4.5s
[CV] END alpha=0.1, eta0=0.1, learning_rate=adaptive, penalty=None; total time=   0.5s
[CV] END alpha=0.1, eta0=1, learning_rate=constant, penalty=l2; total time=   0.1s
[CV] END alpha=0.1, eta0=0.1, learning_rate=adaptive, penalty=None; total time=   0.6s
[CV] END alpha=0.1, eta0=1, learning_rate=constant, penalty=l2; total time=   0.0s
[CV] END alpha=0.1, eta0=1, learning_rate=constant, penalty=l2; total time=   0.1s
[CV] END alpha=0.1, eta0=1, learning_rate=constant, penalty=l2; total time=   0.1s
[CV] END alpha=0.1, eta0=1, learning_rate=constant, penalt



[CV] END alpha=0.1, eta0=1, learning_rate=constant, penalty=None; total time=   0.0s[CV] END alpha=0.1, eta0=1, learning_rate=constant, penalty=elasticnet; total time=   0.1s

[CV] END alpha=0.1, eta0=1, learning_rate=constant, penalty=elasticnet; total time=   0.1s
[CV] END alpha=0.1, eta0=1, learning_rate=constant, penalty=None; total time=   0.0s
[CV] END alpha=0.1, eta0=1, learning_rate=constant, penalty=None; total time=   0.1s
[CV] END alpha=0.1, eta0=1, learning_rate=constant, penalty=elasticnet; total time=   0.1s
[CV] END alpha=0.1, eta0=1, learning_rate=constant, penalty=elasticnet; total time=   0.2s
[CV] END alpha=0.1, eta0=1, learning_rate=constant, penalty=None; total time=   0.1s




[CV] END alpha=0.1, eta0=1, learning_rate=optimal, penalty=l2; total time=   4.6s
[CV] END alpha=0.1, eta0=1, learning_rate=optimal, penalty=l2; total time=   4.6s
[CV] END alpha=0.1, eta0=1, learning_rate=optimal, penalty=l2; total time=   4.8s
[CV] END alpha=0.1, eta0=1, learning_rate=optimal, penalty=l2; total time=   4.8s




[CV] END alpha=0.1, eta0=1, learning_rate=optimal, penalty=l2; total time=   4.9s




[CV] END alpha=0.1, eta0=1, learning_rate=optimal, penalty=l1; total time=   6.0s
[CV] END alpha=0.1, eta0=1, learning_rate=optimal, penalty=l1; total time=   6.2s




[CV] END alpha=0.1, eta0=1, learning_rate=optimal, penalty=l1; total time=   6.4s




[CV] END alpha=0.1, eta0=1, learning_rate=optimal, penalty=l1; total time=   5.7s
[CV] END alpha=0.1, eta0=1, learning_rate=optimal, penalty=l1; total time=   5.8s
[CV] END alpha=0.1, eta0=1, learning_rate=optimal, penalty=elasticnet; total time=   5.7s




[CV] END alpha=0.1, eta0=1, learning_rate=optimal, penalty=elasticnet; total time=   5.9s
[CV] END alpha=0.1, eta0=1, learning_rate=optimal, penalty=None; total time=   4.2s
[CV] END alpha=0.1, eta0=1, learning_rate=optimal, penalty=elasticnet; total time=   5.8s
[CV] END alpha=0.1, eta0=1, learning_rate=invscaling, penalty=l2; total time=   0.3s
[CV] END alpha=0.1, eta0=1, learning_rate=invscaling, penalty=l2; total time=   0.4s
[CV] END alpha=0.1, eta0=1, learning_rate=invscaling, penalty=l2; total time=   0.4s
[CV] END alpha=0.1, eta0=1, learning_rate=invscaling, penalty=l2; total time=   0.4s
[CV] END alpha=0.1, eta0=1, learning_rate=invscaling, penalty=l2; total time=   0.4s
[CV] END alpha=0.1, eta0=1, learning_rate=optimal, penalty=elasticnet; total time=   5.9s
[CV] END alpha=0.1, eta0=1, learning_rate=invscaling, penalty=l1; total time=   0.5s




[CV] END alpha=0.1, eta0=1, learning_rate=optimal, penalty=elasticnet; total time=   5.9s
[CV] END alpha=0.1, eta0=1, learning_rate=invscaling, penalty=l1; total time=   0.9s
[CV] END alpha=0.1, eta0=1, learning_rate=invscaling, penalty=l1; total time=   0.7s
[CV] END alpha=0.1, eta0=1, learning_rate=invscaling, penalty=l1; total time=   0.8s
[CV] END alpha=0.1, eta0=1, learning_rate=invscaling, penalty=l1; total time=   0.9s
[CV] END alpha=0.1, eta0=1, learning_rate=invscaling, penalty=elasticnet; total time=   0.6s
[CV] END alpha=0.1, eta0=1, learning_rate=invscaling, penalty=elasticnet; total time=   1.0s
[CV] END alpha=0.1, eta0=1, learning_rate=invscaling, penalty=elasticnet; total time=   1.0s
[CV] END alpha=0.1, eta0=1, learning_rate=invscaling, penalty=elasticnet; total time=   1.0s
[CV] END alpha=0.1, eta0=1, learning_rate=invscaling, penalty=elasticnet; total time=   0.8s
[CV] END alpha=0.1, eta0=1, learning_rate=invscaling, penalty=None; total time=   0.5s
[CV] END alpha=0.1



[CV] END alpha=0.1, eta0=1, learning_rate=optimal, penalty=None; total time=   4.8s
[CV] END alpha=0.1, eta0=1, learning_rate=adaptive, penalty=l2; total time=   0.5s
[CV] END alpha=0.1, eta0=1, learning_rate=optimal, penalty=None; total time=   4.7s
[CV] END alpha=0.1, eta0=1, learning_rate=adaptive, penalty=l2; total time=   0.5s
[CV] END alpha=0.1, eta0=1, learning_rate=adaptive, penalty=l2; total time=   0.6s




[CV] END alpha=0.1, eta0=1, learning_rate=optimal, penalty=None; total time=   5.1s
[CV] END alpha=0.1, eta0=1, learning_rate=optimal, penalty=None; total time=   5.3s
[CV] END alpha=0.1, eta0=1, learning_rate=adaptive, penalty=l1; total time=   0.9s
[CV] END alpha=0.1, eta0=1, learning_rate=adaptive, penalty=l1; total time=   0.8s
[CV] END alpha=0.1, eta0=1, learning_rate=adaptive, penalty=l1; total time=   0.8s
[CV] END alpha=0.1, eta0=1, learning_rate=adaptive, penalty=elasticnet; total time=   0.7s
[CV] END alpha=0.1, eta0=1, learning_rate=adaptive, penalty=l1; total time=   1.0s
[CV] END alpha=0.1, eta0=1, learning_rate=adaptive, penalty=elasticnet; total time=   1.0s
[CV] END alpha=0.1, eta0=1, learning_rate=adaptive, penalty=l1; total time=   1.1s
[CV] END alpha=0.1, eta0=1, learning_rate=adaptive, penalty=elasticnet; total time=   0.9s
[CV] END alpha=0.1, eta0=1, learning_rate=adaptive, penalty=None; total time=   0.6s
[CV] END alpha=0.1, eta0=1, learning_rate=adaptive, penalty

100%|██████████| 100/100 [00:00<00:00, 703742.28it/s]


Best Parameters: {'alpha': 0.01, 'eta0': 0.001, 'learning_rate': 'adaptive', 'penalty': 'l1'}
Best Score: -39.481603806220846
Mean Squared Error on Test Set: 55.73167469595163
Mean Squared Error on Validation Set: 41.124494124892415
Fold 1/5, MSE: 43.8145976488323
Fold 2/5, MSE: 53.55301705583083
Fold 3/5, MSE: 35.50436426532961
Fold 4/5, MSE: 37.69351284002041
Fold 5/5, MSE: 7.511772661689007


In [24]:
# Define a more focused parameter grid for GridSearchCV based on the best parameters
refined_param_grid = {
    'alpha': [best_params['alpha'] * 0.5, best_params['alpha'], best_params['alpha'] * 5],
    'eta0': [best_params['eta0'] * 0.5, best_params['eta0'], best_params['eta0'] * 5],
    'learning_rate': [best_params['learning_rate']],
    'penalty': [best_params['penalty']]
}

# Initialize a new GridSearchCV with the refined parameter grid
refined_grid_search = GridSearchCV(sgd, refined_param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1, verbose=2)

# Fit the model with the refined parameter grid
refined_grid_search.fit(train_data, train_target.values.ravel())

# Get the best parameters and best score from the refined search
refined_best_params = refined_grid_search.best_params_
refined_best_score = refined_grid_search.best_score_

# Print the refined best parameters and best score
print("Refined Best Parameters:", refined_best_params)
print("Refined Best Score:", refined_best_score)

# Evaluate the refined model on the test set
refined_predictions = refined_grid_search.predict(test_data)
refined_mse = mean_squared_error(test_target, refined_predictions)
print("Refined Mean Squared Error on Test Set:", refined_mse)

# Evaluate the refined model on the validation set
refined_validate_predictions = refined_grid_search.predict(validate_data)
refined_validate_mse = mean_squared_error(validate_target, refined_validate_predictions)
print("Refined Mean Squared Error on Validation Set:", refined_validate_mse)

Fitting 5 folds for each of 9 candidates, totalling 45 fits
[CV] END alpha=0.005, eta0=0.0005, learning_rate=adaptive, penalty=l1; total time=   0.3s
[CV] END alpha=0.005, eta0=0.0005, learning_rate=adaptive, penalty=l1; total time=   0.4s
[CV] END alpha=0.005, eta0=0.0005, learning_rate=adaptive, penalty=l1; total time=   0.3s
[CV] END alpha=0.005, eta0=0.0005, learning_rate=adaptive, penalty=l1; total time=   0.3s
[CV] END alpha=0.005, eta0=0.0005, learning_rate=adaptive, penalty=l1; total time=   0.3s
[CV] END alpha=0.005, eta0=0.001, learning_rate=adaptive, penalty=l1; total time=   0.4s
[CV] END alpha=0.005, eta0=0.001, learning_rate=adaptive, penalty=l1; total time=   0.5s
[CV] END alpha=0.005, eta0=0.001, learning_rate=adaptive, penalty=l1; total time=   0.5s
[CV] END alpha=0.005, eta0=0.005, learning_rate=adaptive, penalty=l1; total time=   0.4s
[CV] END alpha=0.005, eta0=0.001, learning_rate=adaptive, penalty=l1; total time=   0.4s
[CV] END alpha=0.01, eta0=0.0005, learning_ra