In [8]:
import pandas as pd
features = pd.read_csv('/Users/johannessolibieda/Documents/GitHub/Its-Wekk/5 - Models/4 - LinReg/0 - ModelData/50MostImp+CombinedFeatures.csv')
target = pd.read_csv('/Users/johannessolibieda/Documents/GitHub/Its-Wekk/5 - Models/4 - LinReg/0 - ModelData/TargetCutto50MostImpFeatures_DF.csv')

In [9]:
import numpy as np
from sklearn.linear_model import SGDRegressor
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error
from tqdm import tqdm
import matplotlib.pyplot as plt
from joblib import parallel_backend

# Ensure the features and target dataframes are correctly assigned
features = features.drop(columns=['Datum'])
target = target['PM10_Combined_Trend_Residual']

# Split the data into train, test, and validate sets
train_data, temp_data, train_target, temp_target = train_test_split(features, target, test_size=0.3, random_state=42)
test_data, validate_data, test_target, validate_target = train_test_split(temp_data, temp_target, test_size=0.3333, random_state=42)

# Define the parameter grid for GridSearchCV
param_grid = {
    'alpha': [0.0001, 0.001, 0.01, 0.1],
    'eta0': [0.001, 0.01, 0.1, 1],
    'learning_rate': ['constant', 'optimal', 'invscaling', 'adaptive'],
    'penalty': ['l2', 'l1', 'elasticnet']
}

# Initialize the SGDRegressor
sgd = SGDRegressor(max_iter=1000, tol=1e-3)

# Initialize GridSearchCV with joblib parallel backend
with parallel_backend('threading', n_jobs=-1):
    grid_search = GridSearchCV(sgd, param_grid, cv=5, scoring='neg_mean_squared_error', verbose=2)
    grid_search.fit(train_data, train_target.values.ravel())

# Get the best parameters and best score
best_params = grid_search.best_params_
best_score = grid_search.best_score_

# Plot the progress bar
progress_bar = tqdm(total=100)
for i in range(100):
    progress_bar.update(1)
progress_bar.close()

# Print the best parameters and best score
print("Best Parameters:", best_params)
print("Best Score:", best_score)

# Evaluate the model on the test set
predictions = grid_search.predict(test_data)
mse = mean_squared_error(test_target, predictions)
print("Mean Squared Error on Test Set:", mse)

# Evaluate the model on the validation set
validate_predictions = grid_search.predict(validate_data)
validate_mse = mean_squared_error(validate_target, validate_predictions)
print("Mean Squared Error on Validation Set:", validate_mse)

# Expanding CV Fold Sizes
initial_train_size = int(0.5 * len(train_data))
test_size = int(0.1 * len(train_data))
num_folds = (len(train_data) - initial_train_size) // test_size

for fold in range(num_folds):
    start_train_size = initial_train_size + fold * test_size
    end_train_size = start_train_size + test_size

    X_train_fold = train_data.iloc[:end_train_size]
    y_train_fold = train_target.iloc[:end_train_size]
    X_test_fold = train_data.iloc[end_train_size:end_train_size + test_size]
    y_test_fold = train_target.iloc[end_train_size:end_train_size + test_size]

    model = SGDRegressor(**best_params, max_iter=1000, tol=1e-3)
    model.fit(X_train_fold, y_train_fold)
    fold_predictions = model.predict(X_test_fold)
    fold_mse = mean_squared_error(y_test_fold, fold_predictions)
    print(f"Fold {fold + 1}/{num_folds}, MSE: {fold_mse}")

Fitting 5 folds for each of 192 candidates, totalling 960 fits
[CV] END alpha=0.0001, eta0=0.001, learning_rate=constant, penalty=l2; total time=   0.1s
[CV] END alpha=0.0001, eta0=0.001, learning_rate=constant, penalty=l2; total time=   0.1s
[CV] END alpha=0.0001, eta0=0.001, learning_rate=constant, penalty=l2; total time=   0.1s
[CV] END alpha=0.0001, eta0=0.001, learning_rate=constant, penalty=l1; total time=   0.1s
[CV] END alpha=0.0001, eta0=0.001, learning_rate=constant, penalty=l2; total time=   0.1s
[CV] END alpha=0.0001, eta0=0.001, learning_rate=constant, penalty=l1; total time=   0.1s
[CV] END alpha=0.0001, eta0=0.001, learning_rate=constant, penalty=l2; total time=   0.1s
[CV] END alpha=0.0001, eta0=0.001, learning_rate=constant, penalty=elasticnet; total time=   0.1s
[CV] END alpha=0.0001, eta0=0.001, learning_rate=constant, penalty=l1; total time=   0.2s
[CV] END alpha=0.0001, eta0=0.001, learning_rate=constant, penalty=elasticnet; total time=   0.1s
[CV] END alpha=0.0001



[CV] END alpha=0.01, eta0=0.001, learning_rate=optimal, penalty=l1; total time=   6.1s
[CV] END alpha=0.01, eta0=0.001, learning_rate=optimal, penalty=l1; total time=   6.0s




[CV] END alpha=0.01, eta0=0.001, learning_rate=optimal, penalty=l1; total time=   6.2s
[CV] END alpha=0.01, eta0=0.001, learning_rate=invscaling, penalty=l2; total time=   0.2s
[CV] END alpha=0.01, eta0=0.001, learning_rate=invscaling, penalty=l2; total time=   0.2s
[CV] END alpha=0.01, eta0=0.001, learning_rate=invscaling, penalty=l2; total time=   0.2s
[CV] END alpha=0.01, eta0=0.001, learning_rate=invscaling, penalty=l2; total time=   0.1s
[CV] END alpha=0.01, eta0=0.001, learning_rate=invscaling, penalty=l2; total time=   0.2s
[CV] END alpha=0.01, eta0=0.001, learning_rate=invscaling, penalty=l1; total time=   0.3s
[CV] END alpha=0.01, eta0=0.001, learning_rate=invscaling, penalty=l1; total time=   0.5s
[CV] END alpha=0.01, eta0=0.001, learning_rate=invscaling, penalty=l1; total time=   0.3s
[CV] END alpha=0.01, eta0=0.001, learning_rate=invscaling, penalty=l1; total time=   0.2s
[CV] END alpha=0.01, eta0=0.001, learning_rate=invscaling, penalty=l1; total time=   0.2s
[CV] END alph



[CV] END alpha=0.01, eta0=0.001, learning_rate=adaptive, penalty=l2; total time=   0.2s
[CV] END alpha=0.01, eta0=0.001, learning_rate=adaptive, penalty=l2; total time=   0.3s
[CV] END alpha=0.01, eta0=0.001, learning_rate=adaptive, penalty=l2; total time=   0.2s
[CV] END alpha=0.01, eta0=0.001, learning_rate=adaptive, penalty=l2; total time=   0.2s
[CV] END alpha=0.01, eta0=0.001, learning_rate=optimal, penalty=elasticnet; total time=   6.2s




[CV] END alpha=0.01, eta0=0.001, learning_rate=adaptive, penalty=l1; total time=   0.3s
[CV] END alpha=0.01, eta0=0.001, learning_rate=optimal, penalty=elasticnet; total time=   6.4s
[CV] END alpha=0.01, eta0=0.001, learning_rate=adaptive, penalty=l1; total time=   0.4s
[CV] END alpha=0.01, eta0=0.001, learning_rate=adaptive, penalty=l1; total time=   0.4s
[CV] END alpha=0.01, eta0=0.001, learning_rate=adaptive, penalty=l1; total time=   0.4s




[CV] END alpha=0.01, eta0=0.001, learning_rate=adaptive, penalty=l1; total time=   0.4s
[CV] END alpha=0.01, eta0=0.01, learning_rate=constant, penalty=l2; total time=   0.1s
[CV] END alpha=0.01, eta0=0.001, learning_rate=adaptive, penalty=elasticnet; total time=   0.4s
[CV] END alpha=0.01, eta0=0.001, learning_rate=adaptive, penalty=elasticnet; total time=   0.4s
[CV] END alpha=0.01, eta0=0.01, learning_rate=constant, penalty=l2; total time=   0.1s
[CV] END alpha=0.01, eta0=0.01, learning_rate=constant, penalty=l2; total time=   0.1s
[CV] END alpha=0.01, eta0=0.01, learning_rate=constant, penalty=l2; total time=   0.1s
[CV] END alpha=0.01, eta0=0.001, learning_rate=adaptive, penalty=elasticnet; total time=   0.4s
[CV] END alpha=0.01, eta0=0.01, learning_rate=constant, penalty=l2; total time=   0.1s
[CV] END alpha=0.01, eta0=0.001, learning_rate=adaptive, penalty=elasticnet; total time=   0.4s
[CV] END alpha=0.01, eta0=0.01, learning_rate=constant, penalty=l1; total time=   0.1s
[CV] E



[CV] END alpha=0.01, eta0=0.001, learning_rate=optimal, penalty=elasticnet; total time=   6.3s
[CV] END alpha=0.01, eta0=0.001, learning_rate=optimal, penalty=elasticnet; total time=   6.5s




[CV] END alpha=0.01, eta0=0.01, learning_rate=optimal, penalty=l2; total time=   4.4s
[CV] END alpha=0.01, eta0=0.01, learning_rate=optimal, penalty=l2; total time=   4.6s
[CV] END alpha=0.01, eta0=0.01, learning_rate=optimal, penalty=l2; total time=   4.6s
[CV] END alpha=0.01, eta0=0.01, learning_rate=optimal, penalty=l2; total time=   4.8s




[CV] END alpha=0.01, eta0=0.01, learning_rate=optimal, penalty=l2; total time=   4.7s




[CV] END alpha=0.01, eta0=0.01, learning_rate=optimal, penalty=l1; total time=   6.2s




[CV] END alpha=0.01, eta0=0.01, learning_rate=optimal, penalty=l1; total time=   5.9s




[CV] END alpha=0.01, eta0=0.01, learning_rate=optimal, penalty=l1; total time=   6.3s
[CV] END alpha=0.01, eta0=0.01, learning_rate=invscaling, penalty=l2; total time=   0.1s
[CV] END alpha=0.01, eta0=0.01, learning_rate=invscaling, penalty=l2; total time=   0.2s
[CV] END alpha=0.01, eta0=0.01, learning_rate=invscaling, penalty=l2; total time=   0.2s
[CV] END alpha=0.01, eta0=0.01, learning_rate=invscaling, penalty=l2; total time=   0.1s
[CV] END alpha=0.01, eta0=0.01, learning_rate=invscaling, penalty=l2; total time=   0.1s
[CV] END alpha=0.01, eta0=0.01, learning_rate=invscaling, penalty=l1; total time=   0.2s
[CV] END alpha=0.01, eta0=0.01, learning_rate=invscaling, penalty=l1; total time=   0.2s
[CV] END alpha=0.01, eta0=0.01, learning_rate=invscaling, penalty=l1; total time=   0.1s
[CV] END alpha=0.01, eta0=0.01, learning_rate=invscaling, penalty=l1; total time=   0.2s
[CV] END alpha=0.01, eta0=0.01, learning_rate=invscaling, penalty=l1; total time=   0.2s
[CV] END alpha=0.01, eta



[CV] END alpha=0.01, eta0=0.01, learning_rate=adaptive, penalty=l2; total time=   0.2s
[CV] END alpha=0.01, eta0=0.01, learning_rate=adaptive, penalty=l2; total time=   0.3s
[CV] END alpha=0.01, eta0=0.01, learning_rate=optimal, penalty=l1; total time=   6.0s




[CV] END alpha=0.01, eta0=0.01, learning_rate=adaptive, penalty=l2; total time=   0.2s
[CV] END alpha=0.01, eta0=0.01, learning_rate=adaptive, penalty=l2; total time=   0.3s
[CV] END alpha=0.01, eta0=0.01, learning_rate=adaptive, penalty=l2; total time=   0.3s
[CV] END alpha=0.01, eta0=0.01, learning_rate=optimal, penalty=elasticnet; total time=   6.2s
[CV] END alpha=0.01, eta0=0.01, learning_rate=adaptive, penalty=l1; total time=   0.3s




[CV] END alpha=0.01, eta0=0.01, learning_rate=optimal, penalty=elasticnet; total time=   6.3s
[CV] END alpha=0.01, eta0=0.01, learning_rate=adaptive, penalty=l1; total time=   0.4s
[CV] END alpha=0.01, eta0=0.01, learning_rate=optimal, penalty=elasticnet; total time=   6.5s
[CV] END alpha=0.01, eta0=0.01, learning_rate=adaptive, penalty=l1; total time=   0.5s
[CV] END alpha=0.01, eta0=0.01, learning_rate=adaptive, penalty=l1; total time=   0.4s
[CV] END alpha=0.01, eta0=0.01, learning_rate=adaptive, penalty=l1; total time=   0.5s
[CV] END alpha=0.01, eta0=0.1, learning_rate=constant, penalty=l2; total time=   0.0s
[CV] END alpha=0.01, eta0=0.1, learning_rate=constant, penalty=l2; total time=   0.1s
[CV] END alpha=0.01, eta0=0.1, learning_rate=constant, penalty=l2; total time=   0.0s
[CV] END alpha=0.01, eta0=0.1, learning_rate=constant, penalty=l2; total time=   0.0s
[CV] END alpha=0.01, eta0=0.01, learning_rate=adaptive, penalty=elasticnet; total time=   0.5s
[CV] END alpha=0.01, eta0



[CV] END alpha=0.01, eta0=0.01, learning_rate=optimal, penalty=elasticnet; total time=   6.1s




[CV] END alpha=0.01, eta0=0.01, learning_rate=optimal, penalty=elasticnet; total time=   6.5s




[CV] END alpha=0.01, eta0=0.1, learning_rate=optimal, penalty=l2; total time=   4.5s
[CV] END alpha=0.01, eta0=0.1, learning_rate=optimal, penalty=l2; total time=   4.5s




[CV] END alpha=0.01, eta0=0.1, learning_rate=optimal, penalty=l2; total time=   4.7s
[CV] END alpha=0.01, eta0=0.1, learning_rate=optimal, penalty=l2; total time=   4.7s




[CV] END alpha=0.01, eta0=0.1, learning_rate=optimal, penalty=l2; total time=   5.0s




[CV] END alpha=0.01, eta0=0.1, learning_rate=optimal, penalty=l1; total time=   6.2s




[CV] END alpha=0.01, eta0=0.1, learning_rate=optimal, penalty=l1; total time=   6.0s




[CV] END alpha=0.01, eta0=0.1, learning_rate=optimal, penalty=l1; total time=   6.0s
[CV] END alpha=0.01, eta0=0.1, learning_rate=invscaling, penalty=l2; total time=   0.2s
[CV] END alpha=0.01, eta0=0.1, learning_rate=invscaling, penalty=l2; total time=   0.2s
[CV] END alpha=0.01, eta0=0.1, learning_rate=invscaling, penalty=l2; total time=   0.1s
[CV] END alpha=0.01, eta0=0.1, learning_rate=invscaling, penalty=l2; total time=   0.1s
[CV] END alpha=0.01, eta0=0.1, learning_rate=invscaling, penalty=l2; total time=   0.3s
[CV] END alpha=0.01, eta0=0.1, learning_rate=invscaling, penalty=l1; total time=   0.2s
[CV] END alpha=0.01, eta0=0.1, learning_rate=invscaling, penalty=l1; total time=   0.4s
[CV] END alpha=0.01, eta0=0.1, learning_rate=invscaling, penalty=l1; total time=   0.2s
[CV] END alpha=0.01, eta0=0.1, learning_rate=invscaling, penalty=l1; total time=   0.2s
[CV] END alpha=0.01, eta0=0.1, learning_rate=invscaling, penalty=l1; total time=   0.1s
[CV] END alpha=0.01, eta0=0.1, lear



[CV] END alpha=0.01, eta0=0.1, learning_rate=invscaling, penalty=elasticnet; total time=   0.2s
[CV] END alpha=0.01, eta0=0.1, learning_rate=adaptive, penalty=l2; total time=   0.4s




[CV] END alpha=0.01, eta0=0.1, learning_rate=optimal, penalty=elasticnet; total time=   6.2s
[CV] END alpha=0.01, eta0=0.1, learning_rate=optimal, penalty=elasticnet; total time=   6.3s
[CV] END alpha=0.01, eta0=0.1, learning_rate=adaptive, penalty=l2; total time=   0.4s
[CV] END alpha=0.01, eta0=0.1, learning_rate=adaptive, penalty=l2; total time=   0.5s
[CV] END alpha=0.01, eta0=0.1, learning_rate=adaptive, penalty=l2; total time=   0.5s
[CV] END alpha=0.01, eta0=0.1, learning_rate=adaptive, penalty=l2; total time=   0.6s




[CV] END alpha=0.01, eta0=0.1, learning_rate=optimal, penalty=elasticnet; total time=   6.7s
[CV] END alpha=0.01, eta0=0.1, learning_rate=adaptive, penalty=l1; total time=   0.7s
[CV] END alpha=0.01, eta0=0.1, learning_rate=adaptive, penalty=l1; total time=   0.7s
[CV] END alpha=0.01, eta0=0.1, learning_rate=adaptive, penalty=l1; total time=   0.7s
[CV] END alpha=0.01, eta0=0.1, learning_rate=adaptive, penalty=l1; total time=   0.8s
[CV] END alpha=0.01, eta0=0.1, learning_rate=adaptive, penalty=elasticnet; total time=   0.7s




[CV] END alpha=0.01, eta0=0.1, learning_rate=optimal, penalty=elasticnet; total time=   6.2s
[CV] END alpha=0.01, eta0=1, learning_rate=constant, penalty=l2; total time=   0.0s
[CV] END alpha=0.01, eta0=1, learning_rate=constant, penalty=l2; total time=   0.0s
[CV] END alpha=0.01, eta0=0.1, learning_rate=adaptive, penalty=elasticnet; total time=   0.8s
[CV] END alpha=0.01, eta0=0.1, learning_rate=adaptive, penalty=l1; total time=   1.1s
[CV] END alpha=0.01, eta0=0.1, learning_rate=adaptive, penalty=elasticnet; total time=   0.8s
[CV] END alpha=0.01, eta0=1, learning_rate=constant, penalty=l2; total time=   0.1s
[CV] END alpha=0.01, eta0=1, learning_rate=constant, penalty=l2; total time=   0.0s
[CV] END alpha=0.01, eta0=1, learning_rate=constant, penalty=l2; total time=   0.1s
[CV] END alpha=0.01, eta0=1, learning_rate=constant, penalty=l1; total time=   0.1s
[CV] END alpha=0.01, eta0=1, learning_rate=constant, penalty=l1; total time=   0.1s
[CV] END alpha=0.01, eta0=1, learning_rate=co



[CV] END alpha=0.01, eta0=1, learning_rate=optimal, penalty=l2; total time=   4.4s
[CV] END alpha=0.01, eta0=1, learning_rate=optimal, penalty=l2; total time=   4.5s
[CV] END alpha=0.01, eta0=1, learning_rate=optimal, penalty=l2; total time=   4.6s




[CV] END alpha=0.01, eta0=1, learning_rate=optimal, penalty=l2; total time=   4.7s
[CV] END alpha=0.01, eta0=1, learning_rate=optimal, penalty=l2; total time=   4.7s




[CV] END alpha=0.01, eta0=1, learning_rate=optimal, penalty=l1; total time=   5.9s
[CV] END alpha=0.01, eta0=1, learning_rate=optimal, penalty=l1; total time=   6.1s




[CV] END alpha=0.01, eta0=1, learning_rate=optimal, penalty=l1; total time=   6.3s
[CV] END alpha=0.01, eta0=1, learning_rate=invscaling, penalty=l2; total time=   0.5s
[CV] END alpha=0.01, eta0=1, learning_rate=invscaling, penalty=l2; total time=   0.7s
[CV] END alpha=0.01, eta0=1, learning_rate=invscaling, penalty=l2; total time=   0.6s
[CV] END alpha=0.01, eta0=1, learning_rate=invscaling, penalty=l2; total time=   0.4s
[CV] END alpha=0.01, eta0=1, learning_rate=invscaling, penalty=l2; total time=   0.5s
[CV] END alpha=0.01, eta0=1, learning_rate=invscaling, penalty=l1; total time=   0.8s




[CV] END alpha=0.01, eta0=1, learning_rate=optimal, penalty=l1; total time=   6.3s
[CV] END alpha=0.01, eta0=1, learning_rate=optimal, penalty=elasticnet; total time=   6.1s




[CV] END alpha=0.01, eta0=1, learning_rate=optimal, penalty=elasticnet; total time=   6.5s
[CV] END alpha=0.01, eta0=1, learning_rate=invscaling, penalty=l1; total time=   1.2s
[CV] END alpha=0.01, eta0=1, learning_rate=optimal, penalty=l1; total time=   6.7s
[CV] END alpha=0.01, eta0=1, learning_rate=optimal, penalty=elasticnet; total time=   6.5s
[CV] END alpha=0.01, eta0=1, learning_rate=invscaling, penalty=l1; total time=   0.9s
[CV] END alpha=0.01, eta0=1, learning_rate=invscaling, penalty=elasticnet; total time=   0.9s
[CV] END alpha=0.01, eta0=1, learning_rate=invscaling, penalty=l1; total time=   1.2s
[CV] END alpha=0.01, eta0=1, learning_rate=optimal, penalty=elasticnet; total time=   6.1s
[CV] END alpha=0.01, eta0=1, learning_rate=invscaling, penalty=elasticnet; total time=   1.0s




[CV] END alpha=0.01, eta0=1, learning_rate=invscaling, penalty=l1; total time=   1.2s
[CV] END alpha=0.01, eta0=1, learning_rate=invscaling, penalty=elasticnet; total time=   1.2s




[CV] END alpha=0.01, eta0=1, learning_rate=adaptive, penalty=l2; total time=   0.4s[CV] END alpha=0.01, eta0=1, learning_rate=optimal, penalty=elasticnet; total time=   6.3s

[CV] END alpha=0.01, eta0=1, learning_rate=adaptive, penalty=l2; total time=   0.5s
[CV] END alpha=0.01, eta0=1, learning_rate=adaptive, penalty=l2; total time=   0.5s
[CV] END alpha=0.01, eta0=1, learning_rate=invscaling, penalty=elasticnet; total time=   1.1s
[CV] END alpha=0.01, eta0=1, learning_rate=adaptive, penalty=l2; total time=   0.5s
[CV] END alpha=0.01, eta0=1, learning_rate=adaptive, penalty=l2; total time=   0.5s
[CV] END alpha=0.01, eta0=1, learning_rate=invscaling, penalty=elasticnet; total time=   1.1s
[CV] END alpha=0.01, eta0=1, learning_rate=adaptive, penalty=l1; total time=   0.8s
[CV] END alpha=0.01, eta0=1, learning_rate=adaptive, penalty=l1; total time=   0.8s
[CV] END alpha=0.01, eta0=1, learning_rate=adaptive, penalty=l1; total time=   0.8s
[CV] END alpha=0.01, eta0=1, learning_rate=adapti



[CV] END alpha=0.1, eta0=0.001, learning_rate=optimal, penalty=l2; total time=   4.3s
[CV] END alpha=0.1, eta0=0.001, learning_rate=optimal, penalty=l2; total time=   4.4s
[CV] END alpha=0.1, eta0=0.001, learning_rate=optimal, penalty=l2; total time=   4.5s




[CV] END alpha=0.1, eta0=0.001, learning_rate=optimal, penalty=l2; total time=   4.6s




[CV] END alpha=0.1, eta0=0.001, learning_rate=optimal, penalty=l2; total time=   4.8s




[CV] END alpha=0.1, eta0=0.001, learning_rate=optimal, penalty=l1; total time=   5.8s




[CV] END alpha=0.1, eta0=0.001, learning_rate=optimal, penalty=l1; total time=   6.1s




[CV] END alpha=0.1, eta0=0.001, learning_rate=optimal, penalty=l1; total time=   6.8s
[CV] END alpha=0.1, eta0=0.001, learning_rate=invscaling, penalty=l2; total time=   0.1s
[CV] END alpha=0.1, eta0=0.001, learning_rate=invscaling, penalty=l2; total time=   0.2s
[CV] END alpha=0.1, eta0=0.001, learning_rate=invscaling, penalty=l2; total time=   0.1s
[CV] END alpha=0.1, eta0=0.001, learning_rate=invscaling, penalty=l2; total time=   0.1s
[CV] END alpha=0.1, eta0=0.001, learning_rate=invscaling, penalty=l2; total time=   0.1s
[CV] END alpha=0.1, eta0=0.001, learning_rate=invscaling, penalty=l1; total time=   0.4s
[CV] END alpha=0.1, eta0=0.001, learning_rate=invscaling, penalty=l1; total time=   0.2s
[CV] END alpha=0.1, eta0=0.001, learning_rate=invscaling, penalty=l1; total time=   0.2s
[CV] END alpha=0.1, eta0=0.001, learning_rate=invscaling, penalty=l1; total time=   0.2s
[CV] END alpha=0.1, eta0=0.001, learning_rate=invscaling, penalty=l1; total time=   0.3s
[CV] END alpha=0.1, eta0



[CV] END alpha=0.1, eta0=0.001, learning_rate=adaptive, penalty=l2; total time=   0.3s
[CV] END alpha=0.1, eta0=0.001, learning_rate=adaptive, penalty=l2; total time=   0.3s
[CV] END alpha=0.1, eta0=0.001, learning_rate=optimal, penalty=elasticnet; total time=   6.1s
[CV] END alpha=0.1, eta0=0.001, learning_rate=optimal, penalty=l1; total time=   6.4s




[CV] END alpha=0.1, eta0=0.001, learning_rate=adaptive, penalty=l2; total time=   0.3s
[CV] END alpha=0.1, eta0=0.001, learning_rate=adaptive, penalty=l2; total time=   0.3s
[CV] END alpha=0.1, eta0=0.001, learning_rate=optimal, penalty=elasticnet; total time=   6.3s
[CV] END alpha=0.1, eta0=0.001, learning_rate=adaptive, penalty=l1; total time=   0.4s
[CV] END alpha=0.1, eta0=0.001, learning_rate=optimal, penalty=elasticnet; total time=   6.2s




[CV] END alpha=0.1, eta0=0.001, learning_rate=adaptive, penalty=l1; total time=   0.4s
[CV] END alpha=0.1, eta0=0.001, learning_rate=adaptive, penalty=l1; total time=   0.3s
[CV] END alpha=0.1, eta0=0.001, learning_rate=adaptive, penalty=l1; total time=   0.4s
[CV] END alpha=0.1, eta0=0.001, learning_rate=adaptive, penalty=l1; total time=   0.5s
[CV] END alpha=0.1, eta0=0.001, learning_rate=adaptive, penalty=elasticnet; total time=   0.3s
[CV] END alpha=0.1, eta0=0.001, learning_rate=adaptive, penalty=elasticnet; total time=   0.4s
[CV] END alpha=0.1, eta0=0.001, learning_rate=adaptive, penalty=elasticnet; total time=   0.3s
[CV] END alpha=0.1, eta0=0.01, learning_rate=constant, penalty=l2; total time=   0.1s
[CV] END alpha=0.1, eta0=0.001, learning_rate=adaptive, penalty=elasticnet; total time=   0.4s
[CV] END alpha=0.1, eta0=0.01, learning_rate=constant, penalty=l2; total time=   0.1s
[CV] END alpha=0.1, eta0=0.01, learning_rate=constant, penalty=l2; total time=   0.0s
[CV] END alpha



[CV] END alpha=0.1, eta0=0.001, learning_rate=optimal, penalty=elasticnet; total time=   6.6s
[CV] END alpha=0.1, eta0=0.001, learning_rate=optimal, penalty=elasticnet; total time=   6.3s




[CV] END alpha=0.1, eta0=0.01, learning_rate=optimal, penalty=l2; total time=   4.4s




[CV] END alpha=0.1, eta0=0.01, learning_rate=optimal, penalty=l2; total time=   4.7s
[CV] END alpha=0.1, eta0=0.01, learning_rate=optimal, penalty=l2; total time=   4.7s
[CV] END alpha=0.1, eta0=0.01, learning_rate=optimal, penalty=l2; total time=   4.8s
[CV] END alpha=0.1, eta0=0.01, learning_rate=optimal, penalty=l2; total time=   4.7s




[CV] END alpha=0.1, eta0=0.01, learning_rate=optimal, penalty=l1; total time=   5.9s




[CV] END alpha=0.1, eta0=0.01, learning_rate=optimal, penalty=l1; total time=   6.2s




[CV] END alpha=0.1, eta0=0.01, learning_rate=optimal, penalty=l1; total time=   6.7s
[CV] END alpha=0.1, eta0=0.01, learning_rate=invscaling, penalty=l2; total time=   0.1s
[CV] END alpha=0.1, eta0=0.01, learning_rate=invscaling, penalty=l2; total time=   0.2s
[CV] END alpha=0.1, eta0=0.01, learning_rate=invscaling, penalty=l2; total time=   0.1s
[CV] END alpha=0.1, eta0=0.01, learning_rate=invscaling, penalty=l2; total time=   0.1s
[CV] END alpha=0.1, eta0=0.01, learning_rate=invscaling, penalty=l2; total time=   0.1s
[CV] END alpha=0.1, eta0=0.01, learning_rate=invscaling, penalty=l1; total time=   0.2s
[CV] END alpha=0.1, eta0=0.01, learning_rate=invscaling, penalty=l1; total time=   0.2s
[CV] END alpha=0.1, eta0=0.01, learning_rate=invscaling, penalty=l1; total time=   0.1s
[CV] END alpha=0.1, eta0=0.01, learning_rate=invscaling, penalty=l1; total time=   0.1s
[CV] END alpha=0.1, eta0=0.01, learning_rate=invscaling, penalty=l1; total time=   0.2s
[CV] END alpha=0.1, eta0=0.01, lear



[CV] END alpha=0.1, eta0=0.01, learning_rate=optimal, penalty=l1; total time=   6.3s
[CV] END alpha=0.1, eta0=0.01, learning_rate=adaptive, penalty=l2; total time=   0.3s
[CV] END alpha=0.1, eta0=0.01, learning_rate=adaptive, penalty=l2; total time=   0.3s
[CV] END alpha=0.1, eta0=0.01, learning_rate=optimal, penalty=l1; total time=   6.5s
[CV] END alpha=0.1, eta0=0.01, learning_rate=optimal, penalty=elasticnet; total time=   6.4s
[CV] END alpha=0.1, eta0=0.01, learning_rate=adaptive, penalty=l2; total time=   0.3s




[CV] END alpha=0.1, eta0=0.01, learning_rate=optimal, penalty=elasticnet; total time=   6.6s
[CV] END alpha=0.1, eta0=0.01, learning_rate=adaptive, penalty=l2; total time=   0.3s
[CV] END alpha=0.1, eta0=0.01, learning_rate=adaptive, penalty=l2; total time=   0.5s
[CV] END alpha=0.1, eta0=0.01, learning_rate=optimal, penalty=elasticnet; total time=   6.8s
[CV] END alpha=0.1, eta0=0.01, learning_rate=adaptive, penalty=l1; total time=   0.6s
[CV] END alpha=0.1, eta0=0.01, learning_rate=adaptive, penalty=l1; total time=   0.7s
[CV] END alpha=0.1, eta0=0.01, learning_rate=adaptive, penalty=l1; total time=   0.4s
[CV] END alpha=0.1, eta0=0.01, learning_rate=adaptive, penalty=l1; total time=   0.6s
[CV] END alpha=0.1, eta0=0.01, learning_rate=adaptive, penalty=elasticnet; total time=   0.5s
[CV] END alpha=0.1, eta0=0.01, learning_rate=adaptive, penalty=l1; total time=   0.7s
[CV] END alpha=0.1, eta0=0.1, learning_rate=constant, penalty=l2; total time=   0.1s
[CV] END alpha=0.1, eta0=0.1, lea



[CV] END alpha=0.1, eta0=0.1, learning_rate=constant, penalty=elasticnet; total time=   0.1s[CV] END alpha=0.1, eta0=0.1, learning_rate=constant, penalty=elasticnet; total time=   0.1s

[CV] END alpha=0.1, eta0=0.1, learning_rate=constant, penalty=elasticnet; total time=   0.2s
[CV] END alpha=0.1, eta0=0.1, learning_rate=constant, penalty=elasticnet; total time=   0.2s
[CV] END alpha=0.1, eta0=0.1, learning_rate=constant, penalty=elasticnet; total time=   0.2s




[CV] END alpha=0.1, eta0=0.01, learning_rate=optimal, penalty=elasticnet; total time=   7.0s




[CV] END alpha=0.1, eta0=0.1, learning_rate=optimal, penalty=l2; total time=   4.9s
[CV] END alpha=0.1, eta0=0.1, learning_rate=optimal, penalty=l2; total time=   5.1s
[CV] END alpha=0.1, eta0=0.1, learning_rate=optimal, penalty=l2; total time=   5.2s




[CV] END alpha=0.1, eta0=0.1, learning_rate=optimal, penalty=l2; total time=   5.2s
[CV] END alpha=0.1, eta0=0.1, learning_rate=optimal, penalty=l2; total time=   5.4s




[CV] END alpha=0.1, eta0=0.1, learning_rate=optimal, penalty=l1; total time=   7.0s
[CV] END alpha=0.1, eta0=0.1, learning_rate=optimal, penalty=l1; total time=   7.0s




[CV] END alpha=0.1, eta0=0.1, learning_rate=optimal, penalty=l1; total time=   6.8s
[CV] END alpha=0.1, eta0=0.1, learning_rate=invscaling, penalty=l2; total time=   0.2s
[CV] END alpha=0.1, eta0=0.1, learning_rate=invscaling, penalty=l2; total time=   0.2s
[CV] END alpha=0.1, eta0=0.1, learning_rate=invscaling, penalty=l2; total time=   0.2s
[CV] END alpha=0.1, eta0=0.1, learning_rate=invscaling, penalty=l2; total time=   0.1s
[CV] END alpha=0.1, eta0=0.1, learning_rate=invscaling, penalty=l2; total time=   0.2s
[CV] END alpha=0.1, eta0=0.1, learning_rate=invscaling, penalty=l1; total time=   0.4s
[CV] END alpha=0.1, eta0=0.1, learning_rate=invscaling, penalty=l1; total time=   0.1s
[CV] END alpha=0.1, eta0=0.1, learning_rate=invscaling, penalty=l1; total time=   0.4s
[CV] END alpha=0.1, eta0=0.1, learning_rate=invscaling, penalty=l1; total time=   0.6s
[CV] END alpha=0.1, eta0=0.1, learning_rate=invscaling, penalty=l1; total time=   0.2s
[CV] END alpha=0.1, eta0=0.1, learning_rate=in



[CV] END alpha=0.1, eta0=0.1, learning_rate=optimal, penalty=elasticnet; total time=   6.9s
[CV] END alpha=0.1, eta0=0.1, learning_rate=invscaling, penalty=elasticnet; total time=   0.1s
[CV] END alpha=0.1, eta0=0.1, learning_rate=optimal, penalty=elasticnet; total time=   7.0s
[CV] END alpha=0.1, eta0=0.1, learning_rate=optimal, penalty=elasticnet; total time=   6.8s
[CV] END alpha=0.1, eta0=0.1, learning_rate=invscaling, penalty=elasticnet; total time=   0.4s
[CV] END alpha=0.1, eta0=0.1, learning_rate=invscaling, penalty=elasticnet; total time=   0.3s




[CV] END alpha=0.1, eta0=0.1, learning_rate=adaptive, penalty=l2; total time=   0.4s
[CV] END alpha=0.1, eta0=0.1, learning_rate=adaptive, penalty=l2; total time=   0.3s
[CV] END alpha=0.1, eta0=0.1, learning_rate=adaptive, penalty=l2; total time=   0.5s
[CV] END alpha=0.1, eta0=0.1, learning_rate=adaptive, penalty=l2; total time=   0.5s
[CV] END alpha=0.1, eta0=0.1, learning_rate=adaptive, penalty=l2; total time=   0.5s
[CV] END alpha=0.1, eta0=0.1, learning_rate=adaptive, penalty=l1; total time=   0.9s
[CV] END alpha=0.1, eta0=0.1, learning_rate=adaptive, penalty=l1; total time=   0.9s
[CV] END alpha=0.1, eta0=0.1, learning_rate=adaptive, penalty=l1; total time=   0.9s
[CV] END alpha=0.1, eta0=0.1, learning_rate=adaptive, penalty=l1; total time=   0.9s
[CV] END alpha=0.1, eta0=0.1, learning_rate=adaptive, penalty=l1; total time=   0.9s
[CV] END alpha=0.1, eta0=1, learning_rate=constant, penalty=l2; total time=   0.0s
[CV] END alpha=0.1, eta0=1, learning_rate=constant, penalty=l2; tot



[CV] END alpha=0.1, eta0=0.1, learning_rate=adaptive, penalty=elasticnet; total time=   0.7s[CV] END alpha=0.1, eta0=1, learning_rate=constant, penalty=l1; total time=   0.1s

[CV] END alpha=0.1, eta0=1, learning_rate=constant, penalty=l1; total time=   0.1s
[CV] END alpha=0.1, eta0=1, learning_rate=constant, penalty=l1; total time=   0.1s
[CV] END alpha=0.1, eta0=1, learning_rate=constant, penalty=elasticnet; total time=   0.1s
[CV] END alpha=0.1, eta0=1, learning_rate=constant, penalty=elasticnet; total time=   0.1s
[CV] END alpha=0.1, eta0=1, learning_rate=constant, penalty=elasticnet; total time=   0.1s
[CV] END alpha=0.1, eta0=1, learning_rate=constant, penalty=elasticnet; total time=   0.1s
[CV] END alpha=0.1, eta0=1, learning_rate=constant, penalty=elasticnet; total time=   0.3s
[CV] END alpha=0.1, eta0=0.1, learning_rate=adaptive, penalty=elasticnet; total time=   0.8s
[CV] END alpha=0.1, eta0=0.1, learning_rate=adaptive, penalty=elasticnet; total time=   0.8s
[CV] END alpha=0.



[CV] END alpha=0.1, eta0=1, learning_rate=optimal, penalty=l2; total time=   4.5s
[CV] END alpha=0.1, eta0=1, learning_rate=optimal, penalty=l2; total time=   4.5s
[CV] END alpha=0.1, eta0=1, learning_rate=optimal, penalty=l2; total time=   4.6s




[CV] END alpha=0.1, eta0=1, learning_rate=optimal, penalty=l2; total time=   4.9s
[CV] END alpha=0.1, eta0=1, learning_rate=optimal, penalty=l2; total time=   4.9s




[CV] END alpha=0.1, eta0=1, learning_rate=optimal, penalty=l1; total time=   6.2s
[CV] END alpha=0.1, eta0=1, learning_rate=optimal, penalty=l1; total time=   6.3s
[CV] END alpha=0.1, eta0=1, learning_rate=optimal, penalty=l1; total time=   6.2s
[CV] END alpha=0.1, eta0=1, learning_rate=invscaling, penalty=l2; total time=   0.6s
[CV] END alpha=0.1, eta0=1, learning_rate=invscaling, penalty=l2; total time=   0.4s
[CV] END alpha=0.1, eta0=1, learning_rate=invscaling, penalty=l2; total time=   0.5s
[CV] END alpha=0.1, eta0=1, learning_rate=invscaling, penalty=l2; total time=   0.4s
[CV] END alpha=0.1, eta0=1, learning_rate=invscaling, penalty=l2; total time=   0.6s
[CV] END alpha=0.1, eta0=1, learning_rate=invscaling, penalty=l1; total time=   1.0s




[CV] END alpha=0.1, eta0=1, learning_rate=optimal, penalty=l1; total time=   6.2s




[CV] END alpha=0.1, eta0=1, learning_rate=optimal, penalty=elasticnet; total time=   6.4s
[CV] END alpha=0.1, eta0=1, learning_rate=optimal, penalty=l1; total time=   6.6s
[CV] END alpha=0.1, eta0=1, learning_rate=optimal, penalty=elasticnet; total time=   6.3s




[CV] END alpha=0.1, eta0=1, learning_rate=optimal, penalty=elasticnet; total time=   6.2s
[CV] END alpha=0.1, eta0=1, learning_rate=invscaling, penalty=l1; total time=   0.9s
[CV] END alpha=0.1, eta0=1, learning_rate=invscaling, penalty=l1; total time=   1.7s
[CV] END alpha=0.1, eta0=1, learning_rate=invscaling, penalty=l1; total time=   0.8s
[CV] END alpha=0.1, eta0=1, learning_rate=invscaling, penalty=l1; total time=   1.0s
[CV] END alpha=0.1, eta0=1, learning_rate=invscaling, penalty=elasticnet; total time=   1.1s
[CV] END alpha=0.1, eta0=1, learning_rate=invscaling, penalty=elasticnet; total time=   1.3s
[CV] END alpha=0.1, eta0=1, learning_rate=adaptive, penalty=l2; total time=   0.6s
[CV] END alpha=0.1, eta0=1, learning_rate=optimal, penalty=elasticnet; total time=   6.2s
[CV] END alpha=0.1, eta0=1, learning_rate=adaptive, penalty=l2; total time=   0.4s
[CV] END alpha=0.1, eta0=1, learning_rate=optimal, penalty=elasticnet; total time=   6.4s
[CV] END alpha=0.1, eta0=1, learning_r



[CV] END alpha=0.1, eta0=1, learning_rate=invscaling, penalty=elasticnet; total time=   1.0s
[CV] END alpha=0.1, eta0=1, learning_rate=adaptive, penalty=l2; total time=   0.4s
[CV] END alpha=0.1, eta0=1, learning_rate=invscaling, penalty=elasticnet; total time=   1.3s
[CV] END alpha=0.1, eta0=1, learning_rate=adaptive, penalty=l2; total time=   0.5s
[CV] END alpha=0.1, eta0=1, learning_rate=adaptive, penalty=l2; total time=   0.6s
[CV] END alpha=0.1, eta0=1, learning_rate=adaptive, penalty=l1; total time=   0.9s
[CV] END alpha=0.1, eta0=1, learning_rate=adaptive, penalty=l1; total time=   0.9s
[CV] END alpha=0.1, eta0=1, learning_rate=adaptive, penalty=l1; total time=   1.1s
[CV] END alpha=0.1, eta0=1, learning_rate=adaptive, penalty=l1; total time=   0.8s
[CV] END alpha=0.1, eta0=1, learning_rate=adaptive, penalty=elasticnet; total time=   0.8s
[CV] END alpha=0.1, eta0=1, learning_rate=adaptive, penalty=l1; total time=   1.0s
[CV] END alpha=0.1, eta0=1, learning_rate=adaptive, penalty

100%|██████████| 100/100 [00:00<00:00, 690989.13it/s]

Best Parameters: {'alpha': 0.01, 'eta0': 0.001, 'learning_rate': 'invscaling', 'penalty': 'l2'}
Best Score: -36.3968963224596
Mean Squared Error on Test Set: 53.125979228667305
Mean Squared Error on Validation Set: 38.588034969820654





Fold 1/5, MSE: 40.66508794968743
Fold 2/5, MSE: 50.17157026145789
Fold 3/5, MSE: 32.586867044822604
Fold 4/5, MSE: 35.19258041705723
Fold 5/5, MSE: 9.073422377772957


In [10]:
# Define a more focused parameter grid for GridSearchCV based on the best parameters
refined_param_grid = {
    'alpha': [best_params['alpha'] * 0.5, best_params['alpha'], best_params['alpha'] * 5],
    'eta0': [best_params['eta0'] * 0.5, best_params['eta0'], best_params['eta0'] * 5],
    'learning_rate': [best_params['learning_rate']],
    'penalty': [best_params['penalty']]
}

# Initialize a new GridSearchCV with the refined parameter grid
refined_grid_search = GridSearchCV(sgd, refined_param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1, verbose=2)

# Fit the model with the refined parameter grid
refined_grid_search.fit(train_data, train_target.values.ravel())

# Get the best parameters and best score from the refined search
refined_best_params = refined_grid_search.best_params_
refined_best_score = refined_grid_search.best_score_

# Print the refined best parameters and best score
print("Refined Best Parameters:", refined_best_params)
print("Refined Best Score:", refined_best_score)

# Evaluate the refined model on the test set
refined_predictions = refined_grid_search.predict(test_data)
refined_mse = mean_squared_error(test_target, refined_predictions)
print("Refined Mean Squared Error on Test Set:", refined_mse)

# Evaluate the refined model on the validation set
refined_validate_predictions = refined_grid_search.predict(validate_data)
refined_validate_mse = mean_squared_error(validate_target, refined_validate_predictions)
print("Refined Mean Squared Error on Validation Set:", refined_validate_mse)

Fitting 5 folds for each of 9 candidates, totalling 45 fits
[CV] END alpha=0.005, eta0=0.001, learning_rate=invscaling, penalty=l2; total time=   0.2s
[CV] END alpha=0.005, eta0=0.001, learning_rate=invscaling, penalty=l2; total time=   0.2s
[CV] END alpha=0.005, eta0=0.0005, learning_rate=invscaling, penalty=l2; total time=   0.2s
[CV] END alpha=0.005, eta0=0.0005, learning_rate=invscaling, penalty=l2; total time=   0.2s
[CV] END alpha=0.005, eta0=0.0005, learning_rate=invscaling, penalty=l2; total time=   0.3s
[CV] END alpha=0.005, eta0=0.001, learning_rate=invscaling, penalty=l2; total time=   0.3s
[CV] END alpha=0.005, eta0=0.0005, learning_rate=invscaling, penalty=l2; total time=   0.3s
[CV] END alpha=0.005, eta0=0.0005, learning_rate=invscaling, penalty=l2; total time=   0.3s
[CV] END alpha=0.005, eta0=0.001, learning_rate=invscaling, penalty=l2; total time=   0.1s
[CV] END alpha=0.005, eta0=0.005, learning_rate=invscaling, penalty=l2; total time=   0.1s
[CV] END alpha=0.005, eta