In [57]:
import numpy as np
import pandas as pd

from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split, cross_val_score, KFold, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import make_scorer, mean_squared_error, mean_absolute_error, r2_score, median_absolute_error, mean_squared_log_error, explained_variance_score

#Regression algorythms
from sklearn.linear_model import Ridge, Lasso
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor

In [58]:
#Data split
diabetes = load_diabetes()
X = diabetes.data
y = diabetes.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=345)

In [59]:
#Data standardization
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [68]:
models = [
    ('Ridge Regression', Ridge()),
    ('Lasso Regression', Lasso()),
    ('SVR', SVR()),
    ('Decision Tree', DecisionTreeRegressor()),
    ('Random Forest', RandomForestRegressor()),
    ('k-NN', KNeighborsRegressor()),
    ('Gradient Boosting', GradientBoostingRegressor()),
    ('Neural Network', MLPRegressor(max_iter=10000))
]

In [69]:
#Cross-validation parameters
cv = KFold(n_splits=5, shuffle=True, random_state=345)

In [70]:
scoring = {
    'neg_mean_squared_error': make_scorer(mean_squared_error, greater_is_better=False),
    'neg_mean_absolute_error': make_scorer(mean_absolute_error, greater_is_better=False),
    'r2': make_scorer(r2_score),
    'neg_median_absolute_error': make_scorer(median_absolute_error, greater_is_better=False),
    'neg_mean_squared_log_error': make_scorer(mean_squared_log_error, greater_is_better=False),
    'explained_variance': make_scorer(explained_variance_score)
}

In [71]:
#Default hyperparameters
results = []
names = []

for name, model in models:
    scores = {metric: cross_val_score(model, X_train, y_train, cv=cv, scoring=scorer).mean() for metric, scorer in scoring.items()}
    results.append(scores)
    names.append(name)

results_df = pd.DataFrame(results, index=names)



In [72]:
results_df.style.background_gradient(cmap="RdYlGn")

Unnamed: 0,neg_mean_squared_error,neg_mean_absolute_error,r2,neg_median_absolute_error,neg_mean_squared_log_error,explained_variance
Ridge Regression,-3017.384479,-44.988392,0.455947,-39.391947,-0.171642,0.470067
Lasso Regression,-3002.962386,-44.841481,0.460822,-39.582718,-0.168514,0.473699
SVR,-5048.65862,-60.023966,0.118126,-56.62803,-0.259974,0.144811
Decision Tree,-6182.290382,-63.26841,-0.163793,-52.0,-0.338953,-0.138531
Random Forest,-3531.504059,-48.874558,0.377519,-46.012,-0.19084,0.378257
k-NN,-3685.557257,-48.579219,0.340087,-41.08,-0.200909,0.349525
Gradient Boosting,-3468.666471,-47.753218,0.369805,-42.706008,-0.187251,0.374789
Neural Network,-3430.32956,-46.440053,0.394945,-39.387247,-0.19429,0.362251


In [73]:
#Defining hyperparameter grids for individual models

param_grids = {
    'Ridge Regression': {
        'alpha': [0.01, 0.1, 1.0, 10.0, 100.0]
    },
    'Lasso Regression': {
        'alpha': [0.01, 0.1, 1.0, 10.0, 100.0]
    },
    'SVR': {
        'C': [0.1, 1, 10, 100],
        'gamma': [0.001, 0.01, 0.1, 1],
        'kernel': ['rbf', 'linear']
    },
    'Decision Tree': {
        'max_depth': [None, 10, 20, 30, 40, 50],
        'min_samples_split': [2, 5, 10],
        'min_samples_leaf': [1, 2, 4]
    },
    'Random Forest': {
        'n_estimators': [10, 50, 100, 200],
        'max_depth': [None, 10, 20, 30],
        'min_samples_split': [2, 5, 10],
        'min_samples_leaf': [1, 2, 4]
    },
    'k-NN': {
        'n_neighbors': [3, 5, 7, 9],
        'weights': ['uniform', 'distance'],
        'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute']
    },
    'Gradient Boosting': {
        'n_estimators': [10, 50, 100, 200],
        'learning_rate': [0.001, 0.01, 0.1, 1],
        'max_depth': [3, 5, 7, 9]
    },
    'Neural Network': {
        'hidden_layer_sizes': [(50,), (100,), (100, 50), (100, 100)],
        'activation': ['relu', 'tanh'],
        'solver': ['adam', 'lbfgs'],
        'alpha': [0.0001, 0.001, 0.01],
        'learning_rate': ['constant', 'adaptive']
    }
}

In [74]:
best_models = []
for name, model in models:
    print(f"Tuning {name}...")
    grid_search = GridSearchCV(estimator=model, param_grid=param_grids[name], cv=cv, scoring='neg_mean_squared_error', n_jobs=-1)
    grid_search.fit(X_train, y_train)
    best_models.append((name, grid_search.best_estimator_, grid_search.best_params_))
    print(f"Best parameters for {name}: {grid_search.best_params_}")

Tuning Ridge Regression...
Best parameters for Ridge Regression: {'alpha': 10.0}
Tuning Lasso Regression...
Best parameters for Lasso Regression: {'alpha': 1.0}
Tuning SVR...
Best parameters for SVR: {'C': 100, 'gamma': 0.01, 'kernel': 'rbf'}
Tuning Decision Tree...
Best parameters for Decision Tree: {'max_depth': 10, 'min_samples_leaf': 4, 'min_samples_split': 10}
Tuning Random Forest...
Best parameters for Random Forest: {'max_depth': 20, 'min_samples_leaf': 4, 'min_samples_split': 10, 'n_estimators': 50}
Tuning k-NN...
Best parameters for k-NN: {'algorithm': 'auto', 'n_neighbors': 7, 'weights': 'distance'}
Tuning Gradient Boosting...
Best parameters for Gradient Boosting: {'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 50}
Tuning Neural Network...
Best parameters for Neural Network: {'activation': 'relu', 'alpha': 0.0001, 'hidden_layer_sizes': (100,), 'learning_rate': 'adaptive', 'solver': 'adam'}




In [88]:
HPT_results = []
HPT_names = []

for name, model, params in best_models:
    scores = {metric: cross_val_score(model, X_train, y_train, cv=cv, scoring=scorer).mean() for metric, scorer in scoring.items()}
    HPT_results.append(scores)
    HPT_names.append(name)

HPT_results_df = pd.DataFrame(results, index=HPT_names)



NameError: name 'HTP_names' is not defined

In [None]:
HPT_names

['Ridge Regression',
 'Lasso Regression',
 'SVR',
 'Decision Tree',
 'Random Forest',
 'k-NN',
 'Gradient Boosting',
 'Neural Network']

In [None]:
HPT_results_df.style.background_gradient(cmap="RdYlGn")

Unnamed: 0,neg_mean_squared_error,neg_mean_absolute_error,r2,neg_median_absolute_error,neg_mean_squared_log_error,explained_variance
Ridge Regression,-3017.384479,-44.988392,0.455947,-39.391947,-0.171642,0.470067
Lasso Regression,-3002.962386,-44.841481,0.460822,-39.582718,-0.168514,0.473699
SVR,-5048.65862,-60.023966,0.118126,-56.62803,-0.259974,0.144811
Decision Tree,-6182.290382,-63.26841,-0.163793,-52.0,-0.338953,-0.138531
Random Forest,-3531.504059,-48.874558,0.377519,-46.012,-0.19084,0.378257
k-NN,-3685.557257,-48.579219,0.340087,-41.08,-0.200909,0.349525
Gradient Boosting,-3468.666471,-47.753218,0.369805,-42.706008,-0.187251,0.374789
Neural Network,-3430.32956,-46.440053,0.394945,-39.387247,-0.19429,0.362251
Ridge Regression,-3015.866314,-44.883232,0.457486,-39.976686,-0.169126,0.47052
Lasso Regression,-3002.962386,-44.841481,0.460822,-39.582718,-0.168514,0.473699
