In [42]:
import numpy as np
import pandas as pd

from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split, cross_val_score, KFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import make_scorer, mean_squared_error, mean_absolute_error, r2_score, median_absolute_error, mean_squared_log_error, explained_variance_score

#Regression algorythms
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor

In [43]:
diabetes = load_diabetes()
X = diabetes.data
y = diabetes.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=345)

In [44]:
#Data standardization
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [45]:
models = [
    ('Linear Regression', LinearRegression()),
    ('Ridge Regression', Ridge()),
    ('Lasso Regression', Lasso()),
    ('SVR', SVR()),
    ('Decision Tree', DecisionTreeRegressor()),
    ('Random Forest', RandomForestRegressor()),
    ('k-NN', KNeighborsRegressor()),
    ('Gradient Boosting', GradientBoostingRegressor()),
    ('Neural Network', MLPRegressor())
]

In [46]:
#Cross-validation parameters
cv = KFold(n_splits=5, shuffle=True, random_state=345)

In [47]:
scoring = {
    'neg_mean_squared_error': make_scorer(mean_squared_error, greater_is_better=False),
    'neg_mean_absolute_error': make_scorer(mean_absolute_error, greater_is_better=False),
    'r2': make_scorer(r2_score),
    'neg_median_absolute_error': make_scorer(median_absolute_error, greater_is_better=False),
    'neg_mean_squared_log_error': make_scorer(mean_squared_log_error, greater_is_better=False),
    'explained_variance': make_scorer(explained_variance_score)
}

In [48]:
#Default hyperparameters
results = []
names = []

for name, model in models:
    scores = {metric: cross_val_score(model, X_train, y_train, cv=cv, scoring=scorer).mean() for metric, scorer in scoring.items()}
    results.append(scores)
    names.append(name)

results_df = pd.DataFrame(results, index=names)



In [49]:
results_df.style.background_gradient(cmap="RdYlGn")

Unnamed: 0,neg_mean_squared_error,neg_mean_absolute_error,r2,neg_median_absolute_error,neg_mean_squared_log_error,explained_variance
Linear Regression,-3025.608956,-45.100531,0.453889,-39.197501,-0.174682,0.46847
Ridge Regression,-3017.384479,-44.988392,0.455947,-39.391947,-0.171642,0.470067
Lasso Regression,-3002.962386,-44.841481,0.460822,-39.582718,-0.168514,0.473699
SVR,-5048.65862,-60.023966,0.118126,-56.62803,-0.259974,0.144811
Decision Tree,-6355.598873,-64.312475,-0.15572,-51.0,-0.334452,-0.157616
Random Forest,-3424.368146,-49.663988,0.375704,-46.237,-0.193881,0.39924
k-NN,-3685.557257,-48.579219,0.340087,-41.08,-0.200909,0.349525
Gradient Boosting,-3478.978257,-47.490537,0.370439,-42.376927,-0.187542,0.380152
Neural Network,-12074.404244,-92.176479,-1.179266,-77.066341,-1.339049,0.221679
