In [1]:
# ------------------------------------------------------- #
# --- (6) Hyperparameter Tuning for Regression Models --- #

In [2]:
# --- Hyperparameter Tuning for Lasso Regression Using Grid Search --- #

In [10]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.preprocessing import StandardScaler

from sklearn.linear_model import Lasso
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor

from sklearn.model_selection import GridSearchCV

import warnings
warnings.filterwarnings("ignore")

In [7]:
automobile_df = pd.read_csv('datasets/auto-mpg/processed.csv')

automobile_df.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,age
0,31.0,4,71.0,65,1773,19.0,52
1,26.0,4,156.0,92,2585,14.5,41
2,39.0,4,86.0,64,1875,16.4,42
3,13.0,8,350.0,145,4055,12.0,47
4,36.0,4,79.0,58,1825,18.6,46


In [8]:
X = automobile_df.drop(['mpg', 'age'], axis=1)

Y = automobile_df['mpg']

x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2)

In [11]:
# We will find the best Lasso alpha value from the following options
parameters = { 'alpha': [0.2, 0.4, 0.6, 0.8, 1.0] }

# cv=3 means three fold validation, input data split into 3 parts, 2 train 1 test/eval/cv
# return_train_score=True means default scoring, r-squared in the case of regression (could specify another value)
grid_search = GridSearchCV(Lasso(), parameters, cv=3, return_train_score=True)
grid_search.fit(x_train, y_train)

grid_search.best_params_

# Best alphia value is printed (1.0)

{'alpha': 1.0}

In [14]:
# Use for-loop to show the scores for each alpha value
for i in range(len(parameters['alpha'])):
    print('Parameters: ', grid_search.cv_results_['params'][i])
    print('Mean test score: ', grid_search.cv_results_['mean_test_score'][i])
    print('Rank: ', grid_search.cv_results_['rank_test_score'][i])

Parameters:  {'alpha': 0.2}
Mean test score:  0.6982694296611637
Rank:  5
Parameters:  {'alpha': 0.4}
Mean test score:  0.6997843120319306
Rank:  4
Parameters:  {'alpha': 0.6}
Mean test score:  0.7004607791574653
Rank:  3
Parameters:  {'alpha': 0.8}
Mean test score:  0.7006095535335217
Rank:  2
Parameters:  {'alpha': 1.0}
Mean test score:  0.7007332244204139
Rank:  1


In [18]:
# Then use that best value to train the actual model
lasso_model = Lasso(alpha=grid_search.best_params_['alpha']).fit(x_train, y_train)

In [20]:
y_pred = lasso_model.predict(x_test)

print('Training score: ', lasso_model.score(x_train, y_train))
print('Test score: ', r2_score(y_test, y_pred))

Training score:  0.7147820380287512
Test score:  0.63588407576538


In [None]:
# The tutorial goes on to do this for additional estimators and their hyperparameters