In [8]:
#importing libraries

from sklearn.datasets import fetch_california_housing
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.linear_model import Ridge
from sklearn.pipeline import Pipeline

In [9]:
#loading the data

X = fetch_california_housing()['data']
y = fetch_california_housing()['target']


In [10]:
#splitting the data

X_train, X_test, y_train, y_test = train_test_split(X , y, test_size=0.2, random_state=0 )

In [11]:
#Checking shape
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(16512, 8)
(4128, 8)
(16512,)
(4128,)


In [12]:
#constructing the pipeline

pipe = Pipeline([('scaler', StandardScaler()),
                ('regressor', Ridge())])

pipe.fit(X_train, y_train)

Pipeline(memory=None,
         steps=[('scaler',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('regressor',
                 Ridge(alpha=1.0, copy_X=True, fit_intercept=True,
                       max_iter=None, normalize=False, random_state=None,
                       solver='auto', tol=0.001))],
         verbose=False)

In [13]:
pipe.score(X_test, y_test)

0.5943141338604156

In [16]:
#constructing the gridsearch

import numpy as np
grid_parameters = {'regressor__alpha': np.logspace(-2,2,10)}

grid_search = GridSearchCV(pipe, grid_parameters, cv = 5, n_jobs = 2, verbose = 1)

grid_search.fit(X_train, y_train)

Fitting 5 folds for each of 10 candidates, totalling 50 fits


[Parallel(n_jobs=2)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done  50 out of  50 | elapsed:    0.3s finished


GridSearchCV(cv=5, error_score='raise-deprecating',
             estimator=Pipeline(memory=None,
                                steps=[('scaler',
                                        StandardScaler(copy=True,
                                                       with_mean=True,
                                                       with_std=True)),
                                       ('regressor',
                                        Ridge(alpha=1.0, copy_X=True,
                                              fit_intercept=True, max_iter=None,
                                              normalize=False,
                                              random_state=None, solver='auto',
                                              tol=0.001))],
                                verbose=False),
             iid='warn', n_jobs=2,
             param_grid={'regressor__alpha': array([1.00000000e-02, 2.78255940e-02, 7.74263683e-02, 2.15443469e-01,
       5.99484250e-01, 1.66810054e+00,

In [19]:
print(f'The best value for parameter is: {grid_search.best_params_}')

The best value for parameter is: {'regressor__alpha': 12.915496650148826}


In [24]:
print(f'The best value for score is: {grid_search.best_score_}')

The best value for score is: 0.6053958277988015
