### Practice

#### Q1

In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import explained_variance_score

In [2]:
explained_variance_score([7, 4, 9, 4], [8, 7, 12, 5])

0.7777777777777778

#### Q2

In [3]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

X, y = fetch_california_housing(as_frame=True, return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)
ss = StandardScaler(with_mean=True, with_std=True)
X_train_norm = ss.fit_transform(X_train)
X_test_norm = ss.transform(X_test)

In [4]:
from sklearn.linear_model import SGDRegressor

sgd = SGDRegressor(loss='squared_error', penalty='l1', random_state=1)
sgd.fit(X_train_norm, y_train)
sgd.score(X_test_norm, y_test)

0.5996851134711898

#### Alternatively, using a pipeline

In [5]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import SGDRegressor
from sklearn.pipeline import Pipeline

X, y = fetch_california_housing(as_frame=True, return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)
sgd_grid_pipeline = Pipeline([('feature_scaling', StandardScaler(with_mean=True, with_std=True)),
                        ('sgd', SGDRegressor(loss='squared_error', penalty='l1', random_state=1))])

sgd_grid_pipeline.fit(X_train, y_train)
sgd_grid_pipeline.score(X_test, y_test)

0.5996851134711898

### Graded

#### Q1

In [6]:
X = [[1, 2], [3, 4], [1, 2], [3, 4]]
np.random.seed(1)
np.random.shuffle(X)
np.random.shuffle(X)
X

[[3, 4], [1, 2], [1, 2], [3, 4]]

#### Q2

In [7]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

X, y = fetch_california_housing(as_frame=True, return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)

In [8]:
from sklearn.linear_model import SGDRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline

In [9]:
sgd_grid_pipeline = Pipeline([('feature_scaling', StandardScaler(with_mean=True, with_std=True)),
                        ('sgd', SGDRegressor(random_state=1))])

param_grid = {'sgd__loss': ['squared_error', 'huber'],
             'sgd__penalty': ['l1', 'l2'],
             'sgd__alpha': [0.1, 0.01, 0.001],
             'sgd__max_iter': [1000,2000,5000]}

sgd.get_params().keys()

dict_keys(['alpha', 'average', 'early_stopping', 'epsilon', 'eta0', 'fit_intercept', 'l1_ratio', 'learning_rate', 'loss', 'max_iter', 'n_iter_no_change', 'penalty', 'power_t', 'random_state', 'shuffle', 'tol', 'validation_fraction', 'verbose', 'warm_start'])

In [10]:
sgd_grid_search = GridSearchCV(sgd_grid_pipeline, param_grid=param_grid, cv=4, scoring='neg_mean_absolute_error', return_train_score=True)

In [11]:
sgd_grid_search.fit(X_train, y_train)
sgd_grid_search.best_index_

31

In [12]:
sgd_grid_search.cv_results_['mean_test_score'][sgd_grid_search.best_index_]

-0.5377906495811515

In [13]:
print(f"Best value of hyper-parameters is {sgd_grid_search.best_params_}")

Best value of hyper-parameters is {'sgd__alpha': 0.001, 'sgd__loss': 'huber', 'sgd__max_iter': 1000, 'sgd__penalty': 'l2'}


#### Q3

In [14]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

X, y = fetch_california_housing(as_frame=True, return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)

In [15]:
from sklearn.linear_model import Ridge
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline

In [16]:
ridge_grid_pipeline = Pipeline([('feature_scaling', StandardScaler(with_mean=True, with_std=True)),
                        ('ridge', Ridge())])

param_grid = {'ridge__alpha': [0.5,0.1,0.05,0.01,0.005,0.001]}

In [17]:
ridge_grid_search = GridSearchCV(ridge_grid_pipeline, param_grid=param_grid, cv=4, scoring='neg_mean_absolute_error', return_train_score=True)

In [18]:
ridge_grid_search.fit(X_train, y_train)
ridge_grid_search.best_index_

0

In [19]:
ridge_grid_search.cv_results_['mean_test_score'][ridge_grid_search.best_index_]

-0.531706237021487

In [20]:
print(f"Best value of hyper-parameters is {ridge_grid_search.best_params_}")

Best value of hyper-parameters is {'ridge__alpha': 0.5}


#### Q4

In [21]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

X, y = fetch_california_housing(as_frame=True, return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=1)

In [22]:
from sklearn.linear_model import Lasso
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline

In [23]:
lasso_grid_pipeline = Pipeline([('feature_scaling', StandardScaler(with_mean=True, with_std=True)),
                        ('lasso', Lasso())])

param_grid = {'lasso__alpha': [0.5,0.1,0.05,0.01,0.005,0.001],
             'lasso__fit_intercept': [True, False]}

In [24]:
lasso_grid_search = GridSearchCV(lasso_grid_pipeline, param_grid=param_grid, cv=6, scoring='neg_mean_absolute_error', return_train_score=True)

In [25]:
lasso_grid_search.fit(X_train, y_train)
lasso_grid_search.best_index_

10

In [26]:
lasso_grid_search.cv_results_['mean_test_score'][lasso_grid_search.best_index_]

-0.5336947321320945

In [27]:
print(f"Best value of hyper-parameters is {lasso_grid_search.best_params_}")

Best value of hyper-parameters is {'lasso__alpha': 0.001, 'lasso__fit_intercept': True}
