<a href="https://colab.research.google.com/github/fealmutairi79-0/CS-220P/blob/main/notebooks/lecture-4-part-1e.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pathlib

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn import compose, datasets, linear_model, metrics, model_selection
from sklearn import preprocessing, pipeline

# Regularized Linear Models

In [2]:
features, targets = datasets.load_diabetes(
    return_X_y=True,
    as_frame=True,
    scaled=False
)

In [4]:
features.head()

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6
0,59.0,2.0,32.1,101.0,157.0,93.2,38.0,4.0,4.8598,87.0
1,48.0,1.0,21.6,87.0,183.0,103.2,70.0,3.0,3.8918,69.0
2,72.0,2.0,30.5,93.0,156.0,93.6,41.0,4.0,4.6728,85.0
3,24.0,1.0,25.3,84.0,198.0,131.4,40.0,5.0,4.8903,89.0
4,50.0,1.0,23.0,101.0,192.0,125.4,52.0,4.0,4.2905,80.0


In [5]:
targets.head()

Unnamed: 0,target
0,151.0
1,75.0
2,141.0
3,206.0
4,135.0


### Train-test split

In [3]:
prng = np.random.RandomState(42)

train_features, test_features, train_targets, test_targets = (
    model_selection.train_test_split(
        features,
        targets,
        random_state=prng,
        test_size=0.1
    )
)

## Feature Preprocessing

In [6]:
transformer_1 = compose.make_column_transformer(
    (
        preprocessing.OneHotEncoder(
            drop="first",
            dtype=np.uint8,
            sparse_output=False,
        ),
        ["sex"]
    ),
    remainder="drop",
    verbose=True,
    verbose_feature_names_out=False
)


transformer_2 = compose.make_column_transformer(
    (
        preprocessing.StandardScaler(),
        ["age", "bmi", "bp", "s1", "s2", "s3", "s4", "s5"]
    ),
    remainder="drop",
    verbose=True,
    verbose_feature_names_out=False
)

features_preprocessor = pipeline.make_union(
    transformer_1,
    transformer_2,
    verbose=True,
    n_jobs=-1
).set_output(transform="pandas")

In [7]:
features_preprocessor

## Target Preprocessing

In [8]:
target_preprocessor = preprocessing.FunctionTransformer(
    func=np.log,
    inverse_func=np.exp
)

In [9]:
target_preprocessor

## Feature Engineering

In [10]:
feature_engineering = preprocessing.PolynomialFeatures(
    degree=2,
    include_bias=False,#constant term
    interaction_only=False
).set_output(transform="pandas")

In [11]:
feature_engineering

## Model training

### Using ElasticNet

In [12]:
_regressor = compose.TransformedTargetRegressor(
    linear_model.ElasticNet(
        alpha=1e-3,
        l1_ratio=0.5,
        max_iter=4096,
        fit_intercept=True,
        random_state=prng,
    ),
    transformer=target_preprocessor
)

elastic_net_pipeline = pipeline.make_pipeline(
    features_preprocessor,
    feature_engineering,
    _regressor
)

In [13]:
_ = elastic_net_pipeline.fit(train_features, train_targets)

In [14]:
train_predictions = elastic_net_pipeline.predict(train_features)
train_rmse = metrics.root_mean_squared_error(
    train_targets,
    train_predictions,
)
print(f"Training rmse: {train_rmse}")

Training rmse: 52.10772518068729


### Using SGDRegressor

In [15]:
linear_model.SGDRegressor?

In [16]:
_regressor = compose.TransformedTargetRegressor(
    linear_model.SGDRegressor(
        penalty="elasticnet",
        alpha=1e-3,
        l1_ratio=0.5,
        fit_intercept=True,
    ),
    transformer=target_preprocessor
)

sgd_regressor_pipeline = pipeline.make_pipeline(
    features_preprocessor,
    feature_engineering,
    _regressor
)

In [17]:
_ = sgd_regressor_pipeline.fit(train_features, train_targets)

In [21]:
train_predictions = sgd_regressor_pipeline.predict(train_features)
train_rmse = metrics.root_mean_squared_error(
    train_targets,
    train_predictions,
)
print(f"Training rmse: {train_rmse}")

Training rmse: 55.88367768217637


## Model assessment

In [22]:
cv_neg_mses = model_selection.cross_val_score(
    elastic_net_pipeline,
    train_features,
    train_targets,
    cv=5,
    n_jobs=-1,
    scoring="neg_root_mean_squared_error",
    verbose=1
)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:    0.3s finished


In [23]:
cv_rmse = np.mean(-cv_neg_mses)
print(f"ElasticNet CV rmse: {cv_rmse}")

ElasticNet CV rmse: 58.95329225659996


In [24]:
cv_neg_mses = model_selection.cross_val_score(
    sgd_regressor_pipeline,
    train_features,
    train_targets,
    cv=5,
    n_jobs=-1,
    scoring="neg_root_mean_squared_error",
    verbose=1
)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:    0.4s finished


In [25]:
cv_rmse = np.mean(-cv_neg_mses)
print(f"SGDRegressor CV rmse: {cv_rmse}")

SGDRegressor CV rmse: 69.86024253499669


### Exercise:

Is our current model under-fitting or over-fitting? How can you tell? What can you do to fix the problem?

## Tuning model performance

### Using ElasticNetCV

[ElasticNetCV](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.ElasticNetCV.html) is an example of a [cross-validation estimator](https://scikit-learn.org/stable/glossary.html#term-cross-validation-estimator). Cross-validation estimators are named `EstimatorCV` and tend to be roughly equivalent to `GridSearchCV(Estimator(), ...)`. The advantage of using a cross-validation estimator over the canonical estimator class along with grid search is that they can take advantage of warm-starting by reusing precomputed results in the previous steps of the cross-validation process.

When calling `fit`, once the best parameters `l1_ratio` and `alpha` are found through cross-validation, the model is `fit` again using the entire training set.

In [None]:
linear_model.ElasticNetCV?

In [26]:
regressor_cv = compose.TransformedTargetRegressor(
    linear_model.ElasticNetCV(
        cv=5,
        eps=1e-3,
        fit_intercept=True,
        l1_ratio=np.logspace(-1, 0, 10),
        max_iter=8192,
        alphas=np.logspace(-4, 0, 10),
        n_jobs=-1,
        random_state=prng,
        selection="random",
        verbose=0,
    ),
    transformer=target_preprocessor
)

tuned_elastic_net_pipeline = pipeline.make_pipeline(
    features_preprocessor,
    feature_engineering,
    regressor_cv,
    verbose=True
)

In [27]:
tuned_elastic_net_pipeline

In [28]:
_ = tuned_elastic_net_pipeline.fit(train_features, train_targets)

[Pipeline] ...... (step 1 of 3) Processing featureunion, total=   3.2s
[Pipeline]  (step 2 of 3) Processing polynomialfeatures, total=   0.0s


  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descen

[Pipeline]  (step 3 of 3) Processing transformedtargetregressor, total=   5.8s


  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(


In [29]:
(
    tuned_elastic_net_pipeline.named_steps["transformedtargetregressor"]
                              .regressor_
                              .alpha_
)

np.float64(0.016681005372000592)

In [30]:
(
    tuned_elastic_net_pipeline.named_steps["transformedtargetregressor"]
                              .regressor_
                              .l1_ratio_
)

np.float64(0.5994842503189409)

### Using GridSearchCV and SGDRegressor

In [31]:
tuned_sgd_regressor_pipeline = model_selection.GridSearchCV(
    sgd_regressor_pipeline,
    cv=5,
    param_grid={
        "transformedtargetregressor__regressor__alpha": np.logspace(-4, 0, 10),
        "transformedtargetregressor__regressor__l1_ratio": np.logspace(-1, 0, 10)
    },
    n_jobs=-1,
    refit=True,
    scoring="neg_root_mean_squared_error",
    verbose=1
)

In [32]:
tuned_sgd_regressor_pipeline

In [33]:
_ = tuned_sgd_regressor_pipeline.fit(train_features, train_targets)

Fitting 5 folds for each of 100 candidates, totalling 500 fits


In [34]:
tuned_sgd_regressor_pipeline.best_params_

{'transformedtargetregressor__regressor__alpha': np.float64(0.12915496650148828),
 'transformedtargetregressor__regressor__l1_ratio': np.float64(0.16681005372000587)}

## Assessing performance of the tuned model

In [35]:
train_predictions = tuned_elastic_net_pipeline.predict(train_features)
train_rmse = metrics.root_mean_squared_error(
    train_targets,
    train_predictions,
)
print(f"ElasticNetCV training rmse: {train_rmse}")

ElasticNetCV training rmse: 53.790943395498914


In [36]:
test_predictions = tuned_elastic_net_pipeline.predict(test_features)
test_rmse = metrics.root_mean_squared_error(
    test_targets,
    test_predictions,
)
print(f"ElasticNetCV testing rmse: {test_rmse}")

ElasticNetCV testing rmse: 53.88215698076754


In [37]:
train_predictions = tuned_sgd_regressor_pipeline.predict(train_features)
train_rmse = metrics.root_mean_squared_error(
    train_targets,
    train_predictions,
)
print(f"GridSearchCV + SGDRegressor training rmse: {train_rmse}")

GridSearchCV + SGDRegressor training rmse: 57.52091081582808


In [38]:
test_predictions = tuned_sgd_regressor_pipeline.predict(test_features)
test_rmse = metrics.root_mean_squared_error(
    test_targets,
    test_predictions,
)
print(f"GridSearchCV + SGDRegressor Testing rmse: {test_rmse}")

GridSearchCV + SGDRegressor Testing rmse: 55.57813519134998


### Exercise

Which of the two models should you prefer? Why?