**Exercise 7: Ridge Regression and Polynomial Feature Expansion**

*CPSC 381/581: Machine Learning*

*Yale University*

*Instructor: Alex Wong*


**Prerequisites**:

1. Enable Google Colaboratory as an app on your Google Drive account

2. Create a new Google Colab notebook, this will also create a "Colab Notebooks" directory under "MyDrive" i.e.
```
/content/drive/MyDrive/Colab Notebooks
```

3. Create the following directory structure in your Google Drive
```
/content/drive/MyDrive/Colab Notebooks/CPSC 381-581: Machine Learning/Exercises
```

4. Move the 04_exercise_ridge_regression_poly_expansion.ipynb into
```
/content/drive/MyDrive/Colab Notebooks/CPSC 381-581: Machine Learning/Exercises
```
so that its absolute path is
```
/content/drive/MyDrive/Colab Notebooks/CPSC 381-581: Machine Learning/Exercises/07_exercise_ridge_regression_poly_expansion.ipynb
```

In this exercise, we will optimize a linear and ridge regression with polynomial feature expansion to experiment with over and underfitting.


**Submission**:

1. Implement all TODOs in the code blocks below.

2. Report your training and testing scores.

```
Report training and testing scores here.

```

3. List any collaborators.

```
Collaborators: Doe, Jane (Please write names in <Last Name, First Name> format)

Collaboration details: Discussed ... implementation details with Jane Doe.
```

Import packages

In [None]:
import numpy as np
import sklearn.datasets as skdata
import sklearn.metrics as skmetrics
import sklearn.preprocessing as skpreprocess
from sklearn.linear_model import LinearRegression as LinearRegressionSciKit
import warnings
from matplotlib import pyplot as plt

warnings.filterwarnings(action='ignore')
np.random.seed = 1

Implementation of Ridge Regression with Gradient Descent optimizer

In [None]:
class RidgeRegression(object):

    def __init__(self):
        # Define private variables
        self.__weights = None

    def __fit_normal_equation(self, X, y, weight_decay=0):
        '''
        Fits the model to x and y via normal equation

        Arg(s):
            X : numpy
                N x d feature vector
            y : numpy
                N x 1 ground-truth label
            weight_decay : float
                weight of weight decay term
        '''

        # TODO: Implement the __fit_normal_equation function
        self.__weights = None

    def fit(self, X, y, weight_decay=0, solver='normal_equation'):
        '''
        Fits the model to x and y by solving least squares
        using normal equation

        Arg(s):
            X : numpy[float32]
                N x d feature vector
            y : numpy[float32]
                N ground-truth label
            weight_decay : float
                weight of weight decay term
            solver : str
                solver types: normal_equation
        '''

        y = np.expand_dims(y, axis=1)

        # TODO: Implement the fit function

        if solver == 'normal_equation':
            pass
        else:
            raise ValueError('Encountered unsupported solver: {}'.format(solver))

    def predict(self, X):
        '''
        Predicts the real value for each feature vector x

        Arg(s):
            x : numpy[float32]
                N x d feature vector
        Returns:
            numpy[float32] : N x 1 real value vector (\hat{y})
        '''

        # TODO: Implement the predict function

        return 0.0

Helper function for plotting

In [None]:
def plot_results(axis,
                 x_values,
                 y_values,
                 labels,
                 colors,
                 x_limits,
                 y_limits,
                 x_label,
                 y_label):
    '''
    Plots x and y values using line plot with labels and colors

    Args:
        axis :  pyplot.ax
            matplotlib subplot axis
        x_values : list[numpy[float32]]
            list of numpy array of x values
        y_values : list[numpy[float32]]
            list of numpy array of y values
        labels : str
            list of names for legend
        colors : str
            colors for each line
        x_limits : list[float32]
            min and max values of x axis
        y_limits : list[float32]
            min and max values of y axis
        x_label : list[float32]
            name of x axis
        y_label : list[float32]
            name of y axis
    '''

    # Iterate through x_values, y_values, labels, and colors and plot them
    # with associated legend
    for x, y, label, color in zip(x_values, y_values, labels, colors):
        axis.plot(x, y, marker='o', color=color, label=label)
        axis.legend(loc='best')

    # Set x and y limits
    axis.set_xlim(x_limits)
    axis.set_ylim(y_limits)

    # Set x and y labels
    axis.set_xlabel(x_label)
    axis.set_ylabel(y_label)

Load dataset

In [None]:
# Create synthetic dataset
X, y = skdata.make_friedman1(n_samples=2000, n_features=8, noise=6)

# Shuffle the dataset based on sample indices
shuffled_indices = np.random.permutation(X.shape[0])

# Choose the first 80% as training set and the rest as testing
train_split_idx = int(0.80 * X.shape[0])

train_indices = shuffled_indices[0:train_split_idx]
test_indices = shuffled_indices[train_split_idx:]

# Select the examples from x and y to construct our training, validation, testing sets
X_train, y_train = X[train_indices, :], y[train_indices]
X_test, y_test = X[test_indices, :], y[test_indices]

Experiment 1: Demonstrate that linear regression will overfit if we use high degrees of polynomial expansion

In [None]:
print('Experiment 1: Overfitting Linear Regression with Polynomial Expansion')

# TODO: Initialize a list containing 1 to 6 as the degrees for polynomial expansion
degrees = []

# Initialize empty lists to store scores for MSE
scores_mse_linear_overfit_train = []
scores_mse_linear_overfit_test = []

for degree in degrees:

    # TODO: Initialize polynomial expansion
    poly_transform = None

    # TODO: Compute the polynomial terms needed for the data


    # TODO: Transform the data by nonlinear mapping
    X_poly_train = None
    X_poly_test = None

    # TODO: Initialize sci-kit linear regression model
    model_linear_overfit = None

    # TODO: Train linear regression model


    print('Results for linear regression model with degree-{} polynomial expansion'.format(degree))

    # TODO: Test model on training set
    predictions_train = None
    score_mse_linear_overfit_train = 0.0
    print('Training set mean squared error: {:.4f}'.format(score_mse_linear_overfit_train))

    # TODO: Save MSE training scores


    # TODO: Test model on testing set
    predictions_test = None
    score_mse_linear_overfit_test = 0.0
    print('Testing set mean squared error: {:.4f}'.format(score_mse_linear_overfit_test))

    # TODO: Save MSE testing scores


# Convert each scores to NumPy arrays
scores_mse_linear_overfit_train = np.array(scores_mse_linear_overfit_train)
scores_mse_linear_overfit_test = np.array(scores_mse_linear_overfit_test)

# Create figure for training and testing scores for different features
n_experiments = scores_mse_linear_overfit_train.shape[0]

labels = ['Training', 'Testing']
colors = ['blue', 'red']

# TODO: Create a subplot of a 1 by 1 figure to plot MSE for training and testing
fig = None
ax = None

# TODO: Set x and y values
x_values = []
y_values = []

# TODO: Plot MSE scores for training and testing sets
# Set labels to ['Training', 'Testing'] and colors based on colors defined above
# Set x limits to 0 to number of experiments + 1 and y limits between 0 and 100
# Set x label to 'p-degree' and y label to 'MSE'


# TODO: Create plot title of 'Overfitting Linear Regression with Various Degrees of Polynomial Expansions'


Experiment 2: Demonstrate that ridge regression will underfit if we use large weight decay ($\lambda$)

In [None]:
print('Experiment 2: Underfitting Ridge Regression with Large Weight Decay')

# TODO: Initialize a list containing 1 to 2^15 as the weight for weight decay
weight_decays = []

# Initialize empty lists to store scores for MSE
scores_mse_ridge_underfit_train = []
scores_mse_ridge_underfit_test = []

for weight_decay in weight_decays:

    # TODO: Initialize ridge regression model
    model_ridge_underfit = None

    # TODO: Train ridge regression model


    print('Results for ridge regression model with weight decay of {}'.format(weight_decay))

    # TODO: Test model on training set
    predictions_train = None
    score_mse_ridge_underfit_train = 0.0
    print('Training set mean squared error: {:.4f}'.format(score_mse_ridge_underfit_train))

    # TODO: Save MSE training scores


    # TODO: Test model on testing set
    predictions_test = None
    score_mse_ridge_underfit_test = 0.0
    print('Testing set mean squared error: {:.4f}'.format(score_mse_ridge_underfit_test))

    # TODO: Save MSE testing scores


# Convert each scores to NumPy arrays
scores_mse_ridge_underfit_train = np.array(scores_mse_ridge_underfit_train)
scores_mse_ridge_underfit_test = np.array(scores_mse_ridge_underfit_test)

# Create figure for training, validation and testing scores for different features
n_experiments = scores_mse_ridge_underfit_train.shape[0]

labels = ['Training', 'Testing']
colors = ['blue', 'red']

# TODO: Create a subplot of a 1 by 1 figure to plot MSE for training and testing
fig = None
ax = None

# TODO: Set x values (weight_decays in log base2 scale) and y values (MSE)
x_values = []
y_values = []

# TODO: Plot MSE scores for training and testing sets
# Set labels to ['Training', 'Testing'] and colors based on colors defined above
# Set x limits to 0 to log of highest weight_decays + 1 and y limits between 0 and 100
# Set x label to r'$\lambda$ (log2 scale)' and y label to 'MSE'


# TODO: Create plot title of r'Underfitting Ridge Regression with Various $\lambda$'


Experiment 3: Demonstrate that ridge regression with various $\lambda$ prevents overfitting when using polynomial expansion

In [None]:
print(r'Experiment 3: Ridge Regression with Weight Decay and Polynomial Expansion')

# Set polynomial expansion
degree = 6

# TODO: Initialize a list containing 1 to 2^15 as the weight for weight decay
weight_decays = []

# TODO: Initialize polynomial expansion
poly_transform = None

# TODO: Compute the polynomial terms needed for the data


# TODO: Transform the data by nonlinear mapping
x_poly_train = None
x_poly_test = None

# Initialize empty lists to store scores for MSE
scores_mse_ridge_poly_train = []
scores_mse_ridge_poly_test = []

for weight_decay in weight_decays:

    # TODO: Initialize ridge regression model
    model_ridge_poly= None

    # TODO: Train ridge regression model


    print('Results for ridge regression model with weight decay of {} for degree-{} polynomial expansion'.format(weight_decay, degree))

    # TODO: Test model on training set
    predictions_train = None
    score_mse_ridge_poly_train = 0.0
    print('Training set mean squared error: {:.4f}'.format(score_mse_ridge_poly_train))

    # TODO: Save MSE training scores


    # TODO: Test model on testing set
    predictions_test = None
    score_mse_ridge_poly_test = 0.0
    print('Testing set mean squared error: {:.4f}'.format(score_mse_ridge_poly_test))

    # TODO: Save MSE testing scores


# Convert each scores to NumPy arrays
scores_mse_ridge_poly_train = np.array(scores_mse_ridge_poly_train)
scores_mse_ridge_poly_test = np.array(scores_mse_ridge_poly_test)

# Create figure for training and testing scores for different features
n_experiments = scores_mse_ridge_poly_train.shape[0]

labels = ['Training', 'Testing']
colors = ['blue', 'red']

# TODO: Create the first subplot of a 1 by 1 figure to plot MSE for training and testing
fig = None
ax = None

# TODO: Set x values (weight_decays in log base2 scale) and y values (MSE)
x_values = []
y_values = []

# TODO: Plot MSE scores for training and testing sets
# Set labels to ['Training', 'Testing'] and colors based on colors defined above
# Set x limits to 0 to log of highest weight_decays + 1 and y limits between 0 and 100
# Set x label to r'$\lambda$ (log2 scale)' and y label to 'MSE'


# TODO: Create plot title of r'Ridge Regression with various $\lambda$ for Degree-{} Polynomial Expansion'.format(degree)
