# Exercise 5 | Regularized Linear Regression and Bias-Variance

In [None]:
%matplotlib inline

from scipy.io import loadmat
from matplotlib import pyplot as plt
import numpy as np
from scipy.optimize import minimize

## Part 1: Loading and Visualizing Data

We start the exercise by first loading and visualizing the
dataset. The following code will load the dataset into your
environment and plot the data.

In [None]:
# Loading and Visualizing Data ...

data1 = loadmat('ex5data1.mat')

X = data1['X']
Xtest = data1['Xtest']
Xval = data1['Xval']
y = data1['y']
ytest = data1['ytest']
yval = data1['yval']

m = X.shape[0]

plt.plot(X, y, 'rx', markersize=10, linewidth=1.5)
plt.xlabel('Change in water level (x)')
plt.ylabel('Water flowing out of the dam (y)')

## Part 2: Regularized Linear Regression Cost

You should now implement the cost function for regularized linear
regression.

In [None]:
def linear_reg_cost_function(theta, X, y, lambda_):
    """Compute cost and gradient for regularized linear regression with multiple variables."""

    m = len(y)
    theta = theta.reshape(X.shape[1], 1)

    temp = X.dot(theta) - y
    J = (temp.T.dot(temp)) / (2 * m) + lambda_ / (2 * m) * np.sum(theta[1:] ** 2)

    grad = X.T.dot(temp) / m
    grad[1:] += lambda_ / m * theta[1:]

    return J, grad.flatten()

In [None]:
theta = np.array([1, 1]).reshape((2, 1))
J, _ = linear_reg_cost_function(theta, np.c_[np.ones((m, 1)), X], y, 1)

print(f'''Cost at theta = [1 ; 1]: {J}
(this value should be about 303.993192)''')

## Part 3: Regularized Linear Regression Gradient

You should now implement the gradient for regularized linear regression.

In [None]:
theta = np.array([1, 1]).reshape((2, 1))
J, grad = linear_reg_cost_function(theta, np.c_[np.ones((m, 1)), X], y, 1)

print(f'''Gradient at theta = [1 ; 1]:  [{grad[0]}; {grad[1]}]
(this value should be about [-15.303016; 598.250744])''')

## Part 4: Train Linear Regression

Once you have implemented the cost and gradient correctly, the
trainLinearReg function will use your cost function to train
regularized linear regression.

Write Up Note: The data is non-linear, so this will not give a great fit.

In [None]:
def train_linear_reg(X, y, lambda_):
    """Trains linear regression given a dataset (X, y) and a regularization parameter lambda."""
    
    initial_theta = np.zeros((X.shape[1], 1))
    result = minimize(linear_reg_cost_function,
                      initial_theta,
                      args=(X, y, lambda_),
                      method='CG',
                      jac=True,
                      options=dict(maxiter=200))
    
    return result.x

In [None]:
# Train linear regression with lambda = 0
lambda_ = 0
theta = train_linear_reg(np.c_[np.ones((m, 1)), X], y, lambda_)

# Plot fit over the data
plt.plot(X, y, 'rx', markersize=10, linewidth=1.5)
plt.xlabel('Change in water level (x)')
plt.ylabel('Water flowing out of the dam (y)')
plt.plot(X, np.c_[np.ones((m, 1)), X].dot(theta), '--', linewidth=2)