<img src="logo.png" width="200" align="center">

<center> <h1>Ridge Regression (L2 Regularization)</h1> </center>


## Ridge Regression Solution
In the Ridge Regression (L2 Regularization) theory [video](https://youtu.be/skOcLw_fXDs) we derived the solution to be:

\begin{equation}
\hat{\theta} = (X^TX + \lambda I)^{-1}X^TY
\end{equation}

where
\begin{equation}
X = [\bar{x}^T_1, \bar{x}^T_2, ... , \bar{x}^T_n]^T
\end{equation}

\begin{equation}
Y = [y_1, y_2, ... , y_n]^T
\end{equation}

This solution minimizes the following cost function

\begin{equation}
J(x, \theta, y) = \sum_{i=1}^{m}(\theta^T\bar{x}_i - y_i)^2 + \lambda ||\theta||^2
\end{equation}


In [None]:
# import necessary packages
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

In [None]:
class Ridge:
    """Linear least squares with L2 regularization."""
    
    def __init__(self, lam):
        """Initialize a Ridge object.
        
        Args:
            lam: the regularization factor 
        """
        self._lambda = lam
        
    @staticmethod
    def _x_bar(x):
        """Create the vector x_bar.
        
        Args:
            x: input vector
        """
        return np.hstack(([1.0], x, np.square(x)))
    
    def fit(self, x_train, y_train):
        """Generate a fit for the data.
        
        Args:
            x_train: the input values of the training data
            y_train: the output values of the training data
        """
        # stack the data
        X = np.vstack(([self._x_bar(x) for x in x_train]))
        Y = np.vstack(([y for y in y_train]))
        
        # compute the model coeff
        # theta = inv(xTx + lam*I) * xTy
        XT = np.transpose(X)
        XTX = np.matmul(XT, X) + self._lambda * np.identity(X.shape[1])
        self._coeff_hat = np.matmul(np.matmul(np.linalg.inv(XTX), XT), Y)


In [None]:
"""Generate fake data"""
c2 = 0.01
c1 = 1.3
c0 = 3.456
x_in = np.linspace(-10.0, 20.2, 200.0)
y_out = c1 * x_in ** 2 + c1 * x_in + c0 + 500.0 * np.random.rand(len(x_in))

%matplotlib notebook
plt.figure()
plt.scatter(x_in, y_out)
plt.xlabel('x')
plt.ylabel('y')
plt.show()

In [None]:
"""Train using the custom Ridge class"""
x_train, x_test, y_train, y_test = train_test_split(
    x_in, y_out, test_size=0.20)

lam = 0.1
ridge = Ridge(lam)
ridge.fit(x_train, y_train)

In [None]:
"""Train using Sklearn ridge model"""

from sklearn import linear_model

reg = linear_model.Ridge(alpha=lam)
reg.fit([np.array([1.0, x, x**2]) for x in x_train], y_train)

In [None]:
"""Plot test data and model predictions"""

plt.figure()
plt.scatter(x_test, y_test)
x_test_sorted = np.sort(x_test)
plt.plot(x_test_sorted,
         ridge._coeff_hat[0] + ridge._coeff_hat[1]*x_test_sorted + ridge._coeff_hat[2]*x_test_sorted**2,
         '-r', label='custom')
plt.plot(x_test_sorted,
         reg.intercept_ + reg.coef_[1]*x_test_sorted + reg.coef_[2]*x_test_sorted**2, '--g', label='sklearn')
plt.xlabel('x')
plt.ylabel('y')
plt.legend()
plt.show()

# print the coeff
print(f'custom: {ridge._coeff_hat[0]}, {ridge._coeff_hat[1]}, {ridge._coeff_hat[2]}')
print(f'sklearn: {reg.intercept_ }, {reg.coef_[1]}, {reg.coef_[2]}')

In [None]:
"""Effect of regularization factor"""

coeff_store = []
norm_store = []
factors = np.linspace(0.0, 1.0, 10)
for l in factors:
    ridge = Ridge(l)
    ridge.fit(x_train, y_train)

    coeff_store.append(ridge._coeff_hat)
    norm_store.append(np.linalg.norm(ridge._coeff_hat))

In [None]:
plt.figure()
plt.subplot(411)
coeff_0 = [c[0] for c in coeff_store]
plt.plot(factors, coeff_0, 'or')
plt.ylabel('c0')
plt.subplot(412)
coeff_1 = [c[1] for c in coeff_store]
plt.plot(factors, coeff_1, 'og')
plt.ylabel('c1')
plt.subplot(413)
coeff_2 = [c[2] for c in coeff_store]
plt.plot(factors, coeff_2, 'ob')
plt.ylabel('c2')
plt.subplot(414)
plt.plot(factors, norm_store, 'o')
plt.xlabel('lambda')
plt.ylabel('Norm')
plt.show()