In [1]:
#importing dependencies
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import datasets

#importing dataset
boston=datasets.load_boston()
X=boston['data']
y=boston['target']

Before building the RegularizedRegression class, let’s define a few helper functions. 
The first function standardizes the data by removing the mean and dividing by the standard deviation. 
This is the equivalent of the StandardScaler from scikit-learn.

In [2]:
def standard_scaler(X):
    mean=X.mean(0)
    stds=X.std(0)
    return (X-mean)/stds

The sign function simply returns the sign of each element in an array. 
This is useful for calculating the gradient in Lasso regression. 
The first_element_zero option makes the function return a 0 (rather than a -1 or 1) for the first element. 
As discussed in the concept section, this prevents Lasso regression from penalizing the magnitude of the intercept.



In [3]:
def sign(x, first_element_zero=False):
    signs=(-1)**(x<0)
    if first_element_zero:
        signs[0]=0
    return signs

The RegularizedRegression class below contains methods for fitting Ridge and Lasso regression. The first method, record_info, handles standardization, adds an intercept to the predictors, and records the necessary values. The second, fit_ridge, fits Ridge regression using

β^=(X⊤X+λI′)−1X⊤y.

The third method, fit_lasso, estimates the regression parameters using gradient descent. The gradient is the derivative of the Lasso loss function:

∂L(β^)∂β^=−X⊤(y−Xβ^)+λI′ sign(β^).

The gradient descent used here simply adjusts the parameters a fixed number of times (determined by n_iters). There many more efficient ways to implement gradient descent, though we use a simple implementation here to keep focus on Lasso regression.



In [8]:
class RegularizedRegression:
    def record_info(self,X,y,lam,intercept,standardize):
        
        #standardize
        if standardize == True:
            X=standard_scaler(X)
        
        #add intercept
        if intercept==False:
            ones=np.ones(len(X)).reshape(len(X),1) #column of ones
            X=np.concatenate((ones,X),axis=1) #concatenating the target and ones variables
            
        
        #record values
        self.X=np.array(X)
        self.y=np.array(y)
        self.lam=lam
        self.N,self.D=self.X.shape
        
    
    def fit_ridge(self, X,y,lam=0, intercept=False, standardize=True):
    
        #record data and dimensions
        self.record_info(X,y,lam,intercept,standardize)
        
        #estimate parameters
        XtX = np.dot(self.X.T, self.X)
        I_prime = np.eye(self.D)
        I_prime[0,0] = 0 
        XtX_plus_lam_inverse = np.linalg.inv(XtX + self.lam*I_prime)
        Xty = np.dot(self.X.T, self.y)
        self.beta_hats = np.dot(XtX_plus_lam_inverse, Xty)
        
        # get fitted values
        self.y_hat = np.dot(self.X, self.beta_hats)
        
    def fit_lasso(self, X, y, lam = 0, n_iters = 2000,
                  lr = 0.0001, intercept = False, standardize = True):
        
        # record data and dimensions
        self.record_info(X, y, lam, intercept, standardize)
        
        # estimate parameters
        beta_hats = np.random.randn(self.D)
        for i in range(n_iters):
            dL_dbeta = -self.X.T @ (self.y - (self.X @ beta_hats)) + self.lam*sign(beta_hats, True)
            beta_hats -= lr*dL_dbeta 
        self.beta_hats = beta_hats
        
        # get fitted values
        self.y_hat = np.dot(self.X, self.beta_hats)
    

In [9]:
#set lambda
lam=10

#fit ridge
ridge_model=RegularizedRegression()
ridge_model.fit_ridge(X,y,lam)

#fit lass
lasso_model=RegularizedRegression()
lasso_model.fit_lasso(X,y,lam)

The below graphic shows the coefficient estimates using Ridge and Lasso regression with a changing value of λ. Note that λ=0 is identical to ordinary linear regression. As expected, the magnitude of the coefficient estimates decreases as λ increases.

In [17]:
#using python lib
from sklearn.linear_model import Ridge, Lasso

alpha=1
ridge_model=Ridge(alpha=alpha)
ridge_model.fit(X,y);

In [19]:
lasso_model=Lasso(alpha=alpha)
lasso_model.fit(X,y);

In practice, however, we want to choose alpha through cross validation. This is easily implemented in scikit-learn by designating a set of alpha values to try and fitting the model with RidgeCV or LassoCV.

In [20]:
from sklearn.linear_model import RidgeCV, LassoCV
alphas = [0.01, 1, 100]

# Ridge
ridgeCV_model = RidgeCV(alphas = alphas)
ridgeCV_model.fit(X, y)

# Lasso
lassoCV_model = LassoCV(alphas = alphas)
lassoCV_model.fit(X, y);

print('Ridge alpha:', ridgeCV_model.alpha_)
print('Lasso alpha:', lassoCV_model.alpha_)


Ridge alpha: 0.01
Lasso alpha: 1.0
