## CMSC 35300 Final Project: Lasso Models
Shweta Kamath <br>
Nivedita Vatsa <br>
Carolyn Vilter

#### Setup

Source: https://www.geeksforgeeks.org/implementation-of-lasso-regression-from-scratch-using-python/

In [62]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [63]:
# Import data
df = pd.read_csv("data/all_data_standardized.csv")

In [64]:
# Separate out Xs
X = df.loc[:, ~df.columns.isin(["child_id", "mother_id", "treat_alike_scale", "treat_alike_binary"])]
X = X.to_numpy()

# add columns of 1s
X_ = np.hstack((np.ones((len(X), 1)), X))

# Separate out two prospective ys
y_scale = df.loc[:, df.columns == "treat_alike_scale"]
y_scale = y_scale.to_numpy()

y_binary = df.loc[:, df.columns == "treat_alike_binary"]
y_binary = y_binary.to_numpy()

### Lasso Regression
Predict repeatedly using cross validation; plot test error.

In [65]:
class LassoRegression():
    '''
    '''
    def __init__(self, learning_rate, iterations, l1_penalty):
        '''
        '''
        self.learning_rate = learning_rate
        self.iterations = iterations
        self.l1_penalty = l1_penalty
        
    def fit(self, X, Y):
        '''
        '''
        self.m, self.n = X.shape
        
        self.w = np.zeros(self.n)
        self.b = 0
        self.X = X
        self.Y = Y
        
        for i in range(self.iterations):
            self.update_weights()
        return self
    
    def update_weights(self):
        '''
        '''
        Y_pred = self.predict(self.X).reshape(self.m, 1)
        
        dw = np.zeros(self.n)
        for j in range(self.n):
            X_j = self.X[:, j].reshape(self.m, 1).T
            if self.w[j] > 0:
                dw[j] = (-(2*(X_j).dot(self.Y - Y_pred)) + self.l1_penalty) / self.m
            else:
#                 print("self.X[:, j]:", self.X[:, j].shape, X_j.shape)
#                 print("self.Y:", self.Y.shape)
#                 print("Y_pred:", Y_pred.shape)
#                 print("self.l1_penalty:", self.l1_penalty)
                
                dw[j] = (-(2*(X_j).dot(self.Y - Y_pred)) - self.l1_penalty) / self.m
                
        db = - 2 * np.sum(self.Y - Y_pred) / self.m
        
        self.w = self.w - self.learning_rate*dw
        self.b = self.b - self.learning_rate*db
        
        return self
    
    def predict(self, X):
        '''
        '''
        return X.dot(self.w) + self.b

In [66]:
# class LassoRegression():
#     def __init__(self, learning_rate, iterations, l1_penalty):
#         self.learning_rate = learning_rate
#         self.iterations = iterations
#         self.l1_penality = l1_penality
        
#     # Function for model training          
#     def fit(self, X, Y):
#         # no_of_training_examples, no_of_features
#         self.m, self.n = X.shape
        
#         # weight initialization
#         self.W = np.zeros(self.n)
#         self.b = 0
#         self.X = X
#         self.Y = Y
        
#         # gradient descent learning
#         for i in range(self.iterations):
#             self.update_weights()
#         return self
    
#     # update weights in gradient descent
#     def update_weights(self):
#         Y_pred = self.predict(self.X)
        
#         # calculate gradients
#         dW = np.zeros(self.n)
#         for j in range(self.n):
#             if self.W[j] > 0 :
#                 dW[j] = (-(2*(self.X[:, j]).dot(self.Y - Y_pred)) + self.l1_penality) / self.m
#             else:
#                 dW[j] = (-(2*(self.X[:, j]).dot(self.Y - Y_pred)) - self.l1_penality ) / self.m
        
#         db = - 2 * np.sum(self.Y - Y_pred) / self.m
        
#         # update weights
#         self.W = self.W - self.learning_rate * dW
#         self.b = self.b - self.learning_rate * db
        
#         return self
    
#     # Hypothetical function  h(x)
#     def predict(self, X):
#         return X.dot(self.W) + self.b

In [81]:
lasso = LassoRegression(iterations = 10, learning_rate = 0.00001, l1_penalty = 0.01)

In [82]:
lasso.fit(X, y_scale)

<__main__.LassoRegression at 0x1d85d766340>

In [83]:
y_scale_pred = lasso.predict(X)
display(y_scale_pred)

array([-4.78683766e+49, -6.25319160e+49, -8.52162755e+49, ...,
       -1.20159021e+50, -1.21702750e+50, -6.45381256e+49])

In [84]:
lasso.w

array([-4.23157156e+43, -2.84029978e+41, -4.45005647e+42, -4.20316856e+43,
       -1.70753772e+40, -7.77370443e+40, -3.08331185e+41, -4.25450489e+40,
       -5.69621271e+40, -4.50852113e+40, -5.57327623e+40, -5.41315325e+40,
       -5.64677971e+40, -4.64180603e+40, -5.43254297e+40, -6.11111392e+40,
       -5.51654042e+40, -4.81484327e+40, -5.61266742e+40, -6.39280066e+40,
       -5.05140669e+40, -4.92805948e+40, -5.65845708e+40, -3.02908381e+43,
       -2.15207689e+41, -1.80176913e+39, -2.37254623e+45, -3.69921041e+39,
       -1.37663646e+40, -1.05878359e+40, -1.63827569e+39, -1.29141953e+39,
       -8.86609790e+39, -8.76977288e+39, -3.67120658e+38, -3.69921041e+39,
       -1.37663646e+40, -9.73844010e+39, -8.46073627e+39, -2.35497859e+39,
       -1.09581251e+40, -6.50960553e+39, -2.05226208e+39])

### Sklearn: Lasso Regression

In [78]:
from sklearn.linear_model import Lasso

In [79]:
lasso_sklearn = Lasso(alpha=0.0)
lasso_sklearn.fit(X, y_scale)

# predict the value
y_pred_sklearn = lasso_sklearn.predict(X)

  lasso_sklearn.fit(X, y_scale)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


In [80]:
lasso.w

array([[0.01      ],
       [0.01      ],
       [0.01      ],
       [0.01      ],
       [0.01      ],
       [0.01      ],
       [0.01      ],
       [0.01      ],
       [0.01      ],
       [0.01      ],
       [0.01      ],
       [0.01      ],
       [0.01      ],
       [0.01      ],
       [0.01      ],
       [0.01      ],
       [0.01      ],
       [0.01      ],
       [0.01      ],
       [0.01      ],
       [0.01      ],
       [0.01      ],
       [0.01      ],
       [0.01      ],
       [0.01      ],
       [0.01      ],
       [0.01      ],
       [0.00999999],
       [0.01      ],
       [0.01      ],
       [0.01      ],
       [0.01      ],
       [0.01      ],
       [0.01      ],
       [0.01      ],
       [0.01      ],
       [0.01      ],
       [0.01      ],
       [0.01      ],
       [0.01      ],
       [0.01      ],
       [0.01      ],
       [0.01      ],
       [0.01      ]])

In [81]:
lasso_sklearn.coef_

array([ 5.94995187e-04,  1.35634886e-02, -1.08825776e-03,  4.85721094e-05,
        1.39836672e-02,  1.50124826e-02, -8.63994678e-04,  8.40426453e-03,
       -6.40107564e-03,  1.18806410e-02, -1.52450446e-02, -9.55468468e-03,
        1.14055111e-02,  2.43584433e-04, -3.77610187e-03, -9.36740753e-03,
       -2.46391568e-03,  6.86193072e-03, -2.16529119e-02,  1.80446112e-02,
       -1.23826492e-02,  1.41983590e-02,  1.78011922e-02,  1.20634171e-05,
       -3.10229225e-03,  6.94608577e-02, -4.42510590e-07,  9.94312398e-03,
       -7.29117216e-02, -6.52263996e-02, -2.19961582e-01, -2.89226854e-01,
       -3.03937689e-01, -3.62091654e-01, -3.10130511e-01, -5.78312703e-02,
       -2.22576257e-02,  4.34554012e-02, -2.86737738e-02,  1.43151504e-02,
        8.09763506e-03, -3.30367233e-03, -1.42992769e-03])

In [82]:
np.max(y_pred_sklearn)

2.038457258659307