Building a lasso regression model from scratch

In [4]:
# importing the library
import numpy as np 

In [3]:
class Lasso_Regression():

    def __init__(self, learning_rate, no_of_iterations, lambda_parameter):

        # initializing the hyperparameters
        self.learning_rate = learning_rate
        self.no_of_iterations = no_of_iterations
        self.lambda_parameter = lambda_parameter


    def fit(self, X, Y):

        self.m, self.n = X.shape # no. of data and features

        # initializing weights and bias
        self.w = np.zeros(self.n)
        self.b = 0
        self.X = X
        self.Y = Y

        # Implementing the grad descent algorithm
        for i in range(self.no_of_iterations):
            self.update_weights()


    def update_weights(self):

        # linear equation
        Y_predict = self.predict(self.X)

        # defining the gradients
        dw = np.zeros(self.n)

        for i in range(self.n):

            if self.w[i] > 0:
                dw[i] = ((-(2* self.X[:,i]).dot(self.Y - Y_predict)) + self.lambda_parameter) / self.m

            else:
                dw[i] = ((-(2* self.X[:,i]).dot(self.Y - Y_predict)) - self.lambda_parameter)/ self.m
        
        db = - 2 * np.sum(self.Y - Y_predict) / self.m

        # update the weights and bias
        self.w = self.w - self.learning_rate*dw
        self.b = self.b -self.learning_rate*db


    def predict(self, X):

      return X.dot(self.w) + self.b


In [2]:
# importing the libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn import metrics

Data Collection and Processing

In [None]:
salary_data = pd.read_csv('C:/Users/DELL/OneDrive/Desktop/Project/ML/Supervised_Model/salary_data.csv')

In [6]:
salary_data.head()

Unnamed: 0,YearsExperience,Salary
0,1.1,39343
1,1.3,46205
2,1.5,37731
3,2.0,43525
4,2.2,39891


In [8]:
# Analysis of the data
salary_data.shape

(30, 2)

In [9]:
salary_data.isnull().sum()

YearsExperience    0
Salary             0
dtype: int64

Splitting the feature and target

In [10]:
features = salary_data.iloc[:,:-1].values
target = salary_data.iloc[:,1].values

In [11]:
print(features)
print(target)

[[ 1.1]
 [ 1.3]
 [ 1.5]
 [ 2. ]
 [ 2.2]
 [ 2.9]
 [ 3. ]
 [ 3.2]
 [ 3.2]
 [ 3.7]
 [ 3.9]
 [ 4. ]
 [ 4. ]
 [ 4.1]
 [ 4.5]
 [ 4.9]
 [ 5.1]
 [ 5.3]
 [ 5.9]
 [ 6. ]
 [ 6.8]
 [ 7.1]
 [ 7.9]
 [ 8.2]
 [ 8.7]
 [ 9. ]
 [ 9.5]
 [ 9.6]
 [10.3]
 [10.5]]
[ 39343  46205  37731  43525  39891  56642  60150  54445  64445  57189
  63218  55794  56957  57081  61111  67938  66029  83088  81363  93940
  91738  98273 101302 113812 109431 105582 116969 112635 122391 121872]


Train Test Split

In [12]:
X_train, X_test, Y_train, Y_test = train_test_split(features, target, test_size=0.33, random_state=2)

In [13]:
print(features.shape, X_train.shape, X_test.size)

(30, 1) (20, 1) 10


Training a Lasso Regression Model


In [22]:
model = Lasso_Regression(learning_rate=0.01, no_of_iterations=1000, lambda_parameter=0.01)

In [15]:
model.fit(X_train, Y_train)

Predict the salary for test data

In [16]:
test_pred = model.predict(X_test)

print(test_pred)

[ 35680.78115726  33764.72092403  66337.74488907  58673.50395612
  91246.52792117  80708.19663836 101784.85920397  51967.29313979
  42386.9919736   88372.43757131]


In [17]:
# R-squared error
score_1 = metrics.r2_score(Y_test, test_pred)

# MAE error
score_2 = metrics.mean_absolute_error(Y_test, test_pred)

print('R-squared error : ', score_1)
print('Mean Absolute error : ', score_2)

R-squared error :  0.9017125710304092
Mean Absolute error :  6778.544031571366


Comparing it with sklearn lasso regression

In [18]:
from sklearn.linear_model import Lasso
sk_model = Lasso()

In [19]:
sk_model.fit(X_train, Y_train)

0,1,2
,alpha,1.0
,fit_intercept,True
,precompute,False
,copy_X,True
,max_iter,1000
,tol,0.0001
,warm_start,False
,positive,False
,random_state,
,selection,'cyclic'


In [20]:
sk_test_pred = sk_model.predict(X_test)

In [21]:
# R-squared error
score_1 = metrics.r2_score(Y_test, sk_test_pred)

# MAE error
score_2 = metrics.mean_absolute_error(Y_test, sk_test_pred)

print('R-squared error : ', score_1)
print('Mean Absolute error : ', score_2)

R-squared error :  0.9041817142395875
Mean Absolute error :  6673.480440148538
