# Lasso Regression on Boston Housing Dataset

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.datasets import load_boston

Loading the dataset

In [None]:
boston_data = load_boston()
 
boston_df = pd.DataFrame(data=boston_data.data, columns=boston_data.feature_names)

In [None]:
boston_df.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33


In [None]:
X = boston_df
Y = pd.DataFrame(boston_data.target, columns=["MEDV"])

## Lasso Regression model with linear features

In [None]:
from sklearn.preprocessing import scale
 
X = scale(X)
Y = scale(Y)

In [None]:
from sklearn.model_selection import train_test_split
 
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state=5)
print(X_train.shape)
print(X_test.shape)
print(Y_train.shape)
print(Y_test.shape)

(404, 13)
(102, 13)
(404, 1)
(102, 1)


Training the model

In [None]:
from sklearn.linear_model import Lasso
 
lasso = Lasso(alpha=0.1)
lasso.fit(X_train,Y_train)
print ("Lasso model:", (lasso.coef_))

Lasso model: [-0.          0.         -0.          0.00809581 -0.          0.2531233
 -0.         -0.         -0.         -0.         -0.15975457  0.02359289
 -0.44222622]


Predictions on training and test data

In [None]:
y_train_predict = lasso.predict(X_train)
y_test_predict = lasso.predict(X_test)

Calculating metrics on training and test set predictions

In [None]:
 from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error

rmse = (mean_squared_error(Y_train, y_train_predict)**(0.5))
r2 = r2_score(Y_train, y_train_predict)
 
print("Lasso Regression Model (Linear Features)")
print("-"*40)

print("\nThe model performance for training set")
print("-"*40)
print('RMSE is {}'.format(rmse))
print('R2 score is {}'.format(r2))
print('i.e. The lasso reg. model explains the house price for training set {:.2f}% accurately'.format(r2*100))

rmse = (mean_squared_error(Y_test, y_test_predict)**(0.5))
r2 = r2_score(Y_test, y_test_predict)

print("\nThe model performance for testing set")
print("-"*40)
print('RMSE is {}'.format(rmse))
print('R2 score is {}'.format(r2))
print('i.e. The lasso reg. model explains the house price for test set {:.2f}% accurately'.format(r2*100))

Lasso Regression Model (Linear Features)
----------------------------------------

The model performance for training set
----------------------------------------
RMSE is 0.5867948987422895
R2 score is 0.6616127605179004
i.e. The lasso reg. model explains the house price for training set 66.16% accurately

The model performance for testing set
----------------------------------------
RMSE is 0.5281022470742474
R2 score is 0.6992875963240206
i.e. The lasso reg. model explains the house price for test set 69.93% accurately


## Lasso Regression model with polynomial features

In [None]:
from sklearn.preprocessing import PolynomialFeatures
 
poly = PolynomialFeatures(degree = 2)
X_poly = poly.fit_transform(X)

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X_poly, Y, test_size = 0.2, random_state=5)
print(X_train.shape)
print(X_test.shape)
print(Y_train.shape)
print(Y_test.shape)

(404, 105)
(102, 105)
(404, 1)
(102, 1)


 Training the model

In [None]:
lasso = Lasso(alpha=0.003)
lasso.fit(X_train,Y_train)
print ("Lasso model:", (lasso.coef_))

Lasso model: [ 0.         -0.         -0.         -0.          0.         -0.09818647
  0.35386341 -0.11876554 -0.29487792  0.06674189 -0.09891912 -0.09610695
  0.08154434 -0.39960645  0.00705009  0.         -0.          0.35038616
 -0.          0.00807639 -0.          0.         -0.03976869 -0.
 -0.         -0.00074956  0.          0.01037978 -0.          0.
  0.          0.04049998 -0.         -0.01360355 -0.          0.07197171
 -0.         -0.         -0.01760927  0.02350806 -0.          0.11881144
  0.0577246   0.          0.         -0.          0.         -0.06008952
  0.         -0.05036924  0.04316307 -0.07795339 -0.08979969  0.02420571
  0.         -0.          0.         -0.00760077  0.         -0.05084448
 -0.08706271 -0.03819966 -0.0883225   0.01162432 -0.02822902 -0.
 -0.05286692 -0.          0.05167499  0.00519466 -0.06222832  0.
 -0.11662252 -0.0626971  -0.10626387 -0.01851066 -0.0843394   0.00517232
  0.          0.15774741 -0.         -0.         -0.13104637 -0.094199

Predictions on training and test set

In [None]:
y_train_predict = lasso.predict(X_train)
y_test_predict = lasso.predict(X_test)

Calculating metrices of predictions on training and test set

In [None]:
rmse = (mean_squared_error(Y_train, y_train_predict)**(0.5))
r2 = r2_score(Y_train, y_train_predict)
 
print("Lasso Regression Model (Polynomial features)")
print("-"*40)
 
print("\nThe model performance for training set")
print("-"*40)
print('RMSE is {}'.format(rmse))
print('R2 score is {}'.format(r2))
print('i.e. The lasso reg. model explains the house price for training set {:.2f}% accurately'.format(r2*100))
 
rmse = (mean_squared_error(Y_test, y_test_predict)**(0.5))
r2 = r2_score(Y_test, y_test_predict)
 
print("\nThe model performance for testing set")
print("-"*40)
print('RMSE is {}'.format(rmse))
print('R2 score is {}'.format(r2))
print('i.e. The lasso reg. model explains the house price for training set {:.2f}% accurately'.format(r2*100))

Lasso Regression Model (Polynomial features)
----------------------------------------

The model performance for training set
----------------------------------------
RMSE is 0.2952125643173813
R2 score is 0.9143532292999902
i.e. The lasso reg. model explains the house price for training set 91.44% accurately

The model performance for testing set
----------------------------------------
RMSE is 0.33296235859809953
R2 score is 0.8804621088652422
i.e. The lasso reg. model explains the house price for training set 88.05% accurately
