In [1]:
import numpy as np
import matplotlib.pyplot as plt 

import pandas as pd  
import seaborn as sns 

%matplotlib inline

In [2]:
#loading boston data
from sklearn.datasets import load_boston
boston_dataset = load_boston()

In [3]:
boston = pd.DataFrame(boston_dataset.data, columns=boston_dataset.feature_names)
boston.head()
names=boston.columns

In [4]:
boston['MEDV'] = boston_dataset.target
print(names)

Index(['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX',
       'PTRATIO', 'B', 'LSTAT'],
      dtype='object')


In [5]:
X = pd.DataFrame(np.c_[boston['LSTAT'], boston['RM']], columns = ['LSTAT','RM'])
Y = boston['MEDV']

In [6]:
from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state=5)
print(X_train.shape)
print(X_test.shape)
print(Y_train.shape)
print(Y_test.shape)

(404, 2)
(102, 2)
(404,)
(102,)


### Using Lasso

In [7]:
from sklearn.linear_model import Lasso
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
ls = Lasso(alpha=0.1) #a higher value of alpha restricts the coefficients further
ls.fit(X_train,Y_train)
Y_pred_train = ls.predict(X_train) #predictions on training data
Y_pred = ls.predict(X_test) #predictions on testing data


In [8]:
lscoeficent=ls.coef_

In [9]:
rmsetraining = (np.sqrt(mean_squared_error(Y_train, Y_pred_train)))

r2 = r2_score(Y_train, Y_pred_train)

print("The model performance for training set")
print("--------------------------------------")
print('RMSE is {}'.format(rmsetraining))
print('R2 score is {}'.format(r2))
print("\n")

# model evaluation for testing set
y_test_predict = ls.predict(X_test)
mse = mean_squared_error(Y_test,y_test_predict)
rmse = (np.sqrt(mean_squared_error(Y_test, y_test_predict)))
r2 = r2_score(Y_test, y_test_predict)

print("The model performance for testing set")
print("--------------------------------------")
print('MSE {}'.format(mse))
print('RMSE is {}'.format(rmse))
print('R2 score is {}'.format(r2))


print("Difference between training and Testing set")
print("--------------------------------------")
print('Difference of RMSE between training and testing for Lasso is {}'.format(rmsetraining-rmse))


The model performance for training set
--------------------------------------
RMSE is 5.639633479359648
R2 score is 0.6297457828825357


The model performance for testing set
--------------------------------------
MSE 26.80986506219231
RMSE is 5.17782435605847
R2 score is 0.657573887893668
Difference between training and Testing set
--------------------------------------
Difference of RMSE between training and testing for Lasso is 0.46180912330117785


### Using Ridge

In [10]:
from sklearn.linear_model import Ridge
rr = Ridge(alpha=1) #a higher value of alpha restricts the coefficients further
model=rr.fit(X_train,Y_train)
Y_pred_train = rr.predict(X_train) #predictions on training data
Y_pred = rr.predict(X_test) #predictions on testing data


In [11]:
model.coef_

array([-0.71920853,  4.55417009])

In [12]:
model.intercept_

2.98240259036276

In [13]:
rmseRidgeTraining = (np.sqrt(mean_squared_error(Y_train, Y_pred_train)))
r2 = r2_score(Y_train, Y_pred_train)

print("The model performance for training set")
print("--------------------------------------")
print('RMSE is {}'.format(rmseRidgeTraining))
print('R2 score is {}'.format(r2))
print("\n")

# model evaluation for testing set
y_test_predict = rr.predict(X_test)
rmse = (np.sqrt(mean_squared_error(Y_test, y_test_predict)))
mse = mean_squared_error(Y_test,y_test_predict)
r2 = r2_score(Y_test, y_test_predict)

print("The model performance for testing set")
print("--------------------------------------")
print('MSE {}'.format(mse))
print('RMSE is {}'.format(rmse))
print('R2 score is {}'.format(r2))

print("Difference between training and Testing set")
print("--------------------------------------")
print("Difference between training and Testing Dataset RMS Calculated for Ridge {}".format(rmseRidgeTraining-rmse))

The model performance for training set
--------------------------------------
RMSE is 5.637164235841213
R2 score is 0.630069934330712


The model performance for testing set
--------------------------------------
MSE 26.44419839700965
RMSE is 5.142392283462012
R2 score is 0.6622443259654345
Difference between training and Testing set
--------------------------------------
Difference between training and Testing Dataset RMS Calculated for Ridge 0.49477195237920046


### Here we observed that RMSE is lower in Ridge comparitive to Lasso and performed better. When we calculated  the  Difference between Training and Testing Data for RMS, it turned out that Lasso Performed better.