# Ridge Regression on Boston Housing Dataset

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.datasets import load_boston

Loading the dataset

In [None]:
boston_data = load_boston()
 
boston_df = pd.DataFrame(data=boston_data.data, columns=boston_data.feature_names)

In [None]:
boston_df.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33


In [None]:
X = boston_df
Y = pd.DataFrame(boston_data.target, columns=["MEDV"])

## Ridge Regression model with linear features

In [None]:
from sklearn.preprocessing import scale
 
X = scale(X)
Y = scale(Y)

In [None]:
from sklearn.model_selection import train_test_split
 
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state=5)
print(X_train.shape)
print(X_test.shape)
print(Y_train.shape)
print(Y_test.shape)

(404, 13)
(102, 13)
(404, 1)
(102, 1)


Training the model

In [None]:
from sklearn.linear_model import Ridge
 
ridge = Ridge(alpha=0.1)
ridge.fit(X_train,Y_train)
print ("Ridge model:", (ridge.coef_))

Ridge model: [[-0.12221504  0.12505241  0.00046378  0.07477056 -0.20068959  0.2609502
   0.00333226 -0.34147204  0.34379568 -0.24021542 -0.22407009  0.11660369
  -0.46105328]]


Predictions on training and test data

In [None]:
y_train_predict = ridge.predict(X_train)
y_test_predict = ridge.predict(X_test)

Calculating metrics on training and test set predictions

In [None]:
 from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
 
rmse = (mean_squared_error(Y_train, y_train_predict)**(0.5))
r2 = r2_score(Y_train, y_train_predict)
 
print("Ridge Regression Model (Linear Features)")
print("-"*40)
 
print("\nThe model performance for training set")
print("-"*40)
print('RMSE is {}'.format(rmse))
print('R2 score is {}'.format(r2))
print('i.e. The ridge reg. model explains the house price for training set {:.2f}% accurately'.format(r2*100))
 
rmse = (mean_squared_error(Y_test, y_test_predict)**(0.5))
r2 = r2_score(Y_test, y_test_predict)
 
print("\nThe model performance for testing set")
print("-"*40)
print('RMSE is {}'.format(rmse))
print('R2 score is {}'.format(r2))
print('i.e. The ridge reg. model explains the house price for test set {:.2f}% accurately'.format(r2*100))

Ridge Regression Model (Linear Features)
----------------------------------------

The model performance for training set
----------------------------------------
RMSE is 0.5159988350756246
R2 score is 0.738339142398241
i.e. The ridge reg. model explains the house price for training set 73.83% accurately

The model performance for testing set
----------------------------------------
RMSE is 0.4970909218215834
R2 score is 0.7335676379772185
i.e. The ridge reg. model explains the house price for test set 73.36% accurately


## Ridge Regression model with polynomial features

In [None]:
from sklearn.preprocessing import PolynomialFeatures
 
poly = PolynomialFeatures(degree = 2)
X_poly = poly.fit_transform(X)

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X_poly, Y, test_size = 0.2, random_state=5)
print(X_train.shape)
print(X_test.shape)
print(Y_train.shape)
print(Y_test.shape)

(404, 105)
(102, 105)
(404, 1)
(102, 1)


 Training the model

In [None]:
ridge = Ridge(alpha=0.003)
ridge.fit(X_train,Y_train)
print ("Ridge model:", (ridge.coef_))

Ridge model: [[ 0.00000000e+00  1.37436818e+00  6.03287003e-01  5.15552072e-01
   1.79263984e-02 -1.86197009e-01  3.58492912e-01 -2.51912457e-01
  -2.50081579e-01 -2.85710968e-01  3.00128781e-02 -3.06826027e-02
   1.79850262e-01 -3.80548617e-01  1.72806736e-02  2.57794528e+00
   1.32591869e+00  5.77728253e-01 -2.06522044e-03  5.91017178e-02
  -1.38791071e-01 -1.10678549e-01 -3.97624587e-01 -1.03224907e+00
   6.18127908e-01 -2.69589912e-02  1.60534947e-01 -4.11354648e-02
  -1.19546797e-01 -6.83341940e-02 -3.58973762e-01  4.60466980e-02
   3.19700732e-03 -7.82723427e-02 -8.56686674e-02  2.55206105e-01
  -3.95815044e-02  4.17463914e-01 -6.41771201e-02  1.33489440e-01
  -4.61025023e-02  1.34943042e-01  1.74389783e-01  1.26008887e-02
   1.14345706e-01 -2.10943801e-02  6.33237880e-02  5.86705401e-03
   4.26833213e-01  1.26375951e-02  6.08744511e-02 -5.61826423e-02
  -1.35213906e-01  3.24495194e-02  1.84529099e-01 -1.13857722e-01
   1.24344982e-01 -4.79267609e-02  6.57975194e-02 -9.71834530e-

Predictions on training and test set

In [None]:
y_train_predict = ridge.predict(X_train)
y_test_predict = ridge.predict(X_test)

Calculating metrices of predictions on training and test set

In [None]:
rmse = (mean_squared_error(Y_train, y_train_predict)**(0.5))
r2 = r2_score(Y_train, y_train_predict)
 
print("Ridge Regression Model (Polynomial features)")
print("-"*40)
 
print("\nThe model performance for training set")
print("-"*40)
print('RMSE is {}'.format(rmse))
print('R2 score is {}'.format(r2))
print('i.e. The ridge reg. model explains the house price for training set {:.2f}% accurately'.format(r2*100))
 
rmse = (mean_squared_error(Y_test, y_test_predict)**(0.5))
r2 = r2_score(Y_test, y_test_predict)
 
print("\nThe model performance for testing set")
print("-"*40)
print('RMSE is {}'.format(rmse))
print('R2 score is {}'.format(r2))
print('i.e. The ridge reg. model explains the house price for training set {:.2f}% accurately'.format(r2*100))

Ridge Regression Model (Polynomial features)
----------------------------------------

The model performance for training set
----------------------------------------
RMSE is 0.26416073249415045
R2 score is 0.9314231016653638
i.e. The ridge reg. model explains the house price for training set 93.14% accurately

The model performance for testing set
----------------------------------------
RMSE is 0.3499142284932476
R2 score is 0.8679803684954606
i.e. The ridge reg. model explains the house price for training set 86.80% accurately
