# Polynomial Regression on Boston dataset

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

Importing dataset

In [2]:
from sklearn.datasets import load_boston

boston_dataset = load_boston()
boston_df = pd.DataFrame(data=boston_dataset.data, columns=boston_dataset.feature_names)
x = pd.DataFrame(np.c_[boston_df['LSTAT'], boston_df['RM']], columns=['LSTAT', 'RM'])
y = pd.DataFrame(boston_dataset.target, columns=['MEDV'])

print(x.shape)
print(y.shape)

(506, 2)
(506, 1)


Fitting polynomial regression to the dataset

In [3]:
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures

features = PolynomialFeatures(degree=2)
x = features.fit_transform(x)

Splitting the Dataset into Training set and Test set

In [4]:
from sklearn.model_selection import train_test_split

train_x, test_x, train_y, test_y = train_test_split(x, y, test_size=0.2, random_state=5)

In [5]:
print(train_x.shape)
print(test_x.shape)
print(train_y.shape)
print(test_y.shape)

(404, 6)
(102, 6)
(404, 1)
(102, 1)


Training the model

In [6]:
from sklearn.linear_model import LinearRegression

model = LinearRegression()
model.fit(train_x, train_y)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

Predictions

In [7]:
train_y_predict = model.predict(train_x)
test_y_predict = model.predict(test_x)

Calculating metrics on Training and Test set predictions

In [8]:
from sklearn.metrics import r2_score, mean_squared_error

rmse = mean_squared_error(train_y, train_y_predict)**(0.5)
score = r2_score(train_y, train_y_predict)
print("\nThe model performance for training set")
print("-"*40)
print("RMSE is {}".format(rmse))
print("R2 score is {}".format(score))
print("i.e. The polynomial model explains the house price for training set by {:.2f}% accuracy".format(score*100))

rmse = np.sqrt(mean_squared_error(test_y, test_y_predict))
score = r2_score(test_y, test_y_predict)
print("\nThe model performance for test set")
print("-"*40)
print("RMSE is {}".format(rmse))
print("R2 score is {}".format(score))
print("i.e. The polynomial model explains the house price for test set by {:.2f}% accuracy\n".format(score*100))


The model performance for training set
----------------------------------------
RMSE is 4.703071027847756
R2 score is 0.7425094297364765
i.e. The polynomial model explains the house price for training set by 74.25% accuracy

The model performance for test set
----------------------------------------
RMSE is 3.784819884545044
R2 score is 0.8170372495892174
i.e. The polynomial model explains the house price for test set by 81.70% accuracy

