## Importing the Libraries

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline

In [2]:
# loading data
from sklearn.datasets import load_boston
boston = load_boston()

# Evaluating the data
print(boston.keys())

# printing the information about the data
print(boston.DESCR)

dict_keys(['data', 'target', 'feature_names', 'DESCR', 'filename', 'data_module'])
.. _boston_dataset:

Boston house prices dataset
---------------------------

**Data Set Characteristics:**  

    :Number of Instances: 506 

    :Number of Attributes: 13 numeric/categorical predictive. Median Value (attribute 14) is usually the target.

    :Attribute Information (in order):
        - CRIM     per capita crime rate by town
        - ZN       proportion of residential land zoned for lots over 25,000 sq.ft.
        - INDUS    proportion of non-retail business acres per town
        - CHAS     Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)
        - NOX      nitric oxides concentration (parts per 10 million)
        - RM       average number of rooms per dwelling
        - AGE      proportion of owner-occupied units built prior to 1940
        - DIS      weighted distances to five Boston employment centres
        - RAD      index of accessibility to radial highway

## Loading the data

In [3]:
# loading data
from sklearn.datasets import load_boston
boston = load_boston()
boston

{'data': array([[6.3200e-03, 1.8000e+01, 2.3100e+00, ..., 1.5300e+01, 3.9690e+02,
         4.9800e+00],
        [2.7310e-02, 0.0000e+00, 7.0700e+00, ..., 1.7800e+01, 3.9690e+02,
         9.1400e+00],
        [2.7290e-02, 0.0000e+00, 7.0700e+00, ..., 1.7800e+01, 3.9283e+02,
         4.0300e+00],
        ...,
        [6.0760e-02, 0.0000e+00, 1.1930e+01, ..., 2.1000e+01, 3.9690e+02,
         5.6400e+00],
        [1.0959e-01, 0.0000e+00, 1.1930e+01, ..., 2.1000e+01, 3.9345e+02,
         6.4800e+00],
        [4.7410e-02, 0.0000e+00, 1.1930e+01, ..., 2.1000e+01, 3.9690e+02,
         7.8800e+00]]),
 'target': array([24. , 21.6, 34.7, 33.4, 36.2, 28.7, 22.9, 27.1, 16.5, 18.9, 15. ,
        18.9, 21.7, 20.4, 18.2, 19.9, 23.1, 17.5, 20.2, 18.2, 13.6, 19.6,
        15.2, 14.5, 15.6, 13.9, 16.6, 14.8, 18.4, 21. , 12.7, 14.5, 13.2,
        13.1, 13.5, 18.9, 20. , 21. , 24.7, 30.8, 34.9, 26.6, 25.3, 24.7,
        21.2, 19.3, 20. , 16.6, 14.4, 19.4, 19.7, 20.5, 25. , 23.4, 18.9,
        35.4, 24.7, 3

In [4]:
# Convert the data into proper data frame
df_boston = pd.DataFrame(boston.data,columns=boston.feature_names)
df_boston['price'] = pd.Series(boston.target)
df_boston.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,price
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2


## Dividing the data into X and y

In [5]:
X = df_boston.drop('price', axis = 1)
y = df_boston['price']
X.shape, y.shape

((506, 13), (506,))

## Check for missing value

In [6]:
df_boston.isnull().sum()

CRIM       0
ZN         0
INDUS      0
CHAS       0
NOX        0
RM         0
AGE        0
DIS        0
RAD        0
TAX        0
PTRATIO    0
B          0
LSTAT      0
price      0
dtype: int64

## Dividing the data into train test split

In [7]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.25,random_state = 3)
X_train.shape, X_test.shape

((379, 13), (127, 13))

## Perform Feature Scaling on X_train and X_test

In [8]:
from sklearn.preprocessing import StandardScaler

ss = StandardScaler()

ss.fit_transform(X_train)

array([[-4.06684342e-01,  3.50644266e+00, -1.43713107e+00, ...,
        -2.41162804e-01,  4.25613228e-01, -1.09420816e+00],
       [ 5.05466785e-01, -4.77726531e-01,  1.02084908e+00, ...,
         8.23789658e-01, -3.78641020e+00,  5.84333234e-01],
       [-4.04461749e-01, -4.77726531e-01, -8.59039766e-01, ...,
         8.23789658e-01,  4.27867197e-01, -5.82204602e-01],
       ...,
       [-3.86896021e-01,  4.96181492e-01, -7.61477758e-01, ...,
         3.14464567e-01,  4.02107555e-01, -8.52044314e-01],
       [-2.82634676e-01, -4.77726531e-01,  1.23635919e+00, ...,
        -1.72283579e+00, -1.39488913e-01, -8.26551846e-02],
       [ 1.63889075e-03, -4.77726531e-01,  1.02084908e+00, ...,
         8.23789658e-01,  2.63112821e-01, -3.49727311e-01]])

In [9]:
ss.fit_transform(X_test)

array([[-0.42959569, -0.5182122 , -0.7456859 , ..., -0.53445004,
         0.31962857, -1.21534028],
       [-0.42454808, -0.5182122 ,  1.558248  , ...,  1.21776185,
         0.42578634,  0.31089325],
       [ 0.65260631, -0.5182122 ,  1.00172095, ...,  0.75665346,
         0.21170738,  0.62490302],
       ...,
       [-0.43310582, -0.5182122 , -0.23321112, ...,  0.29554506,
         0.45893861,  1.26752766],
       [-0.45452133, -0.5182122 , -0.39914135, ...,  1.07942933,
         0.42543366,  0.58546923],
       [-0.44193368, -0.02263276, -0.50046157, ..., -1.54888851,
         0.4074468 ,  1.16675243]])

## Applying the model on train data

In [10]:
from sklearn.neighbors import KNeighborsRegressor
model = KNeighborsRegressor(n_neighbors = 4, metric = 'minkowski', p = 2)
model.fit(X_train,y_train)

KNeighborsRegressor(n_neighbors=4)

## Perform prediction on test data

In [11]:
pred = model.predict(X_test)
pred

array([32.975, 17.5  , 16.325, 29.95 , 25.925, 29.   , 12.7  , 11.425,
       30.4  , 24.325, 19.875, 20.075, 25.925, 40.85 , 19.6  , 24.925,
       18.925, 16.025, 22.5  , 31.2  , 20.125, 35.375, 21.55 , 28.85 ,
       14.1  , 23.55 , 17.675, 32.225, 21.7  , 36.425, 14.975, 34.3  ,
       15.1  , 16.575, 19.875, 23.425, 20.6  , 28.125, 25.45 , 21.275,
       22.5  , 10.925, 21.4  , 14.75 , 16.075, 21.125, 30.25 , 24.05 ,
       36.025, 21.625, 23.9  , 18.525, 34.65 , 19.45 , 20.5  , 13.3  ,
       36.45 , 15.925, 29.95 , 17.5  , 32.25 , 21.375, 31.9  , 37.6  ,
       13.3  , 26.925, 34.3  , 25.4  , 24.525, 32.675, 22.8  , 24.75 ,
       16.575, 32.175, 12.6  , 21.4  , 30.875, 20.075, 19.55 , 17.05 ,
       21.375, 32.65 , 23.625, 20.95 , 18.925, 12.05 , 26.15 , 12.05 ,
       22.225, 22.35 , 27.025, 20.85 , 21.025, 30.475, 24.95 , 16.75 ,
       39.1  , 13.2  , 23.675, 28.5  , 17.75 , 22.325, 11.95 , 17.05 ,
       31.875, 24.925, 21.625, 30.175, 20.95 , 32.675, 14.225, 11.5  ,
      

## Comparing the prediction and test set results

In [12]:
y_pred = model.predict(X_test)
y_test = y_test.values
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[32.975 44.8  ]
 [17.5   17.1  ]
 [16.325 17.8  ]
 [29.95  33.1  ]
 [25.925 21.9  ]
 [29.    21.   ]
 [12.7   18.4  ]
 [11.425 10.4  ]
 [30.4   23.1  ]
 [24.325 20.   ]
 [19.875 15.7  ]
 [20.075 41.3  ]
 [25.925 33.3  ]
 [40.85  30.7  ]
 [19.6    8.5  ]
 [24.925  6.3  ]
 [18.925 21.2  ]
 [16.025 16.2  ]
 [22.5   15.6  ]
 [31.2   24.1  ]
 [20.125 23.9  ]
 [35.375 50.   ]
 [21.55  23.2  ]
 [28.85  23.4  ]
 [14.1   12.8  ]
 [23.55  24.6  ]
 [17.675 18.8  ]
 [32.225 16.1  ]
 [21.7   18.2  ]
 [36.425 24.3  ]
 [14.975 14.8  ]
 [34.3   37.3  ]
 [15.1   21.4  ]
 [16.575 18.6  ]
 [19.875 18.8  ]
 [23.425 13.9  ]
 [20.6   24.5  ]
 [28.125 31.5  ]
 [25.45  18.2  ]
 [21.275 20.7  ]
 [22.5   19.4  ]
 [10.925  9.7  ]
 [21.4   21.5  ]
 [14.75  14.9  ]
 [16.075 21.7  ]
 [21.125 26.5  ]
 [30.25  20.7  ]
 [24.05  19.3  ]
 [36.025 24.5  ]
 [21.625 19.3  ]
 [23.9   26.6  ]
 [18.525 23.   ]
 [34.65  45.4  ]
 [19.45  19.8  ]
 [20.5   22.7  ]
 [13.3   23.1  ]
 [36.45  50.   ]
 [15.925 17.8  ]
 [29.95  29.1 

## Evaluation of the model

In [16]:
from sklearn.metrics import mean_squared_error
error = mean_squared_error(y_test, pred, squared = False)
print('RMSE : ', error)

RMSE :  7.053721375006279


In [17]:
from sklearn.metrics import mean_squared_error
error = mean_squared_error(y_test, pred)
print('MSE : ', error)

MSE :  49.75498523622047


In [19]:
from sklearn.metrics import r2_score
r2 = r2_score(y_test, y_pred)
r2

0.38620603397108455