## Import Necessary Modules 

In [1]:
# Ignore  the warnings
import warnings
warnings.filterwarnings('always')
warnings.filterwarnings('ignore')

# data visualisation and manipulation
import numpy as np
import pandas as pd

In [2]:
#regression
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor


#model selection
from sklearn.model_selection import train_test_split,cross_validate

#evaluation metrics
from sklearn.metrics import mean_squared_log_error,mean_squared_error, r2_score,mean_absolute_error # for regression

### Loading the Data File  


In [3]:
boston = pd.read_csv("BostonHousingData.csv")

boston.shape

(506, 14)

In [4]:
boston.head()

Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,b,lstat,medv
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.9,5.33,36.2


In [5]:
boston.columns

Index(['crim', 'zn', 'indus', 'chas', 'nox', 'rm', 'age', 'dis', 'rad', 'tax',
       'ptratio', 'b', 'lstat', 'medv'],
      dtype='object')

### Renaming columns

In [6]:
boston=boston.rename(columns= {"nox" : "nitrogen oxides concentration" , 
                        "rm" : "room",
                        "dis" : "distance",
                        "rad" : "radial highways",
                        "ptratio" : "pupil-teacher ratio ",
                        "b" :  "Black",
                        "lstat" : "lower status in %",
                        "medv" : "prices"})

### Checking for null values

In [7]:
boston.isnull().any()

crim                             False
zn                               False
indus                            False
chas                             False
nitrogen oxides concentration    False
room                             False
age                              False
distance                         False
radial highways                  False
tax                              False
pupil-teacher ratio              False
Black                            False
lower status in %                False
prices                           False
dtype: bool

In [8]:
boston.head()

Unnamed: 0,crim,zn,indus,chas,nitrogen oxides concentration,room,age,distance,radial highways,tax,pupil-teacher ratio,Black,lower status in %,prices
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.9,5.33,36.2


### Checking for Duplicates 

In [9]:
boston.columns.duplicated()

array([False, False, False, False, False, False, False, False, False,
       False, False, False, False, False])

In [10]:
boston = boston.round(3)


In [11]:
boston.head()

Unnamed: 0,crim,zn,indus,chas,nitrogen oxides concentration,room,age,distance,radial highways,tax,pupil-teacher ratio,Black,lower status in %,prices
0,0.006,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98,24.0
1,0.027,0.0,7.07,0,0.469,6.421,78.9,4.967,2,242,17.8,396.9,9.14,21.6
2,0.027,0.0,7.07,0,0.469,7.185,61.1,4.967,2,242,17.8,392.83,4.03,34.7
3,0.032,0.0,2.18,0,0.458,6.998,45.8,6.062,3,222,18.7,394.63,2.94,33.4
4,0.069,0.0,2.18,0,0.458,7.147,54.2,6.062,3,222,18.7,396.9,5.33,36.2


In [12]:
set(boston['chas'])

{0, 1}

In [13]:
features = boston.drop('prices', axis = 1)

### Splitting data

In [14]:
from sklearn.model_selection import train_test_split
from sklearn import metrics


train,test = train_test_split(boston, test_size=0.3)

print(train.shape, test.shape)

(354, 14) (152, 14)


In [15]:
boston.columns

Index(['crim', 'zn', 'indus', 'chas', 'nitrogen oxides concentration', 'room',
       'age', 'distance', 'radial highways', 'tax', 'pupil-teacher ratio ',
       'Black', 'lower status in %', 'prices'],
      dtype='object')

In [16]:
train_x = train[['crim', 'zn', 'indus', 'chas', 'nitrogen oxides concentration', 'room','age', 'distance', 'radial highways', 'tax', 'pupil-teacher ratio ','Black', 'lower status in %']]
train_y =train.prices

test_x = test[['crim', 'zn', 'indus', 'chas', 'nitrogen oxides concentration', 'room','age', 'distance', 'radial highways', 'tax', 'pupil-teacher ratio ','Black', 'lower status in %']]
test_y = test.prices

## Applying Machine Learning Algorithms 


In [17]:
model = LinearRegression()
model.fit(train_x, train_y)
y_pred = model.predict(test_x)

mse = mean_squared_error(test_y, y_pred)
rmse = np.sqrt(mse)
print('RMSE :', rmse )

RMSE : 5.080269461289716


In [18]:
model = KNeighborsRegressor(n_neighbors=20)
model.fit(train_x, train_y)
y_pred = model.predict(test_x)

mse = mean_squared_error(test_y, y_pred)
rmse = np.sqrt(mse)
print('RMSE :', rmse )

RMSE : 6.70024403031632


In [19]:
model = SVR(kernel='rbf')
model.fit(train_x, train_y)
y_pred = model.predict(test_x)

mse = mean_squared_error(test_y, y_pred)
rmse = np.sqrt(mse)
print('RMSE :', rmse )

RMSE : 8.34285628043754


In [20]:
model = DecisionTreeRegressor(max_depth=15, min_samples_leaf=100)
model.fit(train_x, train_y)
y_pred = model.predict(test_x)

mse = mean_squared_error(test_y, y_pred)
rmse = np.sqrt(mse)
print('RMSE :', rmse )

RMSE : 5.95002972864001
