In [17]:
# Import packages
import warnings
warnings.filterwarnings('ignore')
import numpy as np 
import pandas as pd
from sklearn.model_selection import train_test_split, KFold, cross_val_predict
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.metrics import r2_score
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR

In [18]:
columns = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV']
dataset = pd.read_csv('housing.csv', header=None, delimiter=r"\s+", names=columns)
dataset.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222.0,18.7,396.9,5.33,36.2


In [19]:
x = dataset.drop(columns=['MEDV'])
y = dataset['MEDV']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=101)

In [20]:
x.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296.0,15.3,396.9,4.98
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242.0,17.8,396.9,9.14
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242.0,17.8,392.83,4.03
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222.0,18.7,394.63,2.94
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222.0,18.7,396.9,5.33


In [21]:
y.head()

0    24.0
1    21.6
2    34.7
3    33.4
4    36.2
Name: MEDV, dtype: float64

In [22]:
kfold = KFold(shuffle=True, random_state=101, n_splits=5)

## Using different Regression Algorithms


### Linear Regression

In [23]:
lr = LinearRegression()
lr.fit(x_train, y_train)
lr_pred = lr.predict(x_test)
lr_accuracy = lr.score(x_test, y_test)
print("Linear Regression Accuracy: ", lr_accuracy)

# Cross Validating
# Score is the difference between the samples in the dataset and the predictions made by the model
lr_score = cross_val_predict(lr, x, y, cv=kfold)
print("Linear Regression Cross Validation Score: ", r2_score(y, lr_score))

Linear Regression Accuracy:  0.7123963332666878
Linear Regression Cross Validation Score:  0.712407644501657


### KNeighborsRegressor 

In [24]:
kn = KNeighborsRegressor()
kn.fit(x_train, y_train)
kn_pred = kn.predict(x_test)
kn_accuracy = kn.score(x_test, y_test)
print("KNeighborsRegressor Accuracy: ", kn_accuracy)

# Cross Validating
# Score is the difference between the samples in the dataset and the predictions made by the model
kn_score = cross_val_predict(kn, x, y, cv=kfold)
print("KNeighborsRegressor Cross Validation Score: ", r2_score(y, kn_score))

KNeighborsRegressor Accuracy:  0.5139991753741371
KNeighborsRegressor Cross Validation Score:  0.5047827112703703


### SVR

In [25]:
svr = SVR()
svr.fit(x_train, y_train)
svr_pred = svr.predict(x_test)
svr_accuracy = svr.score(x_test, y_test)
print("SVR Accuracy: ", svr_accuracy)

# Cross Validating
# Score is the difference between the samples in the dataset and the predictions made by the model
svr_score = cross_val_predict(svr, x, y, cv=kfold)
print("SVR Cross Validation Score: ", r2_score(y, svr_score))

SVR Accuracy:  0.13979861118195425
SVR Cross Validation Score:  0.19809353181777167


## Regularization
### Using Lasso and Ridge with alpha values = [0.1, 0.3, 0.5]

In [26]:
lasso1 = Lasso(alpha=0.1)
lasso1.fit(x_train,y_train)
print("Regularized Linear Score using Lasso with alpha = 0.1 : ",lasso1.score(x_test, y_test))

lasso3 = Lasso(alpha=0.3)
lasso3.fit(x_train,y_train)
print("Regularized Linear Score using Lasso with alpha = 0.3 :",lasso3.score(x_test, y_test))

lasso5 = Lasso(alpha=0.5)
lasso5.fit(x_train,y_train)
print("Regularized Linear Score using Lasso with alpha = 0.5 :",lasso5.score(x_test, y_test))

ridge1 = Ridge(alpha=0.1)
ridge1.fit(x_train,y_train)
print("Regularized Linear Score using Ridge with alpha = 0.1 : ", ridge1.score(x_test, y_test))

ridge3 = Ridge(alpha=0.3)
ridge3.fit(x_train,y_train)
print("Regularized Linear Score using Ridge with alpha = 0.3 : ", ridge3.score(x_test, y_test))

ridge5 = Ridge(alpha=0.5)
ridge5.fit(x_train,y_train)
print("Regularized Linear Score using Ridge with alpha = 0.5 : ", ridge5.score(x_test, y_test))

Regularized Linear Score using Lasso with alpha = 0.1 :  0.7039522461898744
Regularized Linear Score using Lasso with alpha = 0.3 : 0.6944741183759
Regularized Linear Score using Lasso with alpha = 0.5 : 0.6775198121307752
Regularized Linear Score using Ridge with alpha = 0.1 :  0.7122925098276602
Regularized Linear Score using Ridge with alpha = 0.3 :  0.7118931677849827
Regularized Linear Score using Ridge with alpha = 0.5 :  0.7114081364970004
