Estimate the cost of house in Boston using Linear Regression with Gradient Descent

In [53]:
from sklearn import datasets
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.cross_validation import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
import math

In [54]:
bean = datasets.load_boston()
print(bean.DESCR)

Boston House Prices dataset

Notes
------
Data Set Characteristics:  

    :Number of Instances: 506 

    :Number of Attributes: 13 numeric/categorical predictive
    
    :Median Value (attribute 14) is usually the target

    :Attribute Information (in order):
        - CRIM     per capita crime rate by town
        - ZN       proportion of residential land zoned for lots over 25,000 sq.ft.
        - INDUS    proportion of non-retail business acres per town
        - CHAS     Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)
        - NOX      nitric oxides concentration (parts per 10 million)
        - RM       average number of rooms per dwelling
        - AGE      proportion of owner-occupied units built prior to 1940
        - DIS      weighted distances to five Boston employment centres
        - RAD      index of accessibility to radial highways
        - TAX      full-value property-tax rate per $10,000
        - PTRATIO  pupil-teacher ratio by town
      

In [55]:
def load_boston():
    scaler = StandardScaler()
    boston = datasets.load_boston()
    X=boston.data
    y=boston.target
    X = scaler.fit_transform(X)
    return train_test_split(X,y)

In [56]:
X_train, X_test, y_train, y_test = load_boston()

In [57]:
X_train.shape

(379, 13)

<li> Fitting a Linear Regression

In [58]:
lr=LinearRegression()
lr.fit(X_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

<li> Making a Prediction

In [59]:
list(zip (y_test, lr.predict(X_test)))

[(27.5, 24.636077139509332),
 (31.600000000000001, 33.030876011375206),
 (23.800000000000001, 24.705065015490579),
 (23.300000000000001, 28.239001098631839),
 (21.0, 20.528717705167992),
 (19.0, 13.925653199089609),
 (15.1, 18.450218073767026),
 (19.100000000000001, 24.974972022098779),
 (23.800000000000001, 25.531530716787543),
 (13.4, 14.886800461058911),
 (32.700000000000003, 29.999333436950586),
 (23.699999999999999, 27.043828353256345),
 (18.800000000000001, 19.697925920733653),
 (48.799999999999997, 40.072573923393691),
 (17.199999999999999, 16.188666965517914),
 (11.800000000000001, 9.0127718853675045),
 (41.299999999999997, 33.626059534006821),
 (22.399999999999999, 22.984601635623534),
 (26.600000000000001, 27.573012023367973),
 (17.100000000000001, 17.796573854702888),
 (20.5, 20.129662586137457),
 (19.800000000000001, 22.945701195948484),
 (16.100000000000001, 18.943735703503577),
 (50.0, 39.644262789936029),
 (37.299999999999997, 33.618853489950268),
 (50.0, 40.722896562344

In [60]:
y_lrPred=lr.predict(X_test)

<li> Co-Efficient of Determination - $R^2$

In [61]:
r2Value=r2_score(y_test,y_lrPred)
r2Value

0.72856722203722468

<li> Mean Squared Error - MSE

In [62]:
mseValue=mean_squared_error(y_test,y_lrPred)
mseValue

23.844851383249011

<li> SquareRoot of Mean Squared Error - SMSE

In [63]:
smseValue=math.sqrt(mseValue)
smseValue

4.88311902202363

<li> Fitting a Ridge Linear Regression

In [64]:
rlr = Ridge(alpha=1)
rlr.fit(X_train,y_train)

Ridge(alpha=1, copy_X=True, fit_intercept=True, max_iter=None,
   normalize=False, random_state=None, solver='auto', tol=0.001)

<li> Making a Prediction with Ridge

In [65]:
list(zip (y_test,rlr.predict(X_test)))

[(27.5, 24.661236513306132),
 (31.600000000000001, 32.962001702166219),
 (23.800000000000001, 24.707386413046006),
 (23.300000000000001, 28.217286194173234),
 (21.0, 20.567805719606941),
 (19.0, 13.904876797102801),
 (15.1, 18.428244311570204),
 (19.100000000000001, 24.929353708824774),
 (23.800000000000001, 25.526546172366388),
 (13.4, 14.986173861087812),
 (32.700000000000003, 30.047763204727978),
 (23.699999999999999, 27.043255375482985),
 (18.800000000000001, 19.578587089732828),
 (48.799999999999997, 40.067786568904467),
 (17.199999999999999, 16.158021362886934),
 (11.800000000000001, 9.0867093194853474),
 (41.299999999999997, 33.570343650853367),
 (22.399999999999999, 23.000225023304822),
 (26.600000000000001, 27.566614241667946),
 (17.100000000000001, 17.819791703164647),
 (20.5, 20.141543913244249),
 (19.800000000000001, 22.959062261419476),
 (16.100000000000001, 18.923820094005258),
 (50.0, 39.630535424550985),
 (37.299999999999997, 33.536408845154774),
 (50.0, 40.704802550006

In [66]:
y_rlrPred=rlr.predict(X_test)

<li> $R^2$ of Ridge

In [67]:
r2rlrValue=r2_score(y_test,y_rlrPred)
r2rlrValue

0.7285850633437454

<li> MSE of Ridge

In [68]:
mserlrValue=mean_squared_error(y_test,y_rlrPred)
mserlrValue

23.843284058529925

<li> SMSE of Ridge

In [69]:
smserlrValue=math.sqrt(mserlrValue)
smserlrValue

4.882958535409647

<li> Fitting a Lasso Linear Regression

In [70]:
llr = Lasso(alpha=0.1)
llr.fit(X_train,y_train)

Lasso(alpha=0.1, copy_X=True, fit_intercept=True, max_iter=1000,
   normalize=False, positive=False, precompute=False, random_state=None,
   selection='cyclic', tol=0.0001, warm_start=False)

<li> Making a Prediction with Lasso

In [71]:
list(zip (y_test,llr.predict(X_test)))

[(27.5, 24.99987274909499),
 (31.600000000000001, 31.586248942694603),
 (23.800000000000001, 24.874152000441061),
 (23.300000000000001, 27.898049974993548),
 (21.0, 20.972242850880196),
 (19.0, 13.671641950195593),
 (15.1, 17.943043871667761),
 (19.100000000000001, 24.694408893617997),
 (23.800000000000001, 25.468351900517085),
 (13.4, 15.993136128682085),
 (32.700000000000003, 30.705089060682148),
 (23.699999999999999, 27.293090631437011),
 (18.800000000000001, 18.11638780032682),
 (48.799999999999997, 39.662104190379353),
 (17.199999999999999, 15.791092498525565),
 (11.800000000000001, 9.9117729594642885),
 (41.299999999999997, 33.333041265048067),
 (22.399999999999999, 23.37706265455893),
 (26.600000000000001, 27.557536761997191),
 (17.100000000000001, 17.918733396172083),
 (20.5, 20.097803766707621),
 (19.800000000000001, 22.961596735072252),
 (16.100000000000001, 18.955630909351896),
 (50.0, 39.210990086657119),
 (37.299999999999997, 32.195572915160596),
 (50.0, 40.803429134740277

In [72]:
y_llrPred=llr.predict(X_test)

<li> $R^2$ of Lasso

In [73]:
r2llrValue=r2_score(y_test,y_llrPred)
r2llrValue

0.72111147832279898

<li> MSE of Lasso

In [74]:
msellrValue=mean_squared_error(y_test,y_llrPred)
msellrValue

24.499824235667202

<li> SMSE of Lasso

In [75]:
smsellrValue=math.sqrt(msellrValue)
smsellrValue

4.949729713395187