In [1]:
from sklearn import datasets
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.cross_validation import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.linear_model import LinearRegression



In [2]:
bean = datasets.load_boston()
print(bean.DESCR)

Boston House Prices dataset

Notes
------
Data Set Characteristics:  

    :Number of Instances: 506 

    :Number of Attributes: 13 numeric/categorical predictive
    
    :Median Value (attribute 14) is usually the target

    :Attribute Information (in order):
        - CRIM     per capita crime rate by town
        - ZN       proportion of residential land zoned for lots over 25,000 sq.ft.
        - INDUS    proportion of non-retail business acres per town
        - CHAS     Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)
        - NOX      nitric oxides concentration (parts per 10 million)
        - RM       average number of rooms per dwelling
        - AGE      proportion of owner-occupied units built prior to 1940
        - DIS      weighted distances to five Boston employment centres
        - RAD      index of accessibility to radial highways
        - TAX      full-value property-tax rate per $10,000
        - PTRATIO  pupil-teacher ratio by town
      

In [3]:
def load_boston():
    scaler = StandardScaler()
    boston = datasets.load_boston()
    X=boston.data
    y=boston.target
    X = scaler.fit_transform(X)
    return train_test_split(X,y)

In [4]:
X_train, X_test, y_train, y_test = load_boston()

In [5]:
X_train.shape

(379, 13)

In [6]:

clf = LinearRegression()
clf.fit(X_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

In [7]:
list(zip (y_test, clf.predict(X_test)))

[(17.800000000000001, 17.334046475610823),
 (30.100000000000001, 25.658688427932702),
 (34.700000000000003, 30.149383284559331),
 (26.5, 25.341276128263917),
 (36.399999999999999, 32.84452563082796),
 (21.699999999999999, 20.475332312949231),
 (20.100000000000001, 23.564201295492538),
 (26.199999999999999, 24.326199055564445),
 (30.100000000000001, 29.023169115046397),
 (12.6, 18.053202073713894),
 (16.699999999999999, 19.478724678224186),
 (39.799999999999997, 34.023382479162812),
 (13.0, 17.401952971712518),
 (17.800000000000001, 20.297380481858156),
 (13.4, 13.576240146284924),
 (23.100000000000001, 9.9197380497400971),
 (20.699999999999999, 26.033946508358746),
 (33.100000000000001, 35.30041842869776),
 (11.699999999999999, 15.722052619628752),
 (24.300000000000001, 23.81048703954249),
 (14.300000000000001, 14.194298967665883),
 (19.5, 19.565896209526844),
 (50.0, 24.714208682260516),
 (24.399999999999999, 24.070201600650993),
 (20.300000000000001, 19.657964716217013),
 (18.5, 13.6

In [13]:
r21 = r2_score(y_test, clf.predict(X_test)) 

In [14]:
mn1 = mean_squared_error(y_test, clf.predict(X_test))

In [18]:
list((r21 ,mn1))

[0.73459474571578265, 26.940013568557806]

In [20]:
from sklearn import linear_model

In [89]:
clfLS = linear_model.Lasso(alpha=0.1)

In [23]:
y_test.shape

(127,)

In [25]:
X_test.shape

(127, 13)

In [90]:
clfLS.fit(X_train,y_train)

Lasso(alpha=0.1, copy_X=True, fit_intercept=True, max_iter=1000,
   normalize=False, positive=False, precompute=False, random_state=None,
   selection='cyclic', tol=0.0001, warm_start=False)

In [91]:
print(clfLS.coef_)

[-0.84794939  0.76155608 -0.          0.85885331 -1.78098849  3.01786841
 -0.         -2.25546501  1.16176325 -0.43799713 -1.78826563  0.7719711
 -3.42652713]


In [40]:
print(clfLS.intercept_)

22.3363742692


In [42]:
clfLS.predict(X_train)

array([ 20.22019733,  14.23178364,  24.54804179,  25.04357848,
        15.18481916,  35.25117417,  17.67046839,  20.89157719,
        25.56206913,  22.48935856,  24.0124221 ,  36.97678147,
        24.61130069,  25.70657542,  17.36744933,  24.72501265,
        21.3585873 ,  16.67379625,  24.20106475,   6.85647533,
        30.08842716,  30.91841107,  16.59714997,  30.48078481,
        21.87087648,  18.46927984,  23.05692111,  11.640559  ,
        16.91663922,  17.24792318,  33.07419932,  18.94303416,
        19.682926  ,  24.03409454,  17.26882821,  19.3982878 ,
         6.08543965,  23.27514058,  11.15946078,  17.97192801,
        16.05513751,  32.67372488,  19.98850816,  26.45402381,
        37.13880333,  19.83940214,  33.29883779,  19.34591467,
        16.25741791,  21.82688859,  33.05613427,  17.80774829,
        18.39852983,  29.15800084,  19.51887668,  17.79432146,
        24.65107212,  28.03307597,  19.80746607,  20.76143691,
        25.2188626 ,  23.66079788,  32.32451858,  31.78

In [43]:
r2LS = r2_score(y_test, clfLS.predict(X_test))

In [44]:
mnLS = mean_squared_error(y_test, clfLS.predict(X_test))

In [45]:
list((r2LS ,mnLS))

[0.73038389531813119, 27.367436782743372]

In [46]:
clfLS2 = linear_model.Lasso(alpha=0.8)

In [47]:
clfLS2.fit(X_train,y_train)

Lasso(alpha=0.8, copy_X=True, fit_intercept=True, max_iter=1000,
   normalize=False, positive=False, precompute=False, random_state=None,
   selection='cyclic', tol=0.0001, warm_start=False)

In [48]:
r2LS2 = r2_score(y_test, clfLS2.predict(X_test))

In [49]:
mnLS2 = mean_squared_error(y_test, clfLS2.predict(X_test))

In [50]:
list((r2LS2 ,mnLS2))

[0.68405752520217744, 32.069804273061898]

In [52]:
clfLS3 = linear_model.Lasso(alpha=0.0)

In [53]:
clfLS3.fit(X_train,y_train)

  if __name__ == '__main__':
  positive)


Lasso(alpha=0.0, copy_X=True, fit_intercept=True, max_iter=1000,
   normalize=False, positive=False, precompute=False, random_state=None,
   selection='cyclic', tol=0.0001, warm_start=False)

In [54]:
r2LS3 = r2_score(y_test, clfLS3.predict(X_test))

In [55]:
mnLS3 = mean_squared_error(y_test, clfLS3.predict(X_test))

In [56]:
list((r2LS3 ,mnLS3))

[0.73459474571578265, 26.940013568557799]

In [76]:
clfLS4 = linear_model.Lasso(alpha=-0.5)

In [69]:
clfLS4.fit(X_train,y_train)

Lasso(alpha=-0.5, copy_X=True, fit_intercept=True, max_iter=1000,
   normalize=False, positive=False, precompute=False, random_state=None,
   selection='cyclic', tol=0.0001, warm_start=False)

In [77]:
clfLS4.fit(X_train,y_train)

Lasso(alpha=-0.5, copy_X=True, fit_intercept=True, max_iter=1000,
   normalize=False, positive=False, precompute=False, random_state=None,
   selection='cyclic', tol=0.0001, warm_start=False)

In [78]:
r2LS4 = r2_score(y_test, clfLS4.predict(X_test))

In [79]:
mnLS4 = mean_squared_error(y_test, clfLS4.predict(X_test))

In [80]:
list((r2LS4 ,mnLS4))

[0.60956673171208742, 39.631007206921808]

In [81]:
clfLS5 = linear_model.Lasso(alpha=2.5)

In [82]:
clfLS5.fit(X_train,y_train)

Lasso(alpha=2.5, copy_X=True, fit_intercept=True, max_iter=1000,
   normalize=False, positive=False, precompute=False, random_state=None,
   selection='cyclic', tol=0.0001, warm_start=False)

In [83]:
r2LS5 = r2_score(y_test, clfLS5.predict(X_test))

In [84]:
mnLS5 = mean_squared_error(y_test, clfLS5.predict(X_test))

In [85]:
list((r2LS5 ,mnLS5))

[0.55166280478183205, 45.508556923791232]