### Linear Regression

In [104]:
from sklearn import datasets
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.cross_validation import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.linear_model import LinearRegression
import math

In [106]:
def load_boston():
    scaler = StandardScaler()
    boston = datasets.load_boston()
    X=boston.data
    y=boston.target
    X = scaler.fit_transform(X)
    return train_test_split(X,y)
    

In [107]:
X_train, X_test, y_train, y_test = load_boston()

In [108]:
X_train.shape

(379L, 13L)

In [109]:

clf = LinearRegression()
clf.fit(X_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

### Predicting values

In [110]:
zip (y_test, clf.predict(X_test))

[(19.600000000000001, 20.667620920079024),
 (35.399999999999999, 34.063577107343463),
 (15.300000000000001, 20.367556946237837),
 (37.299999999999997, 34.500362031012791),
 (18.699999999999999, 20.7270798255568),
 (19.300000000000001, 21.144969619303609),
 (16.100000000000001, 19.366277540715103),
 (28.399999999999999, 30.822627019089943),
 (18.5, 12.688607380557398),
 (13.5, 13.124341083491567),
 (7.2000000000000002, 17.322368235949344),
 (19.899999999999999, 18.409100044994901),
 (21.699999999999999, 20.665830240379332),
 (21.0, 23.319822748238039),
 (37.600000000000001, 36.935629606461006),
 (26.600000000000001, 27.847381609527055),
 (19.5, 19.201553397134649),
 (36.200000000000003, 27.276875644413629),
 (18.899999999999999, 15.480104874250969),
 (18.199999999999999, 13.912411268056804),
 (15.0, 16.203246954136958),
 (24.300000000000001, 29.223876189420178),
 (17.800000000000001, 21.455382681760522),
 (31.699999999999999, 32.810453904580115),
 (14.0, 13.357283437267824),
 (27.100000

### Measuring Performance

### RMSE

In [111]:
y_pred= clf.predict(X_test)

In [112]:
mse= mean_squared_error(y_test, y_pred)

In [113]:
mse

19.159592740728751

In [114]:
rmse= math.sqrt(mse)
rmse

4.377167205022987

### R2 score

In [124]:
r2_score(y_test, y_pred)

0.77104391627820967

### Implementing Lasso Regression Model

In [125]:
from sklearn.linear_model import Lasso

In [126]:
lasso = Lasso(alpha = 0.)

In [127]:
lasso.fit(X_train, y_train)

Lasso(alpha=0.1, copy_X=True, fit_intercept=True, max_iter=1000,
   normalize=False, positive=False, precompute=False, random_state=None,
   selection='cyclic', tol=0.0001, warm_start=False)

### Predecting Values

In [133]:
zip(y_test, lasso.predict(X_test))

[(19.600000000000001, 20.45607761300343),
 (35.399999999999999, 33.221190056197059),
 (15.300000000000001, 20.962932533056893),
 (37.299999999999997, 33.123116358653164),
 (18.699999999999999, 21.098998657673871),
 (19.300000000000001, 21.388592281568478),
 (16.100000000000001, 18.793820731576822),
 (28.399999999999999, 29.140448078209339),
 (18.5, 13.310643352092146),
 (13.5, 13.505765497290227),
 (7.2000000000000002, 17.106693295966686),
 (19.899999999999999, 18.42428511921959),
 (21.699999999999999, 20.712180452146534),
 (21.0, 22.930888926424309),
 (37.600000000000001, 36.591432975173831),
 (26.600000000000001, 27.854659820884063),
 (19.5, 19.18182096744826),
 (36.200000000000003, 27.77848271610484),
 (18.899999999999999, 16.680578421778797),
 (18.199999999999999, 15.04809085900588),
 (15.0, 17.001611239846142),
 (24.300000000000001, 29.283555616481269),
 (17.800000000000001, 21.682988143315793),
 (31.699999999999999, 32.47643031445989),
 (14.0, 14.023072490278906),
 (27.1000000000

### RMSE

In [134]:
mse_lasso= mean_squared_error(y_test, lasso.predict(X_test))

In [130]:
mse_lasso

19.346865863649935

In [131]:
math.sqrt(mse_lasso)

4.398507231283125

### R2 score

In [132]:
r2_score(y_test, lasso.predict(X_test))

0.76880601271780469