### Boston Housing Assignment
<li> We are going to use Boston Housing data to estimate the cost of houses in Boston using the number of bedrooms and the square feet available.


In [1]:
from sklearn import datasets
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.cross_validation import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.linear_model import LinearRegression
from sklearn import linear_model
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso

### Getting the boston housing data


In [3]:
bean=datasets.load_boston()
print (bean.DESCR)

Boston House Prices dataset

Notes
------
Data Set Characteristics:  

    :Number of Instances: 506 

    :Number of Attributes: 13 numeric/categorical predictive
    
    :Median Value (attribute 14) is usually the target

    :Attribute Information (in order):
        - CRIM     per capita crime rate by town
        - ZN       proportion of residential land zoned for lots over 25,000 sq.ft.
        - INDUS    proportion of non-retail business acres per town
        - CHAS     Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)
        - NOX      nitric oxides concentration (parts per 10 million)
        - RM       average number of rooms per dwelling
        - AGE      proportion of owner-occupied units built prior to 1940
        - DIS      weighted distances to five Boston employment centres
        - RAD      index of accessibility to radial highways
        - TAX      full-value property-tax rate per $10,000
        - PTRATIO  pupil-teacher ratio by town
      

In [4]:
def load_boston():
    scaler = StandardScaler()
    boston = datasets.load_boston()
    X=boston.data
    y=boston.target
    X = scaler.fit_transform(X)
    return train_test_split(X,y)

In [5]:
X_train, X_test, y_train, y_test = load_boston()

In [6]:
X_train.shape

(379, 13)

### Fitting a Linear Regression
<LI>Instantiating a new object(line 1).
<li> Give your regression object your training data.


In [7]:
clf=LinearRegression()
clf.fit(X_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

In [8]:
zip(y_test, clf.predict(X_test))

<zip at 0x113f6b248>

### Making predictions

#### Implementing Sklearn's r2 and mse methods
<li> r2: Coefficient of determination
<li>mse : Mean squared error regression loss

In [9]:
r2_score(y_test, clf.predict(X_test))

0.75590255558606567

In [10]:
mean_squared_error(y_test,clf.predict(X_test))

25.339804284981064

### Sklearn's linear model Ridge & linear model Lasso
<li> Linear model Ridge : Linear least squares with l2 regularization: This model solves a regression model where the loss function is the linear least squares function and regularization is given by the l2-norm
<li >Linear Model trained with L1 prior as regularizer (aka the Lasso)

In [11]:

n_samples, n_features = 10, 5
np.random.seed(0)
y = np.random.randn(n_samples)
X = np.random.randn(n_samples, n_features)
clf = Ridge(alpha=1.0)
clf.fit(X, y) 
Ridge(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=None,
      normalize=False, random_state=None, solver='auto', tol=0.001)


Ridge(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=None,
   normalize=False, random_state=None, solver='auto', tol=0.001)

In [12]:
clf = linear_model.Lasso(alpha = 0.1)
clf.fit([[0, 0], [1, 1]], [0, 1])
Lasso(alpha=0.1, copy_X=True, fit_intercept=True, max_iter=1000,
   normalize=False, positive=False, precompute=False, random_state=None,
   selection='cyclic', tol=0.0001, warm_start=False)
clf.predict([[1, 1]])


array([ 0.8])

### Adjusting the regularization parameter

In [13]:
clf = linear_model.Lasso(alpha = 1.0)
clf.fit([[0, 0], [1, 1]], [0, 1])
Lasso(alpha=0.1, copy_X=True, fit_intercept=True, max_iter=1000,
   normalize=False, positive=False, precompute=False, random_state=None,
   selection='cyclic', tol=0.0001, warm_start=False)
clf.predict([[1, 1]])

array([ 0.5])