### Imports

In [355]:
from sklearn import datasets
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.cross_validation import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso

In [356]:
bean = datasets.load_boston()
print(bean.DESCR)

Boston House Prices dataset

Notes
------
Data Set Characteristics:  

    :Number of Instances: 506 

    :Number of Attributes: 13 numeric/categorical predictive
    
    :Median Value (attribute 14) is usually the target

    :Attribute Information (in order):
        - CRIM     per capita crime rate by town
        - ZN       proportion of residential land zoned for lots over 25,000 sq.ft.
        - INDUS    proportion of non-retail business acres per town
        - CHAS     Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)
        - NOX      nitric oxides concentration (parts per 10 million)
        - RM       average number of rooms per dwelling
        - AGE      proportion of owner-occupied units built prior to 1940
        - DIS      weighted distances to five Boston employment centres
        - RAD      index of accessibility to radial highways
        - TAX      full-value property-tax rate per $10,000
        - PTRATIO  pupil-teacher ratio by town
      

In [357]:
def load_boston():
    scaler = StandardScaler()
    boston = datasets.load_boston()
    X=boston.data
    y=boston.target
    X = scaler.fit_transform(X)
    return train_test_split(X,y)
    

In [358]:
X_train, X_test, y_train, y_test = load_boston()

In [359]:
X_train.shape

(379, 13)

### Fitting a Linear Regression

It's as easy as instantiating a new regression object (line 1) and giving your regression object your training data
(line 2) by calling .fit(independent variables, dependent variable)



In [360]:
clf_LR = LinearRegression()
clf_LR.fit(X_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

### Making a Prediction
X_test is our holdout set of data.  We know the answer (y_test) but the computer does not.   

Using the command below, I create a tuple for each observation, where I'm combining the real value (y_test) with
the value our regressor predicts (clf.predict(X_test))

Use a similiar format to get your r2 and mse metrics working.  Using the [scikit learn api](http://scikit-learn.org/stable/modules/model_evaluation.html) if you need help!

In [361]:
output_LR = list(zip (y_test, clf_LR.predict(X_test)))
y_pred_LR = [x[1] for x in output_LR]

In [362]:
R2_LR = r2_score(y_test, y_pred_LR)
print(R2_LR)

0.746871434181


In [363]:
RMSE_LR = np.sqrt(mean_squared_error(y_test, y_pred_LR))
print(RMSE_LR)

4.81279917919


### Implementing a New Model with L2 Regularization: sklearn.linear_model.Ridge

In [364]:
clf_Ridge = Ridge(alpha=1)
clf_Ridge.fit(X_train, y_train)

Ridge(alpha=1, copy_X=True, fit_intercept=True, max_iter=None,
   normalize=False, random_state=None, solver='auto', tol=0.001)

In [365]:
output_Ridge = list(zip (y_test, clf_Ridge.predict(X_test)))
y_pred_Ridge = [x[1] for x in output_Ridge]

In [366]:
R2_Ridge = r2_score(y_test, y_pred_Ridge)
print(R2_Ridge)

0.747812990757


In [367]:
RMSE_Ridge = np.sqrt(mean_squared_error(y_test, y_pred_Ridge))
print(RMSE_Ridge)

4.80383981005


### Implementing a New Model with L1 Regularization: sklearn.linear_model.Lasso

In [368]:
clf_Lasso = Lasso(alpha=1)
clf_Lasso.fit(X_train, y_train)

Lasso(alpha=1, copy_X=True, fit_intercept=True, max_iter=1000,
   normalize=False, positive=False, precompute=False, random_state=None,
   selection='cyclic', tol=0.0001, warm_start=False)

In [369]:
output_Lasso = list(zip (y_test, clf_Lasso.predict(X_test)))
y_pred_Lasso = [x[1] for x in output_Lasso]

In [370]:
R2_Lasso = r2_score(y_test, y_pred_Lasso)
print(R2_Lasso)

0.714470790016


In [371]:
RMSE_Lasso = np.sqrt(mean_squared_error(y_test, y_pred_Lasso))
print(RMSE_Lasso)

5.11154787896


### Averaging R2 and RMSE over 100 random samples and multiple alpha values

In [372]:
data = []

# Take 100 random samples
for i in range(0, 100):
    
    # Load data
    X_train, X_test, y_train, y_test = load_boston()
    
    # Linear Regression
    clf_LR = LinearRegression()
    clf_LR.fit(X_train, y_train)
    output_LR = list(zip (y_test, clf_LR.predict(X_test)))
    y_pred_LR = [x[1] for x in output_LR]
    R2_LR = r2_score(y_test, y_pred_LR)
    RMSE_LR = np.sqrt(mean_squared_error(y_test, y_pred_LR))
    
    # Increase alpha by factors of 10 for Ridge and Lasso Regression
    for j in range(0, 6):
        
        # Set alpha value
        alpha = 0.01 * 10**j
        
        # Ridge Regression
        clf_Ridge = Ridge(alpha=alpha)
        clf_Ridge.fit(X_train, y_train)
        output_Ridge = list(zip (y_test, clf_Ridge.predict(X_test)))
        y_pred_Ridge = [x[1] for x in output_Ridge]
        R2_Ridge = r2_score(y_test, y_pred_Ridge)
        RMSE_Ridge = np.sqrt(mean_squared_error(y_test, y_pred_Ridge))
        
        # Lasso Regression
        clf_Lasso = Lasso(alpha=alpha)
        clf_Lasso.fit(X_train, y_train)
        output_Lasso = list(zip (y_test, clf_Lasso.predict(X_test)))
        y_pred_Lasso = [x[1] for x in output_Lasso]
        R2_Lasso = r2_score(y_test, y_pred_Lasso)
        RMSE_Lasso = np.sqrt(mean_squared_error(y_test, y_pred_Lasso))
        
        # Save data
        data.append((alpha, R2_LR, R2_Ridge, R2_Lasso, RMSE_LR, RMSE_Ridge, RMSE_Lasso))

# Create dataframe
dataFrame = pd.DataFrame(data, columns=['alpha', 'R2_LR', 'R2_Ridge', 'R2_Lasso', 'RMSE_LR', 'RMSE_Ridge', 'RMSE_Lasso'])

# Group means by alpha level
print(dataFrame.groupby('alpha').mean())

            R2_LR  R2_Ridge  R2_Lasso   RMSE_LR  RMSE_Ridge  RMSE_Lasso
alpha                                                                  
0.01     0.718871  0.718874  0.719197  4.819348    4.819322    4.816543
0.10     0.718871  0.718901  0.714853  4.819348    4.819096    4.853237
1.00     0.718871  0.719138  0.652303  4.819348    4.817081    5.372725
10.00    0.718871  0.719703 -0.013045  4.819348    4.812537    9.182803
100.00   0.718871  0.699378 -0.013045  4.819348    4.990290    9.182803
1000.00  0.718871  0.506703 -0.013045  4.819348    6.414147    9.182803


### Conclusions

**Ridge Regression:** Regarding R2 performance, Ridge Regression only slightly outperformed Linear Regression on average where alpha <= 10.  There was a significant decrease in Ridge Regression R2 performance where alpha was >= 100.  Regarding RMSE performance, Ridge Regression slightly outperformed Linear Regression where alpha < 1.  Again, there was a sharp decline in performance where alpha was >= 100.

**Lasso Regression:** Regarding R2 performance, Lasso Regression slightly outperformed Linear Regression where alpha == 0.01.  Regarding RMSE performance, Lasso again only slightly outperformed Linear Regression where alpha == 0.01.