# Linear Regression via SGD, QR, SVD, LU, etc.

## Imports

In [None]:
%matplotlib inline

from IPython.display import display, Math, Latex
import matplotlib.pyplot as plt
import numpy as np
from numpy import dot
from numpy.linalg import inv
import scipy
from scipy.stats import t
from sklearn import datasets, linear_model
from sklearn.cross_validation import train_test_split

## Boston Real Estate Prices

### Features

In [None]:
boston = datasets.load_boston()
print(boston.DESCR)

### Exploratory Data Analysis

#### Effect of % of lower-status population on price

In [None]:
plt.scatter(boston.data[:, np.where(boston.feature_names == 'LSTAT')], boston.target)
plt.title('% lower-status population vs. price', color='black')
plt.xlabel('LSTAT', {'fontsize':'large'})
plt.ylabel('PRICE', {'fontsize':'large'})


#### Effect of number of rooms on price

In [None]:
plt.scatter(boston.data[:, np.where(boston.feature_names == 'RM')], boston.target)
plt.title('avg. number of rooms vs. price', color='black')
plt.xlabel('RM', {'fontsize':'large'})
plt.ylabel('PRICE', {'fontsize':'large'})

#### Relationship between % lower-status and avg. number of rooms

In [None]:
plt.scatter(boston.data[:, np.where(boston.feature_names == 'LSTAT')],\
            boston.data[:, np.where(boston.feature_names == 'RM')])
plt.title('% lower-status vs. avg. number of rooms', color='black')
plt.xlabel('LSTAT', {'fontsize':'large'})
plt.ylabel('RM', {'fontsize':'large'})

## Linear regression model

### Training / Test Set Split

In [None]:
X_train, X_test, y_train, y_test = \
train_test_split(boston.data, boston.target, test_size=0.3, random_state=42)

### Model Training

In [None]:
bos_regr = linear_model.LinearRegression(fit_intercept=True, normalize=True)

bos_regr.fit(X_train, y_train)

print("\n%-17s: %.3f" % ("Training set R^2", bos_regr.score(X_train, y_train)))
print("%-17s: %.3f" % ("Test set R^2", bos_regr.score(X_test, y_test)))

print("\nModel weights:\n")
print("%-10s: %8.3f" % ("Intercept", bos_regr.intercept_))
for i in range(len(bos_regr.coef_)):
    print("%-10s: %8.3f" % (boston.feature_names[i], bos_regr.coef_[i]))
    
print("""

Feature description:

CRIM     per capita crime rate by town
ZN       proportion of residential land zoned for lots over 25,000 sq.ft.
INDUS    proportion of non-retail business acres per town
CHAS     Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)
NOX      nitric oxides concentration (parts per 10 million)
RM       average number of rooms per dwelling
AGE      proportion of owner-occupied units built prior to 1940
DIS      weighted distances to five Boston employment centres
RAD      index of accessibility to radial highways
TAX      full-value property-tax rate per $10,000
PTRATIO  pupil-teacher ratio by town
B        1000(Bk - 0.63)^2 where Bk is the proportion of blacks by town
LSTAT    % lower status of the population
MEDV     Median value of owner-occupied homes in $1000's
""")    

## Completely Optional - Manual Weight Computation

In [None]:
diabetes = datasets.load_diabetes()

X_no_int = np.array(diabetes.data[:, 0])
shape = (diabetes.data.shape[0], 1)
X = np.concatenate((np.ones(shape), np.array(diabetes.data[:, 0]).reshape(shape)), axis = 1)
y = diabetes.target

### Scikit-learn result for comparison

In [None]:
diab_regr = linear_model.LinearRegression(fit_intercept=True)
diab_regr.fit(X_no_int.reshape(shape), y)
[diab_regr.intercept_, diab_regr.coef_[0]]

### 1) "Linear algebra 101" method: $(X^TX)^{-1}X^TY$

In [None]:
manual_weights = dot(dot(inv(dot(X.T, X)), X.T), y)
print(manual_weights)

### 2) Via QR factorization

In [None]:
(q, r) = np.linalg.qr(X)

dot(dot(inv(dot(r.T, r)), r.T), dot(q.T, y))

### 3) Via Cholesky factorization

In [None]:
L = np.linalg.cholesky(dot(X.T, X))

dot(dot(inv(dot(L, L.T)), X.T), y)


### 4) Via SVD

In [None]:
U, s, V = np.linalg.svd(X)
dot(dot(dot(V, np.diag(np.divide(np.ones(2), s))), U[:, 0:X.shape[1]].T), y)

### 5) Via LU factorization