## Linear Regression: Review

The Boston Housing example.

| CRIM  |ZN |INDUS|CHAS| NOX | RM  |AGE | DIS |RAD|TAX|PTRATIO|  B  |LSTAT|price|
|------:|--:|----:|---:|----:|----:|---:|----:|--:|--:|------:|----:|----:|----:|
|0.00632| 18| 2.31|   0|0.538|6.575|65.2|4.090|  1|296|   15.3|396.9| 4.98| 24.0|
|0.02731|  0| 7.07|   0|0.469|6.421|78.9|4.967|  2|242|   17.8|396.9| 9.14| 21.6|
|0.02729|  0| 7.07|   0|0.469|7.185|61.1|4.967|  2|242|   17.8|392.8| 4.03| 34.7|
|0.03237|  0| 2.18|   0|0.458|6.998|45.8|6.062|  3|222|   18.7|394.6| 2.94| 33.4|
|0.06905|  0| 2.18|   0|0.458|7.147|54.2|6.062|  3|222|   18.7|396.9| 5.33| 36.2|

In [None]:
%matplotlib inline
from sklearn.datasets import load_boston
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
#import seaborn as sns
from pandas.plotting import scatter_matrix
from sklearn.linear_model import LinearRegression
import warnings
from sklearn.preprocessing import StandardScaler

In [None]:
warnings.filterwarnings('ignore')

In [None]:
boston = load_boston()

In [None]:
X = boston.data
y = boston.target

In [None]:
bdf = pd.DataFrame(X, columns=boston.feature_names)
bdf['price'] = y

In [None]:
#fitting a scikitlearn model
lr = LinearRegression()
lr.fit(bdf[['LSTAT']], y)
prds= lr.predict(bdf[['LSTAT']])

In [None]:
#making a plot
plt.figure(figsize = (12, 7))
plt.scatter(bdf.LSTAT, bdf.price, label = 'Actual Price')
plt.plot(bdf.LSTAT, prds, color = 'red', linewidth = 3, label = 'Predicted Price')
plt.title('LSTAT vs. Price in Boston Housing', loc = 'left')
plt.xlabel('LSTAT')
plt.ylabel('Price')
plt.legend(frameon = 'false')
plt.savefig('presentation/img1.png')

## Model Expression

$$w_{LS} = (X^TX)^{-1}X^Ty$$



In [None]:
#calculate product inside parenthesis
xtx = X.T@X

In [None]:
#find inverse
inv = np.linalg.inv(xtx)

In [None]:
#compute weights
wLS = inv@X.T@y

## Writing a Function

```
take in X and y

compute xtx

compute inverse

compute weights

return weights
```

In [None]:
def wLS(X, y):
    '''
    This function provides an 
    ordinary least squares fit of a dataset X on 
    a target variable y.
    ----
    X = input array of feature variables
    y = target array of feature variables
    returns 
    array of weights for basic linear regression
    '''
    #check dimensions
    if X.shape[0] < X.shape[1]:
        X = X.T
    #calculate product inside parenthesis
    xtx = X.T@X
    #find inverse
    inv = np.linalg.inv(xtx)
    #compute weights
    prod = inv@X.T
    wLS = prod@y
    return wLS

In [None]:
wLS(X, y)

## Classes in Python

```python
class ClassName:
    <statement-1>
    .
    .
    .
    <statement-N>
```

```python
class MyClass:
    """A simple example class"""
    i = 12345

    def f(self):
        return 'hello world'
```

In [None]:
class MyClass:
    """A simple example class"""
    i = 12345

    def f(self):
        return 'hello world'
#create an instance
class1 = MyClass()

In [None]:
#use our method
class1.f()

### `__init__ ` and `self`

From the Python docs:


<div class="alert alert-info" role="alert">
The instantiation operation (“calling” a class object) creates an empty object. Many classes like to create objects with instances customized to a specific initial state. Therefore a class may define a special method named __init__(), like this:
<br> 
 
```
def __init__(self):
    self.data = []
```
</div>

```python
class Complex:
    def __init__(self, realpart, imagpart):
        self.r = realpart
        self.i = imagpart

x = Complex(3.0, -4.5)
x.r, x.i
```

In [None]:
class Complex:
    '''
    This is a simple class that will 
    return a complex number object.
    '''
    def __init__(self, realpart, imagpart):
        self.r = realpart
        self.i = imagpart

x = Complex(3.0, -4.5)
x.r, x.i

In [None]:
#a different class instance
x2 = Complex(2.3, 5.6)

In [None]:
#another class instance
x2.r, x2.i

## Our Regression Class

```python
class Regression:
    '''
    This class contains basic linear
    regression capabilities.  
    - OLS fit a linear regression model
    - make predictions with the model
    '''
    def __init__(self, coefs_, intercept_):
        self.coefs_ = None
        self.intercept = None
        
    def OLS(self, X, y)
        ...
        self.coefs_ = wLS
        return wLS
    
    def predict(self, X):
        return predictions
```
    
    
    
    



In [None]:
class Regression:
    
    def __init__(self, fit_intercept = True):
        self.coefs_ = None
        self.intercept_ = None
        self._fit_intercept = fit_intercept
        
    
    def OLS(self, X, y):
        '''
        This function provides an 
        ordinary least squares fit of a dataset X on 
        a target variable y.
        ----
        X = input array of feature variables
        y = target array of feature variables
        returns 
        array of weights for basic linear regression
        '''
        # Check shapes of input matricies. 
        if X.shape[0] < X.shape[1]:
            X = X.T
        if y.shape[0] < y.shape[1]:
            y = y.T
            
        # Prepend ones to x matrix
        if self._fit_intercept:
            ones = np.ones((len(y), 1), dtype=int)
            X = np.concatenate((ones, X), axis=1)
        else:
            X = X
        # fit the model
        xtx = X.T@X
        inv = np.linalg.inv(xtx)
        w_ls = inv@X.T@y
        # add intercepts and coefs
        self.intercept_ = w_ls[:1]
        self.coefs_ = w_ls[1:]
        return w_ls


In [None]:
#some test cases
lr = Regression()

In [None]:
X = np.array([[0, 2], [3, 7], [5, 9], [3.4,6]])
y = np.array([[2.1, 3.2, 4, 5.6]])

In [None]:
lr.OLS(X, y)

In [None]:
#model without intercept
lr2 = Regression(fit_intercept=False)

In [None]:
lr2.OLS(X, y)

### Predict

$$\hat{y} = \beta_0 + X\beta_i$$



```python
def predict(self, X):
    return self.intercept_ + X@self.coef_
```

In [None]:
del(Regression)

In [None]:
class Regression:
    
    def __init__(self, fit_intercept = True):
        self.coefs_ = None
        self.intercept_ = None
        self._fit_intercept = fit_intercept
        
    
    def wLS(self, X, y):
        '''
        This function provides an 
        ordinary least squares fit of a dataset X on 
        a target variable y.
        ----
        X = input array of feature variables
        y = target array of feature variables
        returns 
        array of weights for basic linear regression
        '''
        # Check shapes of input matricies. 
        if X.shape[0] < X.shape[1]:
            X = X.T
        try:
            if y.shape[0] < y.shape[1]:
                y = y.T
        except:
            pass
        # Prepend ones to x matrix
        if self._fit_intercept:
            ones = np.ones((len(y), 1), dtype=int)
            X = np.concatenate((ones, X), axis=1)
        else:
            X = X
        # fit the model
        xtx = X.T@X
        inv = np.linalg.inv(xtx)
        w_ls = inv@X.T@y
        # add intercepts and coefs
        self.intercept_ = w_ls[0]
        self.coefs_ = w_ls[1:]
        return w_ls
    
    def predict(self, X):
        return self.intercept_ + X@self.coefs_


In [None]:
del(lr)

In [None]:
lr = Regression()
lr.wLS(X, y)

In [None]:
lr.coefs_

In [None]:
lr.intercept_

In [None]:
lr.predict(X)

In [None]:
#testing on bigger data
X = boston.data
y = boston.target

In [None]:
bos_reg = Regression()

In [None]:
bos_reg.wLS(X, y)

### Error

**Sum of squared error**: $\sum_{i = 1}^n (\hat{y} - y_i)^2$

**Total sum of squared error**: $\sum_{i = 1}^n (\bar{y} - y_i)^2$

**r$^2$** = $1 - \frac{sse}{tss}$

**Mean Squared Error**: $\frac{1}{n} \sum_{i = 1}^n (\hat{y} - y_i)^2$

In [None]:
def r2(actual_y, predicted_y):
    sse = np.sum((predicted_y - actual_y)**2)
    tse = np.sum((actual_y - np.mean(actual_y))**2)
    return 1 - sse/tse

In [None]:
def mse(actual_y, predicted_y):
    return np.mean((actual_y - predicted_y)**2)

In [None]:
class Metrics:
    
    def __init__(self, X, y, model):
        self.data = X
        self.target = y
        self.model = model
        
    def r2(self):
        squared_errors = (self.target - self.model.predict(self.data))**2
        sse = np.sum(squared_errors)
        tse = np.sum((self.target - np.mean(self.target))**2)
        return 1 - sse/tse
    
    def mse(self):
        return np.mean((self.target - self.model.predict(self.data))**2)
    
    def rmse(self):
        return self.mse()**0.5
    
    def summary_printed(self):
        print('The r2 score is {:.4}\nThe Mean Squared Error is {:.4}\nand the RMSE is {:.4}'.format(self.r2(), self.mse(), self.rmse()))
        

In [None]:
lr = Regression()
lr.wLS(X, y)
performance = Metrics(X, y, lr)

In [None]:
performance.summary_printed()

$$ y = mx + b$$

### Polynomial Features

$$ y = a + bx_i + cx_i^2 $$



In [None]:
from sklearn.preprocessing import PolynomialFeatures
x = np.array([2, 3, 4])
poly = PolynomialFeatures(3, include_bias=False)
poly.fit_transform(x[:, None])

### sklearn pipelines

In [None]:
from sklearn.pipeline import make_pipeline
poly_model = make_pipeline(PolynomialFeatures(7),
                           LinearRegression())

In [None]:
from sklearn.preprocessing import PolynomialFeatures
x = np.array([2, 3, 4])
poly = PolynomialFeatures(3, include_bias=False)
poly.fit_transform(x[:, None])

In [None]:
from sklearn.pipeline import make_pipeline
poly_model = make_pipeline(PolynomialFeatures(7),
                           LinearRegression())

In [None]:
#fitting a high order polynomial to sin with noise
rng = np.random.RandomState(1)
x = 10 * rng.rand(50)
y = np.sin(x) + 0.1 * rng.randn(50)
xfit = np.linspace(0, 10, 1000)

poly_model.fit(x[:, np.newaxis], y)
yfit = poly_model.predict(xfit[:, np.newaxis])

plt.scatter(x, y)
plt.plot(xfit, yfit);

### Any Basis

In [None]:
from sklearn.base import BaseEstimator, TransformerMixin

#a class for generating features from gaussian basis
class GaussianFeatures(BaseEstimator, TransformerMixin):
    """Uniformly spaced Gaussian features for one-dimensional input"""
    
    def __init__(self, N, width_factor=2.0):
        self.N = N
        self.width_factor = width_factor
    
    @staticmethod
    def _gauss_basis(x, y, width, axis=None):
        arg = (x - y) / width
        return np.exp(-0.5 * np.sum(arg ** 2, axis))
        
    def fit(self, X, y=None):
        # create N centers spread along the data range
        self.centers_ = np.linspace(X.min(), X.max(), self.N)
        self.width_ = self.width_factor * (self.centers_[1] - self.centers_[0])
        return self
        
    def transform(self, X):
        return self._gauss_basis(X[:, :, np.newaxis], self.centers_,
                                 self.width_, axis=1)
    
gauss_model = make_pipeline(GaussianFeatures(20),
                            LinearRegression())
gauss_model.fit(x[:, np.newaxis], y)
yfit = gauss_model.predict(xfit[:, np.newaxis])

plt.scatter(x, y)
plt.plot(xfit, yfit)
plt.xlim(0, 10);

## Regularization

![](https://upload.wikimedia.org/wikipedia/commons/b/bd/Tychonoff.jpg)
> "Tikhonov regularization, named for Andrey Tikhonov, is the most commonly used method of regularization of ill-posed problems. In statistics, the method is known as ridge regression, in machine learning it is known as weight decay, and with multiple independent discoveries, it is also variously known as the Tikhonov–Miller method, the Phillips–Twomey method, the constrained linear inversion method, and the method of linear regularization. It is related to the Levenberg–Marquardt algorithm for non-linear least-squares problems."

In [None]:
#projecting 30 gaussian basis
model = make_pipeline(GaussianFeatures(30),
                      LinearRegression())
model.fit(x[:, np.newaxis], y)

plt.scatter(x, y)
plt.plot(xfit, model.predict(xfit[:, np.newaxis]))

plt.xlim(0, 10)
plt.ylim(-1.5, 1.5);

In [None]:
def basis_plot(model, title=None):
    fig, ax = plt.subplots(2, sharex=True)
    model.fit(x[:, np.newaxis], y)
    ax[0].scatter(x, y)
    ax[0].plot(xfit, model.predict(xfit[:, np.newaxis]))
    ax[0].set(xlabel='x', ylabel='y', ylim=(-1.5, 1.5))
    
    if title:
        ax[0].set_title(title)

    ax[1].plot(model.steps[0][1].centers_,
               model.steps[1][1].coef_)
    ax[1].set(xlabel='basis location',
              ylabel='coefficient',
              xlim=(0, 10))
    
model = make_pipeline(GaussianFeatures(30), LinearRegression())
basis_plot(model)

In [None]:
from sklearn.linear_model import Ridge
model = make_pipeline(GaussianFeatures(30), Ridge(alpha=0.1))
basis_plot(model, title='Ridge Regression')

### Intuition

![](images/reg.png)

### Mathematics

$$\displaystyle \hat{\beta}^{ridge} = (X^TX + \lambda I)^{-1}X^Ty$$

In [None]:
from sklearn.datasets import load_boston
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import BayesianRidge
X = boston.data
y  = boston.target

In [None]:
def ridge(X, y, alpha):
    partI = (alpha*np.eye(X.shape[1]) + X.T@X)
    inv = np.linalg.inv(partI)
    wrr = inv@X.T@y
    return wrr

In [None]:
ridge(X, y, 0.1)

### The `sklearn` way

In [None]:
from sklearn.linear_model import Ridge

In [None]:
Ridge(alpha = 0.1).fit(X, y).coef_

In [None]:
pipe = make_pipeline(StandardScaler(), Ridge())

In [None]:
params = {'ridge__alpha': [0.1, 1.0, 4.0, 10, 100]}

In [None]:
grid = GridSearchCV(pipe, param_grid=params, cv = 5)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [None]:
grid.fit(X_train, y_train)

In [None]:
br = BayesianRidge()
pipe = make_pipeline(StandardScaler(), br)
pipe.fit(X_train, y_train)

In [None]:
bayes_pred = pipe.predict(X_test)

In [None]:
pipe.score(X_test, y_test)

In [None]:
br.alpha_

In [None]:
grid.best_estimator_

In [None]:
best = grid.best_estimator_

In [None]:
best.named_steps

In [None]:
from sklearn.dummy import DummyRegressor

In [None]:
dum = DummyRegressor()
dum.fit(X_train, y_train)

In [None]:
from sklearn.linear_model import Lasso

In [None]:
lasso = Lasso()
lasso.fit(X_train, y_train)

In [None]:
plt.figure(figsize = (14, 7))
plt.plot(lr.coefs_, 'bo', label = 'linear regression')
plt.plot(best.named_steps['ridge'].coef_, 'rx', label = 'ridge')
plt.plot(lasso.coef_, '^', label = 'Lasso')
plt.axhline(color = 'black', alpha = 0.4)
plt.legend()

### Further Reading

- Blog post on writing Machine Learning classes in Python: https://dziganto.github.io/classes/data%20science/linear%20regression/machine%20learning/object-oriented%20programming/python/Understanding-Object-Oriented-Programming-Through-Machine-Learning/

- The Python Data Science Handbook, *Linear Regression in Depth*: https://jakevdp.github.io/PythonDataScienceHandbook/05.06-linear-regression.html

- The Elements of Statistical Learning (see chapter 3): https://web.stanford.edu/~hastie/ElemStatLearn/

- Andrew Ng on Regularized Methods: https://www.youtube.com/watch?v=u73PU6Qwl1I