## Linear Regression Model

### Generate the data for the model

In [169]:
import numpy as np
import plotly.plotly as plotly
import plotly.graph_objs as graph_objs

# generate the data set
X = 2 * np.random.rand(100, 1)
y = 4 + 3 * X + np.random.randn(100, 1)

# plot the dataset
trace = graph_objs.Scatter(x = X, y = y, mode = 'markers')
data = [trace]
layout = graph_objs.Layout(
    title='Linear Model',
    xaxis=dict(
        title='X',
        titlefont=dict(
            family='Courier New, monospace',
            size=18,
            color='#7f7f7f'
        )
    ),
    yaxis=dict(
        title='y',
        titlefont=dict(
            family='Courier New, monospace',
            size=18,
            color='#7f7f7f'
        )
    )
)
fig = graph_objs.Figure(data = data, layout = layout)
plotly.iplot(fig)

### Normal Equation minimization

Normal equation is given by:
$$\hat{\Theta} = (\textbf{X}^{T} \cdot \textbf{X})^{-1} \cdot \textbf{X}^{T} \cdot \textbf{y}$$

where: 
* **X** is the matrix of features for every corresponding observation

* **y** is the vector of target values

* $\hat{\Theta}$ is the vector of the minimized parameters for the model

In [170]:
# append ones to the vector of X for the bias unit
X_b = np.c_[np.ones((100,1)), X]

# use normal equation to minimize the parameters
theta_best = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y)

Note that the actual $\Theta_0$ and $\Theta_1$ are defined as 4 and 3 respectively as we defined in our model, although noise is going to affect our parameter calculations.

In [171]:
# our minimized estimated parameters
theta_best

array([[ 3.82181384],
       [ 3.05455978]])

In [176]:
# get the linear regression line
X_new = np.array([0, 2])  # choose 2 points
X_new_b = np.c_[np.ones((2,1)), X_new]  # add a column of 1's fir the bias unit
y_predict = X_new_b.dot(theta_best)  # get the predictions h(X)
print(y_predict)

[[ 3.82181384]
 [ 9.9309334 ]]


In [175]:
# plot the linear regression line
trace_1 = graph_objs.Scatter(x = X, y = y, mode = 'markers', name = 'observations')
trace_2 = graph_objs.Scatter(x = X_new, y = y_predict, mode = 'lines', name = 'linear regression line')
data = [trace_1, trace_2]
layout = graph_objs.Layout(
    title='Linear Model',
    xaxis=dict(
        title='X',
        titlefont=dict(
            family='Courier New, monospace',
            size=18,
            color='#7f7f7f'
        )
    ),
    yaxis=dict(
        title='y',
        titlefont=dict(
            family='Courier New, monospace',
            size=18,
            color='#7f7f7f'
        )
    )
)
fig = graph_objs.Figure(data = data, layout = layout)
plotly.iplot(fig)

In [187]:
# sklearn alternative
from sklearn.linear_model import LinearRegression

# define the model
linear_regression_model = LinearRegression()

# fit the model
l = linear_regression_model.fit(X, y)
print(l.intercept_, l.coef_)

[ 3.82181384] [[ 3.05455978]]


### Gradient Descent Minimization

In [198]:
# stochastic gradient descent
from sklearn.linear_model import SGDRegressor
sgd_reg = SGDRegressor(n_iter = 50, penalty = None, eta0 = 0.1)
sr = sgd_reg.fit(X, y.ravel())
print(sr.intercept_, sr.coef_)

[ 3.78369464] [ 3.01373211]


## Polynomial Regression Model

### Generate data for the model

In [202]:
m = 100
X = 6 * np.random.rand(m, 1) - 3
y = 0.5 * X**2 + X + 2 + np.random.randn(m, 1)

trace = graph_objs.Scatter(x = X, y = y, mode = 'markers')
data = [trace]
layout = graph_objs.Layout(
    title='Polynomial Model',
    xaxis=dict(
        title='X',
        titlefont=dict(
            family='Courier New, monospace',
            size=18,
            color='#7f7f7f'
        )
    ),
    yaxis=dict(
        title='y',
        titlefont=dict(
            family='Courier New, monospace',
            size=18,
            color='#7f7f7f'
        )
    )
)
fig = graph_objs.Figure(data = data, layout = layout)
plotly.iplot(fig)

### Add the polynomial (2nd-degree) features

In [208]:
from sklearn.preprocessing import PolynomialFeatures
poly_features = PolynomialFeatures(degree = 2, include_bias = False)
X_poly = poly_features.fit_transform(X)
print("Original feature: {}".format(X[0]))
print("Polynomial feature added: {}".format(X_poly[0]))

Original feature: [-1.82367467]
Polynomial feature added: [-1.82367467  3.3257893 ]


### Fit Linear Regression

In [209]:
from sklearn.linear_model import LinearRegression
linear_model = LinearRegression()
lm = linear_model.fit(X_poly, y)
print(lm.intercept_, lm.coef_)

[ 2.00128005] [[ 0.8979747   0.51545163]]


In [234]:
# plot the resulting regression
trace_1 = graph_objs.Scatter(x = X, y = y, mode = 'markers')
trace_2 = graph_objs.Scatter(x = X, 
                             y = lm.coef_[0][0] * X + lm.coef_[0][1] * X**2 + lm.intercept_, 
                             mode = 'markers')
data = [trace_1, trace_2]
layout = graph_objs.Layout(
    title='Polynomial Model',
    xaxis=dict(
        title='X',
        titlefont=dict(
            family='Courier New, monospace',
            size=18,
            color='#7f7f7f'
        )
    ),
    yaxis=dict(
        title='y',
        titlefont=dict(
            family='Courier New, monospace',
            size=18,
            color='#7f7f7f'
        )
    )
)
fig = graph_objs.Figure(data = data, layout = layout)
plotly.iplot(fig)

## Ridge Regression Model

**Ridge Regression Model** is Linear Regression Model where the features' weights are constrained using the regularization term: $\alpha \Sigma^{n}_{i=1}\Theta_{i}^{2}$

### Normal Equation

In [236]:
from sklearn.linear_model import Ridge
ridge_reg = Ridge(alpha = 1, solver = 'cholesky')
ridge_reg.fit(X, y)
print(ridge_reg.intercept_, ridge_reg.coef_)

[ 3.60274349] [[ 0.9714163]]


### Stochastic Gradient Descent

In [237]:
from sklearn.linear_model import SGDRegressor
sgd_reg = SGDRegressor(penalty = 'l2')
sgd_reg.fit(X, y.ravel())
print(sgd_reg.intercept_, sgd_reg.coef_)

[ 2.73229359] [ 1.02553078]
