In [119]:
import numpy as np
import pandas as pd

### Generate some data

In [188]:
intercept = 10
coefficient = 2
x = np.random.normal(0, 1, 100)
eps = np.random.normal(0, 1, 100)
y = (coefficient * x) + intercept + eps
# Add intercept term to x for convenience of notation
i = np.repeat(1, x.shape[0])
X = np.vstack([i,x]).T

### Analytical Solution - Normal Equation

We seek coefficients to minimize the Residual Sum of Squares (RSS):

\begin{align}
RSS(\beta) &= (\mathbf{y} - \mathbf{X} \beta)^T (\mathbf{y} - \mathbf{X} \beta) \\
\end{align}

Differentiating RSS with respect to the paramaters (β) yields:

\begin{align}
\frac{\partial RSS}{\partial \beta} &= -2 \mathbf{X}^T (\mathbf{y} - \mathbf{X} \beta) \\
\end{align}

Setting to zero and solving for β:

\begin{align}
&\mathbf{X}^T(\mathbf{y} - \mathbf{X} \beta) = 0 \\
&\beta = (\mathbf{X}^T \mathbf{X})^{-1}\mathbf{X}^T \mathbf{y}
\end{align}

In [189]:
beta = np.linalg.inv(X.T @ X) @ X.T @ y
print('intercept: {}'.format(beta[0]))
print('coefficient: {}'.format(beta[1]))

intercept: 10.07667837435177
coefficient: 1.8834880009210118


### Numerical Solution - Gradient Descent

In [187]:
def partial_rss_wrt_beta(X, y_hat, beta):
    return -2 * X.T @ (y - (X @ beta))

beta = np.repeat(0, 2)
alpha = 0.001
i = 0  
while i < (10 ** 5):
    y_hat = X @ beta
    partial = partial_rss_wrt_beta(X, y_hat, beta)
    partial
    beta = beta - (alpha * partial)
    i += 1

print('intercept: {}'.format(beta[0]))
print('coefficient: {}'.format(beta[1]))

intercept: 10.07111418683654
coefficient: 1.962890447104283
