In [2]:
import numpy as np

### Generate some data

In [3]:
intercept = 10
coefficient = 2
x = np.random.normal(0, 1, 100)
eps = np.random.normal(0, 1, 100)
y = (coefficient * x) + intercept + eps
# Add intercept term to x for convenience of notation
i = np.repeat(1, x.shape[0])
X = np.vstack([i,x]).T

### Analytical Solution - Normal Equation

We seek coefficients to minimize the Residual Sum of Squares (RSS):

$RSS(\beta) = (\mathbf{y} - \mathbf{X} \beta)^T (\mathbf{y} - \mathbf{X} \beta)$

Differentiating *RSS* with respect to the paramaters (β) yields:

$\frac{\partial RSS}{\partial \beta} = -2 \mathbf{X}^T (\mathbf{y} - \mathbf{X} \beta)$

Setting to zero and solving for β:


$\mathbf{X}^T(\mathbf{y} - \mathbf{X} \beta) = 0$

$\beta = (\mathbf{X}^T \mathbf{X})^{-1}\mathbf{X}^T \mathbf{y}$

In [4]:
beta = np.linalg.inv(X.T @ X) @ X.T @ y
print('intercept: {}'.format(beta[0]))
print('coefficient: {}'.format(beta[1]))

intercept: 10.036901190841963
coefficient: 2.002203706392211


### Numerical Solution - Gradient Descent

In [5]:
def partial_rss_wrt_beta(X, beta):
    return -2 * X.T @ (y - (X @ beta))

beta = np.repeat(0, 2)
alpha = 0.001
i = 0  
while i < (10 ** 5):
    partial = partial_rss_wrt_beta(X, beta)
    partial
    beta = beta - (alpha * partial)
    i += 1

print('intercept: {}'.format(beta[0]))
print('coefficient: {}'.format(beta[1]))

intercept: 10.03690119084196
coefficient: 2.0022037063922093
