In [2]:
import matplotlib.pyplot as plt
from mpl_style import lightbg
import numpy as np
import pandas as pd
from scipy import stats
import seaborn as sns

# Find coefficients to linear equation.

1.) Given set of $n$ points $(x_i, y_i)$ on a scatter plot.

2.) Find the best-fit line, $\hat{y}_i= \alpha + \beta x_i$.

3.) Such that the sum of squared errors, $\Sigma (y_i - \hat{y}_i)^2$, is minimized.

# Numerical Example

$ S_x = \Sigma x_i, \ \ S_y = \Sigma y_i, \ \ S_{xy} = \Sigma x_i y_i \\ S_{xx} = \Sigma x_i^2, \ \ S_{yy} = \Sigma y_i^2$

In [3]:
df = pd.read_csv('table1.dat', sep=' ', usecols=['x', 'y'])
Sx = df.x.sum()
Sy = df.y.sum()
Sxy = df.x @ df.y
Sxx = df.x @ df.x
Syy = df.y @ df.y

$ \beta = \frac{n S_{xx} - S_x S_y}{n S_{xx} - S_x^2}, \ \ \alpha = \frac{1}{n} S_y - \beta \frac{1}{n} S_x$

In [4]:
n = len(df)
beta = (n * Sxy - Sx * Sy) / (n * Sxx - Sx ** 2)
alpha = (1/n) * (Sy - beta * Sx)

$ S_{\epsilon}^2 = \frac{1}{n(n-2)} [n S_{yy} - S_y^2 - \beta^2 (n S_{xx} - S_x^2)]$

$ S_{\beta}^2 = \frac{n S_{\epsilon}^2}{n S_{xx} - S_x^2} $

$ S_{\alpha}^2 = S_{\beta}^2 \frac{1}{n} S_{xx} $

In [5]:
Se2 = pow(n * (n - 2), -1) * (n * Syy - pow(Sy, 2) - pow(beta, 2) * (n * Sxx - Sxx ** 2))
Sb2 = (n * Se2 ** 2) / (n * Sxx - Sx ** 2)
Sa2 = Se2 * Sxx / n

ne = pd.Series([alpha, beta, Sa2, Sb2],
               index=['a', 'b', 'ea', 'eb'],
               name='Numerical Example')

In [6]:
df.cov()

Unnamed: 0,x,y
x,3168.134211,2002.297368
y,2002.297368,10476.892105


# Linear Least Squares

$ \begin{bmatrix} \alpha \\ \beta \end{bmatrix} = (X^T X)^{-1} X^T Y$ 

In [7]:
Y = df.y.values.reshape((len(df), 1))
X = np.matrix([np.ones(len(df)), df.x]).T
m = (np.linalg.inv(X.T @ X) @ X.T @ Y)
alpha, beta = m.item(0), m.item(1)
alpha, beta

(310.0172025540511, 0.6320115359280902)

In [8]:
m = np.linalg.inv(X.T @ X)
lls = pd.Series([alpha, beta], index=['a', 'b'], name='Linear Least Squares')

In [9]:
pd.DataFrame([ne, lls])

Unnamed: 0,a,b,ea,eb
Numerical Example,310.017203,0.632012,15935910000000.0,3876278000000.0
Linear Least Squares,310.017203,0.632012,,
