In [1]:
import numpy as np
from sklearn.datasets import make_regression

X, y = make_regression(n_samples=100, n_features=10, n_informative=5, bias=0.5)

In [18]:
def fit_linear_regression(X, y):
    X = np.concatenate((np.ones((X.shape[0], 1)), X), axis=1)
    eps = np.finfo(float).eps
    w = np.linalg.inv(np.dot(X.T, X)+eps*np.identity(X.shape[1])).dot(X.T).dot(y)
    return w    

In [19]:
w = fit_linear_regression(X, y)

In [20]:
w

array([ 5.00000000e-01, -6.21724894e-15, -6.21724894e-15,  4.13874805e+01,
       -6.21724894e-15,  5.35499790e+01,  2.22044605e-16,  7.84880737e+00,
        6.67551421e+00, -4.66293670e-15,  4.27780039e+00])

In [65]:
# https://en.wikipedia.org/wiki/Akaike_information_criterion

def log_likeli_hood(n, k, errs):
    ll = -(n/2) * (1 + np.log(2*np.pi)) - (n/2)*np.log(np.mean(errs**2))
    return ll


def AIC(n, k, errs):
    ll = log_likeli_hood(n, k, errs)
    return 2 * (-ll + k)


def BIC(n, k, errs):
    ll = log_likeli_hood(n, k, errs)
    return -2*ll + (k * np.log(n))


def adjust_r2(n, k, r_2):
    return 1 - (1-r_2)*(n-1)/(n-k)


def f_statistic(n, k, sst, sse):
    return (sst-sse)/(k-1) /(sse/(n-k))


def summary(X, y, w):
    X = np.concatenate((np.ones((X.shape[0], 1)), X), axis=1)
    n = X.shape[0]
    k = X.shape[1] # K is feature + 1 constant here
    sst = np.sum((y-y.mean())**2)
    errs = y - X.dot(w)
    sse = np.sum(errs**2)
    ssr = sst-sse
    r_2 = ssr/sst
    return r_2, adjust_r2(n, k, r_2), f_statistic(n, k, sst, sse), AIC(n, k, errs), BIC(n, k, errs)


In [66]:
summary(X, y, w)

(0.4158997124492362,
 0.3533175387830829,
 6.645657830101385,
 33.95649234217083,
 39.81943595336974)

In [29]:
import statsmodels.api as sm

data = sm.datasets.spector.load()

<class 'statsmodels.datasets.utils.Dataset'>

In [30]:
y = data.endog
y

0     0.0
1     0.0
2     0.0
3     0.0
4     1.0
5     0.0
6     0.0
7     0.0
8     0.0
9     1.0
10    0.0
11    0.0
12    0.0
13    1.0
14    0.0
15    0.0
16    0.0
17    0.0
18    0.0
19    1.0
20    0.0
21    1.0
22    0.0
23    0.0
24    1.0
25    1.0
26    1.0
27    0.0
28    1.0
29    1.0
30    0.0
31    1.0
Name: GRADE, dtype: float64

In [31]:
X = data.exog
X

Unnamed: 0,GPA,TUCE,PSI
0,2.66,20.0,0.0
1,2.89,22.0,0.0
2,3.28,24.0,0.0
3,2.92,12.0,0.0
4,4.0,21.0,0.0
5,2.86,17.0,0.0
6,2.76,17.0,0.0
7,2.87,21.0,0.0
8,3.03,25.0,0.0
9,3.92,29.0,0.0


In [32]:
type(X)

pandas.core.frame.DataFrame

In [33]:
data.exog = sm.add_constant(data.exog, prepend=False)
data.exog

Unnamed: 0,GPA,TUCE,PSI,const
0,2.66,20.0,0.0,1.0
1,2.89,22.0,0.0,1.0
2,3.28,24.0,0.0,1.0
3,2.92,12.0,0.0,1.0
4,4.0,21.0,0.0,1.0
5,2.86,17.0,0.0,1.0
6,2.76,17.0,0.0,1.0
7,2.87,21.0,0.0,1.0
8,3.03,25.0,0.0,1.0
9,3.92,29.0,0.0,1.0


In [34]:
mod = sm.OLS(y, data.exog)

In [36]:
res = mod.fit()

In [37]:
res.summary()

0,1,2,3
Dep. Variable:,GRADE,R-squared:,0.416
Model:,OLS,Adj. R-squared:,0.353
Method:,Least Squares,F-statistic:,6.646
Date:,"Fri, 27 May 2022",Prob (F-statistic):,0.00157
Time:,17:30:09,Log-Likelihood:,-12.978
No. Observations:,32,AIC:,33.96
Df Residuals:,28,BIC:,39.82
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
GPA,0.4639,0.162,2.864,0.008,0.132,0.796
TUCE,0.0105,0.019,0.539,0.594,-0.029,0.050
PSI,0.3786,0.139,2.720,0.011,0.093,0.664
const,-1.4980,0.524,-2.859,0.008,-2.571,-0.425

0,1,2,3
Omnibus:,0.176,Durbin-Watson:,2.346
Prob(Omnibus):,0.916,Jarque-Bera (JB):,0.167
Skew:,0.141,Prob(JB):,0.92
Kurtosis:,2.786,Cond. No.,176.0


In [43]:
w = fit_linear_regression(X, y)

In [67]:
summary(X, y, w)

(0.4158997124492362,
 0.3533175387830829,
 6.645657830101385,
 33.95649234217083,
 39.81943595336974)

In [59]:
w

array([-1.49801712,  0.46385168,  0.01049512,  0.37855479])