In [95]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from statsmodels import api
from scipy import stats
from scipy.optimize import minimize 

In [35]:
np.random.uniform(1.0, 15.0, size=2)

array([ 6.44477303, 11.29663461])

In [79]:
# generate an independent variable 
x = np.random.rand(2, 200)

# generate a normally distributed residual
e = np.random.normal(10, 5, 200)

# generate ground truth
y = np.dot(x.T, np.random.uniform(1.0, 15.0, size=2))  + e
df = pd.DataFrame(x).T.rename(columns={0: 'x1', 1: 'x2'})
df = pd.concat([df, pd.DataFrame(y).rename(columns={0: 'y'})], axis=1)
df 

Unnamed: 0,x1,x2,y
0,0.561839,0.137006,22.667721
1,0.103264,0.449916,11.096745
2,0.191169,0.510447,10.983833
3,0.416060,0.070333,9.559195
4,0.837296,0.403765,15.153511
...,...,...,...
195,0.565630,0.384011,12.722500
196,0.518060,0.932836,21.971939
197,0.657818,0.404406,16.460986
198,0.800280,0.820438,17.155573


In [82]:
features = api.add_constant(df[['x1', 'x2']])
model = api.OLS(y, features).fit()
model.summary() 

0,1,2,3
Dep. Variable:,y,R-squared:,0.198
Model:,OLS,Adj. R-squared:,0.189
Method:,Least Squares,F-statistic:,24.24
Date:,"Thu, 06 Apr 2023",Prob (F-statistic):,3.87e-10
Time:,22:12:53,Log-Likelihood:,-614.14
No. Observations:,200,AIC:,1234.0
Df Residuals:,197,BIC:,1244.0
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,10.4687,0.953,10.980,0.000,8.588,12.349
x1,6.4306,1.350,4.763,0.000,3.768,9.093
x2,5.9951,1.258,4.764,0.000,3.514,8.477

0,1,2,3
Omnibus:,0.893,Durbin-Watson:,2.087
Prob(Omnibus):,0.64,Jarque-Bera (JB):,0.565
Skew:,-0.001,Prob(JB):,0.754
Kurtosis:,3.26,Cond. No.,5.18


In [83]:
res = model.resid
standard_dev = np.std(res)
standard_dev

5.216258634043617

Suppose we have $n$ observations and $p$ features (i.e., $p$ coefficients) and denote $X_i = (x_{i1}, \dots, x_{ip})^T$

$$f(y_i|\beta, \sigma^2) = N (X_i^T\beta, \sigma^2), \text{ where } \beta = (\beta_1, \dots, \beta_j, \dots, \beta_p)^T$$
$$ \beta_j \sim N(\mu_j, \eta_j^2), \text{ where } \mu = (\mu_1, \dots, \mu_p), \text{ and } \eta = (\eta_1, \dots, \eta_p)$$

Then the joint distribution is 
$$L(\beta, \sigma, \mu, \eta) = (\sqrt{2\pi}\sigma)^{-n}\exp\{\frac{\sum_{i=1}^n(y_i - X_i^T\beta)^2}{2\sigma^2}\} \times \prod_{j=1}^p (\sqrt{2\pi}\eta_j)^{-1} \exp\{\frac{(\beta_j - \mu_j)^2}{2\eta_j^2}\}$$

In [92]:
# MLE function
def MLE_Norm(parameters):
       
    beta1, beta2, std_dev = parameters

    # predict the output
    pred = np.dot(x.T, np.array([beta1, beta2]))
    
    # Calculate the log-likelihood for normal distribution
    LL = np.sum(stats.norm.logpdf(y, pred, std_dev))
    
    # Calculate the negative log-likelihood
    neg_LL = -1*LL
    return neg_LL 

In [94]:
 # minimize arguments: function, intial_guess_of_parameters, method
 mle_model = minimize(MLE_Norm, np.array([2, 2, 2]), method='L-BFGS-B')
 mle_model.x

array([16.42887523, 14.04450062,  6.62267384])