In [1]:
import pandas
import math
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
from scipy.stats import norm
from statsmodels.sandbox.regression.gmm import GMM
from statsmodels.base.model import GenericLikelihoodModel

In [2]:
#load data into memory
data1 = np.genfromtxt('data1.dat', delimiter='  ')

#partition correctly
y = data1[:, 0]
x = sm.add_constant(data1[:, 1])

In [3]:
#part a - estimate using OLS
part_a = sm.OLS(y,x).fit()
print(part_a.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.799
Model:                            OLS   Adj. R-squared:                  0.799
Method:                 Least Squares   F-statistic:                     1979.
Date:                Thu, 22 Jun 2023   Prob (F-statistic):          1.33e-175
Time:                        21:15:24   Log-Likelihood:                -362.72
No. Observations:                 500   AIC:                             729.4
Df Residuals:                     498   BIC:                             737.9
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0055      0.022      0.245      0.8

In [4]:
# MLE
def loglike(params, *args):
    y, x = args
    theta0, theta1, sigma = params
    n = len(y)
    e = y - theta0 - theta1*x
    loglike = -n/2*np.log(2*np.pi*sigma**2) - 1/(2*sigma**2)*sum(e**2)
    return -loglike

In [13]:
from scipy.optimize import minimize
x = data1[:, 1]
res = minimize(loglike, [0,0,1], args=(y,x), method='L-BFGS-B', bounds=((None, None), (None, None), (1e-10, None)))

In [14]:
print(res.x)

[0.00551029 1.01045935 0.49982024]


In [16]:
print(res.hess_inv.todense())

[[0.00231849 0.01538678 0.00730256]
 [0.01538678 0.13129851 0.06196704]
 [0.00730256 0.06196704 0.02968299]]


In [17]:
def bootstrap(n = 100):
    theta0 = []
    theta1 = []
    sigma = []
    for _ in range(n):
        sample = np.random.choice(range(len(y)), len(y), replace=True)
        res = minimize(loglike, [0,0,1], args=(y[sample],x[sample]), method='L-BFGS-B', bounds=((None, None), (None, None), (1e-10, None)))
        theta0.append(res.x[0])
        theta1.append(res.x[1])
        sigma.append(res.x[2])
    return theta0, theta1, sigma

In [18]:
theta0_list, theta1_list, sigma_list = bootstrap(n = 100)

In [19]:
np.array(theta0_list).std()

0.022970895927376313

In [20]:
np.array(theta1_list).std()

0.02244009293218294

In [21]:
np.array(sigma_list).std()

0.016430467489376977