In [1]:
import pandas
import math
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
from scipy.stats import norm
from statsmodels.sandbox.regression.gmm import GMM
from statsmodels.base.model import GenericLikelihoodModel

In [2]:
#load data into memory
data1 = np.genfromtxt('data1.dat', delimiter='  ')
data1 = data1.transpose()

#partition correctly
y = data1[0]
x = sm.add_constant(data1[1])

In [3]:
#part a - estimate using OLS
part_a = sm.OLS(y,x).fit()
print part_a.summary()

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.799
Model:                            OLS   Adj. R-squared:                  0.799
Method:                 Least Squares   F-statistic:                     1979.
Date:                Thu, 27 Sep 2018   Prob (F-statistic):          1.33e-175
Time:                        17:53:51   Log-Likelihood:                -362.72
No. Observations:                 500   AIC:                             729.4
Df Residuals:                     498   BIC:                             737.9
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0055      0.022      0.245      0.8

In [4]:
#part b - MLE

class part_b(GenericLikelihoodModel):
    """class for evaluating question 1 part b"""
    
    def nloglikeobs(self, params):
        t1, t2, sigma = params
        endog, exog = self.endog, self.exog.squeeze()
        eps = endog - t1 - t2*exog
        return - norm(0,sigma).logpdf(eps).sum()
    
    
    def fit(self, start_params=None, maxiter=10000, maxfun=5000, **kwds):
        # we have one additional parameter and we need to add it for summary
        if start_params == None:
            start_params = start_params = [.5, .5,.5]
        return super(part_b, self).fit(start_params=start_params,
                                       maxiter=maxiter, maxfun=maxfun, **kwds)

    
model_b = part_b(data1[0],data1[1])
result_b = model_b.fit()
print(result_b.summary(xname=['theta_1', 'theta_2', 'sigma']))


#sources: 
#http://www.statsmodels.org/0.6.1/examples/notebooks/generated/generic_mle.html
#http://rlhick.people.wm.edu/posts/estimating-custom-mle.html

Optimization terminated successfully.
         Current function value: 0.725432
         Iterations: 69
         Function evaluations: 131
                                part_b Results                                
Dep. Variable:                      y   Log-Likelihood:                -362.72
Model:                         part_b   AIC:                             727.4
Method:            Maximum Likelihood   BIC:                             731.6
Date:                Thu, 27 Sep 2018                                         
Time:                        17:53:52                                         
No. Observations:                 500                                         
Df Residuals:                     499                                         
Df Model:                           0                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------

In [5]:
#part c - GMM

class part_c(GMM):
    """class for evaluating question 1 part c"""
    
    def __init__(self, *args, **kwds):
        # set appropriate counts for moment conditions and parameters
        kwds.setdefault('k_moms', 2)
        kwds.setdefault('k_params',2)
        super(part_c, self).__init__(*args, **kwds)
    
    
    def fit(self, start_params=None, maxiter=10000, **kwds):
        if start_params == None:
            start_params = np.array([.5, .5])
        return super(part_c, self).fit(start_params=start_params,
                                       maxiter=maxiter,  **kwds)
    
    
    def momcond(self, params):
        t1,t2 = params  #unwrap parameters
        endog, exog = self.endog, self.exog.squeeze()
        eps = endog - t1 - t2*exog 
        g = np.column_stack( (eps, eps*exog ))
        return g 

    
model_c = part_c(data1[0],data1[1], None)
result_c = model_c.fit(maxiter=2, optim_method='nm', wargs=dict(centered=False))
print(result_c.summary(xname=['theta_1', 'theta_2']))


#sources:
#https://github.com/josef-pkt/misc/blob/master/notebooks/ex_gmm_gamma.ipynb
#https://www.statsmodels.org/dev/generated/statsmodels.sandbox.regression.gmm.GMM.html#statsmodels.sandbox.regression.gmm.GMM
#https://gist.github.com/josef-pkt/6895915

Optimization terminated successfully.
         Current function value: 0.000000
         Iterations: 37
         Function evaluations: 70
Optimization terminated successfully.
         Current function value: 0.000000
         Iterations: 13
         Function evaluations: 26
                                part_c Results                                
Dep. Variable:                      y   Hansen J:                    1.288e-06
Model:                         part_c   Prob (Hansen J):                   nan
Method:                           GMM                                         
Date:                Thu, 27 Sep 2018                                         
Time:                        17:53:53                                         
No. Observations:                 500                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
theta_1      