In [1]:
import pandas as pd
import math
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
import statsmodels.formula.api as smf
import scipy

from statsmodels.sandbox.regression.gmm import GMM
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import Ridge

In [2]:
data = pd.read_stata('chile.dta')
data = data.set_index(['id','year'])

In [3]:
ldata = data.copy()

ldata[['routput', 'totlab', 'renerg','realmats',
       'rcapstock']] = np.log(data[['routput','totlab', 'renerg','realmats','rcapstock']])

ldata = ldata.dropna()

do a non-parametric regression with $l_{it}$, $m_{it}$, $k_{it}$ on $y_{it}$ and save residual, and fitted value

given a guess of $\beta_1$ and $\beta_2$ compute an implied omega i.e. productive shock
get wit-1 (i.e. without estimating a function)
the residuals should be exogenous
these are the moment conditions

https://www.aeaweb.org/content/file?id=3015

In [4]:
d = ldata['routput'],ldata[['totlab']],ldata[['rcapstock','realmats','renerg']]

def np_resids(y,x):
    """residuals from lasso"""
    poly = PolynomialFeatures(degree=3)
    x_poly = poly.fit_transform(x)
    clf = Ridge(alpha=0.0)
    clf.fit(x_poly, y) 
    resid = y-clf.predict(x_poly)
    return resid

stage1_resids = np_resids(ldata['routput'],ldata[['totlab','rcapstock','realmats','renerg']])

In [7]:
def stage2obj(params, stage1_resids, dta):
    y,x = dta
    
    #compute value addeed to production by shock and all terms
    value = y - stage1_resids
    
    #compute implied value of endogenous shock
    shock = value - x.mul(params,axis=1).sum(axis=1)
    
    #####compute expected production trend############
    shocklag = shock.groupby('id').shift(1)
    both_shocks = pd.concat((shock, shocklag),axis=1).dropna()
    
    ###### save residuals ########
    shock2 = np_resids(both_shocks[[0]], both_shocks[[1]])[0]
    
    #####multiply by lags to get moment condition #########
    #moment1 = x['rcapstock'].mul(shock2,axis=0)
    #moment2 = x['totlab'].groupby('id').shift(1).mul(shock2,axis=0)
    #moments = pd.concat((moment1,moment2),axis=1)
    moments = x.mul(shock2,axis=0)
    return moments.dropna()
    
d = ldata['routput'],ldata[['rcapstock','totlab']]
stage2obj(np.array([.1,.1]), stage1_resids, d)

Unnamed: 0_level_0,Unnamed: 1_level_0,rcapstock,totlab
id,year,Unnamed: 2_level_1,Unnamed: 3_level_1
18.0,1980,1.145648,0.356509
18.0,1981,-5.070534,-1.357924
54.0,1980,0.879464,0.370622
54.0,1981,-4.311159,-1.599216
54.0,1982,0.866176,0.265023
54.0,1983,-3.241297,-0.945828
54.0,1984,-0.285848,-0.085370
54.0,1985,-1.034154,-0.357779
54.0,1986,1.320648,0.463455
54.0,1987,-2.971930,-0.922297


In [8]:
class ACF(GMM):
    
    def __init__(self, dta, stage1_resid, *args, **kwds):
        # set appropriate counts for moment conditions and parameters
        y,x = dta
        super(ACF, self).__init__(y,x,stage1_resid, x.shape[1],*args, **kwds)
        self.endog = y
        self.exog = x
        self.instr = stage1_resids
        
    def momcond(self, params):
        d = self.endog, self.exog
        return np.array( stage2obj(params, self.instr , d) )
        
        
d = ldata['routput'],ldata[['rcapstock','totlab']]
acfmodel = ACF(d, stage1_resids)
acfresult = acfmodel.fit(np.array([.1,.5]),optim_method='nm')
print acfresult.summary()

Optimization terminated successfully.
         Current function value: 0.000000
         Iterations: 44
         Function evaluations: 84
Optimization terminated successfully.
         Current function value: 0.000000
         Iterations: 19
         Function evaluations: 39
Optimization terminated successfully.
         Current function value: 0.000000
         Iterations: 19
         Function evaluations: 39
Optimization terminated successfully.
         Current function value: 0.000000
         Iterations: 19
         Function evaluations: 39
                                 ACF Results                                  
Dep. Variable:                routput   Hansen J:                    2.283e-08
Model:                            ACF   Prob (Hansen J):                   nan
Method:                           GMM                                         
Date:                Tue, 14 May 2019                                         
Time:                        16:22:01                