In [1]:
import pandas as pd
import math
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
import statsmodels.formula.api as smf
import scipy

from statsmodels.sandbox.regression.gmm import GMM
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import Ridge

In [2]:
data = pd.read_stata('chile.dta')
data = data.set_index(['id','year'])

In [None]:
ldata = data.copy()

ldata[['routput', 'totlab', 'renerg','realmats','rcapstock']] = np.log(
    ldata[['routput','totlab', 'renerg','realmats','rcapstock']])

ldata = ldata.dropna()

do a non-parametric regression with $l_{it}$, $m_{it}$, $k_{it}$ on $y_{it}$ and save residual, and fitted value

given a guess of $\beta_1$ and $\beta_2$ compute an implied omega i.e. productive shock
get wit-1 (i.e. without estimating a function)
the residuals should be exogenous
these are the moment conditions

https://www.aeaweb.org/content/file?id=3015

In [41]:
d = ldata['routput'],ldata[['totlab']],ldata[['rcapstock','realmats','renerg']]

def np_resids(y,x):
    """residuals from lasso"""
    poly = PolynomialFeatures(degree=3)
    x_poly = poly.fit_transform(x)
    clf = Ridge(alpha=0.0)
    clf.fit(x_poly, y) 
    resid = y-clf.predict(x_poly)
    return resid


stage1_resids = np_resids(ldata['routput'],ldata[['totlab','rcapstock','renerg','realmats']])

In [44]:
def stage2obj(params, stage1_resids, dta):
    y,x = dta
    rho = params[-1]
    params = params[:-1]
    
    x = sm.add_constant(x)
    xlag= x.groupby('id').shift(1)
    
    #compute the value of the control function from stage 1
    control = y - stage1_resids
    controllag = control.groupby('id').shift(1)
    
    if False:
        print np.array(y - x.mul(params,axis=1).sum(axis=1) )[1]
        print np.array(rho*(controllag- xlag.mul(params,axis=1).sum(axis=1) ))[1]
        print np.array(y - x.mul(params,axis=1).sum(axis=1) 
             - rho*(controllag- xlag.mul(params,axis=1).sum(axis=1) ) )[1]   
    
    ### compute the value of the shock###
    shock = (y - x.mul(params,axis=1).sum(axis=1) 
             - rho*(controllag- xlag.mul(params,axis=1).sum(axis=1) ) )    
    if False:
        print '-----'
        print np.array(x)[1]*np.array(y - x.mul(params,axis=1).sum(axis=1) 
             - rho*(controllag- xlag.mul(params,axis=1).sum(axis=1) ) )[1]  
    
        print np.array(xlag)[1]*np.array(y - x.mul(params,axis=1).sum(axis=1) 
             - rho*(controllag- xlag.mul(params,axis=1).sum(axis=1) ) )[1]  
    
    #####multiply to get moment condition #########
    moments = pd.concat((xlag,controllag),axis=1)
    moments = moments.mul(shock,axis=0)
    return moments.dropna()
    
d = ldata['routput'],ldata[['rcapstock','totlab']]
stage2obj(np.ones(4), stage1_resids, d)

Unnamed: 0_level_0,Unnamed: 1_level_0,const,rcapstock,totlab,routput
id,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
18.0,1980,0.195252,2.051434,0.662472,2.081874
18.0,1981,-0.142531,-1.483868,-0.461758,-1.533066
54.0,1980,0.248446,2.059740,0.853627,2.527602
54.0,1981,-0.387994,-3.163360,-1.333096,-4.001083
54.0,1982,0.265327,2.127548,0.789210,2.583670
54.0,1983,-0.313420,-2.601060,-0.795843,-3.115470
54.0,1984,0.095895,0.813253,0.237312,0.924306
54.0,1985,-0.389332,-3.262285,-0.974299,-3.789595
54.0,1986,1.228018,10.144449,3.509606,11.966046
54.0,1987,-0.045909,-0.373876,-0.131204,-0.459534


In [46]:
class ACF(GMM):
    
    def __init__(self, dta, stage1_resid, *args, **kwds):
        # set appropriate counts for moment conditions and parameters
        y,x = dta
        super(ACF, self).__init__(y,x,stage1_resid, x.shape[1] +2,*args, **kwds)
        self.endog = y
        self.exog = x
        self.instr = stage1_resids
        
    def momcond(self, params):
        d = self.endog, self.exog
        return np.array( stage2obj(params, self.instr , d) )
        
        
d = ldata['routput'],ldata[['rcapstock','totlab']]
acfmodel = ACF(d, stage1_resids)
acfresult = acfmodel.fit(np.array([1.3]+2*[.1]+[.5]))
print acfresult.summary()

         Current function value: 0.000411
         Iterations: 33
         Function evaluations: 208
         Gradient evaluations: 190
Optimization terminated successfully.
         Current function value: 0.000000
         Iterations: 72
         Function evaluations: 82
         Gradient evaluations: 82
Optimization terminated successfully.
         Current function value: 0.000000
         Iterations: 0
         Function evaluations: 1
         Gradient evaluations: 1
Optimization terminated successfully.
         Current function value: 0.000000
         Iterations: 0
         Function evaluations: 1
         Gradient evaluations: 1
                                 ACF Results                                  
Dep. Variable:                routput   Hansen J:                    5.656e-11
Model:                            ACF   Prob (Hansen J):                   nan
Method:                           GMM                                         
Date:                Wed, 15 May 2019 