In [1]:
import pandas as pd
import math
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
import statsmodels.formula.api as smf
import scipy

from statsmodels.sandbox.regression.gmm import GMM
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import Ridge

In [2]:
data = pd.read_stata('chile.dta')

In [3]:
ldata = data.copy()

ldata[['routput', 'totlab', 'renerg','realmats',
       'rcapstock']] = np.log(data[['routput','totlab', 'renerg','realmats','rcapstock']])

ldata = ldata.dropna()

do a non-parametric regression with $l_{it}$, $m_{it}$, $k_{it}$ on $y_{it}$ and save residual, and fitted value

given a guess of beta1 and beta2 compute an implied omega i.e. productive shock
get wit-1 (i.e. without estimating a function)
the residuals should be exogenous
these are the moment conditions

https://www.aeaweb.org/content/file?id=3015

In [19]:
d = ldata['routput'],ldata[['totlab']],ldata[['rcapstock','realmats','renerg']],ldata[['id','year']]

def np_resids(y,x):
    """residuals from lasso"""
    poly = PolynomialFeatures(degree=4)
    x_poly = poly.fit_transform(x)
    clf = Ridge(alpha=1.0)
    clf.fit(x_poly, y) 
    resid = y-clf.predict(x_poly)
    return resid

stage1_resids = np_resids(ldata['routput'],ldata[['totlab','rcapstock','realmats','renerg']])

In [12]:
def stage2obj(params, stage1_resids, dta):
    
    y,x,label = dta
    t = label['year']
    
    #compute value addeed to production by shock and all terms
    value = y - stage1_resids
    
    #compute implied value of endogenous shock
    params_tile = np.tile(params,x.shape[0]).reshape(x.shape[0],x.shape[1])
    shock = value - (params_tile*x).sum(axis=1)
    shock = np.array(shock).reshape((shock.shape[0],1))
    
    #calc shock (mostly formating correctly)
    shocklag = zvalue_predict - (zparams_tile*z).sum(axis=1)
    shocklag = np.array(shocklag).reshape((shocklag.shape[0],1))
    
    shocklag = np.concatenate((shocklag,label[['id','year']]),axis=1)
    shocklag = pd.DataFrame(shocklag,columns=['shock2','id','year'])
    shocklag = shocklag.set_index(['id','year'])
    shocklag = shocklag.groupby('id').shift(1)
    
    #clean 'exogenous' portion of production shock
    both_shocks = np.concatenate((shocklag,shock),axis=1)
    both_shocks = both_shocks[~np.isnan(both_shocks).any(axis=1)]
    shocklag = both_shocks[:,0].reshape(both_shocks.shape[0],1)
    npshocklag = np.concatenate((shocklag,shocklag**2,shocklag**3),axis=1)
    npshocklag = sm.add_constant(npshocklag)
    
    return sm.OLS(both_shocks[:,1],npshocklag).fit().resid
    
    
d = ldata['routput'],ldata[['rcapstock','totlab']],ldata[['id','year']]

stage2obj(np.array([.5,.5]),stage1_resids, d).shape

(18044,)

In [13]:
class ACF(GMM):
    
    def __init__(self, dta, stage1_resids, *args, **kwds):
        # set appropriate counts for moment conditions and parameters
        y,x,z,label = dta
        super(ACF, self).__init__(y,x,z, x.shape[1],*args, **kwds)
        self.endog = y
        self.exog = x
        self.instr = z
        self.ids = label
        self.stage1_resids =  stage1_resids
        
        #self up a lag variable
        exogs = np.concatenate((x, label[['id','year']]),axis=1)
        colnames = ['x'+str(i) for i in range(self.nmoms)]
        colnames = colnames + ['id','year']
        exogs = pd.DataFrame(exogs, columns= colnames )
        exogs = exogs.set_index(['id','year'])
        exogs = exogs.groupby('id').shift(1)
        print exogs.shape
        self.exoglag = exogs.dropna()
        self.data.xnames = [col for col in x.columns]
        
        
    def momcond(self, params):
        d = self.endog, self.exog, self.ids
        resids = stage2obj(params, self.stage1_resids , d)
        resids_tile = np.repeat(resids,self.nmoms)
        resids_tile = resids_tile.reshape((resids.shape[0],self.nmoms))
        return resids_tile*self.exoglag
        
    
d = ldata['routput'],ldata[['rcapstock','totlab']],ldata[['realmats','renerg']],ldata[['id','year']]
acfmodel = ACF(d, stage1_resids)
acfresult = acfmodel.fit(np.array([.5,.5]))
print acfresult.summary()

(20717, 2)
Optimization terminated successfully.
         Current function value: 0.000000
         Iterations: 6
         Function evaluations: 8
         Gradient evaluations: 8
Optimization terminated successfully.
         Current function value: 0.000000
         Iterations: 0
         Function evaluations: 1
         Gradient evaluations: 1
Optimization terminated successfully.
         Current function value: 0.000000
         Iterations: 0
         Function evaluations: 1
         Gradient evaluations: 1
Optimization terminated successfully.
         Current function value: 0.000000
         Iterations: 0
         Function evaluations: 1
         Gradient evaluations: 1
                                 ACF Results                                  
Dep. Variable:                routput   Hansen J:                    1.378e-07
Model:                            ACF   Prob (Hansen J):                   nan
Method:                           GMM                                       