In [60]:
from functools import partial
import numpy as np
import os
import pandas as pd
import scipy

import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt


import sys
sys.path.append('../')
import util

save_fig = partial(util.save_fig, save=True)

In [72]:
dataset_file = '/home/drake/workspace/em-2020/data/combined_dataset_all_final.csv'
index_fields = ['Country', 'Year']
data_fields = ['ENI' ,'POP', 'URB', 'AFL' , 'TI', 'CTS' ,'KAOPEN']
prefix = 'template'
start_year = 2000
end_year = 2015
exclude_countries = ['VEN', 'DZA']

In [73]:
%get dataset_file index_fields data_fields start_year end_year exclude_countries
suppressMessages({
library(dplyr)
library(plm)
})
#lag <- plm::lag # dplyr overwrites lag
`%notin%` <- Negate(`%in%`)

data.df <- subset(
    read.csv(dataset_file), select=c(index_fields, data_fields),
    subset=(Country %notin% exclude_countries) 
    & (start_year <= Year) & (Year <= end_year)
)
data.df %>% 
    group_by(Country) %>% 
    summarize(min=min(Year), max=max(Year), .groups='drop')
data.panel <- pdata.frame(data.df, index = c("Country", "Year"), drop.index = TRUE, row.names=TRUE)

Country,min,max
<fct>,<int>,<int>
ALB,2000,2011
ARG,2000,2014
ARM,2005,2014
AUS,2000,2015
AUT,2000,2015
AZE,2001,2014
BEL,2000,2015
BGD,2000,2014
BGR,2000,2014
BLR,2000,2014


In [82]:
# z0 <- pgmm(ENI ~ lag(ENI, 1:2) + lag(URB, 0:1) + lag(AFL, 0:2) + lag(POP, 0:2) | lag(ENI, 2:99), 
# z0 <- pgmm(ENI ~ lag(ENI, 1:2) + lag(URB, 0:1) | lag(ENI, 2:99), 
z0 <- pgmm(ENI ~ lag(ENI, 1:2) + POP + URB + AFL + I(AFL^2) + URB + CTS | lag(ENI, 2:99), 
           data=data.panel, effect = "twoways", model = "twosteps")
summary(z0, robust=TRUE)

“the second-step matrix is singular, a general inverse is used”
“a general inverse is used”


Twoways effects Two steps model

Call:
pgmm(formula = ENI ~ lag(ENI, 1:2) + POP + URB + AFL + I(AFL^2) + 
    URB + CTS | lag(ENI, 2:99), data = data.panel, effect = "twoways", 
    model = "twosteps")

Unbalanced Panel: n = 88, T = 3-16, N = 1276

Number of Observations Used: 1012

Residuals:
      Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
-0.4380978 -0.0238947  0.0000000  0.0004094  0.0241333  0.2472561 

Coefficients:
                Estimate Std. Error z-value  Pr(>|z|)    
lag(ENI, 1:2)1  0.443332   0.106316  4.1699 3.047e-05 ***
lag(ENI, 1:2)2  0.069366   0.049588  1.3988   0.16186    
POP            -0.208796   0.194060 -1.0759   0.28196    
URB             0.277250   0.343584  0.8069   0.41970    
AFL            -0.229496   0.441804 -0.5195   0.60345    
I(AFL^2)       -0.020558   0.021983 -0.9352   0.34969    
CTS             0.040037   0.019270  2.0777   0.03774 *  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Sargan test: chisq(102) 

In [68]:
library(pdynmc)
m1 <- pdynmc(dat = data.df, varname.i = "Country", varname.t = "Year",
             use.mc.diff = TRUE, use.mc.lev = FALSE, use.mc.nonlin = FALSE,
             include.y = TRUE, varname.y = "ENI", lagTerms.y = 2, 
             fur.con = TRUE, fur.con.diff = TRUE, fur.con.lev = FALSE, 
             varname.reg.fur = c("URB"), lagTerms.reg.fur = c(1), 
#              varname.reg.fur = c("URB", 'AFL', 'POP'), lagTerms.reg.fur = c(1, 2, 2), 
             include.dum = TRUE, dum.diff = TRUE, dum.lev = FALSE, varname.dum = "Year",
             w.mat = "iid.err", std.err = "corrected", estimation = "twostep", opt.meth = "none")
summary(m1)


Dynamic linear panel estimation (twostep)
Estimation steps: 2

Coefficients:
       Estimate Std.Err.rob z-value.rob Pr(>|z.rob|)    
L1.ENI  0.51832     0.10454       4.958      < 2e-16 ***
L2.ENI  0.05580     0.05692       0.980      0.32709    
L0.URB  3.93964     7.68457       0.513      0.60795    
L1.URB -3.57779     7.36517      -0.486      0.62697    
2008   -0.01412     0.01130      -1.250      0.21130    
2009   -0.01049     0.01737      -0.604      0.54584    
2010   -0.01861     0.02080      -0.895      0.37079    
2011   -0.05690     0.02141      -2.658      0.00786 ** 
2012   -0.06247     0.02627      -2.378      0.01741 *  
2013   -0.10044     0.03287      -3.055      0.00225 ** 
2014   -0.10186     0.03987      -2.555      0.01062 *  
2015   -0.10021     0.04206      -2.382      0.01722 *  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

 54 total instruments are employed to estimate 12 parameters
 44 linear (DIF) 
 2 further controls (DIF) 
 8 time

In [70]:
library(pdynmc)
m1 <- pdynmc(dat = data.df, varname.i = "Country", varname.t = "Year",
             use.mc.diff = TRUE, use.mc.lev = FALSE, use.mc.nonlin = FALSE,
             include.y = TRUE, varname.y = "ENI", lagTerms.y = 2, 
             fur.con = TRUE, fur.con.diff = TRUE, fur.con.lev = FALSE, 
             varname.reg.fur = c("URB"), lagTerms.reg.fur = c(1), 
#              varname.reg.fur = c("URB", 'AFL', 'POP'), lagTerms.reg.fur = c(1, 2, 2), 
             include.dum = TRUE, dum.diff = TRUE, dum.lev = FALSE, varname.dum = "Year",
             w.mat = "iid.err", std.err = "corrected", estimation = "twostep", opt.meth = "none")
summary(m1)


Dynamic linear panel estimation (twostep)
Estimation steps: 2

Coefficients:
        Estimate Std.Err.rob z-value.rob Pr(>|z.rob|)    
L1.ENI  1.056448    0.061512      17.175      < 2e-16 ***
L2.ENI -0.017574    0.066185      -0.266      0.79024    
L0.URB  0.105146    0.456783       0.230      0.81809    
L1.URB -0.101727    0.453951      -0.224      0.82276    
2008    0.015762    0.009361       1.684      0.09218 .  
2009    0.045823    0.009617       4.765      < 2e-16 ***
2010    0.048587    0.011059       4.393        1e-05 ***
2011    0.019737    0.009369       2.107      0.03512 *  
2012    0.039493    0.009542       4.139        3e-05 ***
2013    0.017536    0.011284       1.554      0.12018    
2014    0.044968    0.012450       3.612      0.00030 ***
2015    0.056813    0.016475       3.448      0.00056 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

 65 total instruments are employed to estimate 12 parameters
 44 linear (DIF) 9 linear (LEV) 
 2 fur