In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm

# append wave7 data to wave1-6
longterm = 'GHA_2008_MGFERE_v01_M_Stata8/r7tomerge.dta'
baseline = 'GHA_2008_MGFERE_v01_M_Stata8/ReplicationDataGhanaJDE.dta'
data_longterm = pd.read_stata(longterm)
data_baseline = pd.read_stata(baseline)
data_baseline['male'] = np.zeros(len(data_baseline))
data_baseline['male'][data_baseline.gender=='male'] = 1
data_all_waves = data_baseline.append(data_longterm, ignore_index=True)
data_all_waves = data_all_waves.sort_values(by=['sheno','wave'], ascending=True)

# fill in useful information/baseline covariates from wave1-6 to wave7
# (we replicate the same stata commends from Table 5 in JDEreplicationfilesGhana.do
baseline_columns = ['atreatcash', 'atreatequip', 'male', 'groupnum', 'male_male', 'female_female', 
                    'male_mixed', 'female_mixed', 'highcapture','highcapital','highgroup', 'mlowgroup']

for i in range(1,len(data_all_waves)):
    if data_all_waves['sheno'].iloc[i] != data_all_waves['sheno'].iloc[i-1]:
        for col in baseline_columns:
            if np.isnan(data_all_waves[col].iloc[i-1]):
                data_all_waves[col].iloc[i-1] = data_all_waves[col].iloc[i-2] if not np.isnan(data_all_waves[col].iloc[i-2]) \
                                                                    else data_all_waves[col].iloc[i-3]
            else:
                raise RuntimeError("It should be nan.")
    if i == len(data_all_waves) - 1:
        for col in baseline_columns:
            data_all_waves[col].iloc[i] = data_all_waves[col].iloc[i-1] if not np.isnan(data_all_waves[col].iloc[i-1]) \
                                                                    else data_all_waves[col].iloc[i-2]

data_all_waves

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_baseline['male'][data_baseline.gender=='male'] = 1
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)


Unnamed: 0,gender,male_male,female_female,male_mixed,female_mixed,highcapture,highcapital,groupnum,cashtreat,equiptreat,...,household_5h,belongf,drink,noimpulse1,noimpulse2,jointown,numsib,sibsAccra,accept,surviver7
0,female,0.0,1.0,0.0,0.0,0.0,1.0,1108.0,0.0,0.0,...,Never,0.0,1.0,1.0,1.0,0.0,0.191877,3.30703,,
5,female,0.0,1.0,0.0,0.0,0.0,1.0,1108.0,0.0,0.0,...,Never,0.0,1.0,1.0,1.0,0.0,0.191877,3.30703,,
2,female,0.0,1.0,0.0,0.0,0.0,1.0,1108.0,0.0,0.0,...,Never,0.0,1.0,1.0,1.0,0.0,0.191877,3.30703,,
4,female,0.0,1.0,0.0,0.0,0.0,1.0,1108.0,0.0,0.0,...,Never,0.0,1.0,1.0,1.0,0.0,0.191877,3.30703,,
1,female,0.0,1.0,0.0,0.0,0.0,1.0,1108.0,0.0,0.0,...,Never,0.0,1.0,1.0,1.0,0.0,0.191877,3.30703,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4756,female,0.0,1.0,0.0,0.0,1.0,1.0,906.0,0.0,0.0,...,More than once a week,0.0,0.0,0.0,0.0,0.0,-2.808123,-1.69297,,
4755,female,0.0,1.0,0.0,0.0,1.0,1.0,906.0,0.0,0.0,...,More than once a week,0.0,0.0,0.0,0.0,0.0,-2.808123,-1.69297,,
4753,female,0.0,1.0,0.0,0.0,1.0,1.0,906.0,0.0,0.0,...,More than once a week,0.0,0.0,0.0,0.0,0.0,-2.808123,-1.69297,,
4754,female,0.0,1.0,0.0,0.0,1.0,1.0,906.0,0.0,0.0,...,More than once a week,0.0,0.0,0.0,0.0,0.0,-2.808123,-1.69297,,


### Replicate Column (1) of Table 5

In [15]:
from empirical import Inference
from scipy.stats import norm

# recovering large strata by baseline covariates and add a variable indicating treatment status {0,1,2}
columns_needed = ['realfinalprofit', 'atreatcash', 'atreatequip', 'wave', 'male', 'groupnum',
                  'sheno', 'male_male', 'female_female', 'male_mixed', 'female_mixed',
                  'highcapture','highcapital', 'highgroup', 'mlowgroup']
df_wave7 = data_all_waves[data_all_waves.wave==7][columns_needed]
df_wave7['strata'] = df_wave7.male_male*100000 + df_wave7.female_female*10000 + df_wave7.male_mixed*1000 \
    + df_wave7.female_mixed*100 + df_wave7.highcapture*10 + df_wave7.highcapital
treatment = np.zeros(len(df_wave7))
treatment[df_wave7['atreatcash']==1] = 1
treatment[df_wave7['atreatequip']==1] = 2
df_wave7['treatment'] = treatment
df_wave7 = df_wave7.sort_values(by=['strata','groupnum','treatment'], ascending=True)

# keep relavant variables and create dummy variables for group fixed effects
dummies = pd.get_dummies(df_wave7.groupnum)
df_wave7 = pd.concat([df_wave7, dummies], axis=1, join='inner')

# replicate regression from Table 5 of GhanaJDE paper
def reg(df, column, fixed_effects=True):
    Y = df.realfinalprofit
    if fixed_effects:
        X = df[['atreatcash', 'atreatequip']+list(dummies.columns)]
    else:
        X = df[['atreatcash', 'atreatequip']]
    if column == 1:
        print("*************(1) of Table 5*************")
        idx = (1-np.isnan(Y))==1
    elif column == 2:
        print("*************(2) of Table 5*************")
        idx = ((1-np.isnan(Y))==1) &(df.male==1)
    elif column == 3:
        print("*************(3) of Table 5*************")
        idx = ((1-np.isnan(Y))==1) &(df.male==0)
    elif column == 4:
        print("*************(4) of Table 5*************")
        idx = ((1-np.isnan(Y))==1) &(df.male==0) & (df.highgroup==1)
    elif column == 5:
        print("*************(5) of Table 5*************")
        idx = ((1-np.isnan(Y))==1) &(df.male==0) & (df.mlowgroup==1)
    else:
        raise RuntimeError("Wrong column")
    Y = Y[idx]
    X = X[idx]
    print(len(Y))
    X = sm.add_constant(X)
    model = sm.OLS(Y,X)
    results = model.fit(cov_type='HC0')
    print(results.params[1:3])
    print(results.bse[1:3])
    r = np.zeros_like(results.params)
    r[1:3] = [1,-1]
    T_test = results.t_test(r)
    print(T_test.pvalue)
    return results.params[1:3].values, results.HC0_se[1:3].values, T_test.pvalue

# inference based on matched-tuples
def reg_MT(df, column):
    Y = df.realfinalprofit.to_numpy()
    if column == 1:
        idx = Y>=0
    elif column == 2:
        idx = (df.male==1)
    elif column == 3:
        idx =  (df.male==0)
    elif column == 4:
        idx = (df.male==0) & (df.highgroup==1)
    elif column == 5:
        idx = (df.male==0) & (df.mlowgroup==1)
    else:
        raise RuntimeError("Wrong column")
    Y = Y[idx]
    Y = Y.reshape(-1,4)
    inf = Inference(Y, Y)
    tstats = np.abs(inf.tau12)/inf.se_tau12
    pval = (1-norm.cdf(tstats))*2
    return [inf.tau1, inf.se_tau1, inf.tau2, inf.se_tau2, pval]


def print_results(results):
    stars = [norm.ppf(0.95), norm.ppf(0.975), norm.ppf(0.995)]
    pvals = [0.1, 0.05, 0.01]

    print("  (1)   (2)   (3)   (4)   (5)")
    for r in range(5):
        for i in range(5):
            if r==1 or r==3:
                print(" & & ({:.2f})".format(results[i][r]), end=' ')
            elif r==4:
                star = ''
                for p in pvals:
                    if results[i][r] < p:
                        star += '*'
                star = "^{" +star+"}" if len(star) > 0 else ''
                print(" & & {:.3f}".format(results[i][r]), end=' ')
            else:
                tstats = np.abs(results[i][r])/results[i][r+1]
                star = ''
                for s in stars:
                    if tstats > s:
                        star += '*'
                star = "^{" +star+"}" if len(star) > 0 else ''
                print(" & & {:.2f}".format(results[i][r]) + star, end=' ')
        print("\\\\")

### Re-analyze after dropping 4 non-quadruplets groups

In [16]:
# drop 4 non-quadruplets groups
bad_groups = [991,992,993,994]
df_wave7_quad = df_wave7[~df_wave7['groupnum'].isin(bad_groups)]

# regression with strata fixed effects
print("*************** With fixed effects ***************")
for i in range(5):
    reg(df_wave7_quad, i+1, fixed_effects=True)

*************** With fixed effects ***************
*************(1) of Table 5*************
542
atreatcash     20.445134
atreatequip    34.044968
dtype: float64
atreatcash     26.771470
atreatequip    17.448152
dtype: float64
0.6164895506839856
*************(2) of Table 5*************
218
atreatcash     79.310288
atreatequip    64.843760
dtype: float64
atreatcash     62.057182
atreatequip    37.124677
dtype: float64
0.8065781659078999
*************(3) of Table 5*************
324
atreatcash    -17.950761
atreatequip    14.633233
dtype: float64
atreatcash     16.315373
atreatequip    13.600014
dtype: float64
0.0574527574843884
*************(4) of Table 5*************
119
atreatcash    -41.142430
atreatequip    56.825492
dtype: float64
atreatcash     33.828448
atreatequip    26.020638
dtype: float64
0.008572234227103405
*************(5) of Table 5*************
205
atreatcash    -4.682495
atreatequip   -7.535105
dtype: float64
atreatcash     16.419757
atreatequip    15.016202
dtype: float6

In [8]:
# regression with strata fixed effects
print("*************** Without fixed effects ***************")
for i in range(5):
    reg(df_wave7_quad, i+1, fixed_effects=False)

*************** Without fixed effects ***************
*************(1) of Table 5*************
atreatcash     18.022939
atreatequip    31.586360
dtype: float64
atreatcash     29.655387
atreatequip    21.643799
dtype: float64
0.6799260665604147
*************(2) of Table 5*************
atreatcash     56.169328
atreatequip    62.015151
dtype: float64
atreatcash     67.949012
atreatequip    40.601521
dtype: float64
0.9375586492873343
*************(3) of Table 5*************
atreatcash    -8.434517
atreatequip    4.632629
dtype: float64
atreatcash     18.246672
atreatequip    20.966571
dtype: float64
0.4842253313024657
*************(4) of Table 5*************
atreatcash    -15.324402
atreatequip    42.102704
dtype: float64
atreatcash     38.994530
atreatequip    48.811936
dtype: float64
0.1707530054152282
*************(5) of Table 5*************
atreatcash     -3.842612
atreatequip   -13.398631
dtype: float64
atreatcash     17.137212
atreatequip    16.077904
dtype: float64
0.553559071475459

In [12]:
from empirical import Inference
from scipy.stats import norm

print("*************(1) of Table 5*************")
Y = df_wave7_quad.realfinalprofit.to_numpy()
Y = Y.reshape(-1,4)
inf = Inference(Y, Y)
tstats = np.abs(inf.tau12)/inf.se_tau12
pval = (1-norm.cdf(tstats))*2
print("atreatcash    ", inf.tau1)
print("atreatequip    ", inf.tau2)
print("atreatcash    ", inf.se_tau1)
print("atreatequip    ", inf.se_tau2)
print("pval of same effects", pval)

print("*************(2) of Table 5*************")
Y = df_wave7_quad.realfinalprofit.to_numpy()
Y = Y[df_wave7_quad.male==1]
Y = Y.reshape(-1,4)
inf = Inference(Y, Y)
tstats = np.abs(inf.tau12)/inf.se_tau12
pval = (1-norm.cdf(tstats))*2
print("atreatcash    ", inf.tau1)
print("atreatequip    ", inf.tau2)
print("atreatcash    ", inf.se_tau1)
print("atreatequip    ", inf.se_tau2)
print("pval of same effects", pval)

print("*************(3) of Table 5*************")
Y = df_wave7_quad.realfinalprofit.to_numpy()
Y = Y[df_wave7_quad.male==0]
Y = Y.reshape(-1,4)
inf = Inference(Y, Y)
tstats = np.abs(inf.tau12)/inf.se_tau12
pval = (1-norm.cdf(tstats))*2
print("atreatcash    ", inf.tau1)
print("atreatequip    ", inf.tau2)
print("atreatcash    ", inf.se_tau1)
print("atreatequip    ", inf.se_tau2)
print("pval of same effects", pval)

print("*************(4) of Table 5*************")
Y = df_wave7_quad.realfinalprofit.to_numpy()
Y = Y[(df_wave7_quad.male==0) & (df_wave7_quad.highgroup==1)]
Y = Y.reshape(-1,4)
inf = Inference(Y, Y)
tstats = np.abs(inf.tau12)/inf.se_tau12
pval = (1-norm.cdf(tstats))*2
print("atreatcash    ", inf.tau1)
print("atreatequip    ", inf.tau2)
print("atreatcash    ", inf.se_tau1)
print("atreatequip    ", inf.se_tau2)
print("pval of same effects", pval)

print("*************(5) of Table 5*************")
Y = df_wave7_quad.realfinalprofit.to_numpy()
Y = Y[(df_wave7_quad.male==0) & (df_wave7_quad.mlowgroup==1)]
Y = Y.reshape(-1,4)
inf = Inference(Y, Y)
tstats = np.abs(inf.tau12)/inf.se_tau12
pval = (1-norm.cdf(tstats))*2
print("atreatcash    ", inf.tau1)
print("atreatequip    ", inf.tau2)
print("atreatcash    ", inf.se_tau1)
print("atreatequip    ", inf.se_tau2)
print("pval of same effects", pval)

*************(1) of Table 5*************
atreatcash     nan
atreatequip     nan
atreatcash     nan
atreatequip     nan
pval of same effects nan
*************(2) of Table 5*************
atreatcash     nan
atreatequip     nan
atreatcash     nan
atreatequip     nan
pval of same effects nan
*************(3) of Table 5*************
atreatcash     nan
atreatequip     nan
atreatcash     nan
atreatequip     nan
pval of same effects nan
*************(4) of Table 5*************
atreatcash     nan
atreatequip     nan
atreatcash     nan
atreatequip     nan
pval of same effects nan
*************(5) of Table 5*************
atreatcash     nan
atreatequip     nan
atreatcash     nan
atreatequip     nan
pval of same effects nan
