In [38]:
num_sample = 5000
num_burn = 2000
sample_size = 800
n_cpu = 10

min_degree = 2
max_degree = 5

In [None]:
from dgp import sample_network_chain, get_graph, sample_Y1, sample_Y2, agcEffect
import numpy as np

# 1. Simulate or load data
adj1 = get_graph(sample_size, min_degree, max_degree, seed=1)
print(adj1.sum(axis=1).min(), adj1.sum(axis=1).max())

tau = np.array([-1.0, 0.50, -0.50])       # shape (3,)
rho = np.array([[0,0.1,0.2],
                [0.1,0,0.1],
                [0.2,0.1,0]])      # shape (3, 3), with 0s on the diagonal
nu = np.array([0.1,0,0,0.1,0,0,0.1,0,0]).reshape(3,3)       # shape (3, 3)
gamma = np.array([-1,2,0.1,-2,0.1,2,0.1,0.1])    # shape (8,)   
beta = np.array([-1*min_degree,2*min_degree,-2,2,0.1,-2,0.1,2,0.1,0])  # shape (10,)

Y_chain, A_chain, L_chain = sample_network_chain(adj1, tau, rho, nu, gamma, beta, R=num_sample,
    burnin_R=num_burn, seed=0, sample_Y_func=sample_Y1, Atype=('gen', 0.7))

Y_chain = Y_chain[::5]
A_chain = A_chain[::5]
L_chain = L_chain[::5]

dir, dir2, dir3, dir4 = [], [], [], []
for i in range(Y_chain.shape[0]):
    Y = Y_chain[i]
    A = A_chain[i]
    L = L_chain[i]
    dir.append(np.mean(np.mean(Y[A==1]) - Y[A==0]))
    dir2.append(np.mean(A))
    dir3.append(np.mean(A[L[:,0]==1]) - np.mean(A[L[:,0]==0]))
    dir4.append(np.mean(Y[L[:,0]==1]) - np.mean(Y[L[:,0]==0]))

np.mean(dir), np.mean(dir2), np.mean(Y_chain), np.mean(dir3), np.mean(dir4)

2 5


 47%|████▋     | 3263/7000 [00:08<00:09, 397.99it/s]

In [None]:
res_truth1 = agcEffect(
    adj1,
    tau, rho, nu, beta,
    treatment_allocation=0.7,
    R=100,
    burnin_R=100,
    seed=0
)
res_truth2 = agcEffect(
    adj1,
    tau, rho, nu, beta,
    treatment_allocation=0.3,
    R=100,
    burnin_R=100,
    seed=0
)

ground_truth1 = {}
ground_truth1['average'] = res_truth1['average']
ground_truth1['direct'] = res_truth1['direct_effect']
ground_truth1['indirect'] = res_truth1['psi_0_gamma'] - res_truth2['psi_0_gamma']
ground_truth1['spillover_effect'] = res_truth1['spillover_effect']
ground_truth1['psi_1_gamma'] = res_truth1['psi_1_gamma']
ground_truth1['psi_0_gamma'] = res_truth1['psi_0_gamma']
ground_truth1['psi_zero'] = res_truth1['psi_zero']

ground_truth1

100%|██████████| 200/200 [00:00<00:00, 527.66it/s]
100%|██████████| 200/200 [00:00<00:00, 588.90it/s]
100%|██████████| 200/200 [00:00<00:00, 593.21it/s]
100%|██████████| 200/200 [00:00<00:00, 590.65it/s]
100%|██████████| 200/200 [00:00<00:00, 591.71it/s]
100%|██████████| 200/200 [00:00<00:00, 589.65it/s]
100%|██████████| 200/200 [00:00<00:00, 592.19it/s]
100%|██████████| 200/200 [00:00<00:00, 589.07it/s]


{'average': 0.1577125,
 'direct': 0.1908875,
 'indirect': -0.09575,
 'spillover_effect': -0.280975,
 'psi_1_gamma': 0.21544999999999997,
 'psi_0_gamma': 0.024562499999999998,
 'psi_zero': 0.30553749999999996}

In [None]:
from utils import run_pll, delete_all_files_in_folder
from run_pll import run_dr_em_raw

treat_p = 0.7
mispec = 'outcome'
args = [{'Y_chain': Y_chain,
         'A_chain': A_chain,
         'L_chain': L_chain,
         'adj': adj1,
         'i': i,
         'treatment_allocation': treat_p,
         'mispec': mispec} for i in range(len(Y_chain))]

res_list_dr = run_pll(run_dr_em_raw, args, processes=n_cpu)
res_list_array_dr = np.array(res_list_dr)
# save results
np.save(f'run/sim_results/sim_dr_em_raw_{sample_size}_{min_degree}_{max_degree}_{treat_p}.npy', res_list_array_dr)

delete_all_files_in_folder(f'run/run_dr_em_raw')

from utils import run_pll
from run_pll import run_dr_em_raw

treat_p = 0.3
args = [{'Y_chain': Y_chain,
         'A_chain': A_chain,
         'L_chain': L_chain,
         'adj': adj1,
         'i': i,
         'treatment_allocation': treat_p,
         'mispec': mispec} for i in range(len(Y_chain))]

res_list_dr = run_pll(run_dr_em_raw, args, processes=n_cpu)
res_list_array_dr = np.array(res_list_dr)
# save results
np.save(f'run/sim_results/sim_dr_em_raw_{sample_size}_{min_degree}_{max_degree}_{treat_p}.npy', res_list_array_dr)

delete_all_files_in_folder(f'run/run_dr_em_raw')

# read results
import numpy as np
from utils import *


results1 = np.load(f'run/sim_results/sim_dr_em_raw_{sample_size}_{min_degree}_{max_degree}_{0.7}.npy', allow_pickle=True)
results2 = np.load(f'run/sim_results/sim_dr_em_raw_{sample_size}_{min_degree}_{max_degree}_{0.3}.npy', allow_pickle=True)

compute_stats(results1, results2, ground_truth1, adj1)

Multiprocessing <function run_dr_em_raw at 0x13ce6ce50> in 1000 tasks, with 10 processes...
Multiprocessing finished.
Multiprocessing <function run_dr_em_raw at 0x13ce6ce50> in 1000 tasks, with 10 processes...
Multiprocessing finished.


100%|██████████| 1000/1000 [01:08<00:00, 14.67it/s]


{'columns': ['average',
  'direct',
  'indirect',
  'spillover_effect',
  'psi_1_gamma',
  'psi_0_gamma',
  'psi_zero'],
 'coverage_rate': array([0.979, 0.97 , 0.726, 0.641, 0.978, 0.738, 0.643]),
 'bias': array([-0.00318387, -0.00440931,  0.01433309,  0.0575684 , -0.00497791,
        -0.0005686 , -0.058137  ]),
 'mse': array([0.00071228, 0.00179551, 0.00351644, 0.03875563, 0.00135891,
        0.00047109, 0.03970892]),
 'var': array([0.00070215, 0.00177607, 0.003311  , 0.03544151, 0.00133413,
        0.00047077, 0.03632901]),
 'ci_length': array([0.11056199, 0.16963323, 0.19037711, 0.60420251, 0.15357295,
        0.05518101, 0.61309872]),
 'true_effect': array([ 0.1577125,  0.1908875, -0.09575  , -0.280975 ,  0.21545  ,
         0.0245625,  0.3055375])}

In [None]:
# from tqdm import tqdm
# from drnet import compute_avg_effects_std_from_raw

# results1 = np.load(f'run/sim_results/sim_dr_em_raw_{sample_size}_{min_degree}_{max_degree}_{0.7}.npy', allow_pickle=True)

# var = []
# var_sim = []
# est = []
# idx = 0
# for i in tqdm(range(Y_chain.shape[0])):
#     avg_effects, se_hac = compute_avg_effects_std_from_raw(results1[i,:,idx],adj1,h=2)
#     var.append(se_hac**2*sample_size)
#     var_sim.append(np.var(results1[i,:,idx]))
#     est.append(avg_effects)

# var_true = np.var(est)
# print(np.mean(var), np.mean(var_sim), var_true*sample_size)

In [None]:
# from utils import run_pll, delete_all_files_in_folder
# from run_pll import run_dr_em_raw

# treat_p = 0.7
# mispec = None
# args = [{'Y_chain': Y_chain,
#          'A_chain': A_chain,
#          'L_chain': L_chain,
#          'adj': adj1,
#          'i': i,
#          'treatment_allocation': treat_p,
#          'mispec': mispec} for i in range(len(Y_chain))]

# res_list_dr = run_pll(run_dr_em_raw, args, processes=n_cpu)
# res_list_array_dr = np.array(res_list_dr)
# # save results
# np.save(f'run/sim_results/sim_dr_em_raw_{sample_size}_{min_degree}_{max_degree}_{treat_p}.npy', res_list_array_dr)

# delete_all_files_in_folder(f'run/run_dr_em_raw')

# from utils import run_pll
# from run_pll import run_dr_em_raw

# treat_p = 0.3
# args = [{'Y_chain': Y_chain,
#          'A_chain': A_chain,
#          'L_chain': L_chain,
#          'adj': adj1,
#          'i': i,
#          'treatment_allocation': treat_p,
#          'mispec': mispec} for i in range(len(Y_chain))]

# res_list_dr = run_pll(run_dr_em_raw, args, processes=n_cpu)
# res_list_array_dr = np.array(res_list_dr)
# # save results
# np.save(f'run/sim_results/sim_dr_em_raw_{sample_size}_{min_degree}_{max_degree}_{treat_p}.npy', res_list_array_dr)

# delete_all_files_in_folder(f'run/run_dr_em_raw')


# # read results
# import numpy as np
# from utils import *


# results1 = np.load(f'run/sim_results/sim_dr_em_raw_{sample_size}_{min_degree}_{max_degree}_{0.7}.npy', allow_pickle=True)
# results2 = np.load(f'run/sim_results/sim_dr_em_raw_{sample_size}_{min_degree}_{max_degree}_{0.3}.npy', allow_pickle=True)

# compute_stats(results1, results2, ground_truth1, adj1)

## Compare with AUTOG

In [None]:
from run_pll import run_autognet_raw

mispec = 'outcome'

if mispec == 'outcome':
    np.random.seed(0)
    L_chain = np.random.binomial(n=1, p=0.2, size=L_chain.shape)

    treat_p = 0.7
    args = [{'Y_chain': Y_chain,
            'A_chain': A_chain,
            'L_chain': L_chain,
            'adj': adj1,
            'i': i,
            'treatment_allocation': treat_p} for i in range(len(Y_chain))]

    res_list_dr = run_pll(run_autognet_raw, args, processes=n_cpu)
    res_list_array_dr = np.array(res_list_dr)
    # save results
    np.save(f'run/sim_results/sim_ag_raw_{sample_size}_{min_degree}_{max_degree}_{treat_p}_{mispec}.npy', res_list_array_dr)
    delete_all_files_in_folder(f'run/run_autog_raw')


    treat_p = 0.3
    args = [{'Y_chain': Y_chain,
            'A_chain': A_chain,
            'L_chain': L_chain,
            'adj': adj1,
            'i': i,
            'treatment_allocation': treat_p} for i in range(len(Y_chain))]

    res_list_dr = run_pll(run_autognet_raw, args, processes=n_cpu)
    res_list_array_dr = np.array(res_list_dr)
    # save results
    np.save(f'run/sim_results/sim_ag_raw_{sample_size}_{min_degree}_{max_degree}_{treat_p}_{mispec}.npy', res_list_array_dr)
    delete_all_files_in_folder(f'run/run_autog_raw')


results1 = np.load(f'run/sim_results/sim_ag_raw_{sample_size}_{min_degree}_{max_degree}_{0.7}_{mispec}.npy')
results2 = np.load(f'run/sim_results/sim_ag_raw_{sample_size}_{min_degree}_{max_degree}_{0.3}_{mispec}.npy')

compute_stats_ag(results1, results2, ground_truth1)

Multiprocessing <function run_autognet_raw at 0x13ce6cee0> in 1000 tasks, with 10 processes...


100%|██████████| 60/60 [00:00<00:00, 344.23it/s]
100%|██████████| 60/60 [00:00<00:00, 338.57it/s]
100%|██████████| 60/60 [00:00<00:00, 310.59it/s]
100%|██████████| 60/60 [00:00<00:00, 323.71it/s]
100%|██████████| 60/60 [00:00<00:00, 361.28it/s]
 57%|█████▋    | 34/60 [00:00<00:00, 331.51it/s]
100%|██████████| 60/60 [00:00<00:00, 342.83it/s]
100%|██████████| 60/60 [00:00<00:00, 360.32it/s]
100%|██████████| 60/60 [00:00<00:00, 325.71it/s]
100%|██████████| 60/60 [00:00<00:00, 361.78it/s]
100%|██████████| 60/60 [00:00<00:00, 340.43it/s]
100%|██████████| 60/60 [00:00<00:00, 336.58it/s]
100%|██████████| 60/60 [00:00<00:00, 334.90it/s]
100%|██████████| 60/60 [00:00<00:00, 340.56it/s]
100%|██████████| 60/60 [00:00<00:00, 380.75it/s]
100%|██████████| 60/60 [00:00<00:00, 345.03it/s]
100%|██████████| 60/60 [00:00<00:00, 364.27it/s]
100%|██████████| 60/60 [00:00<00:00, 305.36it/s]
100%|██████████| 60/60 [00:00<00:00, 374.34it/s]
100%|██████████| 60/60 [00:00<00:00, 338.51it/s]
100%|██████████| 60/

Multiprocessing finished.
Multiprocessing <function run_autognet_raw at 0x13ce6cee0> in 1000 tasks, with 10 processes...


100%|██████████| 60/60 [00:00<00:00, 308.71it/s]
100%|██████████| 60/60 [00:00<00:00, 347.16it/s]
100%|██████████| 60/60 [00:00<00:00, 299.40it/s]
100%|██████████| 60/60 [00:00<00:00, 298.17it/s]
100%|██████████| 60/60 [00:00<00:00, 267.02it/s]
100%|██████████| 60/60 [00:00<00:00, 264.65it/s]
100%|██████████| 60/60 [00:00<00:00, 280.62it/s]
100%|██████████| 60/60 [00:00<00:00, 290.17it/s]
100%|██████████| 60/60 [00:00<00:00, 289.65it/s]
100%|██████████| 60/60 [00:00<00:00, 265.09it/s]
100%|██████████| 60/60 [00:00<00:00, 266.02it/s]
100%|██████████| 60/60 [00:00<00:00, 358.01it/s]
100%|██████████| 60/60 [00:00<00:00, 318.34it/s]
100%|██████████| 60/60 [00:00<00:00, 337.67it/s]
100%|██████████| 60/60 [00:00<00:00, 312.22it/s]
100%|██████████| 60/60 [00:00<00:00, 326.91it/s]
100%|██████████| 60/60 [00:00<00:00, 340.66it/s]
100%|██████████| 60/60 [00:00<00:00, 309.28it/s]
100%|██████████| 60/60 [00:00<00:00, 295.62it/s]
100%|██████████| 60/60 [00:00<00:00, 297.46it/s]
100%|██████████| 60/

Multiprocessing finished.


{'columns': ['average',
  'direct_effect',
  'indirect',
  'spillover_effect',
  'psi_1_gamma',
  'psi_0_gamma',
  'psi_zero'],
 'bias': array([ 0.060423  ,  0.10493673, -0.27061035,  0.16353052,  0.09040158,
        -0.01453515, -0.17806567]),
 'mse': array([0.00384264, 0.01148138, 0.0789698 , 0.02839629, 0.00861828,
        0.00022421, 0.03354434]),
 'var': array([1.91704010e-04, 4.69661795e-04, 5.73984160e-03, 1.65405600e-03,
        4.45831376e-04, 1.29359070e-05, 1.83695118e-03]),
 'ground_truth': array([ 0.1577125,  0.1908875, -0.09575  , -0.280975 ])}

'bias': array([-0.00253708, -0.00264493,  0.01453212,  0.05778765, -0.00326931,
        -0.00062438, -0.05841203]),
 'mse': array([0.00026221, 0.00082486, 0.00332812, 0.03858815, 0.0004195 ,
        0.00047158, 0.03959085]),

'bias': array([-0.00318387, -0.00440931,  0.01433309,  0.0575684 , -0.00497791,
        -0.0005686 , -0.058137  ]),
 'mse': array([0.00071228, 0.00179551, 0.00351644, 0.03875563, 0.00135891,
        0.00047109, 0.03970892]),