In [1]:
num_sample = 300
num_burn = 20
sample_size = 800
n_cpu = 10

min_degree = 1
max_degree = 2

In [2]:
from dgp import sample_network_chain, get_graph, sample_Y1, sample_Y2, agcEffect
import numpy as np

# 1. Simulate or load data
adj = get_graph(sample_size, min_degree, max_degree, seed=1)
print(adj.sum(axis=1).min(), adj.sum(axis=1).max())

tau = np.array([-1.0, 0.50, -0.50])       # shape (3,)
rho = np.array([[0,0.1,0.2],
                [0.1,0,0.1],
                [0.2,0.1,0]])      # shape (3, 3), with 0s on the diagonal
nu = np.array([0.1,0,0,0.1,0,0,0.1,0,0]).reshape(3,3)       # shape (3, 3)
gamma = np.array([-1,2,0.1,-2,0.1,2,0.1,0.1])    # shape (8,)   
beta = np.array([-1*min_degree,2,-0.2,2,0.1,-2,0.1,2,0.1,0.1])  # shape (10,)

Y_chain, A_chain, L_chain = sample_network_chain(adj, tau, rho, nu, gamma, beta, R=num_sample,
    burnin_R=num_burn, seed=0, sample_Y_func=sample_Y1, Atype=('gen', 0.7))

Y_chain = Y_chain[::3]
A_chain = A_chain[::3]
L_chain = L_chain[::3]

dir, dir2, dir3, dir4 = [], [], [], []
for i in range(Y_chain.shape[0]):
    Y = Y_chain[i]
    A = A_chain[i]
    L = L_chain[i]
    dir.append(np.mean(np.mean(Y[A==1]) - Y[A==0]))
    dir2.append(np.mean(A))
    dir3.append(np.mean(A[L[:,0]==1]) - np.mean(A[L[:,0]==0]))
    dir4.append(np.mean(Y[L[:,0]==1]) - np.mean(Y[L[:,0]==0]))

np.mean(dir), np.mean(dir2), np.mean(Y_chain), np.mean(dir3), np.mean(dir4)

1 2


100%|██████████| 320/320 [00:02<00:00, 120.13it/s]


(0.617298367173841,
 0.4164124999999999,
 0.51045,
 0.3439820610459721,
 0.37428683819220365)

In [3]:
agcEffect(
    adj,
    tau, rho, nu, beta,
    treatment_allocation=0.7,
    R=100,
    burnin_R=100,
    seed=0,
    sample_Y_func=sample_Y1
)

100%|██████████| 200/200 [00:00<00:00, 241.81it/s]
100%|██████████| 200/200 [00:00<00:00, 314.30it/s]
100%|██████████| 200/200 [00:00<00:00, 321.22it/s]
100%|██████████| 200/200 [00:00<00:00, 280.25it/s]


{'average': 0.59675,
 'direct_effect': 0.32411250000000014,
 'spillover_effect': -0.04371249999999999,
 'psi_1_gamma': 0.6959375000000001,
 'psi_0_gamma': 0.37182499999999996,
 'psi_zero': 0.41553749999999995}

In [4]:
from utils import run_pll
from run_pll import run_autognet

args = [{'Y_chain': Y_chain,
         'A_chain': A_chain,
         'L_chain': L_chain,
         'adj': adj,
         'i': i} for i in range(len(Y_chain))]

res_list_ag = run_pll(run_autognet, args, processes=n_cpu)
res_list_array_ag = np.array(res_list_ag)


Multiprocessing <function run_autognet at 0x12da00dc0> in 100 tasks, with 10 processes...


100%|██████████| 60/60 [00:00<00:00, 197.26it/s]
100%|██████████| 60/60 [00:00<00:00, 194.73it/s]
100%|██████████| 60/60 [00:00<00:00, 173.72it/s]
100%|██████████| 60/60 [00:00<00:00, 188.95it/s]
100%|██████████| 60/60 [00:00<00:00, 180.02it/s]
100%|██████████| 60/60 [00:00<00:00, 185.96it/s]
100%|██████████| 60/60 [00:00<00:00, 181.65it/s]
100%|██████████| 60/60 [00:00<00:00, 181.95it/s]
100%|██████████| 60/60 [00:00<00:00, 205.75it/s]
100%|██████████| 60/60 [00:00<00:00, 209.97it/s]
100%|██████████| 60/60 [00:00<00:00, 198.41it/s]
100%|██████████| 60/60 [00:00<00:00, 199.74it/s]
100%|██████████| 60/60 [00:00<00:00, 187.52it/s]
100%|██████████| 60/60 [00:00<00:00, 196.09it/s]
100%|██████████| 60/60 [00:00<00:00, 189.30it/s]
100%|██████████| 60/60 [00:00<00:00, 210.90it/s]
100%|██████████| 60/60 [00:00<00:00, 191.51it/s]
100%|██████████| 60/60 [00:00<00:00, 192.66it/s]
100%|██████████| 60/60 [00:00<00:00, 172.12it/s]
100%|██████████| 60/60 [00:00<00:00, 163.51it/s]
100%|██████████| 60/

Multiprocessing finished.


In [5]:
from run_pll import column_names

ret_mean_ag = res_list_array_ag.mean(axis=0)
ret_std_ag = res_list_array_ag.std(axis=0)
for i in range(len(column_names)):
    print(f"{column_names[i]}: {ret_mean_ag[i]:.5f} ± {ret_std_ag[i]:.5f}")

average: 0.60438 ± 0.03733
direct_effect: 0.32285 ± 0.05224
spillover_effect: -0.04929 ± 0.05108
psi_1_gamma: 0.70079 ± 0.04406
psi_0_gamma: 0.37794 ± 0.04525
psi_zero: 0.42724 ± 0.03714


In [6]:
from drnet import expit
from sklearn.linear_model import LogisticRegression

def get_prob(L, Y, A):
    x_a = [L[:, 0], np.zeros(L.shape[0]), 
           L[:, 1], np.zeros(L.shape[0]), 
           L[:, 2], np.zeros(L.shape[0]), 
           np.zeros(L.shape[0])]
    
    # train lostic regression
    model = LogisticRegression(penalty=None, solver='lbfgs', max_iter=10000)
    model.fit(np.array(x_a).T, A)
    
    gamma = np.zeros(8)
    gamma[0] = model.intercept_[0]
    gamma[1:] = model.coef_[0]

    linpred = (
            gamma[0]
            + gamma[1] * L[:, 0] + gamma[2] * 0
            + gamma[3] * L[:, 1] + gamma[4] * 0
            + gamma[5] * L[:, 2] + gamma[6] * 0
            + gamma[7] * 0
        )
    
    probs = expit(linpred)

    gamma_1 = (Y/probs*(A==1)).mean() 
    gamma_0 = (Y/(1-probs)*(A==0)).mean() 

    return gamma_1, gamma_0

res = np.array([get_prob(L_chain[i], Y_chain[i], A_chain[i]) for i in range(len(Y_chain))])
print(res.mean(axis=0))
print(res.std(axis=0))
# ret_tmp = get_prob(L_chain[-1], Y_chain[-1], A_chain[-1])
# ret_tmp

[0.71745048 0.39484257]
[0.04283857 0.03921884]


In [7]:
from drnet import *

ret_raw = doubly_robust(A_chain[-1], L_chain[-1], Y_chain[-1], adj_matrix=adj, return_raw=True)
print(ret_raw['psi_1_gamma'].mean(), ret_raw['psi_0_gamma'].mean())

0.7247145595687462 0.3805592920202106


In [8]:
from run_pll import *
from utils import run_pll

args = [{'Y_chain': Y_chain,
         'A_chain': A_chain,
         'L_chain': L_chain,
         'adj': adj,
         'i': i,
         'mispec': None} for i in range(len(Y_chain))]

res_list_dr = run_pll(run_dr, args, processes=n_cpu)
res_list_array_dr = np.array(res_list_dr)

from run_pll import column_names

ret_mean_dr = res_list_array_dr.mean(axis=0)
ret_std_dr = res_list_array_dr.std(axis=0)
for i in range(len(column_names)):
    print(f"{column_names[i]}: {ret_mean_dr[i]:.5f} ± {ret_std_dr[i]:.5f}")

Multiprocessing <function run_dr at 0x10a8a0550> in 100 tasks, with 10 processes...
Multiprocessing finished.
average: 0.59900 ± 0.05303
direct_effect: 0.31239 ± 0.07429
spillover_effect: -0.04624 ± 0.08150
psi_1_gamma: 0.69191 ± 0.06625
psi_0_gamma: 0.37952 ± 0.05804
psi_zero: 0.42576 ± 0.06978


Multiprocessing <function run_dr at 0x1183a04c0> in 100 tasks, with 10 processes...
Multiprocessing finished.
average: 0.74576 ± 0.04771
direct_effect: 0.35930 ± 0.08317
spillover_effect: 0.07362 ± 0.21426
psi_1_gamma: 0.85530 ± 0.06057
psi_0_gamma: 0.49600 ± 0.06966
psi_zero: 0.42238 ± 0.19960

In [9]:
from utils import run_pll
from run_pll import run_autognet

np.random.seed(0)
L_chain_noise = np.random.binomial(n=1, p=0.2, size=L_chain.shape)
args = [{'Y_chain': Y_chain,
         'A_chain': A_chain,
         'L_chain': L_chain_noise,
         'adj': adj,
         'i': i} for i in range(len(Y_chain))]

res_list_ag = run_pll(run_autognet, args, processes=n_cpu)
res_list_array_ag = np.array(res_list_ag)


Multiprocessing <function run_autognet at 0x12da00dc0> in 100 tasks, with 10 processes...


100%|██████████| 60/60 [00:00<00:00, 130.51it/s]
100%|██████████| 60/60 [00:00<00:00, 151.65it/s]
100%|██████████| 60/60 [00:00<00:00, 179.59it/s]
100%|██████████| 60/60 [00:00<00:00, 172.52it/s]
100%|██████████| 60/60 [00:00<00:00, 154.08it/s]
100%|██████████| 60/60 [00:00<00:00, 148.35it/s]
100%|██████████| 60/60 [00:00<00:00, 143.96it/s]
100%|██████████| 60/60 [00:00<00:00, 140.48it/s]
100%|██████████| 60/60 [00:00<00:00, 144.39it/s]
100%|██████████| 60/60 [00:00<00:00, 138.90it/s]
100%|██████████| 60/60 [00:00<00:00, 136.47it/s]
100%|██████████| 60/60 [00:00<00:00, 148.16it/s]
100%|██████████| 60/60 [00:00<00:00, 174.76it/s]
100%|██████████| 60/60 [00:00<00:00, 141.97it/s]
100%|██████████| 60/60 [00:00<00:00, 178.40it/s]
100%|██████████| 60/60 [00:00<00:00, 149.22it/s]
100%|██████████| 60/60 [00:00<00:00, 167.68it/s]
100%|██████████| 60/60 [00:00<00:00, 193.33it/s]
100%|██████████| 60/60 [00:00<00:00, 190.13it/s]
100%|██████████| 60/60 [00:00<00:00, 194.97it/s]
100%|██████████| 60/

Multiprocessing finished.


In [10]:
from run_pll import column_names

ret_mean_ag = res_list_array_ag.mean(axis=0)
ret_std_ag = res_list_array_ag.std(axis=0)
for i in range(len(column_names)):
    print(f"{column_names[i]}: {ret_mean_ag[i]:.5f} ± {ret_std_ag[i]:.5f}")

average: 0.67500 ± 0.01901
direct_effect: 0.62461 ± 0.05074
spillover_effect: -0.03887 ± 0.05549
psi_1_gamma: 0.86272 ± 0.02286
psi_0_gamma: 0.23811 ± 0.04386
psi_zero: 0.27698 ± 0.02775


In [11]:
# from run_pll import *
# from utils import run_pll

# args = [{'Y_chain': Y_chain,
#          'A_chain': A_chain,
#          'L_chain': L_chain,
#          'adj': adj,
#          'i': i,
#          'mispec': 'outcome'} for i in range(len(Y_chain))]

# res_list_dr = run_pll(run_dr, args, processes=n_cpu)
# res_list_array_dr = np.array(res_list_dr)

# from run_pll import column_names

# ret_mean_dr = res_list_array_dr.mean(axis=0)
# ret_std_dr = res_list_array_dr.std(axis=0)
# for i in range(len(column_names)):
#     print(f"{column_names[i]}: {ret_mean_dr[i]:.5f} ± {ret_std_dr[i]:.5f}")

In [12]:
# from run_pll import *
# from utils import run_pll

# args = [{'Y_chain': Y_chain,
#          'A_chain': A_chain,
#          'L_chain': L_chain,
#          'adj': adj,
#          'i': i,
#          'mispec': 'treatment'} for i in range(len(Y_chain))]

# res_list_dr = run_pll(run_dr, args, processes=n_cpu)
# res_list_array_dr = np.array(res_list_dr)

# from run_pll import column_names

# ret_mean_dr = res_list_array_dr.mean(axis=0)
# ret_std_dr = res_list_array_dr.std(axis=0)
# for i in range(len(column_names)):
#     print(f"{column_names[i]}: {ret_mean_dr[i]:.5f} ± {ret_std_dr[i]:.5f}")