In [37]:
# read rds file
import pyreadr
import pandas as pd
import numpy as np

df = pyreadr.read_r('/Users/ljz/Desktop/causal inference/dr-chain-graph/empirical/code/inputs/nnahray_final_nomiss.rds')[None]

In [38]:
adj = pyreadr.read_r('/Users/ljz/Desktop/causal inference/dr-chain-graph/empirical/code/inputs/adj_full.rds')[None]

In [39]:
df_use = df[['hiv_or_sti','incarcerated','age','latino',
             'educ','illicit_ever','male', 'employed', 'income_cat',
             'homeless', 'household_income']].copy()

In [45]:
df_use['household_income'].unique()

array(['', '$75,000 - $99,999', 'Less than $5,000', '$200,000 - $249,999',
       '$5,000 - $9,999', '$30,000 - $39,999', '$50,000 - $74,999',
       '$15,000 - $19,999', '$40,000 - $49,999', '$20,000-$24,999',
       '$250,000 or more', '$10,000 - $14,999', '$100,000 - $124,999',
       '$25,000 - $29,999'], dtype=object)

In [60]:
df_use['low_income_hh'] = ((df_use['household_income'] == 'Less than $5,000')|
                        (df_use['household_income'] == '$5,000 - $9,999')).astype(int)

In [44]:
df_use['homeless'] = (df_use['homeless']=='Yes').astype(int)
df_use['homeless'].describe()

count    465.000000
mean       0.187097
std        0.390409
min        0.000000
25%        0.000000
50%        0.000000
75%        0.000000
max        1.000000
Name: homeless, dtype: float64

In [70]:
Y = df_use['employed'].values
A = df_use['incarcerated'].values
L = df_use[['age','latino','educ','illicit_ever','male']].values
adj_matrix = adj.values.astype(int)

degree = adj.sum(axis=1)

In [6]:
adj_matrix.sum(axis=1).max(), adj_matrix.sum(axis=1).min()

(12, 0)

In [71]:
np.mean(Y[A == 1]) - np.mean(Y[A == 0])

-0.09778371756320303

In [72]:
from drnet_em import doubly_robust_em

doubly_robust_em(A, L, Y, adj_matrix, treatment_allocation=0.5, seed=1, return_raw=False)

{'average': 0.18838147001367717,
 'direct_effect': -0.15883710856792882,
 'spillover_effect': -0.16407298787332847,
 'psi_1_gamma': 0.1089629157297128,
 'psi_0_gamma': 0.2678000242976416,
 'psi_zero': 0.43187301217097007}

In [73]:
doubly_robust_em(A, L, Y, adj_matrix, treatment_allocation=0.2, seed=1, return_raw=False)

{'average': 0.29152812062956734,
 'direct_effect': -0.2345578516984965,
 'spillover_effect': -0.09343332120170333,
 'psi_1_gamma': 0.10388183927077024,
 'psi_0_gamma': 0.33843969096926674,
 'psi_zero': 0.43187301217097007}

In [74]:
ret_5 = doubly_robust_em(A, L, Y, adj_matrix, treatment_allocation=0.5, seed=1, return_raw=True)
ret_2 = doubly_robust_em(A, L, Y, adj_matrix, treatment_allocation=0.2, seed=1, return_raw=True)

In [75]:
import numpy as np
import networkx as nx

def compute_avg_effects_std_from_raw(psi_vec, adj_matrix, h=2):
    """
    Compute the average effect and its network-HAC standard deviation using a Bartlett kernel.
    
    Parameters:
        psi_vec: np.ndarray of shape (N,), raw influence function values
        adj_matrix: np.ndarray of shape (N, N), adjacency matrix of the network
        h: int, maximum distance for Bartlett kernel
        
    Returns:
        avg_effects: float, average of psi_vec
        se_hac: float, standard error adjusted for network dependence
    """
    N = len(psi_vec)
    avg_effects = np.mean(psi_vec)
    
    # Centered residuals
    g = psi_vec - avg_effects
    
    # Build graph and compute pairwise distances
    G = nx.from_numpy_array(adj_matrix)
    dist = dict(nx.all_pairs_shortest_path_length(G, cutoff=h))
    
    # Network HAC estimator
    hac_var = 0.0
    for i in range(N):
        for j, dij in dist[i].items():
            weight = max(1 - dij / h, 0)  # Bartlett kernel
            hac_var += weight * g[i] * g[j]
    
    hac_var /= N
    se_hac = np.sqrt(hac_var)/np.sqrt(N)

    return avg_effects, se_hac

In [76]:
psi_gamma = ret_5['psi_gamma']
direct_effect = ret_5['psi_1_gamma'] - ret_5['psi_0_gamma']
indirect_effect = ret_5['psi_0_gamma'] - ret_2['psi_0_gamma']
spillover_effect = ret_5['psi_0_gamma'] - ret_5['psi_zero']

avg_psi_gamma, se_psi_gamma = compute_avg_effects_std_from_raw(psi_gamma, adj_matrix, h=3)
avg_direct_effect, se_direct_effect = compute_avg_effects_std_from_raw(direct_effect, adj_matrix, h=3)
avg_indirect_effect, se_indirect_effect = compute_avg_effects_std_from_raw(indirect_effect, adj_matrix, h=3)
avg_spillover_effect, se_spillover_effect = compute_avg_effects_std_from_raw(spillover_effect, adj_matrix, h=3)

print('Average psi_gamma:', avg_psi_gamma, 'Confidence interval:', (avg_psi_gamma - 1.96 * se_psi_gamma, avg_psi_gamma + 1.96 * se_psi_gamma))
print('Average direct effect:', avg_direct_effect, 'Confidence interval:', (avg_direct_effect - 1.96 * se_direct_effect, avg_direct_effect + 1.96 * se_direct_effect))
print('Average indirect effect:', avg_indirect_effect, 'Confidence interval:', (avg_indirect_effect - 1.96 * se_indirect_effect, avg_indirect_effect + 1.96 * se_indirect_effect))
print('Average spillover effect:', avg_spillover_effect, 'Confidence interval:', (avg_spillover_effect - 1.96 * se_spillover_effect, avg_spillover_effect + 1.96 * se_spillover_effect))


Average psi_gamma: 0.18838147001367717 Confidence interval: (0.13418035640461648, 0.24258258362273785)
Average direct effect: -0.15883710856792882 Confidence interval: (-0.26221777921617173, -0.05545643791968592)
Average indirect effect: -0.07063966667162508 Confidence interval: (-0.13916790622753245, -0.002111427115717726)
Average spillover effect: -0.1640729878733285 Confidence interval: (-0.30121822175146407, -0.026927753995192927)


In [68]:
from autognet import evaluate_autognet_via_agc_effect


def get_autog(adj_matrix, Y, A, L, s):
    ret_ag_5 = evaluate_autognet_via_agc_effect(adj_matrix, Y, A, L, treatment_allocation=0.5, R=30, burnin=10, seed=s)
    ret_ag_2 = evaluate_autognet_via_agc_effect(adj_matrix, Y, A, L, treatment_allocation=0.2, R=30, burnin=10, seed=s)

    avg_psi_gamma = ret_ag_5['average']
    avg_direct_effect = ret_ag_5['direct_effect']
    avg_indirect_effect = ret_ag_5['psi_0_gamma'] - ret_ag_2['psi_0_gamma']
    avg_spillover_effect = ret_ag_5['spillover_effect']
    
    return avg_psi_gamma, avg_direct_effect, avg_indirect_effect, avg_spillover_effect

get_autog(adj_matrix, Y, A, L, s=1)

(0.13419354838709677,
 0.013763440860215082,
 0.001577060931899632,
 0.0025089605734766735)

In [None]:
from tqdm import tqdm

# bootstrap to get confidence intervals
num_bootstrap = 1000
np.random.seed(42)  # For reproducibility
bootstrap_results = {'psi_gamma': [],
                     'direct_effect': [],
                     'indirect_effect': [],
                     'spillover_effect': []}
for s in tqdm(range(num_bootstrap)):
    indices = np.random.choice(len(Y), size=int(len(Y)*0.9), replace=False)
    Y_boot = Y[indices]
    A_boot = A[indices]
    L_boot = L[indices]
    
    adj_matrix_boot = adj_matrix[indices][:, indices]
    
    results = get_autog(adj_matrix_boot, Y_boot, A_boot, L_boot, s)
    bootstrap_results['psi_gamma'].append(results[0])
    bootstrap_results['direct_effect'].append(results[1])
    bootstrap_results['indirect_effect'].append(results[2])
    bootstrap_results['spillover_effect'].append(results[3])

# Calculate confidence intervals
def compute_confidence_intervals(data, alpha=0.05):
    lower_bound = np.percentile(data, 100 * (alpha / 2))
    upper_bound = np.percentile(data, 100 * (1 - alpha / 2))
    return lower_bound, upper_bound

ci_psi_gamma = compute_confidence_intervals(bootstrap_results['psi_gamma'])
ci_direct_effect = compute_confidence_intervals(bootstrap_results['direct_effect'])
ci_indirect_effect = compute_confidence_intervals(bootstrap_results['indirect_effect'])
ci_spillover_effect = compute_confidence_intervals(bootstrap_results['spillover_effect'])

r1, r2, r3, r4 = get_autog(adj_matrix, Y, A, L, s=1)
print('Bootstrap confidence intervals for psi_gamma:', ci_psi_gamma, 'Average:', r1)
print('Bootstrap confidence intervals for direct effect:', ci_direct_effect , 'Average:', r2)
print('Bootstrap confidence intervals for indirect effect:', ci_indirect_effect, 'Average:', r3)
print('Bootstrap confidence intervals for spillover effect:', ci_spillover_effect, 'Average:', r4)
    

100%|██████████| 1000/1000 [11:27<00:00,  1.45it/s]

Bootstrap confidence intervals for psi_gamma: (0.44661084529505585, 0.6288177830940987)
Bootstrap confidence intervals for direct effect: (0.15334130781499183, 0.2846232057416267)
Bootstrap confidence intervals for indirect effect: (-0.014360047846890026, 0.03405502392344506)
Bootstrap confidence intervals for spillover effect: (-0.025442583732057385, 0.053989234449760864)





# Household Income

In [77]:
Y = df_use['low_income_hh'].values
A = df_use['incarcerated'].values
L = df_use[['age','latino','educ','illicit_ever','male']].values
adj_matrix = adj.values.astype(int)

degree = adj.sum(axis=1)

In [78]:
np.mean(Y[A == 1]) - np.mean(Y[A == 0])

-0.020046775810223877

In [79]:
ret_5 = doubly_robust_em(A, L, Y, adj_matrix, treatment_allocation=0.5, seed=1, return_raw=True)
ret_2 = doubly_robust_em(A, L, Y, adj_matrix, treatment_allocation=0.2, seed=1, return_raw=True)

psi_gamma = ret_5['psi_gamma']
direct_effect = ret_5['psi_1_gamma'] - ret_5['psi_0_gamma']
indirect_effect = ret_5['psi_0_gamma'] - ret_2['psi_0_gamma']
spillover_effect = ret_5['psi_0_gamma'] - ret_5['psi_zero']

avg_psi_gamma, se_psi_gamma = compute_avg_effects_std_from_raw(psi_gamma, adj_matrix, h=3)
avg_direct_effect, se_direct_effect = compute_avg_effects_std_from_raw(direct_effect, adj_matrix, h=3)
avg_indirect_effect, se_indirect_effect = compute_avg_effects_std_from_raw(indirect_effect, adj_matrix, h=3)
avg_spillover_effect, se_spillover_effect = compute_avg_effects_std_from_raw(spillover_effect, adj_matrix, h=3)

print('Average psi_gamma:', avg_psi_gamma, 'Confidence interval:', (avg_psi_gamma - 1.96 * se_psi_gamma, avg_psi_gamma + 1.96 * se_psi_gamma))
print('Average direct effect:', avg_direct_effect, 'Confidence interval:', (avg_direct_effect - 1.96 * se_direct_effect, avg_direct_effect + 1.96 * se_direct_effect))
print('Average indirect effect:', avg_indirect_effect, 'Confidence interval:', (avg_indirect_effect - 1.96 * se_indirect_effect, avg_indirect_effect + 1.96 * se_indirect_effect))
print('Average spillover effect:', avg_spillover_effect, 'Confidence interval:', (avg_spillover_effect - 1.96 * se_spillover_effect, avg_spillover_effect + 1.96 * se_spillover_effect))


Average psi_gamma: 0.15068698129897684 Confidence interval: (0.09379550470362993, 0.20757845789432375)
Average direct effect: 0.04844788623380378 Confidence interval: (-0.04231729220868945, 0.13921306467629702)
Average indirect effect: 0.0406400837727338 Confidence interval: (-0.0078109188441487926, 0.0890910863896164)
Average spillover effect: 0.043394593235455685 Confidence interval: (-0.036541438944671926, 0.12333062541558329)


In [80]:
get_autog(adj_matrix, Y, A, L, s=1)

(0.13419354838709677,
 0.013763440860215082,
 0.001577060931899632,
 0.0025089605734766735)

In [None]:
from tqdm import tqdm

# bootstrap to get confidence intervals
num_bootstrap = 1000
np.random.seed(42)  # For reproducibility
bootstrap_results = {'psi_gamma': [],
                     'direct_effect': [],
                     'indirect_effect': [],
                     'spillover_effect': []}
for s in tqdm(range(num_bootstrap)):
    indices = np.random.choice(len(Y), size=int(len(Y)*0.9), replace=False)
    Y_boot = Y[indices]
    A_boot = A[indices]
    L_boot = L[indices]
    
    adj_matrix_boot = adj_matrix[indices][:, indices]
    
    results = get_autog(adj_matrix_boot, Y_boot, A_boot, L_boot, s)
    bootstrap_results['psi_gamma'].append(results[0])
    bootstrap_results['direct_effect'].append(results[1])
    bootstrap_results['indirect_effect'].append(results[2])
    bootstrap_results['spillover_effect'].append(results[3])

# Calculate confidence intervals
def compute_confidence_intervals(data, alpha=0.05):
    lower_bound = np.percentile(data, 100 * (alpha / 2))
    upper_bound = np.percentile(data, 100 * (1 - alpha / 2))
    return lower_bound, upper_bound

ci_psi_gamma = compute_confidence_intervals(bootstrap_results['psi_gamma'])
ci_direct_effect = compute_confidence_intervals(bootstrap_results['direct_effect'])
ci_indirect_effect = compute_confidence_intervals(bootstrap_results['indirect_effect'])
ci_spillover_effect = compute_confidence_intervals(bootstrap_results['spillover_effect'])

r1, r2, r3, r4 = get_autog(adj_matrix, Y, A, L, s=1)
print('Bootstrap confidence intervals for psi_gamma:', ci_psi_gamma, 'Average:', r1)
print('Bootstrap confidence intervals for direct effect:', ci_direct_effect , 'Average:', r2)
print('Bootstrap confidence intervals for indirect effect:', ci_indirect_effect, 'Average:', r3)
print('Bootstrap confidence intervals for spillover effect:', ci_spillover_effect, 'Average:', r4)
    

# Homeless

In [81]:
Y = df_use['homeless'].values
A = df_use['incarcerated'].values
L = df_use[['age','latino','educ','illicit_ever','male']].values
adj_matrix = adj.values.astype(int)

degree = adj.sum(axis=1)

In [83]:
np.mean(Y[A == 1]) - np.mean(Y[A == 0])

0.1383227530905446

In [84]:
ret_5 = doubly_robust_em(A, L, Y, adj_matrix, treatment_allocation=0.5, seed=1, return_raw=True)
ret_2 = doubly_robust_em(A, L, Y, adj_matrix, treatment_allocation=0.2, seed=1, return_raw=True)

psi_gamma = ret_5['psi_gamma']
direct_effect = ret_5['psi_1_gamma'] - ret_5['psi_0_gamma']
indirect_effect = ret_5['psi_0_gamma'] - ret_2['psi_0_gamma']
spillover_effect = ret_5['psi_0_gamma'] - ret_5['psi_zero']

avg_psi_gamma, se_psi_gamma = compute_avg_effects_std_from_raw(psi_gamma, adj_matrix, h=3)
avg_direct_effect, se_direct_effect = compute_avg_effects_std_from_raw(direct_effect, adj_matrix, h=3)
avg_indirect_effect, se_indirect_effect = compute_avg_effects_std_from_raw(indirect_effect, adj_matrix, h=3)
avg_spillover_effect, se_spillover_effect = compute_avg_effects_std_from_raw(spillover_effect, adj_matrix, h=3)

print('Average psi_gamma:', avg_psi_gamma, 'Confidence interval:', (avg_psi_gamma - 1.96 * se_psi_gamma, avg_psi_gamma + 1.96 * se_psi_gamma))
print('Average direct effect:', avg_direct_effect, 'Confidence interval:', (avg_direct_effect - 1.96 * se_direct_effect, avg_direct_effect + 1.96 * se_direct_effect))
print('Average indirect effect:', avg_indirect_effect, 'Confidence interval:', (avg_indirect_effect - 1.96 * se_indirect_effect, avg_indirect_effect + 1.96 * se_indirect_effect))
print('Average spillover effect:', avg_spillover_effect, 'Confidence interval:', (avg_spillover_effect - 1.96 * se_spillover_effect, avg_spillover_effect + 1.96 * se_spillover_effect))


Average psi_gamma: 0.1838923393352411 Confidence interval: (0.11899259142337647, 0.24879208724710572)
Average direct effect: 0.16720881409071311 Confidence interval: (0.0160734159055336, 0.3183442122758926)
Average indirect effect: -0.016000990773829958 Confidence interval: (-0.06054012459275883, 0.028538143045098913)
Average spillover effect: -0.07561214995789191 Confidence interval: (-0.1612597290264518, 0.010035429110667973)


In [85]:
get_autog(adj_matrix, Y, A, L, s=1)

(0.12473118279569895,
 0.05756272401433696,
 0.003369175627240137,
 0.005663082437276007)

In [None]:
from tqdm import tqdm

# bootstrap to get confidence intervals
num_bootstrap = 1000
np.random.seed(42)  # For reproducibility
bootstrap_results = {'psi_gamma': [],
                     'direct_effect': [],
                     'indirect_effect': [],
                     'spillover_effect': []}
for s in tqdm(range(num_bootstrap)):
    indices = np.random.choice(len(Y), size=int(len(Y)*0.9), replace=False)
    Y_boot = Y[indices]
    A_boot = A[indices]
    L_boot = L[indices]
    
    adj_matrix_boot = adj_matrix[indices][:, indices]
    
    results = get_autog(adj_matrix_boot, Y_boot, A_boot, L_boot, s)
    bootstrap_results['psi_gamma'].append(results[0])
    bootstrap_results['direct_effect'].append(results[1])
    bootstrap_results['indirect_effect'].append(results[2])
    bootstrap_results['spillover_effect'].append(results[3])

# Calculate confidence intervals
def compute_confidence_intervals(data, alpha=0.05):
    lower_bound = np.percentile(data, 100 * (alpha / 2))
    upper_bound = np.percentile(data, 100 * (1 - alpha / 2))
    return lower_bound, upper_bound

ci_psi_gamma = compute_confidence_intervals(bootstrap_results['psi_gamma'])
ci_direct_effect = compute_confidence_intervals(bootstrap_results['direct_effect'])
ci_indirect_effect = compute_confidence_intervals(bootstrap_results['indirect_effect'])
ci_spillover_effect = compute_confidence_intervals(bootstrap_results['spillover_effect'])

r1, r2, r3, r4 = get_autog(adj_matrix, Y, A, L, s=1)
print('Bootstrap confidence intervals for psi_gamma:', ci_psi_gamma, 'Average:', r1)
print('Bootstrap confidence intervals for direct effect:', ci_direct_effect , 'Average:', r2)
print('Bootstrap confidence intervals for indirect effect:', ci_indirect_effect, 'Average:', r3)
print('Bootstrap confidence intervals for spillover effect:', ci_spillover_effect, 'Average:', r4)
    