In [None]:
!pip install dowhy==0.12 econml==0.15 networkx==3.3 "numpy<2.0" ananke-causal

In [None]:
from ananke.graphs import ADMG
from ananke.identification import OneLineID
from ananke.estimation import CausalEffect
from ananke.datasets import load_afixable_data
from ananke.estimation import AutomatedIF
import numpy as np

import re
import dowhy
from dowhy import CausalModel
import dowhy.datasets
from dowhy.utils.regression import create_polynomial_function
from sklearn.ensemble import GradientBoostingRegressor

# Sensitivity Analysis and Advanced Identification

## Sensitivity Analysis with DoWhy

In [None]:
# Generate the data
data = dowhy.datasets.partially_linear_dataset(
    beta=10,
    num_common_causes=7,
    num_unobserved_common_causes=1,
    strength_unobserved_confounding=10,
    num_samples=1000,
    num_treatments=1,
    stddev_treatment_noise=10,
    stddev_outcome_noise=5
)

In [None]:
# True effect
data["ate"]

In [None]:
# Drop W0 -> unobserved confounder
dropped_cols = ["W0"]
user_data = data["df"].drop(dropped_cols, axis = 1)

user_graph = data["gml_graph"]
for col in dropped_cols:
    user_graph = user_graph.replace('node[ id "{0}" label "{0}"]'.format(col), '')
    user_graph = re.sub('edge\[ source "{}" target "[vy][0]*"\]'.format(col), "", user_graph)

In [None]:
# Define the causal model
model = CausalModel(
    data=user_data,
    treatment=data["treatment_name"],
    outcome=data["outcome_name"],
    graph=user_graph,
    test_significance=None,
)

model.view_model()

In [None]:
# Get the estimand
estimand = model.identify_effect(proceed_when_unidentifiable=True)
print(estimand)

In [None]:
# Estimate the effect
linear_dml_estimate = model.estimate_effect(
    estimand,
    method_name="backdoor.econml.dml.dml.LinearDML",
    method_params={
        'init_params': {'model_y':GradientBoostingRegressor(),
                        'model_t': GradientBoostingRegressor(),
                        'linear_first_stages': False
                        },
        'fit_params': {'cache_values': True,}
        })

In [None]:
# Sensitivity refuter
refutation_sensitivity = model.refute_estimate(
    estimand, linear_dml_estimate,
    method_name = "add_unobserved_common_cause",
    simulation_method = "non-parametric-partial-R2",
    partial_r2_confounder_treatment = np.arange(0, 0.8, 0.1),
    partial_r2_confounder_outcome = np.arange(0, 0.8, 0.1)
    )
print(refutation_sensitivity)

In [None]:
refute_bm = model.refute_estimate(
    estimand, linear_dml_estimate,
    method_name = "add_unobserved_common_cause",
    simulation_method = "non-parametric-partial-R2",
    benchmark_common_causes = ["W1"],
    effect_fraction_on_treatment = 0.2,
    effect_fraction_on_outcome = 0.2
    )

## Advanced Identification with Ananke

In [None]:
# Read the data
data = load_afixable_data()

In [None]:
nodes = ['Income', 'Insurance', 'ViralLoad', 'Education', 'T', 'Toxicity', 'CD4']

di_edges = [('ViralLoad', 'Income'), ('ViralLoad', 'T'), ('ViralLoad', 'Toxicity'),
            ('Education', 'Income'), ('Education', 'T'), ('Education', 'Toxicity'),
            ('Income', 'Insurance'), ('Insurance', 'T'), ('T', 'Toxicity'), ('Toxicity', 'CD4'), ('T', 'CD4')]

bi_edges = [('Income', 'T'), ('Insurance', 'ViralLoad'), ('Education', 'CD4')]

G = ADMG(nodes, di_edges, bi_edges)

G.draw(direction="LR")

In [None]:
# Check if the effect is identifiable
effect_id = OneLineID(graph=G, treatments=['T'], outcomes=['CD4'])
effect_id.id()

In [None]:
# Instantiate the causal effect object
model = CausalEffect(
    graph=G,
    treatment='T',
    outcome='CD4'
    )

In [None]:
# Compute effects using different estimators
ate_ipw = ace_obj.compute_effect(data, "ipw")
ate_gformula = ace_obj.compute_effect(data, "gformula")
ate_aipw = ace_obj.compute_effect(data, "aipw")
ate_eff = ace_obj.compute_effect(data, "eff-aipw")

print("ATE using IPW = ", ate_ipw)
print("ATE using g-formula = ", ate_gformula)
print("ATE using AIPW = ", ate_aipw)
print("ATE using efficient AIPW = ", ate_eff)