In [None]:
import dowhy
import pandas as pd
import numpy as np

# Load data
This is also the time to do any preprocessing of the data, and / or create variables derived from columns (e.g. mapping categorical data encoded as numbers to indicator variables).

In [None]:
filename = 'lalonde.csv'
dataset = pd.read_csv(filename)

In [None]:
dataset.head(5)

# Causal Model structure
In Causal Inference, we rely on prior knowledge of causal model structure. This can be provided in various forms, 
but in DoWhy, we must define a **DAG** (Directed Acyclic Graph). 
A **Graph** is a network of **Nodes** (aka vertices) and **Edges** between them.
**Directed** means each edge has a direction, from the cause, to the effect. 
**Acyclic** means the graph must not have cycles (loops). 
In this case, the Nodes are the columns in the data file, and are also known as **Variables**. 
We must define the edges here as a string; if your graph is complex you can use the graphical editor [CausalWizard.app](https://CausalWizard.app). 

In [None]:
g = """digraph {

Treated;
No_Degree;
Age;
Education_years;
Married;
Wage_1974;
Wage_1978;

Wage_1974 -> Treated;
No_Degree -> Treated;
Education_years -> Treated;
Married -> Treated;

Wage_1974 -> Wage_1978;
No_Degree -> Wage_1978;
Education_years -> Wage_1978;
Married -> Wage_1978;

Age -> Wage_1978;

Treated -> Wage_1978;

}"""

# DoWhy Step 1/4: Create Causal Model

In [None]:
from dowhy import CausalModel

treatment_col = 'Treated'
outcome_col = 'Wage_1978'
model=CausalModel(
    data = dataset,
    treatment=treatment_col,
    outcome=outcome_col,
    graph=g)

model.view_model()

# DoWhy Step 2/4: Identify effect

In [None]:
identified_estimand = model.identify_effect(proceed_when_unidentifiable=True)
print(identified_estimand)  # Explore methods proposed

# DoWhy Step 3/4: Estimate effect

In [None]:
#method = "backdoor.propensity_score_weighting"
method = "backdoor.linear_regression"

desired_effect = "ate"

estimate = model.estimate_effect(
    identified_estimand,
    method_name=method,
    target_units=desired_effect,                         
    method_params={"weighting_scheme":"ips_weight"})

print("Causal Estimate is " + str(estimate.value))


# DoWhy Step 4/4: Refute effect

In [None]:
refute_placebo_treatment = model.refute_estimate(
    identified_estimand,
    estimate,
    method_name="placebo_treatment_refuter",
    placebo_type="permute"
)

print(refute_placebo_treatment)

# Counterfactual outcomes
This step is optional, but interesting. You are likely to want to know how to estimate counterfactual scenarios such as "what would have happened if I had applied the treatment to all the samples?". You can easily obtain these estimates from your model, using DoWhy.

Note: Not all estimators support the do() operator.

In [None]:
dataset_copy = model._data.copy()  # copy because doWhy adds columns

mean_outcome = dataset_copy.loc[:, outcome_col].mean()
print(f'Actual mean outcome: {mean_outcome}')

In [None]:
# "do(x): Given a value x for the treatment, returns the 
# expected value of the outcome when the treatment is 
# intervened to a value x."
treatment_value_treated = True
treatment_value_control = False

cf_estimate_control = estimate.estimator.do(
    x=treatment_value_control,
    data_df=dataset_copy,
)
print(f'Mean outcome if all control: {cf_estimate_control}')

cf_estimate_treated = estimate.estimator.do(
    x=treatment_value_treated,
    data_df=dataset_copy,
)

print(f'Mean outcome if all treated: {cf_estimate_treated}')
