# Causal Estimate

## How to use the notebook

The following cells:
- specify objective, variables, and variable types,
- read dataset,
- set up the causal structure,
- present results from the tests,

By default, the notebook is set up to run with an example (wine quality). To see how it works, run the notebook without changing the code.

For your project, adjust the code in the linked cells with your objectives, variables, dataset etc. and then execute all cells in order.

Please refer to causal_estimate.board for detailed instructions.

In [0]:
import numpy as np
import pandas as pd

from dowhy import CausalModel
import dowhy.causal_refuters as causal_refuters
import dowhy.datasets
import dowhy.api

from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LassoCV
from sklearn.ensemble import GradientBoostingRegressor

from statsmodels.api import OLS

import matplotlib.pyplot as plt

import warnings
warnings.simplefilter('ignore')

### Project

In [0]:
experiment_name = '{{cookiecutter.use_case_name}}'  # please provide a name for the hypothesis testing experiment

### Dataset

In [0]:
time_series = False
path = '{{cookiecutter.data_path}}' # Specify the path of the data

if path =='default example':
    path = 'https://raw.githubusercontent.com/erium/halerium-example-data/main/hypothesis_testing/WineQT.csv'

if time_series:
    df = pd.read_csv(path, parse_dates=['date'], index_col = 'date')
else:
    df = pd.read_csv(path, sep=None)
df

### Define the problem
What is your treatment variable?
What is your outcome variable?
What are the confounders?

In [0]:
# Example: Does pH acidity levels affect the quality of the wine?
is_treatment_binary = False

In [0]:
# Simpler, using backdoor methods
model= CausalModel(
        data = df,
        treatment='pH',
        outcome='quality',
        common_causes=['fixed acidity', 'volatile acidity'],
        effect_modifiers=['residual sugar'])
model.view_model()

In [0]:
# More complex, using graph and iv
# causal_graph = """
# digraph {
# U[label="Unobserved Confounders"];
# sulphates->pH; residual_sugar->quality;
# fixed_acidity->pH; volatile_acidity -> pH;
# U->pH;U->quality;
# fixed_acidity->quality; volatile_acidity->quality; pH->quality; 
# }
# """
#
# model= CausalModel(
#         data = df,
#         graph=causal_graph.replace("\n", " "),
#         treatment='pH',
#         outcome='quality')
# model.view_model()

### Identify estimands

In [0]:
# Skips the warning asking about unobserved confounders
identified_estimand = model.identify_effect(proceed_when_unidentifiable=True)
print(identified_estimand)

### Estimation

In [0]:
estimate_methods = {}

#### Backdoor methods

In [0]:
linear_estimate = model.estimate_effect(identified_estimand,
        method_name="backdoor.linear_regression")
if linear_estimate.value:
        estimate_methods['linear'] = linear_estimate
print("Causal Estimate is " + str(linear_estimate.value))

Propensity score methods
*Only for binary treatments

In [0]:
if is_treatment_binary:
        strat_estimate = model.estimate_effect(identified_estimand,
                method_name="backdoor.propensity_score_stratification") # ok for linear
        if strat_estimate.value:
                estimate_methods['strat'] = strat_estimate
        print("Causal Estimate is " + str(strat_estimate.value))

In [0]:
if is_treatment_binary:
        match_estimate = model.estimate_effect(identified_estimand,
                method_name="backdoor.propensity_score_matching")
        if match_estimate.value:
                estimate_methods['match'] = match_estimate
        print("Causal Estimate is " + str(match_estimate.value))

In [0]:
if is_treatment_binary:
        ipw_estimate = model.estimate_effect(identified_estimand,
                method_name="backdoor.propensity_score_weighting", method_params={"weighting_scheme":"ips_weight"})
        if ipw_estimate.value:
                estimate_methods['ipw'] = ipw_estimate
        print("Causal Estimate is " + str(ipw_estimate.value))

### Instrumental Variable

In [0]:
iv_estimate = model.estimate_effect(identified_estimand,
        method_name="iv.instrumental_variable")
if iv_estimate.value:
    estimate_methods['iv'] = iv_estimate
print("Causal Estimate is " + str(iv_estimate.value))

Regression Discontinuity

In [0]:
regdist_estimate = model.estimate_effect(identified_estimand,
        method_name="iv.regression_discontinuity",
        method_params={'rd_variable_name':'residual sugar',
                       'rd_threshold_value':0.5,
                       'rd_bandwidth': 0.1})
if regdist_estimate.value:
    estimate_methods['regdist'] = regdist_estimate
print("Causal Estimate is " + str(regdist_estimate.value))

### Refuting the estimate
Refutation methods provide tests that every correct estimator should pass. 

So if an estimator fails the refutation test (p-value is <0.05), then it means that there is some problem with the estimator.

In [0]:
refute_data = {}
for estimate_name in estimate_methods:
    estimate = estimate_methods[estimate_name]
    refute_methods = {'random_common_cause': None, 'placebo_treatment': None, 'data_subset': None, 'unobserved_common_cause': None}

    res_random=model.refute_estimate(identified_estimand, estimate, method_name="random_common_cause")
    refute_methods['random_common_cause'] = res_random

    if estimate_name != 'linear':
        res_placebo=model.refute_estimate(identified_estimand, estimate,
            method_name="placebo_treatment_refuter", placebo_type="permute")
        refute_methods['placebo_treatment'] = res_placebo

    res_subset=model.refute_estimate(identified_estimand, estimate,
        method_name="data_subset_refuter", subset_fraction=0.9)
    refute_methods['data_subset'] = res_subset
    
    if is_treatment_binary:
        res_unobserved_auto = model.refute_estimate(identified_estimand, estimate, method_name="add_unobserved_common_cause",
                                            confounders_effect_on_treatment="binary_flip", confounders_effect_on_outcome="linear")
        refute_methods['unobserved_common_cause'] = res_unobserved_auto
    
    refute_data[estimate_name] = refute_methods

In [0]:
for estimate_method in refute_data:
    tests_passed = 0
    tests_failed = 0
    print(estimate_method)
    for refute_method in refute_data[estimate_method]:
        print(refute_method)
        if refute_data[estimate_method][refute_method] == None:
            print("None \n")
            continue
        print(refute_data[estimate_method][refute_method])
        refutation_result = refute_data[estimate_method][refute_method].refutation_result
        if refutation_result != None:
            if refutation_result['is_statistically_significant']:
                tests_failed += 1
            else:
                tests_passed += 1
    print("Statistical tests passed: ", tests_passed)
    print("Statistical tests failed: ", tests_failed)
    print('______________________________')