In [None]:
# Example - https://github.com/cornelliusyudhawijaya/Churn_Causality_Analysis/blob/main/Causal%20Analysis%20Do%20Why.ipynb
    
import numpy as np
import pandas as pd

from dowhy import CausalModel
import dowhy.datasets 

# Avoid printing dataconversion warnings from sklearn and numpy
import warnings
from sklearn.exceptions import DataConversionWarning
warnings.filterwarnings('ignore')

In [None]:
data = pd.read_csv('../../../data/adult.data.csv')

## Drop categorical features
data = data.drop(['fnlwgt','education'],axis=1)

## Drop NULL values
data = data.dropna()

## for dowhy

data['Probability'] = np.where(data['Probability'] == " <=50K", 0, 1)
data['sex'] = np.where(data['sex'] == " Male", 1, 0)
data['race'] = np.where(data['race'] == " White", 1, 0)

non_numeric_columns = list(data.select_dtypes(exclude=[np.number]).columns)

from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()

for col in non_numeric_columns:
    data[col] = le.fit_transform(data[col])

print(non_numeric_columns)

from sklearn.preprocessing import MinMaxScaler,StandardScaler

scaler = MinMaxScaler()
data = pd.DataFrame(scaler.fit_transform(data),columns = data.columns)

print(data.head(5))

In [None]:
causal_graph = """
digraph {
marital_status;
workclass;
occupation;
relationship;
race;
sex;
education_num;
Probability;
U[label="Unobserved Confounders"];
sex -> Probability;
sex -> marital_status; marital_status -> Probability; workclass -> occupation;
marital_status -> occupation; occupation -> Probability
marital_status -> relationship; relationship -> Probability
race -> occupation; race -> Probability;
race -> education_num; education_num -> workclass;
workclass -> Probability;
U->sex;U->race;U->Probability;
}
"""

## dowhy works for binary inputs only


data['Probability'] = np.where(data['Probability'] == 0, False, True)
data['sex'] = np.where(data['sex'] == 1, True, False)
data['race'] = np.where(data['race'] == 1, True, False)

In [None]:
# With graph
model=CausalModel(
        data = data,
        treatment="race",
        outcome="Probability",
        graph=causal_graph.replace("\n", " ")
        )

In [None]:
model.view_model()

from IPython.display import Image, display
display(Image(filename="causal_model.png"))

In [None]:
identified_estimand = model.identify_effect(proceed_when_unidentifiable=True)
print(identified_estimand)

In [None]:
causal_estimate = model.estimate_effect(identified_estimand,
        method_name="backdoor.propensity_score_stratification")
print(causal_estimate)
print("Causal Estimate is " + str(causal_estimate.value))

In [None]:
# Causal effect on the control group (ATC)
causal_estimate_att = model.estimate_effect(identified_estimand,
        method_name="backdoor.propensity_score_stratification",
        target_units = "atc")
print(causal_estimate_att)
print("Causal Estimate is " + str(causal_estimate_att.value))

# Add Random Common Cause

In [None]:
refutel = model.refute_estimate(identified_estimand,causal_estimate, "random_common_cause")
print(refutel)

# Replace Treatment with Placebo

In [None]:
refutel = model.refute_estimate(identified_estimand,causal_estimate,method_name="placebo_treatment_refuter",placebo_type="permute")
print(refutel)

# Remove Random Subset of Data

In [None]:
refutel = model.refute_estimate(identified_estimand,causal_estimate, "data_subset_refuter")
print(refutel)