In [None]:
# Example - https://github.com/cornelliusyudhawijaya/Churn_Causality_Analysis/blob/main/Causal%20Analysis%20Do%20Why.ipynb
    
import numpy as np
import pandas as pd

from dowhy import CausalModel
import dowhy.datasets 

# Avoid printing dataconversion warnings from sklearn and numpy
import warnings
from sklearn.exceptions import DataConversionWarning
warnings.filterwarnings('ignore')

In [None]:
data = pd.read_csv('../../../data/compas-scores-two-years.csv')

## Removed two duplicate coumns - 'decile_score','priors_count'
data = data.drop(['id','name','first','last','compas_screening_date',
                                  'dob','age','juv_fel_count','decile_score',
                                  'juv_misd_count','juv_other_count','days_b_screening_arrest',
                                  'c_jail_in','c_jail_out','c_case_number','c_offense_date','c_arrest_date',
                                  'c_days_from_compas','c_charge_desc','is_recid','r_case_number','r_charge_degree',
                                  'r_days_from_arrest','r_offense_date','r_charge_desc','r_jail_in','r_jail_out',
                                  'violent_recid','is_violent_recid','vr_case_number','vr_charge_degree','vr_offense_date',
                                  'vr_charge_desc','type_of_assessment','decile_score','score_text','screening_date',
                                  'v_type_of_assessment','v_decile_score','v_score_text','v_screening_date','in_custody',
                                  'out_custody','start','end','event'],axis=1)

## Drop NULL values
data = data.dropna()

data.rename(index=str, columns={"two_year_recid": "Probability"}, inplace=True)
data['sex'] = np.where(data['sex'] == 'Female', 1, 0)
data['race'] = np.where(data['race'] != 'Caucasian', 0, 1)

non_numeric_columns = list(data.select_dtypes(exclude=[np.number]).columns)

from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()

for col in non_numeric_columns:
    data[col] = le.fit_transform(data[col])

print(non_numeric_columns)

from sklearn.preprocessing import MinMaxScaler,StandardScaler

scaler = MinMaxScaler()
data = pd.DataFrame(scaler.fit_transform(data),columns = data.columns)

protected_attribute = "race"

print(data.head(5))

# Transform data using LFR

In [None]:
from aif360.algorithms.preprocessing.lfr import LFR
from aif360.datasets import BinaryLabelDataset
from IPython.display import Markdown, display

In [None]:
privileged_groups = [{protected_attribute: 1}]
unprivileged_groups = [{protected_attribute: 0}]

TR = LFR(unprivileged_groups=unprivileged_groups,
         privileged_groups=privileged_groups,
         k=10, Ax=0.1, Ay=1.0, Az=2.0,
         verbose=1)

dataset_orig = BinaryLabelDataset(df=data, label_names=['Probability'], protected_attribute_names=[protected_attribute])

TR.fit(dataset_orig, maxiter=5000, maxfun=5000)

dataset_transf = TR.transform(dataset_orig)

In [None]:
df, yy = dataset_transf.convert_to_dataframe()

In [None]:
causal_graph = """
digraph {
sex;
age_cat;
race;
priors_count;
c_charge_degree;
Probability;
U[label="Unobserved Confounders"];
sex -> Probability;
sex -> age_cat; age_cat -> Probability;
race -> Probability;
sex -> c_charge_degree; c_charge_degree -> Probability;
U->sex;U->race;U->Probability;
}
"""

## dowhy works for binary inputs only


df['Probability'] = np.where(df['Probability'] == 0, False, True)
df[protected_attribute] = np.where(df[protected_attribute] == 1, True, False)

In [None]:
# With graph
model=CausalModel(
        data = df,
        treatment=protected_attribute,
        outcome="Probability",
        graph=causal_graph.replace("\n", " ")
        )
model.view_model()

In [None]:
identified_estimand = model.identify_effect(proceed_when_unidentifiable=True)
print(identified_estimand)

In [None]:
causal_estimate = model.estimate_effect(identified_estimand,
        method_name="backdoor.propensity_score_stratification")
print(causal_estimate)
print("Causal Estimate is " + str(causal_estimate.value))

In [None]:
# Causal effect on the control group (ATC)
causal_estimate_att = model.estimate_effect(identified_estimand,
        method_name="backdoor.propensity_score_stratification",
        target_units = "atc")
print(causal_estimate_att)
print("Causal Estimate is " + str(causal_estimate_att.value))

# Add Random Common Cause

In [None]:
refutel = model.refute_estimate(identified_estimand,causal_estimate, "random_common_cause")
print(refutel)

# Replace Treatment with Placebo

In [None]:
refutel = model.refute_estimate(identified_estimand,causal_estimate,method_name="placebo_treatment_refuter",placebo_type="permute")
print(refutel)

# Remove Random Subset of Data

In [None]:
refutel = model.refute_estimate(identified_estimand,causal_estimate, "data_subset_refuter")
print(refutel)