In [None]:
# Example - https://github.com/cornelliusyudhawijaya/Churn_Causality_Analysis/blob/main/Causal%20Analysis%20Do%20Why.ipynb
    
import numpy as np
import pandas as pd

from dowhy import CausalModel
import dowhy.datasets

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import sklearn.metrics as metrics


import sys,os
sys.path.append(os.path.abspath('..'))


from Measure import measure_final_score,calculate_recall,calculate_far,calculate_precision,calculate_accuracy
from Generate_Samples import generate_samples

# Avoid printing dataconversion warnings from sklearn and numpy
import warnings
from sklearn.exceptions import DataConversionWarning
warnings.filterwarnings('ignore')

In [None]:
data = pd.read_csv('../../../data/default_of_credit_card_clients_first_row_removed.csv')

## Drop NULL values
data = data.dropna()

## for dowhy

data['sex'] = np.where(data['sex'] == 2, 0,1)

non_numeric_columns = list(data.select_dtypes(exclude=[np.number]).columns)

from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()

for col in non_numeric_columns:
    data[col] = le.fit_transform(data[col])

print(non_numeric_columns)

from sklearn.preprocessing import MinMaxScaler,StandardScaler

scaler = MinMaxScaler()
data = pd.DataFrame(scaler.fit_transform(data),columns = data.columns)

dataset_orig_train, dataset_orig_test = train_test_split(data, test_size=0.2, random_state=0,shuffle = True)

print(data.head(5))

In [None]:
protected_attribute = 'sex'

# first one is class value and second one is protected attribute value
zero_zero = len(dataset_orig_train[(dataset_orig_train['Probability'] == 0) & (dataset_orig_train[protected_attribute] == 0)])
zero_one = len(dataset_orig_train[(dataset_orig_train['Probability'] == 0) & (dataset_orig_train[protected_attribute] == 1)])
one_zero = len(dataset_orig_train[(dataset_orig_train['Probability'] == 1) & (dataset_orig_train[protected_attribute] == 0)])
one_one = len(dataset_orig_train[(dataset_orig_train['Probability'] == 1) & (dataset_orig_train[protected_attribute] == 1)])

print(zero_zero,zero_one,one_zero,one_one)

In [None]:
maximum = max(zero_zero,zero_one,one_zero,one_one)
if maximum == zero_zero:
    print("zero_zero is maximum")
if maximum == zero_one:
    print("zero_one is maximum")
if maximum == one_zero:
    print("one_zero is maximum")
if maximum == one_one:
    print("one_one is maximum")

In [None]:
zero_one_to_be_incresed = maximum - zero_one ## where class is 0 attribute is 1
one_zero_to_be_incresed = maximum - one_zero ## where class is 1 attribute is 0
one_one_to_be_incresed = maximum - one_one ## where class is 1 attribute is 1

print(zero_one_to_be_incresed,one_zero_to_be_incresed,one_one_to_be_incresed)

In [None]:
df_zero_one = dataset_orig_train[(dataset_orig_train['Probability'] == 0) & (dataset_orig_train[protected_attribute] == 1)]
df_one_zero = dataset_orig_train[(dataset_orig_train['Probability'] == 1) & (dataset_orig_train[protected_attribute] == 0)]
df_one_one = dataset_orig_train[(dataset_orig_train['Probability'] == 1) & (dataset_orig_train[protected_attribute] == 1)]

df_zero_one['sex'] = df_zero_one['sex'].astype(str)
df_one_zero['sex'] = df_one_zero['sex'].astype(str)
df_one_one['sex'] = df_one_one['sex'].astype(str)


df_zero_one = generate_samples(zero_one_to_be_incresed,df_zero_one,'Default')
df_one_zero = generate_samples(one_zero_to_be_incresed,df_one_zero,'Default')
df_one_one = generate_samples(one_one_to_be_incresed,df_one_one,'Default')

In [None]:
df = df_zero_one.append(df_one_zero)
df = df.append(df_one_one)

df['sex'] = df['sex'].astype(float)

df_zero_zero = dataset_orig_train[(dataset_orig_train['Probability'] == 0) & (dataset_orig_train[protected_attribute] == 0)]
df = df.append(df_zero_zero)

In [None]:
# first one is class value and second one is protected attribute value
zero_zero = len(df[(df['Probability'] == 0) & (df[protected_attribute] == 0)])
zero_one = len(df[(df['Probability'] == 0) & (df[protected_attribute] == 1)])
one_zero = len(df[(df['Probability'] == 1) & (df[protected_attribute] == 0)])
one_one = len(df[(df['Probability'] == 1) & (df[protected_attribute] == 1)])

print(zero_zero,zero_one,one_zero,one_one)

In [None]:
causal_graph = """
digraph {
AGE;
sex;
MARRIAGE;
EDUCATION;
Probability;
U[label="Unobserved Confounders"];
sex -> Probability;
sex -> MARRIAGE; MARRIAGE -> Probability;
sex -> EDUCATION; EDUCATION -> Probability;
AGE -> EDUCATION; AGE -> MARRIAGE;
U->sex;U->Probability;
}
"""

## dowhy works for binary inputs only
df['Probability'] = np.where(df['Probability'] == 0, False, True)
df['sex'] = np.where(df['sex'] == 1, True, False)

In [None]:
# With graph
model=CausalModel(
        data = df,
        treatment="sex",
        outcome="Probability",
        graph=causal_graph.replace("\n", " ")
        )
model.view_model()

In [None]:
identified_estimand = model.identify_effect(proceed_when_unidentifiable=True)
print(identified_estimand)

In [None]:
causal_estimate = model.estimate_effect(identified_estimand,
        method_name="backdoor.propensity_score_stratification")
print(causal_estimate)
print("Causal Estimate is " + str(causal_estimate.value))

In [None]:
# Causal effect on the control group (ATC)
causal_estimate_att = model.estimate_effect(identified_estimand,
        method_name="backdoor.propensity_score_stratification",
        target_units = "atc")
print(causal_estimate_att)
print("Causal Estimate is " + str(causal_estimate_att.value))

# Add Random Common Cause

In [None]:
refutel = model.refute_estimate(identified_estimand,causal_estimate, "random_common_cause")
print(refutel)

# Replace Treatment with Placebo

In [None]:
refutel = model.refute_estimate(identified_estimand,causal_estimate,method_name="placebo_treatment_refuter",placebo_type="permute")
print(refutel)

# Remove Random Subset of Data

In [None]:
refutel = model.refute_estimate(identified_estimand,causal_estimate, "data_subset_refuter")
print(refutel)