<a href="https://colab.research.google.com/github/aniketsharma00411/MultiFairGAN/blob/main/Experiment%20Notebooks/MIMIC_comparison_experiment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Initialization

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, average_precision_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder

In [2]:
! pip install -q fairlearn
from fairlearn.metrics import demographic_parity_ratio, demographic_parity_difference

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/231.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m231.5/231.5 kB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0m
[?25h

# Loading Datasets

In [3]:
from google.colab import drive
drive.mount('/content/drive', force_remount= True)

Mounted at /content/drive


In [4]:
orig = pd.read_csv('/content/drive/Shareddrives/TRAM - FairSyntheticData/Experiments/Data/MIMIC_TAB.csv')
ctgan = pd.read_csv('/content/drive/Shareddrives/TRAM - FairSyntheticData/Experiments/Data/MIMIC_ctgan.csv')
tabfairgan = pd.read_csv('/content/drive/Shareddrives/TRAM - FairSyntheticData/Experiments/Data/MIMIC_tabfairgan_0.6_20.csv')
distcorrgan = pd.read_csv('/content/drive/Shareddrives/TRAM - FairSyntheticData/Experiments/Data/MIMIC_distcorrGAN_0.1.csv')
proposed_dpr = pd.read_csv('/content/drive/Shareddrives/TRAM - FairSyntheticData/Experiments/Data/MIMIC_ctgan_dpr_0.2_30.csv')
proposed_dpd = pd.read_csv('/content/drive/Shareddrives/TRAM - FairSyntheticData/Experiments/Data/MIMIC_ctgan_dpd_0.2_40.csv')
proposed_additive_dpr = pd.read_csv('/content/drive/Shareddrives/TRAM - FairSyntheticData/Experiments/Data/MIMIC_ctgan_dpr_additive_0.3_200.csv')
proposed_additive_dpd = pd.read_csv('/content/drive/Shareddrives/TRAM - FairSyntheticData/Experiments/Data/MIMIC_ctgan_dpd_additive_0.1_200.csv')
proposed_dpr_ethn = pd.read_csv('/content/drive/Shareddrives/TRAM - FairSyntheticData/Experiments/Data/MIMIC_ctgan_dpr_0.4_30_ethnicity.csv')
proposed_dpd_ethn = pd.read_csv('/content/drive/Shareddrives/TRAM - FairSyntheticData/Experiments/Data/MIMIC_ctgan_dpd_0.4_30_ethnicity.csv')
proposed_additive_dpr_ethn = pd.read_csv('/content/drive/Shareddrives/TRAM - FairSyntheticData/Experiments/Data/MIMIC_ctgan_dpr_additive_0.5_200_ethnicity.csv')
proposed_additive_dpd_ethn = pd.read_csv('/content/drive/Shareddrives/TRAM - FairSyntheticData/Experiments/Data/MIMIC_ctgan_dpd_additive_0.4_200_ethnicity.csv')

# Metric Evaluation

In [5]:
iters = 20
metric_eval = {}

In [6]:
cat_features = ['gender', 'ethnicity', 'insurance', 'diagnosis_at_admission', 'discharge_location', 'admission_type']

##  Real Data

In [7]:
for feat in cat_features:
    orig[feat] = LabelEncoder().fit_transform(orig[feat])

In [8]:
orig = orig.drop(columns=["dnr_first", "dnr","fullcode_first", "discharge_location","fullcode"])

In [9]:
X = orig.drop(columns=['hospital_expire_flag'])
y = orig['hospital_expire_flag']

data_dpr_eth = demographic_parity_ratio(y_true=y, y_pred=y, sensitive_features=X['ethnicity'])
data_dpd_eth = demographic_parity_difference(y_true=y, y_pred=y, sensitive_features=X['ethnicity'])
data_dpr_ins = demographic_parity_ratio(y_true=y, y_pred=y, sensitive_features=X['insurance'])
data_dpd_ins = demographic_parity_difference(y_true=y, y_pred=y, sensitive_features=X['insurance'])

taucpr = 0
tf1 = 0
tdpr_eth = 0
tdpd_eth = 0
tdpr_ins = 0
tdpd_ins = 0
for i in range(iters):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=i)

    DT= DecisionTreeClassifier(random_state=i)
    DT.fit(X_train, y_train)
    y_pred = DT.predict(X_test)

    aucpr = average_precision_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    dpr_eth = demographic_parity_ratio(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['ethnicity'])
    dpd_eth = demographic_parity_difference(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['ethnicity'])
    dpr_ins = demographic_parity_ratio(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['insurance'])
    dpd_ins = demographic_parity_difference(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['insurance'])

    taucpr += aucpr
    tf1 += f1
    tdpr_eth += dpr_eth
    tdpd_eth += dpd_eth
    tdpr_ins += dpr_ins
    tdpd_ins += dpd_ins

metric_eval['Real Data'] = [data_dpr_eth, data_dpd_eth, data_dpr_ins, data_dpd_ins, taucpr/iters, tf1/iters, tdpr_eth/iters, tdpd_eth/iters, tdpr_ins/iters, tdpd_ins/iters]

##  CTGAN

In [10]:
for feat in cat_features:
    ctgan[feat] = LabelEncoder().fit_transform(ctgan[feat])

In [11]:
ctgan = ctgan.drop(columns=["dnr_first", "dnr","fullcode_first", "discharge_location","fullcode"])

In [12]:
X = ctgan.drop(columns=['hospital_expire_flag'])
y = ctgan['hospital_expire_flag']

data_dpr_eth = demographic_parity_ratio(y_true=y, y_pred=y, sensitive_features=X['ethnicity'])
data_dpd_eth = demographic_parity_difference(y_true=y, y_pred=y, sensitive_features=X['ethnicity'])
data_dpr_ins = demographic_parity_ratio(y_true=y, y_pred=y, sensitive_features=X['insurance'])
data_dpd_ins = demographic_parity_difference(y_true=y, y_pred=y, sensitive_features=X['insurance'])

taucpr = 0
tf1 = 0
tdpr_eth = 0
tdpd_eth = 0
tdpr_ins = 0
tdpd_ins = 0
for i in range(iters):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=i)

    DT= DecisionTreeClassifier(random_state=i)
    DT.fit(X_train, y_train)
    y_pred = DT.predict(X_test)

    aucpr = average_precision_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    dpr_eth = demographic_parity_ratio(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['ethnicity'])
    dpd_eth = demographic_parity_difference(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['ethnicity'])
    dpr_ins = demographic_parity_ratio(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['insurance'])
    dpd_ins = demographic_parity_difference(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['insurance'])

    taucpr += aucpr
    tf1 += f1
    tdpr_eth += dpr_eth
    tdpd_eth += dpd_eth
    tdpr_ins += dpr_ins
    tdpd_ins += dpd_ins

metric_eval['CTGAN'] = [data_dpr_eth, data_dpd_eth, data_dpr_ins, data_dpd_ins, taucpr/iters, tf1/iters, tdpr_eth/iters, tdpd_eth/iters, tdpr_ins/iters, tdpd_ins/iters]

##  TabFairGAN

In [13]:
for feat in cat_features:
    tabfairgan[feat] = LabelEncoder().fit_transform(tabfairgan[feat])

In [14]:
tabfairgan = tabfairgan.drop(columns=["dnr_first", "dnr","fullcode_first", "discharge_location","fullcode"])

In [15]:
X = tabfairgan.drop(columns=['hospital_expire_flag'])
y = tabfairgan['hospital_expire_flag']

data_dpr_eth = demographic_parity_ratio(y_true=y, y_pred=y, sensitive_features=X['ethnicity'])
data_dpd_eth = demographic_parity_difference(y_true=y, y_pred=y, sensitive_features=X['ethnicity'])
data_dpr_ins = demographic_parity_ratio(y_true=y, y_pred=y, sensitive_features=X['insurance'])
data_dpd_ins = demographic_parity_difference(y_true=y, y_pred=y, sensitive_features=X['insurance'])

taucpr = 0
tf1 = 0
tdpr_eth = 0
tdpd_eth = 0
tdpr_ins = 0
tdpd_ins = 0
for i in range(iters):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=i)

    DT= DecisionTreeClassifier(random_state=i)
    DT.fit(X_train, y_train)
    y_pred = DT.predict(X_test)

    aucpr = average_precision_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    dpr_eth = demographic_parity_ratio(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['ethnicity'])
    dpd_eth = demographic_parity_difference(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['ethnicity'])
    dpr_ins = demographic_parity_ratio(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['insurance'])
    dpd_ins = demographic_parity_difference(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['insurance'])

    taucpr += aucpr
    tf1 += f1
    tdpr_eth += dpr_eth
    tdpd_eth += dpd_eth
    tdpr_ins += dpr_ins
    tdpd_ins += dpd_ins

metric_eval['TabFairGAN'] = [data_dpr_eth, data_dpd_eth, data_dpr_ins, data_dpd_ins, taucpr/iters, tf1/iters, tdpr_eth/iters, tdpd_eth/iters, tdpr_ins/iters, tdpd_ins/iters]

##  Distance Correlation GAN

In [16]:
for feat in cat_features:
    distcorrgan[feat] = LabelEncoder().fit_transform(distcorrgan[feat])

In [17]:
distcorrgan = distcorrgan.drop(columns=["dnr_first", "dnr","fullcode_first", "discharge_location","fullcode"])

In [18]:
X = distcorrgan.drop(columns=['hospital_expire_flag'])
y = distcorrgan['hospital_expire_flag']

data_dpr_eth = demographic_parity_ratio(y_true=y, y_pred=y, sensitive_features=X['ethnicity'])
data_dpd_eth = demographic_parity_difference(y_true=y, y_pred=y, sensitive_features=X['ethnicity'])
data_dpr_ins = demographic_parity_ratio(y_true=y, y_pred=y, sensitive_features=X['insurance'])
data_dpd_ins = demographic_parity_difference(y_true=y, y_pred=y, sensitive_features=X['insurance'])

taucpr = 0
tf1 = 0
tdpr_eth = 0
tdpd_eth = 0
tdpr_ins = 0
tdpd_ins = 0
for i in range(iters):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=i)

    DT= DecisionTreeClassifier(random_state=i)
    DT.fit(X_train, y_train)
    y_pred = DT.predict(X_test)

    aucpr = average_precision_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    dpr_eth = demographic_parity_ratio(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['ethnicity'])
    dpd_eth = demographic_parity_difference(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['ethnicity'])
    dpr_ins = demographic_parity_ratio(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['insurance'])
    dpd_ins = demographic_parity_difference(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['insurance'])

    taucpr += aucpr
    tf1 += f1
    tdpr_eth += dpr_eth
    tdpd_eth += dpd_eth
    tdpr_ins += dpr_ins
    tdpd_ins += dpd_ins

metric_eval['Distance Correlation GAN'] = [data_dpr_eth, data_dpd_eth, data_dpr_ins, data_dpd_ins, taucpr/iters, tf1/iters, tdpr_eth/iters, tdpd_eth/iters, tdpr_ins/iters, tdpd_ins/iters]

##  MultiFairGAN w/ DPR (Ethnicity and Insurance) (Two-cycle Strategy)

In [19]:
for feat in cat_features:
    proposed_dpr[feat] = LabelEncoder().fit_transform(proposed_dpr[feat])

In [20]:
proposed_dpr = proposed_dpr.drop(columns=["dnr_first", "dnr","fullcode_first", "discharge_location","fullcode"])

In [21]:
X = proposed_dpr.drop(columns=['hospital_expire_flag'])
y = proposed_dpr['hospital_expire_flag']

data_dpr_eth = demographic_parity_ratio(y_true=y, y_pred=y, sensitive_features=X['ethnicity'])
data_dpd_eth = demographic_parity_difference(y_true=y, y_pred=y, sensitive_features=X['ethnicity'])
data_dpr_ins = demographic_parity_ratio(y_true=y, y_pred=y, sensitive_features=X['insurance'])
data_dpd_ins = demographic_parity_difference(y_true=y, y_pred=y, sensitive_features=X['insurance'])

taucpr = 0
tf1 = 0
tdpr_eth = 0
tdpd_eth = 0
tdpr_ins = 0
tdpd_ins = 0
for i in range(iters):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=i)

    DT= DecisionTreeClassifier(random_state=i)
    DT.fit(X_train, y_train)
    y_pred = DT.predict(X_test)

    aucpr = average_precision_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    dpr_eth = demographic_parity_ratio(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['ethnicity'])
    dpd_eth = demographic_parity_difference(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['ethnicity'])
    dpr_ins = demographic_parity_ratio(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['insurance'])
    dpd_ins = demographic_parity_difference(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['insurance'])

    taucpr += aucpr
    tf1 += f1
    tdpr_eth += dpr_eth
    tdpd_eth += dpd_eth
    tdpr_ins += dpr_ins
    tdpd_ins += dpd_ins

metric_eval['MultiFairGAN w/ DPR (Ethnicity and Insurance) (Two-cycle Strategy)'] = [data_dpr_eth, data_dpd_eth, data_dpr_ins, data_dpd_ins, taucpr/iters, tf1/iters, tdpr_eth/iters, tdpd_eth/iters, tdpr_ins/iters, tdpd_ins/iters]

##  MultiFairGAN w/ DPD (Ethnicity and Insurance) (Two-cycle Strategy)

In [22]:
for feat in cat_features:
    proposed_dpd[feat] = LabelEncoder().fit_transform(proposed_dpd[feat])

In [23]:
proposed_dpd = proposed_dpd.drop(columns=["dnr_first", "dnr","fullcode_first", "discharge_location","fullcode"])

In [24]:
X = proposed_dpd.drop(columns=['hospital_expire_flag'])
y = proposed_dpd['hospital_expire_flag']

data_dpr_eth = demographic_parity_ratio(y_true=y, y_pred=y, sensitive_features=X['ethnicity'])
data_dpd_eth = demographic_parity_difference(y_true=y, y_pred=y, sensitive_features=X['ethnicity'])
data_dpr_ins = demographic_parity_ratio(y_true=y, y_pred=y, sensitive_features=X['insurance'])
data_dpd_ins = demographic_parity_difference(y_true=y, y_pred=y, sensitive_features=X['insurance'])

taucpr = 0
tf1 = 0
tdpr_eth = 0
tdpd_eth = 0
tdpr_ins = 0
tdpd_ins = 0
for i in range(iters):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=i)

    DT= DecisionTreeClassifier(random_state=i)
    DT.fit(X_train, y_train)
    y_pred = DT.predict(X_test)

    aucpr = average_precision_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    dpr_eth = demographic_parity_ratio(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['ethnicity'])
    dpd_eth = demographic_parity_difference(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['ethnicity'])
    dpr_ins = demographic_parity_ratio(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['insurance'])
    dpd_ins = demographic_parity_difference(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['insurance'])

    taucpr += aucpr
    tf1 += f1
    tdpr_eth += dpr_eth
    tdpd_eth += dpd_eth
    tdpr_ins += dpr_ins
    tdpd_ins += dpd_ins

metric_eval['MultiFairGAN w/ DPD (Ethnicity and Insurance) (Two-cycle Strategy)'] = [data_dpr_eth, data_dpd_eth, data_dpr_ins, data_dpd_ins, taucpr/iters, tf1/iters, tdpr_eth/iters, tdpd_eth/iters, tdpr_ins/iters, tdpd_ins/iters]

##  MultiFairGAN w/ DPR (Ethnicity and Insurance) (Additive Fairness Loss Strategy)

In [25]:
for feat in cat_features:
    proposed_additive_dpr[feat] = LabelEncoder().fit_transform(proposed_additive_dpr[feat])

In [26]:
proposed_additive_dpr = proposed_additive_dpr.drop(columns=["dnr_first", "dnr","fullcode_first", "discharge_location","fullcode"])

In [27]:
X = proposed_additive_dpr.drop(columns=['hospital_expire_flag'])
y = proposed_additive_dpr['hospital_expire_flag']

data_dpr_eth = demographic_parity_ratio(y_true=y, y_pred=y, sensitive_features=X['ethnicity'])
data_dpd_eth = demographic_parity_difference(y_true=y, y_pred=y, sensitive_features=X['ethnicity'])
data_dpr_ins = demographic_parity_ratio(y_true=y, y_pred=y, sensitive_features=X['insurance'])
data_dpd_ins = demographic_parity_difference(y_true=y, y_pred=y, sensitive_features=X['insurance'])

taucpr = 0
tf1 = 0
tdpr_eth = 0
tdpd_eth = 0
tdpr_ins = 0
tdpd_ins = 0
for i in range(iters):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=i)

    DT= DecisionTreeClassifier(random_state=i)
    DT.fit(X_train, y_train)
    y_pred = DT.predict(X_test)

    aucpr = average_precision_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    dpr_eth = demographic_parity_ratio(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['ethnicity'])
    dpd_eth = demographic_parity_difference(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['ethnicity'])
    dpr_ins = demographic_parity_ratio(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['insurance'])
    dpd_ins = demographic_parity_difference(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['insurance'])

    taucpr += aucpr
    tf1 += f1
    tdpr_eth += dpr_eth
    tdpd_eth += dpd_eth
    tdpr_ins += dpr_ins
    tdpd_ins += dpd_ins

metric_eval['MultiFairGAN w/ DPR (Ethnicity and Insurance) (Additive Fairness Loss Strategy)'] = [data_dpr_eth, data_dpd_eth, data_dpr_ins, data_dpd_ins, taucpr/iters, tf1/iters, tdpr_eth/iters, tdpd_eth/iters, tdpr_ins/iters, tdpd_ins/iters]

##  MultiFairGAN w/ DPD (Ethnicity and Insurance) (Additive Fairness Loss Strategy)

In [28]:
for feat in cat_features:
    proposed_additive_dpd[feat] = LabelEncoder().fit_transform(proposed_additive_dpd[feat])

In [29]:
proposed_additive_dpd = proposed_additive_dpd.drop(columns=["dnr_first", "dnr","fullcode_first", "discharge_location","fullcode"])

In [30]:
X = proposed_additive_dpd.drop(columns=['hospital_expire_flag'])
y = proposed_additive_dpd['hospital_expire_flag']

data_dpr_eth = demographic_parity_ratio(y_true=y, y_pred=y, sensitive_features=X['ethnicity'])
data_dpd_eth = demographic_parity_difference(y_true=y, y_pred=y, sensitive_features=X['ethnicity'])
data_dpr_ins = demographic_parity_ratio(y_true=y, y_pred=y, sensitive_features=X['insurance'])
data_dpd_ins = demographic_parity_difference(y_true=y, y_pred=y, sensitive_features=X['insurance'])

taucpr = 0
tf1 = 0
tdpr_eth = 0
tdpd_eth = 0
tdpr_ins = 0
tdpd_ins = 0
for i in range(iters):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=i)

    DT= DecisionTreeClassifier(random_state=i)
    DT.fit(X_train, y_train)
    y_pred = DT.predict(X_test)

    aucpr = average_precision_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    dpr_eth = demographic_parity_ratio(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['ethnicity'])
    dpd_eth = demographic_parity_difference(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['ethnicity'])
    dpr_ins = demographic_parity_ratio(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['insurance'])
    dpd_ins = demographic_parity_difference(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['insurance'])

    taucpr += aucpr
    tf1 += f1
    tdpr_eth += dpr_eth
    tdpd_eth += dpd_eth
    tdpr_ins += dpr_ins
    tdpd_ins += dpd_ins

metric_eval['MultiFairGAN w/ DPD (Ethnicity and Insurance) (Additive Fairness Loss Strategy)'] = [data_dpr_eth, data_dpd_eth, data_dpr_ins, data_dpd_ins, taucpr/iters, tf1/iters, tdpr_eth/iters, tdpd_eth/iters, tdpr_ins/iters, tdpd_ins/iters]

##  MultiFairGAN w/ DPR (Ethnicity) (Two-cycle Strategy)

In [31]:
for feat in cat_features:
    proposed_dpr_ethn[feat] = LabelEncoder().fit_transform(proposed_dpr_ethn[feat])

In [32]:
proposed_dpr_ethn = proposed_dpr_ethn.drop(columns=["dnr_first", "dnr","fullcode_first", "discharge_location","fullcode"])

In [33]:
X = proposed_dpr_ethn.drop(columns=['hospital_expire_flag'])
y = proposed_dpr_ethn['hospital_expire_flag']

data_dpr_eth = demographic_parity_ratio(y_true=y, y_pred=y, sensitive_features=X['ethnicity'])
data_dpd_eth = demographic_parity_difference(y_true=y, y_pred=y, sensitive_features=X['ethnicity'])
data_dpr_ins = demographic_parity_ratio(y_true=y, y_pred=y, sensitive_features=X['insurance'])
data_dpd_ins = demographic_parity_difference(y_true=y, y_pred=y, sensitive_features=X['insurance'])

taucpr = 0
tf1 = 0
tdpr_eth = 0
tdpd_eth = 0
tdpr_ins = 0
tdpd_ins = 0
for i in range(iters):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=i)

    DT= DecisionTreeClassifier(random_state=i)
    DT.fit(X_train, y_train)
    y_pred = DT.predict(X_test)

    aucpr = average_precision_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    dpr_eth = demographic_parity_ratio(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['ethnicity'])
    dpd_eth = demographic_parity_difference(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['ethnicity'])
    dpr_ins = demographic_parity_ratio(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['insurance'])
    dpd_ins = demographic_parity_difference(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['insurance'])

    taucpr += aucpr
    tf1 += f1
    tdpr_eth += dpr_eth
    tdpd_eth += dpd_eth
    tdpr_ins += dpr_ins
    tdpd_ins += dpd_ins

metric_eval['MultiFairGAN w/ DPR (Ethnicity) (Two-cycle Strategy)'] = [data_dpr_eth, data_dpd_eth, data_dpr_ins, data_dpd_ins, taucpr/iters, tf1/iters, tdpr_eth/iters, tdpd_eth/iters, tdpr_ins/iters, tdpd_ins/iters]

##  MultiFairGAN w/ DPD (Ethnicity) (Two-cycle Strategy)

In [34]:
for feat in cat_features:
    proposed_dpd_ethn[feat] = LabelEncoder().fit_transform(proposed_dpd_ethn[feat])

In [35]:
proposed_dpd_ethn = proposed_dpd_ethn.drop(columns=["dnr_first", "dnr","fullcode_first", "discharge_location","fullcode"])

In [36]:
X = proposed_dpd_ethn.drop(columns=['hospital_expire_flag'])
y = proposed_dpd_ethn['hospital_expire_flag']

data_dpr_eth = demographic_parity_ratio(y_true=y, y_pred=y, sensitive_features=X['ethnicity'])
data_dpd_eth = demographic_parity_difference(y_true=y, y_pred=y, sensitive_features=X['ethnicity'])
data_dpr_ins = demographic_parity_ratio(y_true=y, y_pred=y, sensitive_features=X['insurance'])
data_dpd_ins = demographic_parity_difference(y_true=y, y_pred=y, sensitive_features=X['insurance'])

taucpr = 0
tf1 = 0
tdpr_eth = 0
tdpd_eth = 0
tdpr_ins = 0
tdpd_ins = 0
for i in range(iters):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=i)

    DT= DecisionTreeClassifier(random_state=i)
    DT.fit(X_train, y_train)
    y_pred = DT.predict(X_test)

    aucpr = average_precision_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    dpr_eth = demographic_parity_ratio(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['ethnicity'])
    dpd_eth = demographic_parity_difference(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['ethnicity'])
    dpr_ins = demographic_parity_ratio(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['insurance'])
    dpd_ins = demographic_parity_difference(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['insurance'])

    taucpr += aucpr
    tf1 += f1
    tdpr_eth += dpr_eth
    tdpd_eth += dpd_eth
    tdpr_ins += dpr_ins
    tdpd_ins += dpd_ins

metric_eval['MultiFairGAN w/ DPD (Ethnicity) (Two-cycle Strategy)'] = [data_dpr_eth, data_dpd_eth, data_dpr_ins, data_dpd_ins, taucpr/iters, tf1/iters, tdpr_eth/iters, tdpd_eth/iters, tdpr_ins/iters, tdpd_ins/iters]

##  MultiFairGAN w/ DPR (Ethnicity) (Additive Fairness Loss Strategy)

In [37]:
for feat in cat_features:
    proposed_additive_dpr_ethn[feat] = LabelEncoder().fit_transform(proposed_additive_dpr_ethn[feat])

In [38]:
proposed_additive_dpr_ethn = proposed_additive_dpr_ethn.drop(columns=["dnr_first", "dnr","fullcode_first", "discharge_location","fullcode"])

In [39]:
X = proposed_additive_dpr_ethn.drop(columns=['hospital_expire_flag'])
y = proposed_additive_dpr_ethn['hospital_expire_flag']

data_dpr_eth = demographic_parity_ratio(y_true=y, y_pred=y, sensitive_features=X['ethnicity'])
data_dpd_eth = demographic_parity_difference(y_true=y, y_pred=y, sensitive_features=X['ethnicity'])
data_dpr_ins = demographic_parity_ratio(y_true=y, y_pred=y, sensitive_features=X['insurance'])
data_dpd_ins = demographic_parity_difference(y_true=y, y_pred=y, sensitive_features=X['insurance'])

taucpr = 0
tf1 = 0
tdpr_eth = 0
tdpd_eth = 0
tdpr_ins = 0
tdpd_ins = 0
for i in range(iters):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=i)

    DT= DecisionTreeClassifier(random_state=i)
    DT.fit(X_train, y_train)
    y_pred = DT.predict(X_test)

    aucpr = average_precision_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    dpr_eth = demographic_parity_ratio(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['ethnicity'])
    dpd_eth = demographic_parity_difference(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['ethnicity'])
    dpr_ins = demographic_parity_ratio(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['insurance'])
    dpd_ins = demographic_parity_difference(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['insurance'])

    taucpr += aucpr
    tf1 += f1
    tdpr_eth += dpr_eth
    tdpd_eth += dpd_eth
    tdpr_ins += dpr_ins
    tdpd_ins += dpd_ins

metric_eval['MultiFairGAN w/ DPR (Ethnicity) (Additive Fairness Loss Strategy)'] = [data_dpr_eth, data_dpd_eth, data_dpr_ins, data_dpd_ins, taucpr/iters, tf1/iters, tdpr_eth/iters, tdpd_eth/iters, tdpr_ins/iters, tdpd_ins/iters]

##  MultiFairGAN w/ DPD (Ethnicity) (Additive Fairness Loss Strategy)

In [40]:
for feat in cat_features:
    proposed_additive_dpd_ethn[feat] = LabelEncoder().fit_transform(proposed_additive_dpd_ethn[feat])

In [41]:
proposed_additive_dpd_ethn = proposed_additive_dpd_ethn.drop(columns=["dnr_first", "dnr","fullcode_first", "discharge_location","fullcode"])

In [42]:
X = proposed_additive_dpd_ethn.drop(columns=['hospital_expire_flag'])
y = proposed_additive_dpd_ethn['hospital_expire_flag']

data_dpr_eth = demographic_parity_ratio(y_true=y, y_pred=y, sensitive_features=X['ethnicity'])
data_dpd_eth = demographic_parity_difference(y_true=y, y_pred=y, sensitive_features=X['ethnicity'])
data_dpr_ins = demographic_parity_ratio(y_true=y, y_pred=y, sensitive_features=X['insurance'])
data_dpd_ins = demographic_parity_difference(y_true=y, y_pred=y, sensitive_features=X['insurance'])

taucpr = 0
tf1 = 0
tdpr_eth = 0
tdpd_eth = 0
tdpr_ins = 0
tdpd_ins = 0
for i in range(iters):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=i)

    DT= DecisionTreeClassifier(random_state=i)
    DT.fit(X_train, y_train)
    y_pred = DT.predict(X_test)

    aucpr = average_precision_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    dpr_eth = demographic_parity_ratio(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['ethnicity'])
    dpd_eth = demographic_parity_difference(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['ethnicity'])
    dpr_ins = demographic_parity_ratio(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['insurance'])
    dpd_ins = demographic_parity_difference(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['insurance'])

    taucpr += aucpr
    tf1 += f1
    tdpr_eth += dpr_eth
    tdpd_eth += dpd_eth
    tdpr_ins += dpr_ins
    tdpd_ins += dpd_ins

metric_eval['MultiFairGAN w/ DPD (Ethnicity) (Additive Fairness Loss Strategy)'] = [data_dpr_eth, data_dpd_eth, data_dpr_ins, data_dpd_ins, taucpr/iters, tf1/iters, tdpr_eth/iters, tdpd_eth/iters, tdpr_ins/iters, tdpd_ins/iters]

## Results

In [43]:
metric_eval_df = pd.DataFrame(metric_eval, index=['DPR - Ethnicity', 'DPD - Ethnicity', 'DPR - Insurance', 'DPD - Insurance', 'AUC PR', 'F1-Score', 'DPR Classifier - Ethnicity', 'DPD Classifier - Ethnicity', 'DPR Classifier - Insurance', 'DPD Classifier - Insurance'])
metric_eval_df

Unnamed: 0,Real Data,CTGAN,TabFairGAN,Distance Correlation GAN,MultiFairGAN w/ DPR (Ethnicity and Insurance) (Two-cycle Strategy),MultiFairGAN w/ DPD (Ethnicity and Insurance) (Two-cycle Strategy),MultiFairGAN w/ DPR (Ethnicity and Insurance) (Additive Fairness Loss Strategy),MultiFairGAN w/ DPD (Ethnicity and Insurance) (Additive Fairness Loss Strategy),MultiFairGAN w/ DPR (Ethnicity) (Two-cycle Strategy),MultiFairGAN w/ DPD (Ethnicity) (Two-cycle Strategy),MultiFairGAN w/ DPR (Ethnicity) (Additive Fairness Loss Strategy),MultiFairGAN w/ DPD (Ethnicity) (Additive Fairness Loss Strategy)
DPR - Ethnicity,0.62391,0.912609,0.345568,0.019154,0.98414,0.991424,0.924501,0.682552,0.991377,0.964758,0.840568,0.965623
DPD - Ethnicity,0.052548,0.020701,0.195792,0.958727,0.007687,0.004335,0.01814,0.076932,0.004131,0.01752,0.030989,0.008233
DPR - Insurance,0.443404,0.275869,0.175038,0.495247,0.943503,0.964204,0.31556,0.47461,0.8685,0.919672,0.410019,0.404146
DPD - Insurance,0.086147,0.256274,0.256881,0.336502,0.027736,0.018321,0.285784,0.137637,0.06917,0.04113,0.149038,0.205735
AUC PR,0.208664,0.303903,0.199629,0.946718,0.485465,0.504939,0.276649,0.336339,0.482789,0.488397,0.239911,0.301934
F1-Score,0.357705,0.4096,0.312579,0.96427,0.483874,0.506668,0.355349,0.460148,0.485166,0.487096,0.342982,0.395431
DPR Classifier - Ethnicity,0.803316,0.927479,0.362373,0.010803,0.968202,0.973168,0.912367,0.747945,0.962754,0.96502,0.930947,0.92867
DPD Classifier - Ethnicity,0.029615,0.018689,0.194,0.974266,0.015484,0.013727,0.024091,0.063297,0.018135,0.017695,0.014512,0.018678
DPR Classifier - Insurance,0.465938,0.292037,0.163952,0.392177,0.886886,0.89183,0.366107,0.473924,0.853138,0.853317,0.429768,0.41811
DPD Classifier - Insurance,0.091702,0.258532,0.28816,0.476486,0.057028,0.057894,0.259343,0.169522,0.078729,0.07993,0.156535,0.224937
