<a href="https://colab.research.google.com/github/aniketsharma00411/MultiFairGAN/blob/main/Experiment%20Notebooks/EICU_comparison_experiment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Initialization

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, average_precision_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder

In [2]:
! pip install -q fairlearn
from fairlearn.metrics import demographic_parity_ratio, demographic_parity_difference

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/231.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━[0m [32m112.6/231.5 kB[0m [31m2.8 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m231.5/231.5 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[?25h

# Loading Datasets

In [3]:
from google.colab import drive
drive.mount('/content/drive', force_remount= True)

Mounted at /content/drive


In [4]:
orig = pd.read_csv('/content/drive/Shareddrives/TRAM - FairSyntheticData/Experiments/Data/eicu_age.csv')
ctgan = pd.read_csv('/content/drive/Shareddrives/TRAM - FairSyntheticData/Experiments/Data/EICU_ctgan.csv')
tabfairgan = pd.read_csv('/content/drive/Shareddrives/TRAM - FairSyntheticData/Experiments/Data/EICU_tabfairgan_0.4_20.csv')
distcorrgan = pd.read_csv('/content/drive/Shareddrives/TRAM - FairSyntheticData/Experiments/Data/EICU_distcorrGAN_0.3.csv')
proposed_dpr = pd.read_csv('/content/drive/Shareddrives/TRAM - FairSyntheticData/Experiments/Data/EICU_ctgan_dpr_0.5_30.csv')
proposed_dpd = pd.read_csv('/content/drive/Shareddrives/TRAM - FairSyntheticData/Experiments/Data/EICU_ctgan_dpd_0.5_20.csv')
proposed_additive_dpr = pd.read_csv('/content/drive/Shareddrives/TRAM - FairSyntheticData/Experiments/Data/EICU_ctgan_dpr_additive_0.5_200.csv')
proposed_additive_dpd = pd.read_csv('/content/drive/Shareddrives/TRAM - FairSyntheticData/Experiments/Data/EICU_ctgan_dpd_additive_0.4_200.csv')
proposed_dpr_ethn = pd.read_csv('/content/drive/Shareddrives/TRAM - FairSyntheticData/Experiments/Data/EICU_ctgan_dpr_0.7_20_ethnicity.csv')
proposed_dpd_ethn = pd.read_csv('/content/drive/Shareddrives/TRAM - FairSyntheticData/Experiments/Data/EICU_ctgan_dpd_0.4_20_ethnicity.csv')
proposed_additive_dpr_ethn = pd.read_csv('/content/drive/Shareddrives/TRAM - FairSyntheticData/Experiments/Data/EICU_ctgan_dpr_additive_0.8_200_ethnicity.csv')
proposed_additive_dpd_ethn = pd.read_csv('/content/drive/Shareddrives/TRAM - FairSyntheticData/Experiments/Data/EICU_ctgan_dpd_additive_0.8_200_ethnicity.csv')

# Metric Evaluation

In [5]:
iters = 20
metric_eval = {}

In [6]:
cat_features = ['gender', 'ethnicity', 'apacheadmissiondx', 'hospitaladmitsource', 'unittype', 'AgeGroup']

##  Real Data

In [7]:
for feat in cat_features:
    orig[feat] = LabelEncoder().fit_transform(orig[feat])

In [8]:
X = orig.drop(columns=['hospitaldischargestatus'])
y = LabelEncoder().fit_transform(orig['hospitaldischargestatus'])

data_dpr_eth = demographic_parity_ratio(y_true=y, y_pred=y, sensitive_features=X['ethnicity'])
data_dpd_eth = demographic_parity_difference(y_true=y, y_pred=y, sensitive_features=X['ethnicity'])
data_dpr_ins = demographic_parity_ratio(y_true=y, y_pred=y, sensitive_features=X['AgeGroup'])
data_dpd_ins = demographic_parity_difference(y_true=y, y_pred=y, sensitive_features=X['AgeGroup'])

taucpr = 0
tf1 = 0
tdpr_eth = 0
tdpd_eth = 0
tdpr_ins = 0
tdpd_ins = 0
for i in range(iters):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=i)

    DT= DecisionTreeClassifier(random_state=i)
    DT.fit(X_train, y_train)
    y_pred = DT.predict(X_test)

    aucpr = average_precision_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    dpr_eth = demographic_parity_ratio(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['ethnicity'])
    dpd_eth = demographic_parity_difference(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['ethnicity'])
    dpr_ins = demographic_parity_ratio(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['AgeGroup'])
    dpd_ins = demographic_parity_difference(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['AgeGroup'])

    taucpr += aucpr
    tf1 += f1
    tdpr_eth += dpr_eth
    tdpd_eth += dpd_eth
    tdpr_ins += dpr_ins
    tdpd_ins += dpd_ins

metric_eval['Real Data'] = [data_dpr_eth, data_dpd_eth, data_dpr_ins, data_dpd_ins, taucpr/iters, tf1/iters, tdpr_eth/iters, tdpd_eth/iters, tdpr_ins/iters, tdpd_ins/iters]

##  CTGAN

In [9]:
for feat in cat_features:
    ctgan[feat] = LabelEncoder().fit_transform(ctgan[feat])

In [10]:
X = ctgan.drop(columns=['hospitaldischargestatus'])
y = LabelEncoder().fit_transform(ctgan['hospitaldischargestatus'])

data_dpr_eth = demographic_parity_ratio(y_true=y, y_pred=y, sensitive_features=X['ethnicity'])
data_dpd_eth = demographic_parity_difference(y_true=y, y_pred=y, sensitive_features=X['ethnicity'])
data_dpr_ins = demographic_parity_ratio(y_true=y, y_pred=y, sensitive_features=X['AgeGroup'])
data_dpd_ins = demographic_parity_difference(y_true=y, y_pred=y, sensitive_features=X['AgeGroup'])

taucpr = 0
tf1 = 0
tdpr_eth = 0
tdpd_eth = 0
tdpr_ins = 0
tdpd_ins = 0
for i in range(iters):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=i)

    DT= DecisionTreeClassifier(random_state=i)
    DT.fit(X_train, y_train)
    y_pred = DT.predict(X_test)

    aucpr = average_precision_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    dpr_eth = demographic_parity_ratio(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['ethnicity'])
    dpd_eth = demographic_parity_difference(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['ethnicity'])
    dpr_ins = demographic_parity_ratio(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['AgeGroup'])
    dpd_ins = demographic_parity_difference(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['AgeGroup'])

    taucpr += aucpr
    tf1 += f1
    tdpr_eth += dpr_eth
    tdpd_eth += dpd_eth
    tdpr_ins += dpr_ins
    tdpd_ins += dpd_ins

metric_eval['CTGAN'] = [data_dpr_eth, data_dpd_eth, data_dpr_ins, data_dpd_ins, taucpr/iters, tf1/iters, tdpr_eth/iters, tdpd_eth/iters, tdpr_ins/iters, tdpd_ins/iters]

##  TabFairGAN

In [11]:
for feat in cat_features:
    tabfairgan[feat] = LabelEncoder().fit_transform(tabfairgan[feat])

In [12]:
X = tabfairgan.drop(columns=['hospitaldischargestatus'])
y = LabelEncoder().fit_transform(tabfairgan['hospitaldischargestatus'])

data_dpr_eth = demographic_parity_ratio(y_true=y, y_pred=y, sensitive_features=X['ethnicity'])
data_dpd_eth = demographic_parity_difference(y_true=y, y_pred=y, sensitive_features=X['ethnicity'])
data_dpr_ins = demographic_parity_ratio(y_true=y, y_pred=y, sensitive_features=X['AgeGroup'])
data_dpd_ins = demographic_parity_difference(y_true=y, y_pred=y, sensitive_features=X['AgeGroup'])

taucpr = 0
tf1 = 0
tdpr_eth = 0
tdpd_eth = 0
tdpr_ins = 0
tdpd_ins = 0
for i in range(iters):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=i)

    DT= DecisionTreeClassifier(random_state=i)
    DT.fit(X_train, y_train)
    y_pred = DT.predict(X_test)

    aucpr = average_precision_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    dpr_eth = demographic_parity_ratio(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['ethnicity'])
    dpd_eth = demographic_parity_difference(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['ethnicity'])
    dpr_ins = demographic_parity_ratio(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['AgeGroup'])
    dpd_ins = demographic_parity_difference(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['AgeGroup'])

    taucpr += aucpr
    tf1 += f1
    tdpr_eth += dpr_eth
    tdpd_eth += dpd_eth
    tdpr_ins += dpr_ins
    tdpd_ins += dpd_ins

metric_eval['TabFairGAN'] = [data_dpr_eth, data_dpd_eth, data_dpr_ins, data_dpd_ins, taucpr/iters, tf1/iters, tdpr_eth/iters, tdpd_eth/iters, tdpr_ins/iters, tdpd_ins/iters]

##  Distance Correlation GAN

In [13]:
for feat in cat_features:
    distcorrgan[feat] = LabelEncoder().fit_transform(distcorrgan[feat])

In [14]:
X = distcorrgan.drop(columns=['hospitaldischargestatus'])
y = LabelEncoder().fit_transform(distcorrgan['hospitaldischargestatus'])

data_dpr_eth = demographic_parity_ratio(y_true=y, y_pred=y, sensitive_features=X['ethnicity'])
data_dpd_eth = demographic_parity_difference(y_true=y, y_pred=y, sensitive_features=X['ethnicity'])
data_dpr_ins = demographic_parity_ratio(y_true=y, y_pred=y, sensitive_features=X['AgeGroup'])
data_dpd_ins = demographic_parity_difference(y_true=y, y_pred=y, sensitive_features=X['AgeGroup'])

taucpr = 0
tf1 = 0
tdpr_eth = 0
tdpd_eth = 0
tdpr_ins = 0
tdpd_ins = 0
for i in range(iters):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=i)

    DT= DecisionTreeClassifier(random_state=i)
    DT.fit(X_train, y_train)
    y_pred = DT.predict(X_test)

    aucpr = average_precision_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    dpr_eth = demographic_parity_ratio(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['ethnicity'])
    dpd_eth = demographic_parity_difference(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['ethnicity'])
    dpr_ins = demographic_parity_ratio(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['AgeGroup'])
    dpd_ins = demographic_parity_difference(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['AgeGroup'])

    taucpr += aucpr
    tf1 += f1
    tdpr_eth += dpr_eth
    tdpd_eth += dpd_eth
    tdpr_ins += dpr_ins
    tdpd_ins += dpd_ins

metric_eval['Distance Correlation GAN'] = [data_dpr_eth, data_dpd_eth, data_dpr_ins, data_dpd_ins, taucpr/iters, tf1/iters, tdpr_eth/iters, tdpd_eth/iters, tdpr_ins/iters, tdpd_ins/iters]

##  MultiFairGAN w/ DPR (Ethnicity and Insurance) (Two-cycle Strategy)(Ethnicity and Insurance) (Two-cycle Strategy)

In [15]:
for feat in cat_features:
    proposed_dpr[feat] = LabelEncoder().fit_transform(proposed_dpr[feat])

In [16]:
X = proposed_dpr.drop(columns=['hospitaldischargestatus'])
y = LabelEncoder().fit_transform(proposed_dpr['hospitaldischargestatus'])

data_dpr_eth = demographic_parity_ratio(y_true=y, y_pred=y, sensitive_features=X['ethnicity'])
data_dpd_eth = demographic_parity_difference(y_true=y, y_pred=y, sensitive_features=X['ethnicity'])
data_dpr_ins = demographic_parity_ratio(y_true=y, y_pred=y, sensitive_features=X['AgeGroup'])
data_dpd_ins = demographic_parity_difference(y_true=y, y_pred=y, sensitive_features=X['AgeGroup'])

taucpr = 0
tf1 = 0
tdpr_eth = 0
tdpd_eth = 0
tdpr_ins = 0
tdpd_ins = 0
for i in range(iters):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=i)

    DT= DecisionTreeClassifier(random_state=i)
    DT.fit(X_train, y_train)
    y_pred = DT.predict(X_test)

    aucpr = average_precision_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    dpr_eth = demographic_parity_ratio(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['ethnicity'])
    dpd_eth = demographic_parity_difference(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['ethnicity'])
    dpr_ins = demographic_parity_ratio(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['AgeGroup'])
    dpd_ins = demographic_parity_difference(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['AgeGroup'])

    taucpr += aucpr
    tf1 += f1
    tdpr_eth += dpr_eth
    tdpd_eth += dpd_eth
    tdpr_ins += dpr_ins
    tdpd_ins += dpd_ins

metric_eval['MultiFairGAN w/ DPR (Ethnicity and Insurance) (Two-cycle Strategy)'] = [data_dpr_eth, data_dpd_eth, data_dpr_ins, data_dpd_ins, taucpr/iters, tf1/iters, tdpr_eth/iters, tdpd_eth/iters, tdpr_ins/iters, tdpd_ins/iters]

##  MultiFairGAN w/ DPD (Ethnicity and Insurance) (Two-cycle Strategy)

In [17]:
for feat in cat_features:
    proposed_dpd[feat] = LabelEncoder().fit_transform(proposed_dpd[feat])

In [18]:
X = proposed_dpd.drop(columns=['hospitaldischargestatus'])
y = LabelEncoder().fit_transform(proposed_dpd['hospitaldischargestatus'])

data_dpr_eth = demographic_parity_ratio(y_true=y, y_pred=y, sensitive_features=X['ethnicity'])
data_dpd_eth = demographic_parity_difference(y_true=y, y_pred=y, sensitive_features=X['ethnicity'])
data_dpr_ins = demographic_parity_ratio(y_true=y, y_pred=y, sensitive_features=X['AgeGroup'])
data_dpd_ins = demographic_parity_difference(y_true=y, y_pred=y, sensitive_features=X['AgeGroup'])

taucpr = 0
tf1 = 0
tdpr_eth = 0
tdpd_eth = 0
tdpr_ins = 0
tdpd_ins = 0
for i in range(iters):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=i)

    DT= DecisionTreeClassifier(random_state=i)
    DT.fit(X_train, y_train)
    y_pred = DT.predict(X_test)

    aucpr = average_precision_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    dpr_eth = demographic_parity_ratio(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['ethnicity'])
    dpd_eth = demographic_parity_difference(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['ethnicity'])
    dpr_ins = demographic_parity_ratio(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['AgeGroup'])
    dpd_ins = demographic_parity_difference(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['AgeGroup'])

    taucpr += aucpr
    tf1 += f1
    tdpr_eth += dpr_eth
    tdpd_eth += dpd_eth
    tdpr_ins += dpr_ins
    tdpd_ins += dpd_ins

metric_eval['MultiFairGAN w/ DPD (Ethnicity and Insurance) (Two-cycle Strategy)'] = [data_dpr_eth, data_dpd_eth, data_dpr_ins, data_dpd_ins, taucpr/iters, tf1/iters, tdpr_eth/iters, tdpd_eth/iters, tdpr_ins/iters, tdpd_ins/iters]

##  MultiFairGAN w/ DPR (Ethnicity and Insurance) (Additive Fairness Loss Strategy)

In [19]:
for feat in cat_features:
    proposed_additive_dpr[feat] = LabelEncoder().fit_transform(proposed_additive_dpr[feat])

In [20]:
X = proposed_additive_dpr.drop(columns=['hospitaldischargestatus'])
y = LabelEncoder().fit_transform(proposed_additive_dpr['hospitaldischargestatus'])

data_dpr_eth = demographic_parity_ratio(y_true=y, y_pred=y, sensitive_features=X['ethnicity'])
data_dpd_eth = demographic_parity_difference(y_true=y, y_pred=y, sensitive_features=X['ethnicity'])
data_dpr_ins = demographic_parity_ratio(y_true=y, y_pred=y, sensitive_features=X['AgeGroup'])
data_dpd_ins = demographic_parity_difference(y_true=y, y_pred=y, sensitive_features=X['AgeGroup'])

taucpr = 0
tf1 = 0
tdpr_eth = 0
tdpd_eth = 0
tdpr_ins = 0
tdpd_ins = 0
for i in range(iters):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=i)

    DT= DecisionTreeClassifier(random_state=i)
    DT.fit(X_train, y_train)
    y_pred = DT.predict(X_test)

    aucpr = average_precision_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    dpr_eth = demographic_parity_ratio(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['ethnicity'])
    dpd_eth = demographic_parity_difference(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['ethnicity'])
    dpr_ins = demographic_parity_ratio(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['AgeGroup'])
    dpd_ins = demographic_parity_difference(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['AgeGroup'])

    taucpr += aucpr
    tf1 += f1
    tdpr_eth += dpr_eth
    tdpd_eth += dpd_eth
    tdpr_ins += dpr_ins
    tdpd_ins += dpd_ins

metric_eval['MultiFairGAN w/ DPR (Ethnicity and Insurance) (Additive Fairness Loss Strategy)'] = [data_dpr_eth, data_dpd_eth, data_dpr_ins, data_dpd_ins, taucpr/iters, tf1/iters, tdpr_eth/iters, tdpd_eth/iters, tdpr_ins/iters, tdpd_ins/iters]

##  MultiFairGAN w/ DPD (Ethnicity and Insurance) (Additive Fairness Loss Strategy)

In [21]:
for feat in cat_features:
    proposed_additive_dpd[feat] = LabelEncoder().fit_transform(proposed_additive_dpd[feat])

In [22]:
X = proposed_additive_dpd.drop(columns=['hospitaldischargestatus'])
y = LabelEncoder().fit_transform(proposed_additive_dpd['hospitaldischargestatus'])

data_dpr_eth = demographic_parity_ratio(y_true=y, y_pred=y, sensitive_features=X['ethnicity'])
data_dpd_eth = demographic_parity_difference(y_true=y, y_pred=y, sensitive_features=X['ethnicity'])
data_dpr_ins = demographic_parity_ratio(y_true=y, y_pred=y, sensitive_features=X['AgeGroup'])
data_dpd_ins = demographic_parity_difference(y_true=y, y_pred=y, sensitive_features=X['AgeGroup'])

taucpr = 0
tf1 = 0
tdpr_eth = 0
tdpd_eth = 0
tdpr_ins = 0
tdpd_ins = 0
for i in range(iters):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=i)

    DT= DecisionTreeClassifier(random_state=i)
    DT.fit(X_train, y_train)
    y_pred = DT.predict(X_test)

    aucpr = average_precision_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    dpr_eth = demographic_parity_ratio(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['ethnicity'])
    dpd_eth = demographic_parity_difference(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['ethnicity'])
    dpr_ins = demographic_parity_ratio(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['AgeGroup'])
    dpd_ins = demographic_parity_difference(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['AgeGroup'])

    taucpr += aucpr
    tf1 += f1
    tdpr_eth += dpr_eth
    tdpd_eth += dpd_eth
    tdpr_ins += dpr_ins
    tdpd_ins += dpd_ins

metric_eval['MultiFairGAN w/ DPD (Ethnicity and Insurance) (Additive Fairness Loss Strategy)'] = [data_dpr_eth, data_dpd_eth, data_dpr_ins, data_dpd_ins, taucpr/iters, tf1/iters, tdpr_eth/iters, tdpd_eth/iters, tdpr_ins/iters, tdpd_ins/iters]

##  MultiFairGAN w/ DPR (Ethnicity) (Two-cycle Strategy)(Two-cycle Strategy)

In [23]:
for feat in cat_features:
    proposed_dpr_ethn[feat] = LabelEncoder().fit_transform(proposed_dpr_ethn[feat])

In [24]:
X = proposed_dpr_ethn.drop(columns=['hospitaldischargestatus'])
y = LabelEncoder().fit_transform(proposed_dpr_ethn['hospitaldischargestatus'])

data_dpr_eth = demographic_parity_ratio(y_true=y, y_pred=y, sensitive_features=X['ethnicity'])
data_dpd_eth = demographic_parity_difference(y_true=y, y_pred=y, sensitive_features=X['ethnicity'])
data_dpr_ins = demographic_parity_ratio(y_true=y, y_pred=y, sensitive_features=X['AgeGroup'])
data_dpd_ins = demographic_parity_difference(y_true=y, y_pred=y, sensitive_features=X['AgeGroup'])

taucpr = 0
tf1 = 0
tdpr_eth = 0
tdpd_eth = 0
tdpr_ins = 0
tdpd_ins = 0
for i in range(iters):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=i)

    DT= DecisionTreeClassifier(random_state=i)
    DT.fit(X_train, y_train)
    y_pred = DT.predict(X_test)

    aucpr = average_precision_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    dpr_eth = demographic_parity_ratio(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['ethnicity'])
    dpd_eth = demographic_parity_difference(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['ethnicity'])
    dpr_ins = demographic_parity_ratio(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['AgeGroup'])
    dpd_ins = demographic_parity_difference(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['AgeGroup'])

    taucpr += aucpr
    tf1 += f1
    tdpr_eth += dpr_eth
    tdpd_eth += dpd_eth
    tdpr_ins += dpr_ins
    tdpd_ins += dpd_ins

metric_eval['MultiFairGAN w/ DPR (Ethnicity) (Two-cycle Strategy)'] = [data_dpr_eth, data_dpd_eth, data_dpr_ins, data_dpd_ins, taucpr/iters, tf1/iters, tdpr_eth/iters, tdpd_eth/iters, tdpr_ins/iters, tdpd_ins/iters]

##  MultiFairGAN w/ DPD (Ethnicity) (Two-cycle Strategy)

In [25]:
for feat in cat_features:
    proposed_dpd_ethn[feat] = LabelEncoder().fit_transform(proposed_dpd_ethn[feat])

In [26]:
X = proposed_dpd_ethn.drop(columns=['hospitaldischargestatus'])
y = LabelEncoder().fit_transform(proposed_dpd_ethn['hospitaldischargestatus'])

data_dpr_eth = demographic_parity_ratio(y_true=y, y_pred=y, sensitive_features=X['ethnicity'])
data_dpd_eth = demographic_parity_difference(y_true=y, y_pred=y, sensitive_features=X['ethnicity'])
data_dpr_ins = demographic_parity_ratio(y_true=y, y_pred=y, sensitive_features=X['AgeGroup'])
data_dpd_ins = demographic_parity_difference(y_true=y, y_pred=y, sensitive_features=X['AgeGroup'])

taucpr = 0
tf1 = 0
tdpr_eth = 0
tdpd_eth = 0
tdpr_ins = 0
tdpd_ins = 0
for i in range(iters):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=i)

    DT= DecisionTreeClassifier(random_state=i)
    DT.fit(X_train, y_train)
    y_pred = DT.predict(X_test)

    aucpr = average_precision_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    dpr_eth = demographic_parity_ratio(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['ethnicity'])
    dpd_eth = demographic_parity_difference(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['ethnicity'])
    dpr_ins = demographic_parity_ratio(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['AgeGroup'])
    dpd_ins = demographic_parity_difference(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['AgeGroup'])

    taucpr += aucpr
    tf1 += f1
    tdpr_eth += dpr_eth
    tdpd_eth += dpd_eth
    tdpr_ins += dpr_ins
    tdpd_ins += dpd_ins

metric_eval['MultiFairGAN w/ DPD (Ethnicity) (Two-cycle Strategy)'] = [data_dpr_eth, data_dpd_eth, data_dpr_ins, data_dpd_ins, taucpr/iters, tf1/iters, tdpr_eth/iters, tdpd_eth/iters, tdpr_ins/iters, tdpd_ins/iters]

##  MultiFairGAN w/ DPR (Ethnicity) (Additive Fairness Loss Strategy)

In [27]:
for feat in cat_features:
    proposed_additive_dpr_ethn[feat] = LabelEncoder().fit_transform(proposed_additive_dpr_ethn[feat])

In [28]:
X = proposed_additive_dpr_ethn.drop(columns=['hospitaldischargestatus'])
y = LabelEncoder().fit_transform(proposed_additive_dpr_ethn['hospitaldischargestatus'])

data_dpr_eth = demographic_parity_ratio(y_true=y, y_pred=y, sensitive_features=X['ethnicity'])
data_dpd_eth = demographic_parity_difference(y_true=y, y_pred=y, sensitive_features=X['ethnicity'])
data_dpr_ins = demographic_parity_ratio(y_true=y, y_pred=y, sensitive_features=X['AgeGroup'])
data_dpd_ins = demographic_parity_difference(y_true=y, y_pred=y, sensitive_features=X['AgeGroup'])

taucpr = 0
tf1 = 0
tdpr_eth = 0
tdpd_eth = 0
tdpr_ins = 0
tdpd_ins = 0
for i in range(iters):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=i)

    DT= DecisionTreeClassifier(random_state=i)
    DT.fit(X_train, y_train)
    y_pred = DT.predict(X_test)

    aucpr = average_precision_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    dpr_eth = demographic_parity_ratio(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['ethnicity'])
    dpd_eth = demographic_parity_difference(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['ethnicity'])
    dpr_ins = demographic_parity_ratio(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['AgeGroup'])
    dpd_ins = demographic_parity_difference(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['AgeGroup'])

    taucpr += aucpr
    tf1 += f1
    tdpr_eth += dpr_eth
    tdpd_eth += dpd_eth
    tdpr_ins += dpr_ins
    tdpd_ins += dpd_ins

metric_eval['MultiFairGAN w/ DPR (Ethnicity) (Additive Fairness Loss Strategy)'] = [data_dpr_eth, data_dpd_eth, data_dpr_ins, data_dpd_ins, taucpr/iters, tf1/iters, tdpr_eth/iters, tdpd_eth/iters, tdpr_ins/iters, tdpd_ins/iters]

##  Real DataMultiFairGAN w/ DPD (Ethnicity) (Additive Fairness Loss Strategy)

In [29]:
for feat in cat_features:
    proposed_additive_dpd_ethn[feat] = LabelEncoder().fit_transform(proposed_additive_dpd_ethn[feat])

In [30]:
X = proposed_additive_dpd_ethn.drop(columns=['hospitaldischargestatus'])
y = LabelEncoder().fit_transform(proposed_additive_dpd_ethn['hospitaldischargestatus'])

data_dpr_eth = demographic_parity_ratio(y_true=y, y_pred=y, sensitive_features=X['ethnicity'])
data_dpd_eth = demographic_parity_difference(y_true=y, y_pred=y, sensitive_features=X['ethnicity'])
data_dpr_ins = demographic_parity_ratio(y_true=y, y_pred=y, sensitive_features=X['AgeGroup'])
data_dpd_ins = demographic_parity_difference(y_true=y, y_pred=y, sensitive_features=X['AgeGroup'])

taucpr = 0
tf1 = 0
tdpr_eth = 0
tdpd_eth = 0
tdpr_ins = 0
tdpd_ins = 0
for i in range(iters):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=i)

    DT= DecisionTreeClassifier(random_state=i)
    DT.fit(X_train, y_train)
    y_pred = DT.predict(X_test)

    aucpr = average_precision_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    dpr_eth = demographic_parity_ratio(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['ethnicity'])
    dpd_eth = demographic_parity_difference(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['ethnicity'])
    dpr_ins = demographic_parity_ratio(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['AgeGroup'])
    dpd_ins = demographic_parity_difference(y_true=y_test, y_pred=y_pred, sensitive_features=X_test['AgeGroup'])

    taucpr += aucpr
    tf1 += f1
    tdpr_eth += dpr_eth
    tdpd_eth += dpd_eth
    tdpr_ins += dpr_ins
    tdpd_ins += dpd_ins

metric_eval['Real DataMultiFairGAN w/ DPD (Ethnicity) (Additive Fairness Loss Strategy)'] = [data_dpr_eth, data_dpd_eth, data_dpr_ins, data_dpd_ins, taucpr/iters, tf1/iters, tdpr_eth/iters, tdpd_eth/iters, tdpr_ins/iters, tdpd_ins/iters]

## Results

In [31]:
metric_eval_df = pd.DataFrame(metric_eval, index=['DPR - Ethnicity', 'DPD - Ethnicity', 'DPR - AgeGroup', 'DPD - AgeGroup', 'AUC PR', 'F1-Score', 'DPR Classifier - Ethnicity', 'DPD Classifier - Ethnicity', 'DPR Classifier - AgeGroup', 'DPD Classifier - AgeGroup'])
metric_eval_df

Unnamed: 0,Real Data,CTGAN,TabFairGAN,Distance Correlation GAN,MultiFairGAN w/ DPR (Ethnicity and Insurance) (Two-cycle Strategy),MultiFairGAN w/ DPD (Ethnicity and Insurance) (Two-cycle Strategy),MultiFairGAN w/ DPR (Ethnicity and Insurance) (Additive Fairness Loss Strategy),MultiFairGAN w/ DPD (Ethnicity and Insurance) (Additive Fairness Loss Strategy),MultiFairGAN w/ DPR (Ethnicity) (Two-cycle Strategy),MultiFairGAN w/ DPD (Ethnicity) (Two-cycle Strategy),MultiFairGAN w/ DPR (Ethnicity) (Additive Fairness Loss Strategy),Real DataMultiFairGAN w/ DPD (Ethnicity) (Additive Fairness Loss Strategy)
DPR - Ethnicity,0.796395,0.213889,0.413164,0.016543,0.975642,0.973087,0.479665,0.299724,0.979399,0.985512,0.447662,0.046849
DPD - Ethnicity,0.019411,0.125451,0.227186,0.967994,0.012243,0.013494,0.084903,0.115981,0.010243,0.007171,0.121428,0.281455
DPR - AgeGroup,0.320991,0.376878,0.826809,0.879699,0.998078,0.991983,0.480408,0.540335,0.99258,0.992515,0.24052,0.288024
DPD - AgeGroup,0.081463,0.113406,0.049046,0.067097,0.000958,0.003987,0.11792,0.081418,0.003664,0.003686,0.231758,0.05401
AUC PR,0.1155,0.321184,0.291605,0.950484,0.497592,0.495808,0.272344,0.264685,0.492181,0.491329,0.317751,0.361901
F1-Score,0.213864,0.506982,0.355944,0.966391,0.496999,0.496717,0.434989,0.417952,0.493085,0.492368,0.467184,0.58209
DPR Classifier - Ethnicity,0.706981,0.357607,0.591345,0.019459,0.959803,0.958974,0.521063,0.536583,0.962252,0.961213,0.543436,0.05924
DPD Classifier - Ethnicity,0.038208,0.106881,0.157899,0.962019,0.020454,0.020884,0.083004,0.085938,0.019105,0.019607,0.110973,0.284322
DPR Classifier - AgeGroup,0.406235,0.441384,0.784398,0.861498,0.978726,0.976845,0.531051,0.680077,0.96787,0.975436,0.303189,0.385738
DPD Classifier - AgeGroup,0.079297,0.106491,0.067574,0.079902,0.010707,0.011639,0.109549,0.060536,0.016169,0.012341,0.218364,0.050275
