In [None]:
import pandas as pd
import numpy as np
import ast
from sklearn.model_selection import train_test_split
from sklearn.multioutput import MultiOutputClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_auc_score
from collections import defaultdict
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score, confusion_matrix, roc_auc_score, precision_recall_curve, auc
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.metrics import log_loss
from scipy.stats import entropy
import torch
import torch.nn as nn
from sklearn.multioutput import MultiOutputClassifier
from xgboost import XGBClassifier
import torch.optim as optim


### Read the CSV

In [19]:
test = pd.read_csv("../model_dev/densenet_data/densenet_test_embeddings.csv", quotechar='"', on_bad_lines='skip')
train = pd.read_csv("../model_dev/densenet_data/densenet_train_embeddings.csv", quotechar='"', on_bad_lines='skip')
valid = pd.read_csv("../model_dev/densenet_data/densenet_valid_embeddings.csv", quotechar='"', on_bad_lines='skip')

print(test.columns)
test.head()

Index(['path_to_image', 'path_to_dcm', 'age', 'sex', 'race', 'insurance_type',
       'No Finding', 'Enlarged Cardiomediastinum', 'Cardiomegaly',
       'Lung Opacity', 'Lung Lesion', 'Edema', 'Consolidation', 'Pneumonia',
       'Atelectasis', 'Pneumothorax', 'Pleural Effusion', 'Pleural Other',
       'Fracture', 'Support Devices', 'embeddings'],
      dtype='object')


Unnamed: 0,path_to_image,path_to_dcm,age,sex,race,insurance_type,No Finding,Enlarged Cardiomediastinum,Cardiomegaly,Lung Opacity,...,Edema,Consolidation,Pneumonia,Atelectasis,Pneumothorax,Pleural Effusion,Pleural Other,Fracture,Support Devices,embeddings
0,train/patient47347/study3/view1_frontal.jpg,train/patient47347/study3/view1_frontal.dcm,78.0,1,0,1,0,0,1,0,...,1,0,0,1,0,1,0,1,1,"[0.0029132624622434378, 0.1020001769065857, 0...."
1,train/patient37527/study12/view1_frontal.jpg,train/patient37527/study12/view1_frontal.dcm,63.0,0,1,2,0,0,0,0,...,0,0,0,0,0,1,0,0,0,"[0.0014348188415169716, 0.0543656125664711, 0...."
2,train/patient41208/study9/view1_frontal.jpg,train/patient41208/study9/view1_frontal.dcm,70.0,0,0,1,0,0,0,1,...,0,0,0,0,0,0,0,0,0,"[0.001982336398214102, 0.040021587163209915, 0..."
3,train/patient39357/study1/view1_frontal.jpg,train/patient39357/study1/view1_frontal.dcm,79.0,1,1,1,0,0,0,0,...,0,0,1,0,0,0,0,0,0,"[0.001741771469824016, 0.0560498870909214, 0.1..."
4,train/patient31982/study4/view1_frontal.jpg,train/patient31982/study4/view1_frontal.dcm,67.0,0,0,0,0,0,0,0,...,0,1,0,0,0,1,0,0,0,"[9.678312198957428e-05, 0.12247737497091293, 0..."


### Convert embeddings from str to list (a bit long for large data sets)

In [20]:
test['embeddings'] = test['embeddings'].apply(ast.literal_eval)

In [21]:
train['embeddings'] = train['embeddings'].apply(ast.literal_eval)

In [22]:
valid['embeddings'] = valid['embeddings'].apply(ast.literal_eval)

### Remove columns

In [23]:
test = test.drop(columns=['path_to_image', 'path_to_dcm'])
train = train.drop(columns=['path_to_image', 'path_to_dcm'])
valid = valid.drop(columns=['path_to_image', 'path_to_dcm'])

### Remove rows that were not processed (embeddings = 0)

In [24]:
initial_size = test.shape[0] 

# The previous logic with transforming the list to string and filtering on the length of said string is not necessarily stable and misleading.
# Let's implement a more explicit test for what we actually care about: 

test = test[test['embeddings'].apply(type) == list]

final_size = test.shape[0] 

print(f'Number of test removed rows = {initial_size - final_size}')

initial_size = train.shape[0] 

train = train[train['embeddings'].apply(type) == list]

final_size = train.shape[0] 

print(f'Number of train removed rows = {initial_size - final_size}')


valid = valid[valid['embeddings'].apply(type) == list]

final_size = valid.shape[0] 

print(f'Number of train removed rows = {initial_size - final_size}')

Number of test removed rows = 51
Number of train removed rows = 67
Number of train removed rows = 62782


### Convert age to binary to study bias

In [25]:
a = 70
test['age'] = (test['age'] >= a).astype(int)
train['age'] = (train['age'] >= a).astype(int)

### Create artificial training distribution

In [26]:
print("Initial sex Distribution:")
print(train['sex'].value_counts())

print("\nInitial Race Distribution:")
print(train['race'].value_counts())

print("\nInitial Age Distribution:")
print(train['age'].value_counts())

print("\nInitial Health Distribution:")
print(train['insurance_type'].value_counts())

Initial sex Distribution:
sex
0    38998
1    28198
Name: count, dtype: int64

Initial Race Distribution:
race
0    52553
1     9844
2     4799
Name: count, dtype: int64

Initial Age Distribution:
age
0    42267
1    24929
Name: count, dtype: int64

Initial Health Distribution:
insurance_type
1    43076
2    18340
0     5780
Name: count, dtype: int64


### Train test

In [27]:
# diseases = ['Enlarged Cardiomediastinum', 'Cardiomegaly', 'Lung Opacity', 'Lung Lesion', 'Edema', 
#             'Consolidation', 'Pneumonia', 'Atelectasis', 'Pneumothorax', 
#             'Pleural Effusion', 'Pleural Other', 'Fracture']
diseases = ['Cardiomegaly', 'Lung Opacity', 'Edema', 'Pleural Effusion']

In [29]:

# Extract the embeddings by turning the pandas series into a list and then into a tensor
train_embeddings = torch.tensor(np.array(train['embeddings'].tolist()), dtype=torch.float32)
test_embeddings = torch.tensor(np.array(test['embeddings'].tolist()), dtype=torch.float32)
valid_embeddings = torch.tensor(np.array(valid['embeddings'].tolist()), dtype=torch.float32)

# The TensorDataset stores the tensors and allows easy access to the data in batches.
#train_loader: Data is loaded from the train_dataset in batches of size 128. The order of the data is randomly shuffled (shuffle=True) at each epoch to ensure that the model does not learn dependencies on the order.
#valid_loader:Data is loaded from the valid_dataset in batches of size 128. The order of the data remains fixed (shuffle=False) because validation data is often evaluated in the original order.
train_dataset = torch.utils.data.TensorDataset(train_embeddings)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=128, shuffle=True)

valid_dataset = torch.utils.data.TensorDataset(valid_embeddings)
valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=128, shuffle=False)

sex_train = train['sex']
age_train = train['age']
race_train = train['race']
health_train = train['insurance_type']


In [30]:
class Autoencoder(nn.Module): 
    def __init__(self, input_dim=1024, latent_dim=128):
        super(Autoencoder, self).__init__() # inherits arent class (nn.Module) from which your Autoencoder class inherits. It initializes PyTorch's internal mechanisms for managing model parameters and registering layers.
        # Encoder Here, we define the Encoder, which progressively transforms the input data (x) through multiple transformations into a compressed latent space (z). The encoder reduces the input dimensionality from 1024 to 128.
        self.encoder = nn.Sequential( #With nn.Sequential, you don't need to explicitly define how data moves from one layer to the next in the forward pass—it automatically handles this for you.
            nn.Linear(input_dim, 512),
            nn.ReLU(),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, latent_dim)
        )
        # Decoder Here, we are defining the Decoder portion of the Autoencoder using nn.Sequential. The Decoder is responsible for reconstructing the input data x^ from the latent representation z. Essentially, it reverses the dimensionality reduction performed by the Encoder and attempts to recover the original structure of the data.
        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, 256),
            nn.ReLU(),
            nn.Linear(256, 512),
            nn.ReLU(),
            nn.Linear(512, input_dim),
            nn.Sigmoid()  # For reconstruction in [0, 1]
        )
    
    def forward(self, x):
        latent = self.encoder(x)
        reconstructed = self.decoder(latent)
        return latent, reconstructed

# Modellinitialisierung
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
autoencoder = Autoencoder(input_dim=1024, latent_dim=128).to(device)


In [32]:
# Hyperparameter
learning_rate = 0.0005
epochs = 25

# Optimizer and loss function : MSE is a natural choice because it penalizes larger differences more heavily, encouraging the model to reduce significant reconstruction errors.
criterion = nn.MSELoss()
optimizer = optim.Adam(autoencoder.parameters(), lr=learning_rate)

# Training-loop
for epoch in range(epochs):
    autoencoder.train()
    train_loss = 0.0
    for batch in train_loader: #loop over batches
        data = batch[0].to(device)  # Input data: Moves the input data from the CPU to the device (GPU or CPU) where the model is located.
        optimizer.zero_grad() #Clears the previously accumulated gradients of the model's parameters.
        
        # forward propagatoin
        latent, reconstructed = autoencoder(data)
        loss = criterion(reconstructed, data)  # MSE-Loss
        
        # backwards propagation and optimizing
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    
    # Validating
    autoencoder.eval()
    valid_loss = 0.0
    with torch.no_grad(): #This disables gradient computation during validation.
        for batch in valid_loader: #Iterate Over Validation Batches
            data = batch[0].to(device)
            latent, reconstructed = autoencoder(data)
            loss = criterion(reconstructed, data)
            valid_loss += loss.item()
    
    print(f"Epoch [{epoch+1}/{epochs}], Train Loss: {train_loss/len(train_loader):.4f}, Valid Loss: {valid_loss/len(valid_loader):.4f}")


Epoch [1/25], Train Loss: 0.0006, Valid Loss: 0.0006
Epoch [2/25], Train Loss: 0.0006, Valid Loss: 0.0006
Epoch [3/25], Train Loss: 0.0006, Valid Loss: 0.0005
Epoch [4/25], Train Loss: 0.0006, Valid Loss: 0.0005
Epoch [5/25], Train Loss: 0.0006, Valid Loss: 0.0005
Epoch [6/25], Train Loss: 0.0005, Valid Loss: 0.0005
Epoch [7/25], Train Loss: 0.0005, Valid Loss: 0.0005
Epoch [8/25], Train Loss: 0.0005, Valid Loss: 0.0005
Epoch [9/25], Train Loss: 0.0005, Valid Loss: 0.0005
Epoch [10/25], Train Loss: 0.0005, Valid Loss: 0.0005
Epoch [11/25], Train Loss: 0.0005, Valid Loss: 0.0005
Epoch [12/25], Train Loss: 0.0005, Valid Loss: 0.0005
Epoch [13/25], Train Loss: 0.0005, Valid Loss: 0.0005
Epoch [14/25], Train Loss: 0.0005, Valid Loss: 0.0005
Epoch [15/25], Train Loss: 0.0005, Valid Loss: 0.0005
Epoch [16/25], Train Loss: 0.0005, Valid Loss: 0.0005
Epoch [17/25], Train Loss: 0.0005, Valid Loss: 0.0005
Epoch [18/25], Train Loss: 0.0005, Valid Loss: 0.0005
Epoch [19/25], Train Loss: 0.0005, Va

In [33]:
autoencoder.eval()
with torch.no_grad():
    train_latent = autoencoder.encoder(train_embeddings.to(device)).cpu().numpy()
    valid_latent = autoencoder.encoder(valid_embeddings.to(device)).cpu().numpy()
    test_latent = autoencoder.encoder(test_embeddings.to(device)).cpu().numpy()

# saving of the reduced embeddings
print(f"Train Latent Shape: {train_latent.shape}")
print(f"Valid Latent Shape: {valid_latent.shape}")
print(f"Test Latent Shape: {test_latent.shape}")


Train Latent Shape: (67196, 128)
Valid Latent Shape: (4481, 128)
Test Latent Shape: (40307, 128)


In [34]:
np.save("train_latent.npy", train_latent)
np.save("valid_latent.npy", valid_latent)
np.save("test_latent.npy", test_latent)

In [35]:
# Load latent embeddings
train_latent = np.load("train_latent.npy")
valid_latent = np.load("valid_latent.npy")
test_latent = np.load("test_latent.npy")

# Load labels
diseases = ['Cardiomegaly', 'Lung Opacity', 'Edema', 'Pleural Effusion']
y_train = train[diseases]
y_valid = valid[diseases]
y_test = test[diseases]


# Combine latent embeddings with categorical variables
x_train_subset = pd.DataFrame(train_latent)
x_test_subset = pd.DataFrame(test_latent)

# Feature names
feat_names = list(x_train_subset.columns)


In [45]:
# Labels for train and test
y_train = train[diseases]
y_test = test[diseases]
y_valid = valid[diseases]
y_no_finding = test["No Finding"]
y_sex = test['sex']
y_race = test['race']
y_insurance = test['insurance_type']
y_age = test['age']


### Adversarial attacks

In [36]:
class AdversarialModel(BaseEstimator, ClassifierMixin):
    def __init__(self, main_model, adversary_model, alpha=0.1):
        """
        main_model: The primary model predicting diseases.
        adversary_model: The model trying to predict the sensitive attribute.
        alpha: Weight of the adversarial loss.
        """
        self.main_model = main_model
        self.adversary_model = adversary_model
        self.alpha = alpha

    def fit(self, X, Y, S1, S2):
        """
        X: Features
        Y: Target labels (diseases)
        S: Sensitive attribute (e.g., sex)
        """
        for _ in range(5):  # Number of training iterations
            # Update main model
            self.main_model.fit(X, Y)

            # Predict diseases to use as features for the adversary
            # if hasattr(self.main_model, "predict_proba"):
            Y_pred = self.main_model.predict_proba(X)
            # else:
            #     Y_pred = self.main_model.predict(X)

            # Train adversary on predicting sensitive attributes from disease predictions
            self.adversary_model.fit(Y_pred, S1)

            # Adversary predictions
            S_pred = self.adversary_model.predict_proba(Y_pred)

            # Calculate adversarial loss
            adv_loss = log_loss(S1, S_pred)

            # Update main model to decrease adversary success
            Y_grad = -self.alpha * (S_pred - S_pred - S1.values.reshape(-1, 1))
            self.main_model.fit(X, Y, sample_weight=Y_grad[:, 1])



            # Update main model
            self.main_model.fit(X, Y)

            # Predict diseases to use as features for the adversary
            # if hasattr(self.main_model, "predict_proba"):
            Y_pred = self.main_model.predict_proba(X) 
            # else:
            #     Y_pred = self.main_model.predict(X)

            # Train adversary on predicting sensitive attributes from disease predictions
            self.adversary_model.fit(Y_pred, S2)

            # Adversary predictions
            S_pred = self.adversary_model.predict_proba(Y_pred)

            # Calculate adversarial loss
            adv_loss = log_loss(S2, S_pred)

            # Update main model to decrease adversary success
            Y_grad = -self.alpha * (S_pred - S_pred - S2.values.reshape(-1, 1))
            self.main_model.fit(X, Y, sample_weight=Y_grad[:, 1])

    def predict(self, X):
        return self.main_model.predict_proba(X)



In [37]:

# Main model
xgb_model = XGBClassifier(
    use_label_encoder=False,
    eval_metric='logloss',
    learning_rate=0.1,
    random_state=42
)


adversary_model = XGBClassifier(
    use_label_encoder=False,
    eval_metric='logloss',
    learning_rate=0.1,
    random_state=42
)

# Adversarial wrapper
adv_model = AdversarialModel(main_model=xgb_model, adversary_model=adversary_model, alpha=0.3)

# Train with features (X_train), disease labels (y_train), and sensitive attribute (sex)
adv_model.fit(x_train_subset, y_train, age_train, sex_train)

adv_model.fit(x_train_subset, y_train, race_train, health_train)

# Predict with adversarially trained model
y_pred = adv_model.predict(x_test_subset)


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encode

In [38]:
predictions = y_pred
targets = y_test.values

In [39]:
predictions

array([[0.09719285, 0.65185285, 0.5167791 , 0.69201916],
       [0.06302026, 0.53982955, 0.2747421 , 0.5020819 ],
       [0.03063657, 0.37036383, 0.05537954, 0.15028714],
       ...,
       [0.02484785, 0.6623129 , 0.20355947, 0.5208331 ],
       [0.03472432, 0.45545825, 0.03103275, 0.13496989],
       [0.01718268, 0.4284143 , 0.03530766, 0.18701828]], dtype=float32)

In [40]:
diseases = ['Cardiomegaly', 'Lung Opacity', 'Edema', 'Pleural Effusion']

In [46]:
def compute_metrics(predictions, targets):
    # Calculate metrics for each disease
    metrics = {}
    for idx, disease in enumerate(diseases):
        disease_pred = predictions[disease]
        disease_true = targets[disease]
        # disease_pred = predictions[:, idx]
        # disease_true = targets[:, idx]
        auc_roc = roc_auc_score(disease_true, disease_pred)
        f1 = f1_score(disease_true, (disease_pred > 0.5).astype(int))
        accuracy = accuracy_score(disease_true, (disease_pred > 0.5).astype(int))
        tn, fp, fn, tp = confusion_matrix(disease_true, (disease_pred > 0.5).astype(int)).ravel()
        tp_rate = tp / (tp + fn) if (tp + fn) > 0 else 0
        tn_rate = tn / (tn + fp) if (tn + fp) > 0 else 0
        fn_rate = fn / (fn + tp) if (fn + tp) > 0 else 0
        fp_rate = fp / (tn + fp) if (tn + fp) > 0 else 0
        
        # Calculate Precision-Recall AUC
        precision, recall, _ = precision_recall_curve(disease_true, disease_pred)
        auprc = auc(recall, precision)

        metrics[disease] = {
            'Accuracy': accuracy,
            'AUC': auc_roc,
            'AUPRC': auprc,
            'F1 Score': f1,
            'TP Rate': tp_rate,
            'FN Rate': fn_rate,
            'TN Rate': tn_rate,
            'FP Rate': fp_rate
        }
    return metrics

metrics = compute_metrics(pd.DataFrame(y_pred, columns=diseases), pd.DataFrame(y_test.values, columns=diseases))
print(metrics)

{'Cardiomegaly': {'Accuracy': 0.8810628426824125, 'AUC': 0.8006438956128046, 'AUPRC': 0.3852840212747396, 'F1 Score': 0.19238544474393532, 'TP Rate': 0.11512096774193549, 'FN Rate': 0.8848790322580645, 'TN Rate': 0.9885421676521345, 'FP Rate': 0.011457832347865448}, 'Lung Opacity': {'Accuracy': 0.6462152975909892, 'AUC': 0.6931359452973284, 'AUPRC': 0.6406124393035012, 'F1 Score': 0.6726655036268478, 'TP Rate': 0.738359201773836, 'FN Rate': 0.2616407982261641, 'TN Rate': 0.5568587206177003, 'FP Rate': 0.44314127938229975}, 'Edema': {'Accuracy': 0.7762671496266157, 'AUC': 0.7838727960944124, 'AUPRC': 0.4977913122081279, 'F1 Score': 0.36671348314606744, 'TP Rate': 0.2695921528136293, 'FN Rate': 0.7304078471863706, 'TN Rate': 0.9365162301613219, 'FP Rate': 0.06348376983867808}, 'Pleural Effusion': {'Accuracy': 0.7395985808916565, 'AUC': 0.8101336246724882, 'AUPRC': 0.7015438033687654, 'F1 Score': 0.6766282580565653, 'TP Rate': 0.6911070551954183, 'FN Rate': 0.3088929448045818, 'TN Rate': 

### Compute metrics for different categories

In [47]:
def create_distributions(y_true, y_pred):
            P = np.array([1 - y_true, y_true]).T  # Probabiility distribution of True Labels
            Q = np.array([1 - y_pred, y_pred]).T  # Probabiility distribution of predicted diseases
            return P, Q

In [48]:
predictions

array([[0.09719285, 0.65185285, 0.5167791 , 0.69201916],
       [0.06302026, 0.53982955, 0.2747421 , 0.5020819 ],
       [0.03063657, 0.37036383, 0.05537954, 0.15028714],
       ...,
       [0.02484785, 0.6623129 , 0.20355947, 0.5208331 ],
       [0.03472432, 0.45545825, 0.03103275, 0.13496989],
       [0.01718268, 0.4284143 , 0.03530766, 0.18701828]], dtype=float32)

In [49]:

# Calculate metrics for each disease and for each class

metrics_female = {}
for idx, disease in enumerate(diseases):
    # disease_pred = predictions[:, idx]
    disease_pred = predictions[y_sex == 1, idx]
    disease_true = targets[y_sex == 1, idx]
    auc_roc = roc_auc_score(disease_true, disease_pred)
    f1 = f1_score(disease_true, (disease_pred > 0.5).astype(int))
    accuracy = accuracy_score(disease_true, (disease_pred > 0.5).astype(int))
    tn, fp, fn, tp = confusion_matrix(disease_true, (disease_pred > 0.5).astype(int)).ravel()
    tp_rate = tp / (tp + fn)
    tn_rate = tn / (tn + fp)
    fn_rate = fn / (fn + tp)
    fp_rate = fp / (tn + fp)
    
    precision, recall, _ = precision_recall_curve(disease_true, disease_pred)
    auprc = auc(recall, precision)

    # KL-Divergence
    P, Q = create_distributions(disease_true, disease_pred)
    kl_div = np.mean([entropy(P_row, Q_row) for P_row, Q_row in zip(P, Q)])

    metrics_female[disease] = {
        'Accuracy': accuracy,
        'AUC': auc_roc,
        'AUPRC': auprc,
        'F1 Score': f1,
        'TP Rate': tp_rate,
        'FN Rate': fn_rate,
        'TN Rate': tn_rate,
        'FP Rate': fp_rate,
        'KL Div': kl_div
        }
    
metrics_male = {}
for idx, disease in enumerate(diseases):
    # disease_pred = predictions[:, idx]
    disease_pred = predictions[y_sex == 0, idx]
    disease_true = targets[y_sex == 0, idx]
    auc_roc = roc_auc_score(disease_true, disease_pred)
    f1 = f1_score(disease_true, (disease_pred > 0.5).astype(int))
    accuracy = accuracy_score(disease_true, (disease_pred > 0.5).astype(int))
    tn, fp, fn, tp = confusion_matrix(disease_true, (disease_pred > 0.5).astype(int)).ravel()
    tp_rate = tp / (tp + fn)
    tn_rate = tn / (tn + fp)
    fn_rate = fn / (fn + tp)
    fp_rate = fp / (tn + fp)

    precision, recall, _ = precision_recall_curve(disease_true, disease_pred)
    auprc = auc(recall, precision)


    # KL-Divergence
    P, Q = create_distributions(disease_true, disease_pred)
    kl_div = np.mean([entropy(P_row, Q_row) for P_row, Q_row in zip(P, Q)])

    metrics_male[disease] = {
        'Accuracy': accuracy,
        'AUC': auc_roc,
        'AUPRC': auprc,
        'F1 Score': f1,
        'TP Rate': tp_rate,
        'FN Rate': fn_rate,
        'TN Rate': tn_rate,
        'FP Rate': fp_rate,
        'KL Div': kl_div
        }
    

metrics_white = {}
for idx, disease in enumerate(diseases):
    # disease_pred = predictions[:, idx]
    disease_pred = predictions[y_race == 0, idx]
    disease_true = targets[y_race == 0, idx]
    auc_roc = roc_auc_score(disease_true, disease_pred)
    f1 = f1_score(disease_true, (disease_pred > 0.5).astype(int))
    accuracy = accuracy_score(disease_true, (disease_pred > 0.5).astype(int))
    tn, fp, fn, tp = confusion_matrix(disease_true, (disease_pred > 0.5).astype(int)).ravel()
    tp_rate = tp / (tp + fn)
    tn_rate = tn / (tn + fp)
    fn_rate = fn / (fn + tp)
    fp_rate = fp / (tn + fp)

    precision, recall, _ = precision_recall_curve(disease_true, disease_pred)
    auprc = auc(recall, precision)


    # KL-Divergence
    P, Q = create_distributions(disease_true, disease_pred)
    kl_div = np.mean([entropy(P_row, Q_row) for P_row, Q_row in zip(P, Q)])

    metrics_white[disease] = {
        'Accuracy': accuracy,
        'AUC': auc_roc,
        'AUPRC': auprc,
        'F1 Score': f1,
        'TP Rate': tp_rate,
        'FN Rate': fn_rate,
        'TN Rate': tn_rate,
        'FP Rate': fp_rate,
        'KL Div': kl_div
        }
    
metrics_black = {}
for idx, disease in enumerate(diseases):
    # disease_pred = predictions[:, idx]
    disease_pred = predictions[y_race == 2, idx]
    disease_true = targets[y_race == 2, idx]
    auc_roc = roc_auc_score(disease_true, disease_pred)
    f1 = f1_score(disease_true, (disease_pred > 0.5).astype(int))
    accuracy = accuracy_score(disease_true, (disease_pred > 0.5).astype(int))
    tn, fp, fn, tp = confusion_matrix(disease_true, (disease_pred > 0.5).astype(int)).ravel()
    tp_rate = tp / (tp + fn)
    tn_rate = tn / (tn + fp)
    fn_rate = fn / (fn + tp)
    fp_rate = fp / (tn + fp)

    precision, recall, _ = precision_recall_curve(disease_true, disease_pred)
    auprc = auc(recall, precision)


    # KL-Divergence
    P, Q = create_distributions(disease_true, disease_pred)
    kl_div = np.mean([entropy(P_row, Q_row) for P_row, Q_row in zip(P, Q)])

    metrics_black[disease] = {
        'Accuracy': accuracy,
        'AUC': auc_roc,
        'AUPRC': auprc,
        'F1 Score': f1,
        'TP Rate': tp_rate,
        'FN Rate': fn_rate,
        'TN Rate': tn_rate,
        'FP Rate': fp_rate,
        'KL Div': kl_div
        }
    
metrics_asian = {}
for idx, disease in enumerate(diseases):
    # disease_pred = predictions[:, idx]
    disease_pred = predictions[y_race == 1, idx]
    disease_true = targets[y_race == 1, idx]
    auc_roc = roc_auc_score(disease_true, disease_pred)
    f1 = f1_score(disease_true, (disease_pred > 0.5).astype(int))
    accuracy = accuracy_score(disease_true, (disease_pred > 0.5).astype(int))
    tn, fp, fn, tp = confusion_matrix(disease_true, (disease_pred > 0.5).astype(int)).ravel()
    tp_rate = tp / (tp + fn)
    tn_rate = tn / (tn + fp)
    fn_rate = fn / (fn + tp)
    fp_rate = fp / (tn + fp)

    precision, recall, _ = precision_recall_curve(disease_true, disease_pred)
    auprc = auc(recall, precision)


    # KL-Divergence
    P, Q = create_distributions(disease_true, disease_pred)
    kl_div = np.mean([entropy(P_row, Q_row) for P_row, Q_row in zip(P, Q)])

    metrics_asian[disease] = {
        'Accuracy': accuracy,
        'AUC': auc_roc,
        'AUPRC': auprc,
        'F1 Score': f1,
        'TP Rate': tp_rate,
        'FN Rate': fn_rate,
        'TN Rate': tn_rate,
        'FP Rate': fp_rate,
        'KL Div': kl_div
        }
    

metrics_medicaid = {}
for idx, disease in enumerate(diseases):
    # disease_pred = predictions[:, idx]
    disease_pred = predictions[y_insurance == 0, idx]
    disease_true = targets[y_insurance == 0, idx]
    auc_roc = roc_auc_score(disease_true, disease_pred)
    f1 = f1_score(disease_true, (disease_pred > 0.5).astype(int))
    accuracy = accuracy_score(disease_true, (disease_pred > 0.5).astype(int))
    tn, fp, fn, tp = confusion_matrix(disease_true, (disease_pred > 0.5).astype(int)).ravel()
    tp_rate = tp / (tp + fn)
    tn_rate = tn / (tn + fp)
    fn_rate = fn / (fn + tp)
    fp_rate = fp / (tn + fp)

    precision, recall, _ = precision_recall_curve(disease_true, disease_pred)
    auprc = auc(recall, precision)


    # KL-Divergence
    P, Q = create_distributions(disease_true, disease_pred)
    kl_div = np.mean([entropy(P_row, Q_row) for P_row, Q_row in zip(P, Q)])

    metrics_medicaid[disease] = {
        'Accuracy': accuracy,
        'AUC': auc_roc,
        'AUPRC': auprc,
        'F1 Score': f1,
        'TP Rate': tp_rate,
        'FN Rate': fn_rate,
        'TN Rate': tn_rate,
        'FP Rate': fp_rate,
        'KL Div': kl_div
        }
    
metrics_medicare = {}
for idx, disease in enumerate(diseases):
    # disease_pred = predictions[:, idx]
    disease_pred = predictions[y_insurance == 1, idx]
    disease_true = targets[y_insurance == 1, idx]
    auc_roc = roc_auc_score(disease_true, disease_pred)
    f1 = f1_score(disease_true, (disease_pred > 0.5).astype(int))
    accuracy = accuracy_score(disease_true, (disease_pred > 0.5).astype(int))
    tn, fp, fn, tp = confusion_matrix(disease_true, (disease_pred > 0.5).astype(int)).ravel()
    tp_rate = tp / (tp + fn)
    tn_rate = tn / (tn + fp)
    fn_rate = fn / (fn + tp)
    fp_rate = fp / (tn + fp)

    precision, recall, _ = precision_recall_curve(disease_true, disease_pred)
    auprc = auc(recall, precision)


    # KL-Divergence
    P, Q = create_distributions(disease_true, disease_pred)
    kl_div = np.mean([entropy(P_row, Q_row) for P_row, Q_row in zip(P, Q)])

    metrics_medicare[disease] = {
        'Accuracy': accuracy,
        'AUC': auc_roc,
        'AUPRC': auprc,
        'F1 Score': f1,
        'TP Rate': tp_rate,
        'FN Rate': fn_rate,
        'TN Rate': tn_rate,
        'FP Rate': fp_rate,
        'KL Div': kl_div
        }
    
metrics_private = {}
for idx, disease in enumerate(diseases):
    # disease_pred = predictions[:, idx]
    disease_pred = predictions[y_insurance == 2, idx]
    disease_true = targets[y_insurance == 2, idx]
    auc_roc = roc_auc_score(disease_true, disease_pred)
    f1 = f1_score(disease_true, (disease_pred > 0.5).astype(int))
    accuracy = accuracy_score(disease_true, (disease_pred > 0.5).astype(int))
    tn, fp, fn, tp = confusion_matrix(disease_true, (disease_pred > 0.5).astype(int)).ravel()
    tp_rate = tp / (tp + fn)
    tn_rate = tn / (tn + fp)
    fn_rate = fn / (fn + tp)
    fp_rate = fp / (tn + fp)

    precision, recall, _ = precision_recall_curve(disease_true, disease_pred)
    auprc = auc(recall, precision)


    # KL-Divergence
    P, Q = create_distributions(disease_true, disease_pred)
    kl_div = np.mean([entropy(P_row, Q_row) for P_row, Q_row in zip(P, Q)])

    metrics_private[disease] = {
        'Accuracy': accuracy,
        'AUC': auc_roc,
        'AUPRC': auprc,
        'F1 Score': f1,
        'TP Rate': tp_rate,
        'FN Rate': fn_rate,
        'TN Rate': tn_rate,
        'FP Rate': fp_rate,
        'KL Div': kl_div
        }
    

metrics_young = {}
for idx, disease in enumerate(diseases):
    # disease_pred = predictions[:, idx]
    disease_pred = predictions[y_age == 0, idx]
    disease_true = targets[y_age == 0, idx]
    auc_roc = roc_auc_score(disease_true, disease_pred)
    f1 = f1_score(disease_true, (disease_pred > 0.5).astype(int))
    accuracy = accuracy_score(disease_true, (disease_pred > 0.5).astype(int))
    tn, fp, fn, tp = confusion_matrix(disease_true, (disease_pred > 0.5).astype(int)).ravel()
    tp_rate = tp / (tp + fn)
    tn_rate = tn / (tn + fp)
    fn_rate = fn / (fn + tp)
    fp_rate = fp / (tn + fp)
    
    precision, recall, _ = precision_recall_curve(disease_true, disease_pred)
    auprc = auc(recall, precision)


    # KL-Divergence
    P, Q = create_distributions(disease_true, disease_pred)
    kl_div = np.mean([entropy(P_row, Q_row) for P_row, Q_row in zip(P, Q)])

    metrics_young[disease] = {
        'Accuracy': accuracy,
        'AUC': auc_roc,
        'AUPRC': auprc,
        'F1 Score': f1,
        'TP Rate': tp_rate,
        'FN Rate': fn_rate,
        'TN Rate': tn_rate,
        'FP Rate': fp_rate,
        'KL Div': kl_div
        }
    
metrics_old = {}
for idx, disease in enumerate(diseases):
    # disease_pred = predictions[:, idx]
    disease_pred = predictions[y_age == 1, idx]
    disease_true = targets[y_age == 1, idx]
    auc_roc = roc_auc_score(disease_true, disease_pred)
    f1 = f1_score(disease_true, (disease_pred > 0.5).astype(int))
    accuracy = accuracy_score(disease_true, (disease_pred > 0.5).astype(int))
    tn, fp, fn, tp = confusion_matrix(disease_true, (disease_pred > 0.5).astype(int)).ravel()
    tp_rate = tp / (tp + fn)
    tn_rate = tn / (tn + fp)
    fn_rate = fn / (fn + tp)
    fp_rate = fp / (tn + fp)

    precision, recall, _ = precision_recall_curve(disease_true, disease_pred)
    auprc = auc(recall, precision)


    # KL-Divergence
    P, Q = create_distributions(disease_true, disease_pred)
    kl_div = np.mean([entropy(P_row, Q_row) for P_row, Q_row in zip(P, Q)])

    metrics_old[disease] = {
        'Accuracy': accuracy,
        'AUC': auc_roc,
        'AUPRC': auprc,
        'F1 Score': f1,
        'TP Rate': tp_rate,
        'FN Rate': fn_rate,
        'TN Rate': tn_rate,
        'FP Rate': fp_rate,
        'KL Div': kl_div
        }

In [50]:
metrics_female

{'Cardiomegaly': {'Accuracy': 0.8882714589737527,
  'AUC': 0.7975344355894879,
  'AUPRC': 0.36593611069900756,
  'F1 Score': 0.17682926829268295,
  'TP Rate': 0.10453141091658084,
  'FN Rate': 0.8954685890834192,
  'TN Rate': 0.9899158541471884,
  'FP Rate': 0.01008414585281154,
  'KL Div': 0.2936621446730282},
 'Lung Opacity': {'Accuracy': 0.6483802317332703,
  'AUC': 0.6926011141456913,
  'AUPRC': 0.6406214975607818,
  'F1 Score': 0.6776850547306816,
  'TP Rate': 0.7465377268385864,
  'FN Rate': 0.25346227316141356,
  'TN Rate': 0.5521077283372365,
  'FP Rate': 0.44789227166276346,
  'KL Div': 0.629681184968942},
 'Edema': {'Accuracy': 0.7751832584535351,
  'AUC': 0.7831146723972487,
  'AUPRC': 0.5069876585299515,
  'F1 Score': 0.35378079864061174,
  'TP Rate': 0.25193610842207165,
  'FN Rate': 0.7480638915779284,
  'TN Rate': 0.9443053817271589,
  'FP Rate': 0.05569461827284105,
  'KL Div': 0.45730355514750387},
 'Pleural Effusion': {'Accuracy': 0.742728777488768,
  'AUC': 0.8119110

In [51]:
# Initialize an empty list to store the data
data_sex = []

# Iterate over the diseases in the metrics dictionary
for disease, values in metrics.items():
    # Extract AUC and rates from dictionaries
    
    auprc_overall = values['AUPRC'] *100

    auc_overall = values['AUC'] *100
    auc_male = metrics_male[disease]['AUPRC'] *100
    auc_female = metrics_female[disease]['AUPRC'] *100
    tp_rate_male = metrics_male[disease]['TP Rate'] *100
    tp_rate_female = metrics_female[disease]['TP Rate'] *100
    fp_rate_male = metrics_male[disease]['FP Rate'] *100
    fp_rate_female = metrics_female[disease]['FP Rate'] *100

    kl1 = metrics_male[disease]['KL Div'] *100
    kl2 = metrics_female[disease]['KL Div'] *100

    
    delta_KL_sex = abs(kl1 - kl2)


    # Calculate delta AUC and equality of odds
    delta_auc_sex = abs(auc_male - auc_female) 
    eq_odds_sex = 0.5 * (abs(tp_rate_male - tp_rate_female) + abs(fp_rate_male - fp_rate_female))
    
    # Append to the data list
    data_sex.append([disease, auprc_overall, auc_overall, auc_male, auc_female, delta_auc_sex, eq_odds_sex, delta_KL_sex])

# Create a DataFrame
df_sex = pd.DataFrame(data_sex, columns=['Disease', 'AUPRC', 'AUC', 'AUC_Male', 'AUC_Female', 'Delta AUC', 'EqOdds', 'KL div'])


# Styling the DataFrame
styled_df = df_sex.style.format({
    'AUC': "{:.3f}", 
    'AUPRC': "{:.3f}",
    'AUC_Male': "{:.3f}", 
    'AUC_Female': "{:.3f}",
    'Delta AUC': "{:.3f}",
    'EqOdds': "{:.3f}",
    'KL div': "{:.3f}"
}).background_gradient(cmap='viridis', subset=['AUPRC', 'AUC', 'AUC_Male', 'AUC_Female', 'Delta AUC', 'EqOdds', 'KL div'])

# Display the styled DataFrame
styled_df


Unnamed: 0,Disease,AUPRC,AUC,AUC_Male,AUC_Female,Delta AUC,EqOdds,KL div
0,Cardiomegaly,38.528,80.064,39.792,36.594,3.198,0.989,1.985
1,Lung Opacity,64.061,69.314,64.073,64.062,0.01,1.115,0.028
2,Edema,49.779,78.387,49.222,50.699,1.476,2.208,0.782
3,Pleural Effusion,70.154,81.013,70.047,70.339,0.292,0.808,0.355


In [52]:
# Initialize an empty list to store the data
data_race = []

# Iterate over the diseases in the metrics dictionary
for disease, values in metrics.items():
    auprc_overall = values['AUPRC'] *100
    auc_overall = values['AUC'] *100
    auc_groups = [
        metrics_white[disease]['AUPRC'] *100,
        metrics_black[disease]['AUPRC'] *100,
        metrics_asian[disease]['AUPRC'] *100
    ]
    tp_rates = [
        metrics_white[disease]['TP Rate'] *100,
        metrics_black[disease]['TP Rate'] *100,
        metrics_asian[disease]['TP Rate'] *100
    ]
    fp_rates = [
        metrics_white[disease]['FP Rate'] *100,
        metrics_black[disease]['FP Rate'] *100,
        metrics_asian[disease]['FP Rate'] *100
    ]

    kl_rates = [
        metrics_white[disease]['KL Div'] *100,
        metrics_black[disease]['KL Div'] *100,
        metrics_asian[disease]['KL Div'] *100
    ]

    delta_kl_race = max(abs(kl_rates[i] - kl_rates[j]) for i in range(len(kl_rates)) for j in range(i + 1, len(kl_rates)))

    # Calculate the maximum delta AUC
    delta_auc_race = max(abs(auc_groups[i] - auc_groups[j]) for i in range(len(auc_groups)) for j in range(i + 1, len(auc_groups)))

    # Calculate the maximum equality of odds
    eq_odds_race = max(
        0.5 * (abs(tp_rates[i] - tp_rates[j]) + abs(fp_rates[i] - fp_rates[j]))
        for i in range(len(tp_rates)) for j in range(i + 1, len(tp_rates))
    )

    # Append to the data list
    data_race.append([disease, auprc_overall, auc_overall] + auc_groups + [delta_auc_race, eq_odds_race, delta_kl_race])

# Create a DataFrame
columns = ['Disease', 'AUPRC', 'AUC', 'AUC_White', 'AUC_Black', 'AUC_Asian', 'Max Delta AUC', 'Max EqOdds', 'KL div']
df_race = pd.DataFrame(data_race, columns=columns)

# Display the DataFrame with styling
# Styling the DataFrame
styled_df = df_race.style.format({
    'AUC': "{:.3f}", 
    'AUPRC': "{:.3f}", 
    'AUC_White': "{:.3f}", 
    'AUC_Black': "{:.3f}",
    'AUC_Asian': "{:.3f}",
    'Max Delta AUC': "{:.3f}",
    'Max EqOdds': "{:.3f}",
    'KL div': "{:.3f}"
}).background_gradient(cmap='viridis', subset=['AUPRC', 'AUC', 'AUC_White', 'AUC_Black', 'AUC_Asian', 'Max Delta AUC', 'Max EqOdds', 'KL div'])

# Display the styled DataFrame
styled_df

Unnamed: 0,Disease,AUPRC,AUC,AUC_White,AUC_Black,AUC_Asian,Max Delta AUC,Max EqOdds,KL div
0,Cardiomegaly,38.528,80.064,35.771,52.497,42.118,16.726,2.634,12.349
1,Lung Opacity,64.061,69.314,63.8,64.568,65.254,1.454,3.25,1.664
2,Edema,49.779,78.387,50.222,50.933,46.729,4.204,0.673,7.165
3,Pleural Effusion,70.154,81.013,70.281,62.617,72.461,9.844,2.348,1.887


In [53]:
# Initialize an empty list to store the data
data_age = []

# Iterate over the diseases in the metrics dictionary
for disease, values in metrics.items():
    # Extract AUC and rates from dictionaries
    
    auprc_overall = values['AUPRC'] *100

    auc_overall = values['AUC'] *100
    auc_old = metrics_old[disease]['AUPRC'] *100
    auc_young = metrics_young[disease]['AUPRC'] *100
    tp_rate_old = metrics_old[disease]['TP Rate'] *100
    tp_rate_young = metrics_young[disease]['TP Rate'] *100
    fp_rate_old = metrics_old[disease]['FP Rate'] *100
    fp_rate_young = metrics_young[disease]['FP Rate'] *100


    kl1 = metrics_old[disease]['KL Div'] *100
    kl2 = metrics_young[disease]['KL Div'] *100

    
    delta_KL_age = abs(kl1 - kl2)

    
    # Calculate delta AUC and equality of odds
    delta_auc_age = abs(auc_old - auc_young)
    eq_odds_age = 0.5 * (abs(tp_rate_old - tp_rate_young) + abs(fp_rate_old - fp_rate_young))
    
    # Append to the data list
    data_age.append([disease, auprc_overall, auc_overall, auc_old, auc_young, delta_auc_age, eq_odds_age, delta_KL_age])

# Create a DataFrame
df_age = pd.DataFrame(data_age, columns=['Disease', 'AUPRC', 'AUC', 'AUC_old', 'AUC_young', 'Delta AUC', 'EqOdds', 'KL div'])


# Styling the DataFrame
styled_df = df_age.style.format({
    'AUC': "{:.3f}", 
    'AUPRC': "{:.3f}",
    'AUC_old': "{:.3f}", 
    'AUC_young': "{:.3f}",
    'Delta AUC': "{:.3f}",
    'EqOdds': "{:.3f}",
    'KL div': "{:.3f}"
}).background_gradient(cmap='viridis', subset=['AUC', 'AUPRC', 'AUC_old', 'AUC_young', 'Delta AUC', 'EqOdds', 'KL div'])

# Display the styled DataFrame
styled_df


Unnamed: 0,Disease,AUPRC,AUC,AUC_old,AUC_young,Delta AUC,EqOdds,KL div
0,Cardiomegaly,38.528,80.064,38.24,38.728,0.488,1.866,10.743
1,Lung Opacity,64.061,69.314,64.662,63.59,1.072,8.996,3.441
2,Edema,49.779,78.387,53.387,46.992,6.394,2.419,7.957
3,Pleural Effusion,70.154,81.013,72.034,68.894,3.14,4.615,2.959


In [54]:
# Initialize an empty list to store the data
data_health = []

# Iterate over the diseases in the metrics dictionary
for disease, values in metrics.items():
    auprc_overall = values['AUPRC'] *100
    auc_overall = values['AUC'] *100
    auc_groups = [
        metrics_medicaid[disease]['AUPRC'] *100,
        metrics_medicare[disease]['AUPRC'] *100,
        metrics_private[disease]['AUPRC'] *100
    ]
    tp_rates = [
        metrics_medicaid[disease]['TP Rate'] *100,
        metrics_medicare[disease]['TP Rate'] *100,
        metrics_private[disease]['TP Rate'] *100
    ]
    fp_rates = [
        metrics_medicaid[disease]['FP Rate'] *100,
        metrics_medicare[disease]['FP Rate'] *100,
        metrics_private[disease]['FP Rate'] *100
    ]

    kl_rates = [
        metrics_medicaid[disease]['KL Div'] *100,
        metrics_medicare[disease]['KL Div'] *100,
        metrics_private[disease]['KL Div'] *100
    ]

    delta_kl_health = max(abs(kl_rates[i] - kl_rates[j]) for i in range(len(kl_rates)) for j in range(i + 1, len(kl_rates)))

    # Calculate the maximum delta AUC
    delta_auc_health = max(abs(auc_groups[i] - auc_groups[j]) for i in range(len(auc_groups)) for j in range(i + 1, len(auc_groups)))

    # Calculate the maximum equality of odds
    eq_odds_health = max(
        0.5 * (abs(tp_rates[i] - tp_rates[j]) + abs(fp_rates[i] - fp_rates[j]))
        for i in range(len(tp_rates)) for j in range(i + 1, len(tp_rates))
    )

    # Append to the data list
    data_health.append([disease, auprc_overall, auc_overall] + auc_groups + [delta_auc_health, eq_odds_health, delta_kl_health])

# Create a DataFrame
columns = ['Disease', 'AUPRC', 'AUC', 'AUC_Medicaid', 'AUC_Medicare', 'AUC_Private', 'Max Delta AUC', 'Max EqOdds', 'KL div']
df_health = pd.DataFrame(data_health, columns=columns)

# Display the DataFrame with styling
# Styling the DataFrame
styled_df = df_health.style.format({
    'AUC': "{:.3f}", 
    'AUPRC': "{:.3f}", 
    'AUC_Medicaid': "{:.3f}", 
    'AUC_Medicare': "{:.3f}",
    'AUC_Private': "{:.3f}",
    'Max Delta AUC': "{:.3f}",
    'Max EqOdds': "{:.3f}",
    'KL div': "{:.3f}"
}).background_gradient(cmap='viridis', subset=['AUPRC', 'AUC',  'AUC_Medicaid', 'AUC_Medicare', 'AUC_Private', 'Max Delta AUC', 'Max EqOdds', 'KL div'])

# Display the styled DataFrame
styled_df

Unnamed: 0,Disease,AUPRC,AUC,AUC_Medicaid,AUC_Medicare,AUC_Private,Max Delta AUC,Max EqOdds,KL div
0,Cardiomegaly,38.528,80.064,39.976,39.716,33.71,6.267,1.732,8.687
1,Lung Opacity,64.061,69.314,66.946,64.301,62.38,4.566,7.503,3.893
2,Edema,49.779,78.387,43.973,51.884,45.207,7.911,3.138,8.867
3,Pleural Effusion,70.154,81.013,68.179,70.89,68.68,2.711,5.242,4.326


In [55]:
# Initialize an empty list to store the data
data = []
i = 0

# Assuming 'metrics', 'df_sex', 'df_race', 'df_age', and 'df_health' are predefined and correctly structured
# Iterate over the diseases in the metrics dictionary
for disease, values in metrics.items():
    auprc_overall = values['AUPRC'] * 100
    auc_overall = values['AUC'] * 100

    # Append to the data list
    data.append([disease, auprc_overall, auc_overall] +
                [df_sex['Delta AUC'][i], df_sex['EqOdds'][i], df_sex['KL div'][i]] +
                [df_race['Max Delta AUC'][i], df_race['Max EqOdds'][i], df_race['KL div'][i]] +
                [df_age['Delta AUC'][i], df_age['EqOdds'][i], df_age['KL div'][i]] +
                [df_health['Max Delta AUC'][i], df_health['Max EqOdds'][i], df_health['KL div'][i]])
    i += 1

# Create a DataFrame
columns = ['Disease', 'AUPRC', 'AUC', 'Delta AUPRC sex', 'EqOdds sex', 'KL div sex',
           'Delta AUPRC race', 'EqOdds race', 'KL div race', 'Delta AUPRC age', 'EqOdds age', 'KL div age',
           'Delta AUPRC health', 'EqOdds health', 'KL div health']
df = pd.DataFrame(data, columns=columns)

# Styling the DataFrame
styled_df = df.style.format({
    'AUPRC': "{:.1f}",
    'AUC': "{:.1f}",
    'Delta AUPRC sex': "{:.1f}",
    'EqOdds sex': "{:.1f}",
    'KL div sex': "{:.1f}",
    'Delta AUPRC race': "{:.1f}",
    'EqOdds race': "{:.1f}",
    'KL div race': "{:.1f}",
    'Delta AUPRC age': "{:.1f}",
    'EqOdds age': "{:.1f}",
    'KL div age': "{:.1f}",
    'Delta AUPRC health': "{:.1f}",
    'EqOdds health': "{:.1f}",
    'KL div health': "{:.1f}"
}).background_gradient(cmap='OrRd', subset=[
    'AUPRC', 'AUC', 'Delta AUPRC sex', 'EqOdds sex', 'KL div sex', 'Delta AUPRC race', 'EqOdds race', 'KL div race',
    'Delta AUPRC age', 'EqOdds age', 'KL div age', 'Delta AUPRC health', 'EqOdds health', 'KL div health'
])

# Display the styled DataFrame
styled_df


Unnamed: 0,Disease,AUPRC,AUC,Delta AUPRC sex,EqOdds sex,KL div sex,Delta AUPRC race,EqOdds race,KL div race,Delta AUPRC age,EqOdds age,KL div age,Delta AUPRC health,EqOdds health,KL div health
0,Cardiomegaly,38.5,80.1,3.2,1.0,2.0,16.7,2.6,12.3,0.5,1.9,10.7,6.3,1.7,8.7
1,Lung Opacity,64.1,69.3,0.0,1.1,0.0,1.5,3.2,1.7,1.1,9.0,3.4,4.6,7.5,3.9
2,Edema,49.8,78.4,1.5,2.2,0.8,4.2,0.7,7.2,6.4,2.4,8.0,7.9,3.1,8.9
3,Pleural Effusion,70.2,81.0,0.3,0.8,0.4,9.8,2.3,1.9,3.1,4.6,3.0,2.7,5.2,4.3


In [56]:
df.to_csv("adversary_attacks_nn.csv") 