In [1]:
import pandas as pd
import numpy as np
import ast
from sklearn.model_selection import train_test_split
from sklearn.multioutput import MultiOutputClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score, confusion_matrix, roc_auc_score, precision_recall_curve, auc
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset, WeightedRandomSampler
import matplotlib.pyplot as plt
from torch.optim.lr_scheduler import ReduceLROnPlateau
import torch.optim as optim
import wandb
from torch.optim.lr_scheduler import StepLR



In [2]:
# test = pd.read_csv("../model_dev/densenet_data/densenet_all_test_embeddings.csv", quotechar='"', on_bad_lines='skip')
# train = pd.read_csv("../model_dev/densenet_data/densenet_all_train_embeddings.csv", quotechar='"', on_bad_lines='skip')
# valid = pd.read_csv("../model_dev/densenet_data/densenet_all_valid_embeddings.csv", quotechar='"', on_bad_lines='skip')

test_initial = pd.read_csv("/home/research/svea/ml_project/densenet_test_embeddings.csv", quotechar='"', on_bad_lines='skip')
train_initial = pd.read_csv("/home/research/svea/ml_project/densenet_train_embeddings.csv", quotechar='"', on_bad_lines='skip')
valid_initial = pd.read_csv("/home/research/svea/ml_project/densenet_valid_embeddings.csv", quotechar='"', on_bad_lines='skip')

print(test_initial.columns)
test_initial.head()

Index(['path_to_image', 'path_to_dcm', 'age', 'sex', 'race', 'insurance_type',
       'No Finding', 'Enlarged Cardiomediastinum', 'Cardiomegaly',
       'Lung Opacity', 'Lung Lesion', 'Edema', 'Consolidation', 'Pneumonia',
       'Atelectasis', 'Pneumothorax', 'Pleural Effusion', 'Pleural Other',
       'Fracture', 'Support Devices', 'embeddings'],
      dtype='object')


Unnamed: 0,path_to_image,path_to_dcm,age,sex,race,insurance_type,No Finding,Enlarged Cardiomediastinum,Cardiomegaly,Lung Opacity,...,Edema,Consolidation,Pneumonia,Atelectasis,Pneumothorax,Pleural Effusion,Pleural Other,Fracture,Support Devices,embeddings
0,train/patient47347/study3/view1_frontal.jpg,train/patient47347/study3/view1_frontal.dcm,78.0,1,0,1,0,0,1,0,...,1,0,0,1,0,1,0,1,1,"[0.0029132624622434378, 0.1020001769065857, 0...."
1,train/patient37527/study12/view1_frontal.jpg,train/patient37527/study12/view1_frontal.dcm,63.0,0,1,2,0,0,0,0,...,0,0,0,0,0,1,0,0,0,"[0.0014348188415169716, 0.0543656125664711, 0...."
2,train/patient41208/study9/view1_frontal.jpg,train/patient41208/study9/view1_frontal.dcm,70.0,0,0,1,0,0,0,1,...,0,0,0,0,0,0,0,0,0,"[0.001982336398214102, 0.040021587163209915, 0..."
3,train/patient39357/study1/view1_frontal.jpg,train/patient39357/study1/view1_frontal.dcm,79.0,1,1,1,0,0,0,0,...,0,0,1,0,0,0,0,0,0,"[0.001741771469824016, 0.0560498870909214, 0.1..."
4,train/patient31982/study4/view1_frontal.jpg,train/patient31982/study4/view1_frontal.dcm,67.0,0,0,0,0,0,0,0,...,0,1,0,0,0,1,0,0,0,"[9.678312198957428e-05, 0.12247737497091293, 0..."


In [3]:
test_initial['embeddings'] = test_initial['embeddings'].apply(ast.literal_eval)

In [4]:
valid_initial['embeddings'] = valid_initial['embeddings'].apply(ast.literal_eval)

In [5]:
train_initial['embeddings'] = train_initial['embeddings'].apply(ast.literal_eval)

In [6]:
test = test_initial.drop(columns=['path_to_image', 'path_to_dcm'])
train = train_initial.drop(columns=['path_to_image', 'path_to_dcm'])
valid = valid_initial.drop(columns=['path_to_image', 'path_to_dcm'])


In [10]:
initial_size = test.shape[0] 
test = test[test['embeddings'].str.len() > 10]
final_size = test.shape[0] 
print(f'Number of test removed rows = {initial_size - final_size}')

initial_size = train.shape[0] 
train = train[train['embeddings'].str.len() > 10]
final_size = train.shape[0] 
print(f'Number of train removed rows = {initial_size - final_size}')

initial_size = test.shape[0] 
valid = valid[valid['embeddings'].str.len() > 10]
final_size = test.shape[0] 
print(f'Number of valid removed rows = {initial_size - final_size}')

Number of test removed rows = 51
Number of train removed rows = 67
Number of valid removed rows = 0


In [11]:
a = 70
test['age'] = (test['age'] >= a).astype(int)
valid['age'] = (valid['age'] >= a).astype(int)
train['age'] = (train['age'] >= a).astype(int)

In [12]:
import torch

# Extract the embeddings by turning the pandas series into a list and then into a tensor
train_embeddings = torch.tensor(np.array(train['embeddings'].tolist()), dtype=torch.float32)
valid_embeddings = torch.tensor(np.array(valid['embeddings'].tolist()), dtype=torch.float32)
test_embeddings = torch.tensor(np.array(test['embeddings'].tolist()), dtype=torch.float32)

# The TensorDataset stores the tensors and allows easy access to the data in batches.
#train_loader: Data is loaded from the train_dataset in batches of size 128. The order of the data is randomly shuffled (shuffle=True) at each epoch to ensure that the model does not learn dependencies on the order.
#valid_loader:Data is loaded from the valid_dataset in batches of size 128. The order of the data remains fixed (shuffle=False) because validation data is often evaluated in the original order.
train_dataset = torch.utils.data.TensorDataset(train_embeddings)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=128, shuffle=True)

valid_dataset = torch.utils.data.TensorDataset(valid_embeddings)
valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=128, shuffle=False)


In [13]:
class Autoencoder(nn.Module): 
    def __init__(self, input_dim=1024, latent_dim=128):
        super(Autoencoder, self).__init__() # inherits arent class (nn.Module) from which your Autoencoder class inherits. It initializes PyTorch's internal mechanisms for managing model parameters and registering layers.
        # Encoder Here, we define the Encoder, which progressively transforms the input data (x) through multiple transformations into a compressed latent space (z). The encoder reduces the input dimensionality from 1024 to 128.
        self.encoder = nn.Sequential( #With nn.Sequential, you don't need to explicitly define how data moves from one layer to the next in the forward pass—it automatically handles this for you.
            nn.Linear(input_dim, 512),
            nn.ReLU(),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, latent_dim)
        )
        # Decoder Here, we are defining the Decoder portion of the Autoencoder using nn.Sequential. The Decoder is responsible for reconstructing the input data x^ from the latent representation z. Essentially, it reverses the dimensionality reduction performed by the Encoder and attempts to recover the original structure of the data.
        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, 256),
            nn.ReLU(),
            nn.Linear(256, 512),
            nn.ReLU(),
            nn.Linear(512, input_dim),
            nn.Sigmoid()  # For reconstruction in [0, 1]
        )
    
    def forward(self, x):
        latent = self.encoder(x)
        reconstructed = self.decoder(latent)
        return latent, reconstructed

# Modellinitialisierung
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
autoencoder = Autoencoder(input_dim=1024, latent_dim=128).to(device)


In [14]:
# Hyperparameter
learning_rate = 0.0005
epochs = 50

# Optimizer and loss function : MSE is a natural choice because it penalizes larger differences more heavily, encouraging the model to reduce significant reconstruction errors.
criterion = nn.MSELoss()
optimizer = optim.Adam(autoencoder.parameters(), lr=learning_rate)

# Training-loop
for epoch in range(epochs):
    autoencoder.train()
    train_loss = 0.0
    for batch in train_loader: #loop over batches
        data = batch[0].to(device)  # Input data: Moves the input data from the CPU to the device (GPU or CPU) where the model is located.
        optimizer.zero_grad() #Clears the previously accumulated gradients of the model's parameters.
        
        # forward propagatoin
        latent, reconstructed = autoencoder(data)
        loss = criterion(reconstructed, data)  # MSE-Loss
        
        # backwards propagation and optimizing
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    
    # Validating
    autoencoder.eval()
    valid_loss = 0.0
    with torch.no_grad(): #This disables gradient computation during validation.
        for batch in valid_loader: #Iterate Over Validation Batches
            data = batch[0].to(device)
            latent, reconstructed = autoencoder(data)
            loss = criterion(reconstructed, data)
            valid_loss += loss.item()
    
    print(f"Epoch [{epoch+1}/{epochs}], Train Loss: {train_loss/len(train_loader):.4f}, Valid Loss: {valid_loss/len(valid_loader):.4f}")


Epoch [1/50], Train Loss: 0.0118, Valid Loss: 0.0026
Epoch [2/50], Train Loss: 0.0020, Valid Loss: 0.0016
Epoch [3/50], Train Loss: 0.0014, Valid Loss: 0.0013
Epoch [4/50], Train Loss: 0.0013, Valid Loss: 0.0011
Epoch [5/50], Train Loss: 0.0011, Valid Loss: 0.0010
Epoch [6/50], Train Loss: 0.0010, Valid Loss: 0.0009
Epoch [7/50], Train Loss: 0.0009, Valid Loss: 0.0009
Epoch [8/50], Train Loss: 0.0008, Valid Loss: 0.0008
Epoch [9/50], Train Loss: 0.0008, Valid Loss: 0.0008
Epoch [10/50], Train Loss: 0.0008, Valid Loss: 0.0007
Epoch [11/50], Train Loss: 0.0007, Valid Loss: 0.0007
Epoch [12/50], Train Loss: 0.0007, Valid Loss: 0.0007
Epoch [13/50], Train Loss: 0.0007, Valid Loss: 0.0006
Epoch [14/50], Train Loss: 0.0007, Valid Loss: 0.0006
Epoch [15/50], Train Loss: 0.0006, Valid Loss: 0.0006
Epoch [16/50], Train Loss: 0.0006, Valid Loss: 0.0006
Epoch [17/50], Train Loss: 0.0006, Valid Loss: 0.0006
Epoch [18/50], Train Loss: 0.0006, Valid Loss: 0.0006
Epoch [19/50], Train Loss: 0.0006, Va

In [15]:
autoencoder.eval()
with torch.no_grad():
    train_latent = autoencoder.encoder(train_embeddings.to(device)).cpu().numpy()
    valid_latent = autoencoder.encoder(valid_embeddings.to(device)).cpu().numpy()
    test_latent = autoencoder.encoder(test_embeddings.to(device)).cpu().numpy()

# saving of the reduced embeddings
print(f"Train Latent Shape: {train_latent.shape}")
print(f"Valid Latent Shape: {valid_latent.shape}")
print(f"Test Latent Shape: {test_latent.shape}")


Train Latent Shape: (67196, 128)
Valid Latent Shape: (4481, 128)
Test Latent Shape: (40307, 128)


In [16]:
np.save("train_latent.npy", train_latent)
np.save("valid_latent.npy", valid_latent)
np.save("test_latent.npy", test_latent)

In [17]:
train_latent = np.load("train_latent.npy")
valid_latent = np.load("valid_latent.npy")
test_latent = np.load("test_latent.npy")

In [18]:
import pandas as pd
import numpy as np
from sklearn.multioutput import MultiOutputClassifier
from xgboost import XGBClassifier
import shap
import matplotlib.pyplot as plt

# Load latent embeddings
train_latent = np.load("train_latent.npy")
valid_latent = np.load("valid_latent.npy")
test_latent = np.load("test_latent.npy")

# Load labels
diseases = ['Cardiomegaly', 'Lung Opacity', 'Edema', 'Atelectasis', 'Pneumothorax', 'Pleural Effusion']
y_train = train[diseases]
y_valid = valid[diseases]
y_test = test[diseases]


# Combine latent embeddings with categorical variables
x_train = pd.concat([train.reset_index(drop=True), pd.DataFrame(train_latent)], axis=1)
x_test = pd.concat([test.reset_index(drop=True), pd.DataFrame(test_latent)], axis=1)

  from .autonotebook import tqdm as notebook_tqdm


In [65]:
y_train = train[diseases]
y_test = test[diseases]
y_valid = valid[diseases]
y_no_finding = test["No Finding"]
y_sex = test['sex']
y_race = test['race']
y_insurance = test['insurance_type']
y_age = test['age']


In [36]:
print(x_train.columns)

Index([                       'age',                        'sex',
                             'race',             'insurance_type',
                       'No Finding', 'Enlarged Cardiomediastinum',
                      'Lung Lesion',              'Consolidation',
                        'Pneumonia',              'Pleural Other',
       ...
                                118,                          119,
                                120,                          121,
                                122,                          123,
                                124,                          125,
                                126,                          127],
      dtype='object', length=140)


In [51]:
x_test.columns

Index([                       'age',                        'sex',
                             'race',             'insurance_type',
                       'No Finding', 'Enlarged Cardiomediastinum',
                     'Cardiomegaly',               'Lung Opacity',
                      'Lung Lesion',                      'Edema',
       ...
                                118,                          119,
                                120,                          121,
                                122,                          123,
                                124,                          125,
                                126,                          127],
      dtype='object', length=147)

In [55]:
columns_to_drop_train = ["sex", "age","race","insurance_type", "No Finding", "Lung Lesion",'Enlarged Cardiomediastinum','Consolidation', 'Pneumonia','Pleural Other', "Fracture", "Support Devices"]
columns_to_drop_test = ["sex", "age","race","insurance_type", "No Finding", "Lung Lesion",'Enlarged Cardiomediastinum','Consolidation', 'Pneumonia','Pleural Other', "Fracture", "Support Devices", "Cardiomegaly", "Lung Opacity", "Edema", "Atelectasis", "Pneumothorax", "Pleural Effusion", "embeddings"]
x_train_subset = x_train.drop(columns_to_drop_train, axis = 1)
x_test_subset = x_test.drop(columns_to_drop_test,axis = 1)

print(x_test_subset)

            0         1         2         3         4         5         6    \
0      0.425305 -0.140266  0.132230 -0.043113 -0.245426  0.360462  0.510475   
1      0.270447 -0.296409  0.061932  0.265630 -0.599676  0.372155  0.512882   
2     -0.088401 -0.081625  0.550444 -0.449444  0.110903 -0.378580  0.117296   
3      0.226510  0.147124  0.219870  0.075524 -0.288051  0.074556 -0.183503   
4      0.450600  0.156684 -0.293627 -0.324263  0.237824  0.321672  0.269231   
...         ...       ...       ...       ...       ...       ...       ...   
40302 -0.386299 -0.435047  0.601436 -0.040361 -0.519877  0.301437 -0.421643   
40303  0.773695  0.082207  0.004860 -0.287582  0.227193  0.465701  0.280559   
40304  0.354382  0.111568 -0.008665 -0.272823  0.005449  0.077656  0.346835   
40305 -0.295024 -0.352866  0.582980 -0.200716 -0.113246 -0.568129 -0.139537   
40306  0.386309  0.045407  0.098773  0.280105 -0.136829  0.236010  0.347412   

            7         8         9    ...       118 

In [56]:
x_train_subset.columns = x_train_subset.columns.astype(int)



In [57]:
x_test_subset.columns = x_test_subset.columns.astype(int)

In [39]:
print(x_train_subset.columns)

Index([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,
       ...
       118, 119, 120, 121, 122, 123, 124, 125, 126, 127],
      dtype='int64', length=128)


In [70]:

# Define the train_model function
def train_model(x_train, y_train, x_test, y_test, model):
    multi_output_model = MultiOutputClassifier(model)
    

    multi_output_model.fit(x_train_subset, y_train)
    
    if hasattr(model, "predict_proba"):
        y_test_preds_proba = pd.DataFrame({disease: probs[:, 1] for disease, probs in zip(diseases, multi_output_model.predict_proba(x_test_subset))}) # Dataframe with probabilites 
    else:
        y_test_preds_proba = None

    return y_test_preds_proba

# Initialize the XGBoost model
xgb_model = XGBClassifier(
    use_label_encoder=False,      
    eval_metric='logloss',   
    learning_rate=0.1,
    random_state=42
)

# Train the model and compute SHAP values
y_pred = train_model(
    x_train=x_train_subset, 
    y_train=y_train, 
    x_test=x_test_subset, 
    y_test=y_test, 
    model=xgb_model,
)

# # Visualize SHAP values for each disease
# for disease_name in diseases:
#     print(f"SHAP Summary Plot for {disease_name}")
#     shap.summary_plot(shap_values[disease_name], feature_names=feat_names)


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



In [71]:
predictions = y_pred.values
targets = y_test.values

In [72]:
diseases = ['Cardiomegaly', 'Lung Opacity', 'Edema', 'Atelectasis', 'Pneumothorax', 'Pleural Effusion']

In [73]:
def compute_metrics(predictions, targets):
    # Calculate metrics for each disease
    metrics = {}
    for idx, disease in enumerate(diseases):
        disease_pred = predictions[disease]
        disease_true = targets[disease]
        # disease_pred = predictions[:, idx]
        # disease_true = targets[:, idx]
        auc_roc = roc_auc_score(disease_true, disease_pred)
        f1 = f1_score(disease_true, (disease_pred > 0.5).astype(int))
        accuracy = accuracy_score(disease_true, (disease_pred > 0.5).astype(int))
        tn, fp, fn, tp = confusion_matrix(disease_true, (disease_pred > 0.5).astype(int)).ravel()
        tp_rate = tp / (tp + fn) if (tp + fn) > 0 else 0
        tn_rate = tn / (tn + fp) if (tn + fp) > 0 else 0
        fn_rate = fn / (fn + tp) if (fn + tp) > 0 else 0
        fp_rate = fp / (tn + fp) if (tn + fp) > 0 else 0
        
        # Calculate Precision-Recall AUC
        precision, recall, _ = precision_recall_curve(disease_true, disease_pred)
        auprc = auc(recall, precision)

        metrics[disease] = {
            'Accuracy': accuracy,
            'AUC': auc_roc,
            'AUPRC': auprc,
            'F1 Score': f1,
            'TP Rate': tp_rate,
            'FN Rate': fn_rate,
            'TN Rate': tn_rate,
            'FP Rate': fp_rate
        }
    return metrics

metrics = compute_metrics(pd.DataFrame(y_pred), pd.DataFrame(y_test))
print(metrics)

{'Cardiomegaly': {'Accuracy': 0.8817823206887141, 'AUC': np.float64(0.8005728032081931), 'AUPRC': np.float64(0.39185048208257195), 'F1 Score': np.float64(0.2070227991346314), 'TP Rate': np.float64(0.1254032258064516), 'FN Rate': np.float64(0.8745967741935484), 'TN Rate': np.float64(0.9879197668826208), 'FP Rate': np.float64(0.012080233117379127)}, 'Lung Opacity': {'Accuracy': 0.6479023494678344, 'AUC': np.float64(0.694616515885432), 'AUPRC': np.float64(0.6424789710439098), 'F1 Score': np.float64(0.6731761238025056), 'TP Rate': np.float64(0.7365450514009272), 'FN Rate': np.float64(0.26345494859907276), 'TN Rate': np.float64(0.5619410643600645), 'FP Rate': np.float64(0.4380589356399355)}, 'Edema': {'Accuracy': 0.7754236236881931, 'AUC': np.float64(0.7848103814335488), 'AUPRC': np.float64(0.49927388982811727), 'F1 Score': np.float64(0.3817784455675454), 'TP Rate': np.float64(0.28859060402684567), 'FN Rate': np.float64(0.7114093959731543), 'TN Rate': np.float64(0.9293971654366142), 'FP Rat

In [67]:
predictions


<function dict.values>

In [74]:

# Calculate metrics for each disease and for each class

metrics_female = {}
for idx, disease in enumerate(diseases):
    # disease_pred = predictions[:, idx]
    disease_pred = predictions[y_sex == 1, idx]
    disease_true = targets[y_sex == 1, idx]
    auc_roc = roc_auc_score(disease_true, disease_pred)
    f1 = f1_score(disease_true, (disease_pred > 0.5).astype(int))
    accuracy = accuracy_score(disease_true, (disease_pred > 0.5).astype(int))
    tn, fp, fn, tp = confusion_matrix(disease_true, (disease_pred > 0.5).astype(int)).ravel()
    tp_rate = tp / (tp + fn)
    tn_rate = tn / (tn + fp)
    fn_rate = fn / (fn + tp)
    fp_rate = fp / (tn + fp)
    
    precision, recall, _ = precision_recall_curve(disease_true, disease_pred)
    auprc = auc(recall, precision)


    metrics_female[disease] = {
        'Accuracy': accuracy,
        'AUC': auc_roc,
        'AUPRC': auprc,
        'F1 Score': f1,
        'TP Rate': tp_rate,
        'FN Rate': fn_rate,
        'TN Rate': tn_rate,
        'FP Rate': fp_rate
        }
    
metrics_male = {}
for idx, disease in enumerate(diseases):
    # disease_pred = predictions[:, idx]
    disease_pred = predictions[y_sex == 0, idx]
    disease_true = targets[y_sex == 0, idx]
    auc_roc = roc_auc_score(disease_true, disease_pred)
    f1 = f1_score(disease_true, (disease_pred > 0.5).astype(int))
    accuracy = accuracy_score(disease_true, (disease_pred > 0.5).astype(int))
    tn, fp, fn, tp = confusion_matrix(disease_true, (disease_pred > 0.5).astype(int)).ravel()
    tp_rate = tp / (tp + fn)
    tn_rate = tn / (tn + fp)
    fn_rate = fn / (fn + tp)
    fp_rate = fp / (tn + fp)

    precision, recall, _ = precision_recall_curve(disease_true, disease_pred)
    auprc = auc(recall, precision)


    metrics_male[disease] = {
        'Accuracy': accuracy,
        'AUC': auc_roc,
        'AUPRC': auprc,
        'F1 Score': f1,
        'TP Rate': tp_rate,
        'FN Rate': fn_rate,
        'TN Rate': tn_rate,
        'FP Rate': fp_rate
        }

metrics_white = {}
for idx, disease in enumerate(diseases):
    # disease_pred = predictions[:, idx]
    disease_pred = predictions[y_race == 0, idx]
    disease_true = targets[y_race == 0, idx]
    auc_roc = roc_auc_score(disease_true, disease_pred)
    f1 = f1_score(disease_true, (disease_pred > 0.5).astype(int))
    accuracy = accuracy_score(disease_true, (disease_pred > 0.5).astype(int))
    tn, fp, fn, tp = confusion_matrix(disease_true, (disease_pred > 0.5).astype(int)).ravel()
    tp_rate = tp / (tp + fn)
    tn_rate = tn / (tn + fp)
    fn_rate = fn / (fn + tp)
    fp_rate = fp / (tn + fp)

    precision, recall, _ = precision_recall_curve(disease_true, disease_pred)
    auprc = auc(recall, precision)


    metrics_white[disease] = {
        'Accuracy': accuracy,
        'AUC': auc_roc,
        'AUPRC': auprc,
        'F1 Score': f1,
        'TP Rate': tp_rate,
        'FN Rate': fn_rate,
        'TN Rate': tn_rate,
        'FP Rate': fp_rate
        }
    
metrics_black = {}
for idx, disease in enumerate(diseases):
    # disease_pred = predictions[:, idx]
    disease_pred = predictions[y_race == 2, idx]
    disease_true = targets[y_race == 2, idx]
    auc_roc = roc_auc_score(disease_true, disease_pred)
    f1 = f1_score(disease_true, (disease_pred > 0.5).astype(int))
    accuracy = accuracy_score(disease_true, (disease_pred > 0.5).astype(int))
    tn, fp, fn, tp = confusion_matrix(disease_true, (disease_pred > 0.5).astype(int)).ravel()
    tp_rate = tp / (tp + fn)
    tn_rate = tn / (tn + fp)
    fn_rate = fn / (fn + tp)
    fp_rate = fp / (tn + fp)

    precision, recall, _ = precision_recall_curve(disease_true, disease_pred)
    auprc = auc(recall, precision)


    metrics_black[disease] = {
        'Accuracy': accuracy,
        'AUC': auc_roc,
        'AUPRC': auprc,
        'F1 Score': f1,
        'TP Rate': tp_rate,
        'FN Rate': fn_rate,
        'TN Rate': tn_rate,
        'FP Rate': fp_rate
        }
    
metrics_asian = {}
for idx, disease in enumerate(diseases):
    # disease_pred = predictions[:, idx]
    disease_pred = predictions[y_race == 1, idx]
    disease_true = targets[y_race == 1, idx]
    auc_roc = roc_auc_score(disease_true, disease_pred)
    f1 = f1_score(disease_true, (disease_pred > 0.5).astype(int))
    accuracy = accuracy_score(disease_true, (disease_pred > 0.5).astype(int))
    tn, fp, fn, tp = confusion_matrix(disease_true, (disease_pred > 0.5).astype(int)).ravel()
    tp_rate = tp / (tp + fn)
    tn_rate = tn / (tn + fp)
    fn_rate = fn / (fn + tp)
    fp_rate = fp / (tn + fp)

    precision, recall, _ = precision_recall_curve(disease_true, disease_pred)
    auprc = auc(recall, precision)


    metrics_asian[disease] = {
        'Accuracy': accuracy,
        'AUC': auc_roc,
        'AUPRC': auprc,
        'F1 Score': f1,
        'TP Rate': tp_rate,
        'FN Rate': fn_rate,
        'TN Rate': tn_rate,
        'FP Rate': fp_rate
        }
    

metrics_medicaid = {}
for idx, disease in enumerate(diseases):
    # disease_pred = predictions[:, idx]
    disease_pred = predictions[y_insurance == 0, idx]
    disease_true = targets[y_insurance == 0, idx]
    auc_roc = roc_auc_score(disease_true, disease_pred)
    f1 = f1_score(disease_true, (disease_pred > 0.5).astype(int))
    accuracy = accuracy_score(disease_true, (disease_pred > 0.5).astype(int))
    tn, fp, fn, tp = confusion_matrix(disease_true, (disease_pred > 0.5).astype(int)).ravel()
    tp_rate = tp / (tp + fn)
    tn_rate = tn / (tn + fp)
    fn_rate = fn / (fn + tp)
    fp_rate = fp / (tn + fp)

    precision, recall, _ = precision_recall_curve(disease_true, disease_pred)
    auprc = auc(recall, precision)


    metrics_medicaid[disease] = {
        'Accuracy': accuracy,
        'AUC': auc_roc,
        'AUPRC': auprc,
        'F1 Score': f1,
        'TP Rate': tp_rate,
        'FN Rate': fn_rate,
        'TN Rate': tn_rate,
        'FP Rate': fp_rate
        }
    
metrics_medicare = {}
for idx, disease in enumerate(diseases):
    # disease_pred = predictions[:, idx]
    disease_pred = predictions[y_insurance == 1, idx]
    disease_true = targets[y_insurance == 1, idx]
    auc_roc = roc_auc_score(disease_true, disease_pred)
    f1 = f1_score(disease_true, (disease_pred > 0.5).astype(int))
    accuracy = accuracy_score(disease_true, (disease_pred > 0.5).astype(int))
    tn, fp, fn, tp = confusion_matrix(disease_true, (disease_pred > 0.5).astype(int)).ravel()
    tp_rate = tp / (tp + fn)
    tn_rate = tn / (tn + fp)
    fn_rate = fn / (fn + tp)
    fp_rate = fp / (tn + fp)

    precision, recall, _ = precision_recall_curve(disease_true, disease_pred)
    auprc = auc(recall, precision)


    metrics_medicare[disease] = {
        'Accuracy': accuracy,
        'AUC': auc_roc,
        'AUPRC': auprc,
        'F1 Score': f1,
        'TP Rate': tp_rate,
        'FN Rate': fn_rate,
        'TN Rate': tn_rate,
        'FP Rate': fp_rate
        }
    
metrics_private = {}
for idx, disease in enumerate(diseases):
    # disease_pred = predictions[:, idx]
    disease_pred = predictions[y_insurance == 2, idx]
    disease_true = targets[y_insurance == 2, idx]
    auc_roc = roc_auc_score(disease_true, disease_pred)
    f1 = f1_score(disease_true, (disease_pred > 0.5).astype(int))
    accuracy = accuracy_score(disease_true, (disease_pred > 0.5).astype(int))
    tn, fp, fn, tp = confusion_matrix(disease_true, (disease_pred > 0.5).astype(int)).ravel()
    tp_rate = tp / (tp + fn)
    tn_rate = tn / (tn + fp)
    fn_rate = fn / (fn + tp)
    fp_rate = fp / (tn + fp)

    precision, recall, _ = precision_recall_curve(disease_true, disease_pred)
    auprc = auc(recall, precision)


    metrics_private[disease] = {
        'Accuracy': accuracy,
        'AUC': auc_roc,
        'AUPRC': auprc,
        'F1 Score': f1,
        'TP Rate': tp_rate,
        'FN Rate': fn_rate,
        'TN Rate': tn_rate,
        'FP Rate': fp_rate
        }
    

metrics_young = {}
for idx, disease in enumerate(diseases):
    # disease_pred = predictions[:, idx]
    disease_pred = predictions[y_age == 0, idx]
    disease_true = targets[y_age == 0, idx]
    auc_roc = roc_auc_score(disease_true, disease_pred)
    f1 = f1_score(disease_true, (disease_pred > 0.5).astype(int))
    accuracy = accuracy_score(disease_true, (disease_pred > 0.5).astype(int))
    tn, fp, fn, tp = confusion_matrix(disease_true, (disease_pred > 0.5).astype(int)).ravel()
    tp_rate = tp / (tp + fn)
    tn_rate = tn / (tn + fp)
    fn_rate = fn / (fn + tp)
    fp_rate = fp / (tn + fp)
    
    precision, recall, _ = precision_recall_curve(disease_true, disease_pred)
    auprc = auc(recall, precision)


    metrics_young[disease] = {
        'Accuracy': accuracy,
        'AUC': auc_roc,
        'AUPRC': auprc,
        'F1 Score': f1,
        'TP Rate': tp_rate,
        'FN Rate': fn_rate,
        'TN Rate': tn_rate,
        'FP Rate': fp_rate
        }
    
metrics_old = {}
for idx, disease in enumerate(diseases):
    # disease_pred = predictions[:, idx]
    disease_pred = predictions[y_age == 1, idx]
    disease_true = targets[y_age == 1, idx]
    auc_roc = roc_auc_score(disease_true, disease_pred)
    f1 = f1_score(disease_true, (disease_pred > 0.5).astype(int))
    accuracy = accuracy_score(disease_true, (disease_pred > 0.5).astype(int))
    tn, fp, fn, tp = confusion_matrix(disease_true, (disease_pred > 0.5).astype(int)).ravel()
    tp_rate = tp / (tp + fn)
    tn_rate = tn / (tn + fp)
    fn_rate = fn / (fn + tp)
    fp_rate = fp / (tn + fp)

    precision, recall, _ = precision_recall_curve(disease_true, disease_pred)
    auprc = auc(recall, precision)


    metrics_old[disease] = {
        'Accuracy': accuracy,
        'AUC': auc_roc,
        'AUPRC': auprc,
        'F1 Score': f1,
        'TP Rate': tp_rate,
        'FN Rate': fn_rate,
        'TN Rate': tn_rate,
        'FP Rate': fp_rate
        }

In [75]:
# Initialize an empty list to store the data
data_sex = []

# Iterate over the diseases in the metrics dictionary
for disease, values in metrics.items():
    # Extract AUC and rates from dictionaries
    
    auprc_overall = values['AUPRC'] *100

    auc_overall = values['AUC'] *100
    auc_male = metrics_male[disease]['AUC'] *100
    auc_female = metrics_female[disease]['AUC'] *100
    tp_rate_male = metrics_male[disease]['TP Rate'] *100
    tp_rate_female = metrics_female[disease]['TP Rate'] *100
    fp_rate_male = metrics_male[disease]['FP Rate'] *100
    fp_rate_female = metrics_female[disease]['FP Rate'] *100
    
    # Calculate delta AUC and equality of odds
    delta_auc_sex = abs(auc_male - auc_female)
    eq_odds_sex = 0.5 * (abs(tp_rate_male - tp_rate_female) + abs(fp_rate_male - fp_rate_female))
    
    # Append to the data list
    data_sex.append([disease, auprc_overall, auc_overall, auc_male, auc_female, delta_auc_sex, eq_odds_sex])

# Create a DataFrame
df_sex = pd.DataFrame(data_sex, columns=['Disease', 'AUPRC', 'AUC', 'AUC_Male', 'AUC_Female', 'Delta AUC', 'EqOdds'])


# Styling the DataFrame
styled_df = df_sex.style.format({
    'AUC': "{:.3f}", 
    'AUPRC': "{:.3f}",
    'AUC_Male': "{:.3f}", 
    'AUC_Female': "{:.3f}",
    'Delta AUC': "{:.3f}",
    'EqOdds': "{:.3f}"
}).background_gradient(cmap='viridis', subset=['AUPRC', 'AUC', 'AUC_Male', 'AUC_Female', 'Delta AUC', 'EqOdds'])

# Display the styled DataFrame
styled_df


Unnamed: 0,Disease,AUPRC,AUC,AUC_Male,AUC_Female,Delta AUC,EqOdds
0,Cardiomegaly,39.185,80.057,80.243,79.717,0.526,0.774
1,Lung Opacity,64.248,69.462,69.567,69.317,0.25,1.001
2,Edema,49.927,78.481,78.627,78.313,0.314,2.902
3,Atelectasis,21.801,62.705,63.541,61.385,2.157,0.051
4,Pneumothorax,27.137,74.344,73.615,75.327,1.713,0.29
5,Pleural Effusion,70.406,81.058,80.846,81.357,0.511,0.967


In [76]:
# Initialize an empty list to store the data
data_race = []

# Iterate over the diseases in the metrics dictionary
for disease, values in metrics.items():
    auprc_overall = values['AUPRC'] *100
    auc_overall = values['AUC'] *100
    auc_groups = [
        metrics_white[disease]['AUC'] *100,
        metrics_black[disease]['AUC'] *100,
        metrics_asian[disease]['AUC'] *100
    ]
    tp_rates = [
        metrics_white[disease]['TP Rate'] *100,
        metrics_black[disease]['TP Rate'] *100,
        metrics_asian[disease]['TP Rate'] *100
    ]
    fp_rates = [
        metrics_white[disease]['FP Rate'] *100,
        metrics_black[disease]['FP Rate'] *100,
        metrics_asian[disease]['FP Rate'] *100
    ]

    # Calculate the maximum delta AUC
    delta_auc_race = max(abs(auc_groups[i] - auc_groups[j]) for i in range(len(auc_groups)) for j in range(i + 1, len(auc_groups)))

    # Calculate the maximum equality of odds
    eq_odds_race = max(
        0.5 * (abs(tp_rates[i] - tp_rates[j]) + abs(fp_rates[i] - fp_rates[j]))
        for i in range(len(tp_rates)) for j in range(i + 1, len(tp_rates))
    )

    # Append to the data list
    data_race.append([disease, auprc_overall, auc_overall] + auc_groups + [delta_auc_race, eq_odds_race])

# Create a DataFrame
columns = ['Disease', 'AUPRC', 'AUC', 'AUC_White', 'AUC_Black', 'AUC_Asian', 'Max Delta AUC', 'Max EqOdds']
df_race = pd.DataFrame(data_race, columns=columns)

# Display the DataFrame with styling
# Styling the DataFrame
styled_df = df_race.style.format({
    'AUC': "{:.3f}", 
    'AUPRC': "{:.3f}", 
    'AUC_White': "{:.3f}", 
    'AUC_Black': "{:.3f}",
    'AUC_Asian': "{:.3f}",
    'Max Delta AUC': "{:.3f}",
    'Max EqOdds': "{:.3f}"
}).background_gradient(cmap='viridis', subset=['AUPRC', 'AUC', 'AUC_White', 'AUC_Black', 'AUC_Asian', 'Max Delta AUC', 'Max EqOdds'])

# Display the styled DataFrame
styled_df

Unnamed: 0,Disease,AUPRC,AUC,AUC_White,AUC_Black,AUC_Asian,Max Delta AUC,Max EqOdds
0,Cardiomegaly,39.185,80.057,79.601,80.752,80.824,1.223,2.453
1,Lung Opacity,64.248,69.462,69.096,71.583,70.233,2.487,3.026
2,Edema,49.927,78.481,78.5,76.769,79.291,2.522,0.627
3,Atelectasis,21.801,62.705,62.932,59.052,62.427,3.88,0.065
4,Pneumothorax,27.137,74.344,73.843,77.237,75.315,3.394,0.817
5,Pleural Effusion,70.406,81.058,80.858,80.517,82.194,1.677,2.543


In [77]:
# Initialize an empty list to store the data
data_age = []

# Iterate over the diseases in the metrics dictionary
for disease, values in metrics.items():
    # Extract AUC and rates from dictionaries
    
    auprc_overall = values['AUPRC'] *100

    auc_overall = values['AUC'] *100
    auc_old = metrics_old[disease]['AUC'] *100
    auc_young = metrics_young[disease]['AUC'] *100
    tp_rate_old = metrics_old[disease]['TP Rate'] *100
    tp_rate_young = metrics_young[disease]['TP Rate'] *100
    fp_rate_old = metrics_old[disease]['FP Rate'] *100
    fp_rate_young = metrics_young[disease]['FP Rate'] *100
    
    # Calculate delta AUC and equality of odds
    delta_auc_age = abs(auc_old - auc_young)
    eq_odds_age = 0.5 * (abs(tp_rate_old - tp_rate_young) + abs(fp_rate_old - fp_rate_young))
    
    # Append to the data list
    data_age.append([disease, auprc_overall, auc_overall, auc_old, auc_young, delta_auc_age, eq_odds_age])

# Create a DataFrame
df_age = pd.DataFrame(data_age, columns=['Disease', 'AUPRC', 'AUC', 'AUC_old', 'AUC_young', 'Delta AUC', 'EqOdds'])


# Styling the DataFrame
styled_df = df_age.style.format({
    'AUC': "{:.3f}", 
    'AUPRC': "{:.3f}",
    'AUC_old': "{:.3f}", 
    'AUC_young': "{:.3f}",
    'Delta AUC': "{:.3f}",
    'EqOdds': "{:.3f}"
}).background_gradient(cmap='viridis', subset=['AUC', 'AUPRC', 'AUC_old', 'AUC_young', 'Delta AUC', 'EqOdds'])

# Display the styled DataFrame
styled_df


Unnamed: 0,Disease,AUPRC,AUC,AUC_old,AUC_young,Delta AUC,EqOdds
0,Cardiomegaly,39.185,80.057,76.871,81.397,4.526,2.006
1,Lung Opacity,64.248,69.462,65.685,71.231,5.546,8.989
2,Edema,49.927,78.481,76.886,79.126,2.24,3.078
3,Atelectasis,21.801,62.705,60.284,64.088,3.804,0.033
4,Pneumothorax,27.137,74.344,72.664,74.885,2.221,0.641
5,Pleural Effusion,70.406,81.058,80.526,81.289,0.763,4.779


In [78]:
# Initialize an empty list to store the data
data_health = []

# Iterate over the diseases in the metrics dictionary
for disease, values in metrics.items():
    auprc_overall = values['AUPRC'] *100
    auc_overall = values['AUC'] *100
    auc_groups = [
        metrics_medicaid[disease]['AUC'] *100,
        metrics_medicare[disease]['AUC'] *100,
        metrics_private[disease]['AUC'] *100
    ]
    tp_rates = [
        metrics_medicaid[disease]['TP Rate'] *100,
        metrics_medicare[disease]['TP Rate'] *100,
        metrics_private[disease]['TP Rate'] *100
    ]
    fp_rates = [
        metrics_medicaid[disease]['FP Rate'] *100,
        metrics_medicare[disease]['FP Rate'] *100,
        metrics_private[disease]['FP Rate'] *100
    ]

    # Calculate the maximum delta AUC
    delta_auc_health = max(abs(auc_groups[i] - auc_groups[j]) for i in range(len(auc_groups)) for j in range(i + 1, len(auc_groups)))

    # Calculate the maximum equality of odds
    eq_odds_health = max(
        0.5 * (abs(tp_rates[i] - tp_rates[j]) + abs(fp_rates[i] - fp_rates[j]))
        for i in range(len(tp_rates)) for j in range(i + 1, len(tp_rates))
    )

    # Append to the data list
    data_health.append([disease, auprc_overall, auc_overall] + auc_groups + [delta_auc_health, eq_odds_health])

# Create a DataFrame
columns = ['Disease', 'AUPRC', 'AUC', 'AUC_Medicaid', 'AUC_Medicare', 'AUC_Private', 'Max Delta AUC', 'Max EqOdds']
df_health = pd.DataFrame(data_health, columns=columns)

# Display the DataFrame with styling
# Styling the DataFrame
styled_df = df_health.style.format({
    'AUC': "{:.3f}", 
    'AUPRC': "{:.3f}", 
    'AUC_Medicaid': "{:.3f}", 
    'AUC_Medicare': "{:.3f}",
    'AUC_Private': "{:.3f}",
    'Max Delta AUC': "{:.3f}",
    'Max EqOdds': "{:.3f}"
}).background_gradient(cmap='viridis', subset=['AUPRC', 'AUC',  'AUC_Medicaid', 'AUC_Medicare', 'AUC_Private', 'Max Delta AUC', 'Max EqOdds'])

# Display the styled DataFrame
styled_df

Unnamed: 0,Disease,AUPRC,AUC,AUC_Medicaid,AUC_Medicare,AUC_Private,Max Delta AUC,Max EqOdds
0,Cardiomegaly,39.185,80.057,82.089,79.018,80.667,3.072,1.388
1,Lung Opacity,64.248,69.462,72.49,67.335,72.627,5.291,7.728
2,Edema,49.927,78.481,77.13,77.532,80.495,3.365,3.314
3,Atelectasis,21.801,62.705,62.299,61.264,65.995,4.731,0.112
4,Pneumothorax,27.137,74.344,73.442,73.797,74.863,1.422,0.549
5,Pleural Effusion,70.406,81.058,82.31,80.304,82.071,2.005,4.596


In [79]:
# Initialize an empty list to store the data
data = []
i=0
# Iterate over the diseases in the metrics dictionary
for disease, values in metrics.items():

    auprc_overall = values['AUPRC'] *100
    auc_overall = values['AUC'] *100

    # Append to the data list
    data.append([disease, auprc_overall, auc_overall] + [df_sex['Delta AUC'][i], df_sex['EqOdds'][i]] + [df_race['Max Delta AUC'][i], df_race['Max EqOdds'][i]] + [df_age['Delta AUC'][i], df_age['EqOdds'][i]] + [df_health['Max Delta AUC'][i], df_health['Max EqOdds'][i]])
    i+=1
# Create a DataFrame
columns = ['Disease', 'AUPRC', 'AUC', 'Delta AUC sex', 'EqOdds sex', 'Delta AUC race', 'EqOdds race', 'Delta AUC age', 'EqOdds age', 'Delta AUC health', 'EqOdds health']
df = pd.DataFrame(data, columns=columns)

# Display the DataFrame with styling
# Styling the DataFrame
styled_df = df.style.format({
    'AUC': "{:.3f}", 
    'AUPRC': "{:.3f}", 
    'Delta AUC sex': "{:.3f}", 
    'EqOdds sex': "{:.3f}",
    'Delta AUC race': "{:.3f}", 
    'EqOdds race': "{:.3f}",
    'Delta AUC age': "{:.3f}", 
    'EqOdds age': "{:.3f}",
    'Delta AUC health': "{:.3f}", 
    'EqOdds health': "{:.3f}"
}).background_gradient(cmap='viridis', subset=['AUC',  'AUPRC', 'Delta AUC sex', 'EqOdds sex', 'Delta AUC race', 'EqOdds race', 'Delta AUC age', 'EqOdds age', 'Delta AUC health', 'EqOdds health'])

# Display the styled DataFrame
styled_df

Unnamed: 0,Disease,AUPRC,AUC,Delta AUC sex,EqOdds sex,Delta AUC race,EqOdds race,Delta AUC age,EqOdds age,Delta AUC health,EqOdds health
0,Cardiomegaly,39.185,80.057,0.526,0.774,1.223,2.453,4.526,2.006,3.072,1.388
1,Lung Opacity,64.248,69.462,0.25,1.001,2.487,3.026,5.546,8.989,5.291,7.728
2,Edema,49.927,78.481,0.314,2.902,2.522,0.627,2.24,3.078,3.365,3.314
3,Atelectasis,21.801,62.705,2.157,0.051,3.88,0.065,3.804,0.033,4.731,0.112
4,Pneumothorax,27.137,74.344,1.713,0.29,3.394,0.817,2.221,0.641,1.422,0.549
5,Pleural Effusion,70.406,81.058,0.511,0.967,1.677,2.543,0.763,4.779,2.005,4.596
