In [1]:
import pandas as pd
import numpy as np
import ast
from sklearn.model_selection import train_test_split
from sklearn.multioutput import MultiOutputClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_auc_score
from collections import defaultdict
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score, confusion_matrix, roc_auc_score, precision_recall_curve, auc
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.metrics import log_loss
from scipy.stats import entropy

### Read the CSV

In [2]:
test = pd.read_csv("../model_dev/densenet_data/densenet_test_embeddings.csv", quotechar='"', on_bad_lines='skip')
train = pd.read_csv("../model_dev/densenet_data/densenet_train_embeddings.csv", quotechar='"', on_bad_lines='skip')
# valid = pd.read_csv("../model_dev/densenet_data/densenet_valid_embeddings.csv", quotechar='"', on_bad_lines='skip')

print(test.columns)
test.head()

Index(['path_to_image', 'path_to_dcm', 'age', 'sex', 'race', 'insurance_type',
       'No Finding', 'Enlarged Cardiomediastinum', 'Cardiomegaly',
       'Lung Opacity', 'Lung Lesion', 'Edema', 'Consolidation', 'Pneumonia',
       'Atelectasis', 'Pneumothorax', 'Pleural Effusion', 'Pleural Other',
       'Fracture', 'Support Devices', 'embeddings'],
      dtype='object')


Unnamed: 0,path_to_image,path_to_dcm,age,sex,race,insurance_type,No Finding,Enlarged Cardiomediastinum,Cardiomegaly,Lung Opacity,...,Edema,Consolidation,Pneumonia,Atelectasis,Pneumothorax,Pleural Effusion,Pleural Other,Fracture,Support Devices,embeddings
0,train/patient47347/study3/view1_frontal.jpg,train/patient47347/study3/view1_frontal.dcm,78.0,1,0,1,0,0,1,0,...,1,0,0,1,0,1,0,1,1,"[0.0029132624622434378, 0.1020001769065857, 0...."
1,train/patient37527/study12/view1_frontal.jpg,train/patient37527/study12/view1_frontal.dcm,63.0,0,1,2,0,0,0,0,...,0,0,0,0,0,1,0,0,0,"[0.0014348188415169716, 0.0543656125664711, 0...."
2,train/patient41208/study9/view1_frontal.jpg,train/patient41208/study9/view1_frontal.dcm,70.0,0,0,1,0,0,0,1,...,0,0,0,0,0,0,0,0,0,"[0.001982336398214102, 0.040021587163209915, 0..."
3,train/patient39357/study1/view1_frontal.jpg,train/patient39357/study1/view1_frontal.dcm,79.0,1,1,1,0,0,0,0,...,0,0,1,0,0,0,0,0,0,"[0.001741771469824016, 0.0560498870909214, 0.1..."
4,train/patient31982/study4/view1_frontal.jpg,train/patient31982/study4/view1_frontal.dcm,67.0,0,0,0,0,0,0,0,...,0,1,0,0,0,1,0,0,0,"[9.678312198957428e-05, 0.12247737497091293, 0..."


### Convert embeddings from str to list (a bit long for large data sets)

In [3]:
test['embeddings'] = test['embeddings'].apply(ast.literal_eval)

In [4]:
train['embeddings'] = train['embeddings'].apply(ast.literal_eval)

### Remove columns

In [5]:
test = test.drop(columns=['path_to_image', 'path_to_dcm'])
train = train.drop(columns=['path_to_image', 'path_to_dcm'])


### Remove rows that were not processed (embeddings = 0)

In [6]:
initial_size = test.shape[0] 

# The previous logic with transforming the list to string and filtering on the length of said string is not necessarily stable and misleading.
# Let's implement a more explicit test for what we actually care about: 

test = test[test['embeddings'].apply(type) == list]

final_size = test.shape[0] 

print(f'Number of test removed rows = {initial_size - final_size}')

initial_size = train.shape[0] 

train = train[train['embeddings'].apply(type) == list]

final_size = train.shape[0] 

print(f'Number of train removed rows = {initial_size - final_size}')

Number of test removed rows = 51
Number of train removed rows = 67


### Convert age to binary to study bias

In [7]:
a = 70
test['age'] = (test['age'] >= a).astype(int)
train['age'] = (train['age'] >= a).astype(int)

### Create artificial training distribution

In [8]:
print("Initial sex Distribution:")
print(train['sex'].value_counts())

print("\nInitial Race Distribution:")
print(train['race'].value_counts())

print("\nInitial Age Distribution:")
print(train['age'].value_counts())

print("\nInitial Health Distribution:")
print(train['insurance_type'].value_counts())

Initial sex Distribution:
sex
0    38998
1    28198
Name: count, dtype: int64

Initial Race Distribution:
race
0    52553
1     9844
2     4799
Name: count, dtype: int64

Initial Age Distribution:
age
0    42267
1    24929
Name: count, dtype: int64

Initial Health Distribution:
insurance_type
1    43076
2    18340
0     5780
Name: count, dtype: int64


In [9]:
# nb_of_rows_to_remove = 5000
# cath_to_remove = 'sex' # sex or race
# which_cath = 0 # 0 or 1 or 2

# filtered_df = train[train[cath_to_remove] == which_cath]

# rows_to_drop = filtered_df.sample(n=nb_of_rows_to_remove, random_state=42) 

# train = train.drop(rows_to_drop.index)

# nb_of_rows_to_add = 5000  
# cath_to_use = 'sex' 
# which_cath = 1 

# filtered_df = train[train[cath_to_use] == which_cath]

# rows_to_add = filtered_df.sample(n=nb_of_rows_to_add, replace=True, random_state=42)

# train = pd.concat([train, rows_to_add], ignore_index=True)

# nb_of_rows_to_remove = 8500
# cath_to_remove = 'age' # sex or race
# which_cath = 0 # 0 or 1 or 2

# filtered_df = train[train[cath_to_remove] == which_cath]

# rows_to_drop = filtered_df.sample(n=nb_of_rows_to_remove, random_state=42) 

# train = train.drop(rows_to_drop.index)

# nb_of_rows_to_add = 8500  
# cath_to_use = 'age' 
# which_cath = 1 

# filtered_df = train[train[cath_to_use] == which_cath]

# rows_to_add = filtered_df.sample(n=nb_of_rows_to_add, replace=True, random_state=42)

# train = pd.concat([train, rows_to_add], ignore_index=True)



In [10]:
nb_of_rows_to_add = 45000  
cath_to_use = 'race' 
which_cath = 2

filtered_df = train[train[cath_to_use] == which_cath]

rows_to_add = filtered_df.sample(n=nb_of_rows_to_add, replace=True, random_state=42)

train = pd.concat([train, rows_to_add], ignore_index=True)

nb_of_rows_to_add = 40000  
cath_to_use = 'race' 
which_cath = 1

filtered_df = train[train[cath_to_use] == which_cath]

rows_to_add = filtered_df.sample(n=nb_of_rows_to_add, replace=True, random_state=42)

train = pd.concat([train, rows_to_add], ignore_index=True)

nb_of_rows_to_add = 50000  
cath_to_use = 'age' 
which_cath = 1

filtered_df = train[train[cath_to_use] == which_cath]

rows_to_add = filtered_df.sample(n=nb_of_rows_to_add, replace=True, random_state=42)

train = pd.concat([train, rows_to_add], ignore_index=True)

nb_of_rows_to_add = 15000  
cath_to_use = 'sex' 
which_cath = 1

filtered_df = train[train[cath_to_use] == which_cath]

rows_to_add = filtered_df.sample(n=nb_of_rows_to_add, replace=True, random_state=42)

train = pd.concat([train, rows_to_add], ignore_index=True)

### Train test

In [11]:
train_embeddings = pd.DataFrame(train['embeddings'].tolist())
test_embeddings = pd.DataFrame(test['embeddings'].tolist())
# valid_embeddings = pd.DataFrame(valid['embeddings'].tolist())


# Diseases to predict
# diseases = ['Enlarged Cardiomediastinum', 'Cardiomegaly', 'Lung Opacity', 'Lung Lesion', 'Edema', 'Consolidation', 'Pneumonia',
#             'Atelectasis', 'Pneumothorax', 'Pleural Effusion', 'Pleural Other', 'Fracture']
diseases = ['Cardiomegaly', 'Lung Opacity', 'Edema', 'Pleural Effusion']

# Labels for train and test
y_train = train[diseases]
y_test = test[diseases]

# Create x_train and x_test
x_train = pd.concat([train.reset_index(), train_embeddings], axis=1)
x_test =  pd.concat([test.reset_index(), test_embeddings], axis=1)

x_train.drop(columns=["embeddings"] + diseases, inplace=True)
x_test.drop(columns=["embeddings"] + diseases, inplace=True)




# Labels for train and test
y_train = train[diseases]
y_test = test[diseases]
# y_valid = valid[diseases]
y_no_finding = test["No Finding"]
y_sex = test['sex']
y_race = test['race']
y_insurance = test['insurance_type']
y_age = test['age']

sex_train = train['sex']
age_train = train['age']
race_train = train['race']
health_train = train['insurance_type']


### PCA to reduce embeddings

In [12]:
# Step 1: Standardize the embeddings_list to have mean 0 and variance 1
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(np.stack(train_embeddings.values))
X_test_scaled = scaler.transform(np.stack(test_embeddings.values))

# Step 2: Set target variance threshold (e.g., 95%)
variance_threshold = 0.95

# Step 3: Fit PCA to determine the optimal number of components based on variance threshold
pca_full = PCA()
pca_full.fit(X_train_scaled)
cumulative_variance = np.cumsum(pca_full.explained_variance_ratio_)

# Step 4: Find the number of components that meets the variance threshold
optimal_components = np.argmax(cumulative_variance >= variance_threshold) + 1
print(f"Optimal number of components to retain {variance_threshold*100}% variance: {optimal_components}")

#95% variance means that the selected principal components (reduced dimensions) retain 95% of the total variability present in the original high-dimensional data.

Optimal number of components to retain 95.0% variance: 303


In [13]:
# Apply PCA if wanted
pca = PCA(n_components=optimal_components)
x_train_subset = pca.fit_transform(X_train_scaled)
x_test_subset = pca.transform(X_test_scaled)


In [14]:
# diseases = ['Enlarged Cardiomediastinum', 'Cardiomegaly', 'Lung Opacity', 'Lung Lesion', 'Edema', 
#             'Consolidation', 'Pneumonia', 'Atelectasis', 'Pneumothorax', 
#             'Pleural Effusion', 'Pleural Other', 'Fracture']
diseases = ['Cardiomegaly', 'Lung Opacity', 'Edema', 'Pleural Effusion']

### Adversarial attacks

In [15]:
class AdversarialModel(BaseEstimator, ClassifierMixin):
    def __init__(self, main_model, adversary_model, alpha=0.1):
        """
        main_model: The primary model predicting diseases.
        adversary_model: The model trying to predict the sensitive attribute.
        alpha: Weight of the adversarial loss.
        """
        self.main_model = main_model
        self.adversary_model = adversary_model
        self.alpha = alpha

    def fit(self, X, Y, S1, S2):
        """
        X: Features
        Y: Target labels (diseases)
        S: Sensitive attribute (e.g., sex)
        """
        for _ in range(3):  # Number of training iterations
            # Update main model
            self.main_model.fit(X, Y)

            # Predict diseases to use as features for the adversary
            
            Y_pred = self.main_model.predict_proba(X)
            
            # Train adversary on predicting sensitive attributes from disease predictions
            self.adversary_model.fit(Y_pred, S1)

            # Adversary predictions
            S_pred = self.adversary_model.predict_proba(Y_pred)

            # Calculate adversarial loss
            adv_loss = log_loss(S1, S_pred)

            # Update main model to decrease adversary success
            Y_grad = -self.alpha * (S_pred - S_pred - S1.values.reshape(-1, 1))
            self.main_model.fit(X, Y, sample_weight=Y_grad[:, 1])



            # Update main model
            self.main_model.fit(X, Y)

            # Predict diseases to use as features for the adversary
            Y_pred = self.main_model.predict_proba(X)
            

            # Train adversary on predicting sensitive attributes from disease predictions
            self.adversary_model.fit(Y_pred, S2)

            # Adversary predictions
            S_pred = self.adversary_model.predict_proba(Y_pred)

            # Calculate adversarial loss
            adv_loss = log_loss(S2, S_pred)

            # Update main model to decrease adversary success
            Y_grad = -self.alpha * (S_pred - S_pred - S2.values.reshape(-1, 1))
            self.main_model.fit(X, Y, sample_weight=Y_grad[:, 1])

    def predict(self, X):
        return self.main_model.predict_proba(X)


In [16]:

# Main model
xgb_model = XGBClassifier(
    use_label_encoder=False,
    eval_metric='logloss',
    learning_rate=0.1,
    random_state=42
)

# Adversary model
adversary_model = LogisticRegression()

adversary_model = XGBClassifier(
    use_label_encoder=False,
    eval_metric='logloss',
    learning_rate=0.1,
    random_state=42
)

# Adversarial wrapper
adv_model = AdversarialModel(main_model=xgb_model, adversary_model=adversary_model, alpha=0.1)

# Train with features (X_train), disease labels (y_train), and sensitive attribute (sex)
adv_model.fit(x_train_subset, y_train, age_train, sex_train)

adv_model.fit(x_train_subset, y_train, race_train, health_train)

# Predict with adversarially trained model
y_pred = adv_model.predict(x_test_subset)


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encode

In [17]:
predictions = y_pred
targets = y_test.values

In [18]:
diseases = ['Cardiomegaly', 'Lung Opacity', 'Edema', 'Pleural Effusion']

In [19]:
def compute_metrics(predictions, targets):
    # Calculate metrics for each disease
    metrics = {}
    for idx, disease in enumerate(diseases):
        disease_pred = predictions[disease]
        disease_true = targets[disease]
        # disease_pred = predictions[:, idx]
        # disease_true = targets[:, idx]
        auc_roc = roc_auc_score(disease_true, disease_pred)
        f1 = f1_score(disease_true, (disease_pred > 0.5).astype(int))
        accuracy = accuracy_score(disease_true, (disease_pred > 0.5).astype(int))
        tn, fp, fn, tp = confusion_matrix(disease_true, (disease_pred > 0.5).astype(int)).ravel()
        tp_rate = tp / (tp + fn) if (tp + fn) > 0 else 0
        tn_rate = tn / (tn + fp) if (tn + fp) > 0 else 0
        fn_rate = fn / (fn + tp) if (fn + tp) > 0 else 0
        fp_rate = fp / (tn + fp) if (tn + fp) > 0 else 0
        
        # Calculate Precision-Recall AUC
        precision, recall, _ = precision_recall_curve(disease_true, disease_pred)
        auprc = auc(recall, precision)

        metrics[disease] = {
            'Accuracy': accuracy,
            'AUC': auc_roc,
            'AUPRC': auprc,
            'F1 Score': f1,
            'TP Rate': tp_rate,
            'FN Rate': fn_rate,
            'TN Rate': tn_rate,
            'FP Rate': fp_rate
        }
    return metrics

metrics = compute_metrics(pd.DataFrame(y_pred, columns=diseases), pd.DataFrame(y_test.values, columns=diseases))
print(metrics)

{'Cardiomegaly': {'Accuracy': 0.8803929838489593, 'AUC': 0.7921453245336328, 'AUPRC': 0.37163312157491224, 'F1 Score': 0.17940425531914894, 'TP Rate': 0.10625, 'FN Rate': 0.89375, 'TN Rate': 0.9890231137013042, 'FP Rate': 0.010976886298695787}, 'Lung Opacity': {'Accuracy': 0.6401617585034858, 'AUC': 0.6833973504797125, 'AUPRC': 0.6304524688362466, 'F1 Score': 0.6621004566210046, 'TP Rate': 0.7160854666397903, 'FN Rate': 0.2839145333602096, 'TN Rate': 0.5665347212041245, 'FP Rate': 0.4334652787958755}, 'Edema': {'Accuracy': 0.7727689979408043, 'AUC': 0.7778992951069524, 'AUPRC': 0.48856639547251707, 'F1 Score': 0.3436761017556431, 'TP Rate': 0.2475993804852865, 'FN Rate': 0.7524006195147135, 'TN Rate': 0.9388674808960877, 'FP Rate': 0.06113251910391222}, 'Pleural Effusion': {'Accuracy': 0.7341156622919096, 'AUC': 0.8050897639188975, 'AUPRC': 0.6959685358634421, 'F1 Score': 0.6645801383368283, 'TP Rate': 0.6681981244886399, 'FN Rate': 0.33180187551136003, 'TN Rate': 0.7770087640265378, '

In [20]:
def create_distributions(y_true, y_pred):
            P = np.array([1 - y_true, y_true]).T  # Probabiility distribution of True Labels
            Q = np.array([1 - y_pred, y_pred]).T  # Probabiility distribution of predicted diseases
            return P, Q

In [21]:

# Calculate metrics for each disease and for each class

metrics_female = {}
for idx, disease in enumerate(diseases):
    # disease_pred = predictions[:, idx]
    disease_pred = predictions[y_sex == 1, idx]
    disease_true = targets[y_sex == 1, idx]
    auc_roc = roc_auc_score(disease_true, disease_pred)
    f1 = f1_score(disease_true, (disease_pred > 0.5).astype(int))
    accuracy = accuracy_score(disease_true, (disease_pred > 0.5).astype(int))
    tn, fp, fn, tp = confusion_matrix(disease_true, (disease_pred > 0.5).astype(int)).ravel()
    tp_rate = tp / (tp + fn)
    tn_rate = tn / (tn + fp)
    fn_rate = fn / (fn + tp)
    fp_rate = fp / (tn + fp)
    
    precision, recall, _ = precision_recall_curve(disease_true, disease_pred)
    auprc = auc(recall, precision)

    # KL-Divergence
    P, Q = create_distributions(disease_true, disease_pred)
    kl_div = np.mean([entropy(P_row, Q_row) for P_row, Q_row in zip(P, Q)])

    metrics_female[disease] = {
        'Accuracy': accuracy,
        'AUC': auc_roc,
        'AUPRC': auprc,
        'F1 Score': f1,
        'TP Rate': tp_rate,
        'FN Rate': fn_rate,
        'TN Rate': tn_rate,
        'FP Rate': fp_rate,
        'KL Div': kl_div
        }
    
metrics_male = {}
for idx, disease in enumerate(diseases):
    # disease_pred = predictions[:, idx]
    disease_pred = predictions[y_sex == 0, idx]
    disease_true = targets[y_sex == 0, idx]
    auc_roc = roc_auc_score(disease_true, disease_pred)
    f1 = f1_score(disease_true, (disease_pred > 0.5).astype(int))
    accuracy = accuracy_score(disease_true, (disease_pred > 0.5).astype(int))
    tn, fp, fn, tp = confusion_matrix(disease_true, (disease_pred > 0.5).astype(int)).ravel()
    tp_rate = tp / (tp + fn)
    tn_rate = tn / (tn + fp)
    fn_rate = fn / (fn + tp)
    fp_rate = fp / (tn + fp)

    precision, recall, _ = precision_recall_curve(disease_true, disease_pred)
    auprc = auc(recall, precision)


    # KL-Divergence
    P, Q = create_distributions(disease_true, disease_pred)
    kl_div = np.mean([entropy(P_row, Q_row) for P_row, Q_row in zip(P, Q)])

    metrics_male[disease] = {
        'Accuracy': accuracy,
        'AUC': auc_roc,
        'AUPRC': auprc,
        'F1 Score': f1,
        'TP Rate': tp_rate,
        'FN Rate': fn_rate,
        'TN Rate': tn_rate,
        'FP Rate': fp_rate,
        'KL Div': kl_div
        }
    

metrics_white = {}
for idx, disease in enumerate(diseases):
    # disease_pred = predictions[:, idx]
    disease_pred = predictions[y_race == 0, idx]
    disease_true = targets[y_race == 0, idx]
    auc_roc = roc_auc_score(disease_true, disease_pred)
    f1 = f1_score(disease_true, (disease_pred > 0.5).astype(int))
    accuracy = accuracy_score(disease_true, (disease_pred > 0.5).astype(int))
    tn, fp, fn, tp = confusion_matrix(disease_true, (disease_pred > 0.5).astype(int)).ravel()
    tp_rate = tp / (tp + fn)
    tn_rate = tn / (tn + fp)
    fn_rate = fn / (fn + tp)
    fp_rate = fp / (tn + fp)

    precision, recall, _ = precision_recall_curve(disease_true, disease_pred)
    auprc = auc(recall, precision)


    # KL-Divergence
    P, Q = create_distributions(disease_true, disease_pred)
    kl_div = np.mean([entropy(P_row, Q_row) for P_row, Q_row in zip(P, Q)])

    metrics_white[disease] = {
        'Accuracy': accuracy,
        'AUC': auc_roc,
        'AUPRC': auprc,
        'F1 Score': f1,
        'TP Rate': tp_rate,
        'FN Rate': fn_rate,
        'TN Rate': tn_rate,
        'FP Rate': fp_rate,
        'KL Div': kl_div
        }
    
metrics_black = {}
for idx, disease in enumerate(diseases):
    # disease_pred = predictions[:, idx]
    disease_pred = predictions[y_race == 2, idx]
    disease_true = targets[y_race == 2, idx]
    auc_roc = roc_auc_score(disease_true, disease_pred)
    f1 = f1_score(disease_true, (disease_pred > 0.5).astype(int))
    accuracy = accuracy_score(disease_true, (disease_pred > 0.5).astype(int))
    tn, fp, fn, tp = confusion_matrix(disease_true, (disease_pred > 0.5).astype(int)).ravel()
    tp_rate = tp / (tp + fn)
    tn_rate = tn / (tn + fp)
    fn_rate = fn / (fn + tp)
    fp_rate = fp / (tn + fp)

    precision, recall, _ = precision_recall_curve(disease_true, disease_pred)
    auprc = auc(recall, precision)


    # KL-Divergence
    P, Q = create_distributions(disease_true, disease_pred)
    kl_div = np.mean([entropy(P_row, Q_row) for P_row, Q_row in zip(P, Q)])

    metrics_black[disease] = {
        'Accuracy': accuracy,
        'AUC': auc_roc,
        'AUPRC': auprc,
        'F1 Score': f1,
        'TP Rate': tp_rate,
        'FN Rate': fn_rate,
        'TN Rate': tn_rate,
        'FP Rate': fp_rate,
        'KL Div': kl_div
        }
    
metrics_asian = {}
for idx, disease in enumerate(diseases):
    # disease_pred = predictions[:, idx]
    disease_pred = predictions[y_race == 1, idx]
    disease_true = targets[y_race == 1, idx]
    auc_roc = roc_auc_score(disease_true, disease_pred)
    f1 = f1_score(disease_true, (disease_pred > 0.5).astype(int))
    accuracy = accuracy_score(disease_true, (disease_pred > 0.5).astype(int))
    tn, fp, fn, tp = confusion_matrix(disease_true, (disease_pred > 0.5).astype(int)).ravel()
    tp_rate = tp / (tp + fn)
    tn_rate = tn / (tn + fp)
    fn_rate = fn / (fn + tp)
    fp_rate = fp / (tn + fp)

    precision, recall, _ = precision_recall_curve(disease_true, disease_pred)
    auprc = auc(recall, precision)


    # KL-Divergence
    P, Q = create_distributions(disease_true, disease_pred)
    kl_div = np.mean([entropy(P_row, Q_row) for P_row, Q_row in zip(P, Q)])

    metrics_asian[disease] = {
        'Accuracy': accuracy,
        'AUC': auc_roc,
        'AUPRC': auprc,
        'F1 Score': f1,
        'TP Rate': tp_rate,
        'FN Rate': fn_rate,
        'TN Rate': tn_rate,
        'FP Rate': fp_rate,
        'KL Div': kl_div
        }
    

metrics_medicaid = {}
for idx, disease in enumerate(diseases):
    # disease_pred = predictions[:, idx]
    disease_pred = predictions[y_insurance == 0, idx]
    disease_true = targets[y_insurance == 0, idx]
    auc_roc = roc_auc_score(disease_true, disease_pred)
    f1 = f1_score(disease_true, (disease_pred > 0.5).astype(int))
    accuracy = accuracy_score(disease_true, (disease_pred > 0.5).astype(int))
    tn, fp, fn, tp = confusion_matrix(disease_true, (disease_pred > 0.5).astype(int)).ravel()
    tp_rate = tp / (tp + fn)
    tn_rate = tn / (tn + fp)
    fn_rate = fn / (fn + tp)
    fp_rate = fp / (tn + fp)

    precision, recall, _ = precision_recall_curve(disease_true, disease_pred)
    auprc = auc(recall, precision)


    # KL-Divergence
    P, Q = create_distributions(disease_true, disease_pred)
    kl_div = np.mean([entropy(P_row, Q_row) for P_row, Q_row in zip(P, Q)])

    metrics_medicaid[disease] = {
        'Accuracy': accuracy,
        'AUC': auc_roc,
        'AUPRC': auprc,
        'F1 Score': f1,
        'TP Rate': tp_rate,
        'FN Rate': fn_rate,
        'TN Rate': tn_rate,
        'FP Rate': fp_rate,
        'KL Div': kl_div
        }
    
metrics_medicare = {}
for idx, disease in enumerate(diseases):
    # disease_pred = predictions[:, idx]
    disease_pred = predictions[y_insurance == 1, idx]
    disease_true = targets[y_insurance == 1, idx]
    auc_roc = roc_auc_score(disease_true, disease_pred)
    f1 = f1_score(disease_true, (disease_pred > 0.5).astype(int))
    accuracy = accuracy_score(disease_true, (disease_pred > 0.5).astype(int))
    tn, fp, fn, tp = confusion_matrix(disease_true, (disease_pred > 0.5).astype(int)).ravel()
    tp_rate = tp / (tp + fn)
    tn_rate = tn / (tn + fp)
    fn_rate = fn / (fn + tp)
    fp_rate = fp / (tn + fp)

    precision, recall, _ = precision_recall_curve(disease_true, disease_pred)
    auprc = auc(recall, precision)


    # KL-Divergence
    P, Q = create_distributions(disease_true, disease_pred)
    kl_div = np.mean([entropy(P_row, Q_row) for P_row, Q_row in zip(P, Q)])

    metrics_medicare[disease] = {
        'Accuracy': accuracy,
        'AUC': auc_roc,
        'AUPRC': auprc,
        'F1 Score': f1,
        'TP Rate': tp_rate,
        'FN Rate': fn_rate,
        'TN Rate': tn_rate,
        'FP Rate': fp_rate,
        'KL Div': kl_div
        }
    
metrics_private = {}
for idx, disease in enumerate(diseases):
    # disease_pred = predictions[:, idx]
    disease_pred = predictions[y_insurance == 2, idx]
    disease_true = targets[y_insurance == 2, idx]
    auc_roc = roc_auc_score(disease_true, disease_pred)
    f1 = f1_score(disease_true, (disease_pred > 0.5).astype(int))
    accuracy = accuracy_score(disease_true, (disease_pred > 0.5).astype(int))
    tn, fp, fn, tp = confusion_matrix(disease_true, (disease_pred > 0.5).astype(int)).ravel()
    tp_rate = tp / (tp + fn)
    tn_rate = tn / (tn + fp)
    fn_rate = fn / (fn + tp)
    fp_rate = fp / (tn + fp)

    precision, recall, _ = precision_recall_curve(disease_true, disease_pred)
    auprc = auc(recall, precision)


    # KL-Divergence
    P, Q = create_distributions(disease_true, disease_pred)
    kl_div = np.mean([entropy(P_row, Q_row) for P_row, Q_row in zip(P, Q)])

    metrics_private[disease] = {
        'Accuracy': accuracy,
        'AUC': auc_roc,
        'AUPRC': auprc,
        'F1 Score': f1,
        'TP Rate': tp_rate,
        'FN Rate': fn_rate,
        'TN Rate': tn_rate,
        'FP Rate': fp_rate,
        'KL Div': kl_div
        }
    

metrics_young = {}
for idx, disease in enumerate(diseases):
    # disease_pred = predictions[:, idx]
    disease_pred = predictions[y_age == 0, idx]
    disease_true = targets[y_age == 0, idx]
    auc_roc = roc_auc_score(disease_true, disease_pred)
    f1 = f1_score(disease_true, (disease_pred > 0.5).astype(int))
    accuracy = accuracy_score(disease_true, (disease_pred > 0.5).astype(int))
    tn, fp, fn, tp = confusion_matrix(disease_true, (disease_pred > 0.5).astype(int)).ravel()
    tp_rate = tp / (tp + fn)
    tn_rate = tn / (tn + fp)
    fn_rate = fn / (fn + tp)
    fp_rate = fp / (tn + fp)
    
    precision, recall, _ = precision_recall_curve(disease_true, disease_pred)
    auprc = auc(recall, precision)


    # KL-Divergence
    P, Q = create_distributions(disease_true, disease_pred)
    kl_div = np.mean([entropy(P_row, Q_row) for P_row, Q_row in zip(P, Q)])

    metrics_young[disease] = {
        'Accuracy': accuracy,
        'AUC': auc_roc,
        'AUPRC': auprc,
        'F1 Score': f1,
        'TP Rate': tp_rate,
        'FN Rate': fn_rate,
        'TN Rate': tn_rate,
        'FP Rate': fp_rate,
        'KL Div': kl_div
        }
    
metrics_old = {}
for idx, disease in enumerate(diseases):
    # disease_pred = predictions[:, idx]
    disease_pred = predictions[y_age == 1, idx]
    disease_true = targets[y_age == 1, idx]
    auc_roc = roc_auc_score(disease_true, disease_pred)
    f1 = f1_score(disease_true, (disease_pred > 0.5).astype(int))
    accuracy = accuracy_score(disease_true, (disease_pred > 0.5).astype(int))
    tn, fp, fn, tp = confusion_matrix(disease_true, (disease_pred > 0.5).astype(int)).ravel()
    tp_rate = tp / (tp + fn)
    tn_rate = tn / (tn + fp)
    fn_rate = fn / (fn + tp)
    fp_rate = fp / (tn + fp)

    precision, recall, _ = precision_recall_curve(disease_true, disease_pred)
    auprc = auc(recall, precision)


    # KL-Divergence
    P, Q = create_distributions(disease_true, disease_pred)
    kl_div = np.mean([entropy(P_row, Q_row) for P_row, Q_row in zip(P, Q)])

    metrics_old[disease] = {
        'Accuracy': accuracy,
        'AUC': auc_roc,
        'AUPRC': auprc,
        'F1 Score': f1,
        'TP Rate': tp_rate,
        'FN Rate': fn_rate,
        'TN Rate': tn_rate,
        'FP Rate': fp_rate,
        'KL Div': kl_div
        }

In [22]:
metrics_female

{'Cardiomegaly': {'Accuracy': 0.8879758808228896,
  'AUC': 0.7868315034766061,
  'AUPRC': 0.34859832982562183,
  'F1 Score': 0.16409351565946184,
  'TP Rate': 0.09577754891864057,
  'FN Rate': 0.9042224510813595,
  'TN Rate': 0.990717243221584,
  'FP Rate': 0.009282756778415922,
  'KL Div': 0.29883524474489265},
 'Lung Opacity': {'Accuracy': 0.6399266966185859,
  'AUC': 0.6830619266824584,
  'AUPRC': 0.6321279471730845,
  'F1 Score': 0.6605739760378936,
  'TP Rate': 0.7076170009551098,
  'FN Rate': 0.2923829990448902,
  'TN Rate': 0.573536299765808,
  'FP Rate': 0.42646370023419206,
  'KL Div': 0.6367646401185277},
 'Edema': {'Accuracy': 0.7711633956017971,
  'AUC': 0.7749665830472989,
  'AUPRC': 0.49490932823660516,
  'F1 Score': 0.32218525652250046,
  'TP Rate': 0.22265246853823814,
  'FN Rate': 0.7773475314617618,
  'TN Rate': 0.9484511889862328,
  'FP Rate': 0.05154881101376721,
  'KL Div': 0.46525629015140313},
 'Pleural Effusion': {'Accuracy': 0.73622605816978,
  'AUC': 0.8081845

In [23]:
# Initialize an empty list to store the data
data_sex = []

# Iterate over the diseases in the metrics dictionary
for disease, values in metrics.items():
    # Extract AUC and rates from dictionaries
    
    auprc_overall = values['AUPRC'] *100

    auc_overall = values['AUC'] *100
    auc_male = metrics_male[disease]['AUPRC'] *100
    auc_female = metrics_female[disease]['AUPRC'] *100
    tp_rate_male = metrics_male[disease]['TP Rate'] *100
    tp_rate_female = metrics_female[disease]['TP Rate'] *100
    fp_rate_male = metrics_male[disease]['FP Rate'] *100
    fp_rate_female = metrics_female[disease]['FP Rate'] *100

    kl1 = metrics_male[disease]['KL Div'] *100
    kl2 = metrics_female[disease]['KL Div'] *100

    
    delta_KL_sex = abs(kl1 - kl2)


    # Calculate delta AUC and equality of odds
    delta_auc_sex = abs(auc_male - auc_female) 
    eq_odds_sex = 0.5 * (abs(tp_rate_male - tp_rate_female) + abs(fp_rate_male - fp_rate_female))
    
    # Append to the data list
    data_sex.append([disease, auprc_overall, auc_overall, auc_male, auc_female, delta_auc_sex, eq_odds_sex, delta_KL_sex])

# Create a DataFrame
df_sex = pd.DataFrame(data_sex, columns=['Disease', 'AUPRC', 'AUC', 'AUC_Male', 'AUC_Female', 'Delta AUC', 'EqOdds', 'KL div'])


# Styling the DataFrame
styled_df = df_sex.style.format({
    'AUC': "{:.3f}", 
    'AUPRC': "{:.3f}",
    'AUC_Male': "{:.3f}", 
    'AUC_Female': "{:.3f}",
    'Delta AUC': "{:.3f}",
    'EqOdds': "{:.3f}",
    'KL div': "{:.3f}"
}).background_gradient(cmap='viridis', subset=['AUPRC', 'AUC', 'AUC_Male', 'AUC_Female', 'Delta AUC', 'EqOdds', 'KL div'])

# Display the styled DataFrame
styled_df


Unnamed: 0,Disease,AUPRC,AUC,AUC_Male,AUC_Female,Delta AUC,EqOdds,KL div
0,Cardiomegaly,37.163,79.215,38.638,34.86,3.778,1.008,1.844
1,Lung Opacity,63.045,68.34,62.938,63.213,0.274,1.334,0.048
2,Edema,48.857,77.79,48.548,49.491,0.943,2.998,1.217
3,Pleural Effusion,69.597,80.509,69.343,69.939,0.596,0.534,0.695


In [24]:
# Initialize an empty list to store the data
data_race = []

# Iterate over the diseases in the metrics dictionary
for disease, values in metrics.items():
    auprc_overall = values['AUPRC'] *100
    auc_overall = values['AUC'] *100
    auc_groups = [
        metrics_white[disease]['AUPRC'] *100,
        metrics_black[disease]['AUPRC'] *100,
        metrics_asian[disease]['AUPRC'] *100
    ]
    tp_rates = [
        metrics_white[disease]['TP Rate'] *100,
        metrics_black[disease]['TP Rate'] *100,
        metrics_asian[disease]['TP Rate'] *100
    ]
    fp_rates = [
        metrics_white[disease]['FP Rate'] *100,
        metrics_black[disease]['FP Rate'] *100,
        metrics_asian[disease]['FP Rate'] *100
    ]

    kl_rates = [
        metrics_white[disease]['KL Div'] *100,
        metrics_black[disease]['KL Div'] *100,
        metrics_asian[disease]['KL Div'] *100
    ]

    delta_kl_race = max(abs(kl_rates[i] - kl_rates[j]) for i in range(len(kl_rates)) for j in range(i + 1, len(kl_rates)))

    # Calculate the maximum delta AUC
    delta_auc_race = max(abs(auc_groups[i] - auc_groups[j]) for i in range(len(auc_groups)) for j in range(i + 1, len(auc_groups)))

    # Calculate the maximum equality of odds
    eq_odds_race = max(
        0.5 * (abs(tp_rates[i] - tp_rates[j]) + abs(fp_rates[i] - fp_rates[j]))
        for i in range(len(tp_rates)) for j in range(i + 1, len(tp_rates))
    )

    # Append to the data list
    data_race.append([disease, auprc_overall, auc_overall] + auc_groups + [delta_auc_race, eq_odds_race, delta_kl_race])

# Create a DataFrame
columns = ['Disease', 'AUPRC', 'AUC', 'AUC_White', 'AUC_Black', 'AUC_Asian', 'Max Delta AUC', 'Max EqOdds', 'KL div']
df_race = pd.DataFrame(data_race, columns=columns)

# Display the DataFrame with styling
# Styling the DataFrame
styled_df = df_race.style.format({
    'AUC': "{:.3f}", 
    'AUPRC': "{:.3f}", 
    'AUC_White': "{:.3f}", 
    'AUC_Black': "{:.3f}",
    'AUC_Asian': "{:.3f}",
    'Max Delta AUC': "{:.3f}",
    'Max EqOdds': "{:.3f}",
    'KL div': "{:.3f}"
}).background_gradient(cmap='viridis', subset=['AUPRC', 'AUC', 'AUC_White', 'AUC_Black', 'AUC_Asian', 'Max Delta AUC', 'Max EqOdds', 'KL div'])

# Display the styled DataFrame
styled_df

Unnamed: 0,Disease,AUPRC,AUC,AUC_White,AUC_Black,AUC_Asian,Max Delta AUC,Max EqOdds,KL div
0,Cardiomegaly,37.163,79.215,34.261,51.127,42.5,16.867,1.368,13.186
1,Lung Opacity,63.045,68.34,62.86,63.53,63.818,0.958,3.305,1.835
2,Edema,48.857,77.79,49.294,51.569,44.621,6.948,1.537,6.787
3,Pleural Effusion,69.597,80.509,70.011,59.681,71.195,11.514,3.35,1.253


In [25]:
# Initialize an empty list to store the data
data_age = []

# Iterate over the diseases in the metrics dictionary
for disease, values in metrics.items():
    # Extract AUC and rates from dictionaries
    
    auprc_overall = values['AUPRC'] *100

    auc_overall = values['AUC'] *100
    auc_old = metrics_old[disease]['AUPRC'] *100
    auc_young = metrics_young[disease]['AUPRC'] *100
    tp_rate_old = metrics_old[disease]['TP Rate'] *100
    tp_rate_young = metrics_young[disease]['TP Rate'] *100
    fp_rate_old = metrics_old[disease]['FP Rate'] *100
    fp_rate_young = metrics_young[disease]['FP Rate'] *100


    kl1 = metrics_old[disease]['KL Div'] *100
    kl2 = metrics_young[disease]['KL Div'] *100

    
    delta_KL_age = abs(kl1 - kl2)

    
    # Calculate delta AUC and equality of odds
    delta_auc_age = abs(auc_old - auc_young)
    eq_odds_age = 0.5 * (abs(tp_rate_old - tp_rate_young) + abs(fp_rate_old - fp_rate_young))
    
    # Append to the data list
    data_age.append([disease, auprc_overall, auc_overall, auc_old, auc_young, delta_auc_age, eq_odds_age, delta_KL_age])

# Create a DataFrame
df_age = pd.DataFrame(data_age, columns=['Disease', 'AUPRC', 'AUC', 'AUC_old', 'AUC_young', 'Delta AUC', 'EqOdds', 'KL div'])


# Styling the DataFrame
styled_df = df_age.style.format({
    'AUC': "{:.3f}", 
    'AUPRC': "{:.3f}",
    'AUC_old': "{:.3f}", 
    'AUC_young': "{:.3f}",
    'Delta AUC': "{:.3f}",
    'EqOdds': "{:.3f}",
    'KL div': "{:.3f}"
}).background_gradient(cmap='viridis', subset=['AUC', 'AUPRC', 'AUC_old', 'AUC_young', 'Delta AUC', 'EqOdds', 'KL div'])

# Display the styled DataFrame
styled_df


Unnamed: 0,Disease,AUPRC,AUC,AUC_old,AUC_young,Delta AUC,EqOdds,KL div
0,Cardiomegaly,37.163,79.215,37.295,37.104,0.191,1.232,10.683
1,Lung Opacity,63.045,68.34,63.37,62.775,0.595,9.003,3.572
2,Edema,48.857,77.79,52.479,46.061,6.418,2.165,8.113
3,Pleural Effusion,69.597,80.509,71.96,67.941,4.019,6.001,2.45


In [26]:
# Initialize an empty list to store the data
data_health = []

# Iterate over the diseases in the metrics dictionary
for disease, values in metrics.items():
    auprc_overall = values['AUPRC'] *100
    auc_overall = values['AUC'] *100
    auc_groups = [
        metrics_medicaid[disease]['AUPRC'] *100,
        metrics_medicare[disease]['AUPRC'] *100,
        metrics_private[disease]['AUPRC'] *100
    ]
    tp_rates = [
        metrics_medicaid[disease]['TP Rate'] *100,
        metrics_medicare[disease]['TP Rate'] *100,
        metrics_private[disease]['TP Rate'] *100
    ]
    fp_rates = [
        metrics_medicaid[disease]['FP Rate'] *100,
        metrics_medicare[disease]['FP Rate'] *100,
        metrics_private[disease]['FP Rate'] *100
    ]

    kl_rates = [
        metrics_medicaid[disease]['KL Div'] *100,
        metrics_medicare[disease]['KL Div'] *100,
        metrics_private[disease]['KL Div'] *100
    ]

    delta_kl_health = max(abs(kl_rates[i] - kl_rates[j]) for i in range(len(kl_rates)) for j in range(i + 1, len(kl_rates)))

    # Calculate the maximum delta AUC
    delta_auc_health = max(abs(auc_groups[i] - auc_groups[j]) for i in range(len(auc_groups)) for j in range(i + 1, len(auc_groups)))

    # Calculate the maximum equality of odds
    eq_odds_health = max(
        0.5 * (abs(tp_rates[i] - tp_rates[j]) + abs(fp_rates[i] - fp_rates[j]))
        for i in range(len(tp_rates)) for j in range(i + 1, len(tp_rates))
    )

    # Append to the data list
    data_health.append([disease, auprc_overall, auc_overall] + auc_groups + [delta_auc_health, eq_odds_health, delta_kl_health])

# Create a DataFrame
columns = ['Disease', 'AUPRC', 'AUC', 'AUC_Medicaid', 'AUC_Medicare', 'AUC_Private', 'Max Delta AUC', 'Max EqOdds', 'KL div']
df_health = pd.DataFrame(data_health, columns=columns)

# Display the DataFrame with styling
# Styling the DataFrame
styled_df = df_health.style.format({
    'AUC': "{:.3f}", 
    'AUPRC': "{:.3f}", 
    'AUC_Medicaid': "{:.3f}", 
    'AUC_Medicare': "{:.3f}",
    'AUC_Private': "{:.3f}",
    'Max Delta AUC': "{:.3f}",
    'Max EqOdds': "{:.3f}",
    'KL div': "{:.3f}"
}).background_gradient(cmap='viridis', subset=['AUPRC', 'AUC',  'AUC_Medicaid', 'AUC_Medicare', 'AUC_Private', 'Max Delta AUC', 'Max EqOdds', 'KL div'])

# Display the styled DataFrame
styled_df

Unnamed: 0,Disease,AUPRC,AUC,AUC_Medicaid,AUC_Medicare,AUC_Private,Max Delta AUC,Max EqOdds,KL div
0,Cardiomegaly,37.163,79.215,38.48,38.144,33.311,5.169,1.026,9.019
1,Lung Opacity,63.045,68.34,65.784,63.222,61.651,4.132,8.56,4.057
2,Edema,48.857,77.79,42.902,50.95,44.561,8.048,2.24,9.089
3,Pleural Effusion,69.597,80.509,66.052,70.765,67.381,4.713,5.28,4.207


In [27]:
# Initialize an empty list to store the data
data = []
i = 0

# Assuming 'metrics', 'df_sex', 'df_race', 'df_age', and 'df_health' are predefined and correctly structured
# Iterate over the diseases in the metrics dictionary
for disease, values in metrics.items():
    auprc_overall = values['AUPRC'] * 100
    auc_overall = values['AUC'] * 100

    # Append to the data list
    data.append([disease, auprc_overall, auc_overall] +
                [df_sex['Delta AUC'][i], df_sex['EqOdds'][i], df_sex['KL div'][i]] +
                [df_race['Max Delta AUC'][i], df_race['Max EqOdds'][i], df_race['KL div'][i]] +
                [df_age['Delta AUC'][i], df_age['EqOdds'][i], df_age['KL div'][i]] +
                [df_health['Max Delta AUC'][i], df_health['Max EqOdds'][i], df_health['KL div'][i]])
    i += 1

# Create a DataFrame
columns = ['Disease', 'AUPRC', 'AUC', 'Delta AUPRC sex', 'EqOdds sex', 'KL div sex',
           'Delta AUPRC race', 'EqOdds race', 'KL div race', 'Delta AUPRC age', 'EqOdds age', 'KL div age',
           'Delta AUPRC health', 'EqOdds health', 'KL div health']
df = pd.DataFrame(data, columns=columns)

# Styling the DataFrame
styled_df = df.style.format({
    'AUPRC': "{:.1f}",
    'AUC': "{:.1f}",
    'Delta AUPRC sex': "{:.1f}",
    'EqOdds sex': "{:.1f}",
    'KL div sex': "{:.1f}",
    'Delta AUPRC race': "{:.1f}",
    'EqOdds race': "{:.1f}",
    'KL div race': "{:.1f}",
    'Delta AUPRC age': "{:.1f}",
    'EqOdds age': "{:.1f}",
    'KL div age': "{:.1f}",
    'Delta AUPRC health': "{:.1f}",
    'EqOdds health': "{:.1f}",
    'KL div health': "{:.1f}"
}).background_gradient(cmap='OrRd', subset=[
    'AUPRC', 'AUC', 'Delta AUPRC sex', 'EqOdds sex', 'KL div sex', 'Delta AUPRC race', 'EqOdds race', 'KL div race',
    'Delta AUPRC age', 'EqOdds age', 'KL div age', 'Delta AUPRC health', 'EqOdds health', 'KL div health'
])

# Display the styled DataFrame
styled_df


Unnamed: 0,Disease,AUPRC,AUC,Delta AUPRC sex,EqOdds sex,KL div sex,Delta AUPRC race,EqOdds race,KL div race,Delta AUPRC age,EqOdds age,KL div age,Delta AUPRC health,EqOdds health,KL div health
0,Cardiomegaly,37.2,79.2,3.8,1.0,1.8,16.9,1.4,13.2,0.2,1.2,10.7,5.2,1.0,9.0
1,Lung Opacity,63.0,68.3,0.3,1.3,0.0,1.0,3.3,1.8,0.6,9.0,3.6,4.1,8.6,4.1
2,Edema,48.9,77.8,0.9,3.0,1.2,6.9,1.5,6.8,6.4,2.2,8.1,8.0,2.2,9.1
3,Pleural Effusion,69.6,80.5,0.6,0.5,0.7,11.5,3.4,1.3,4.0,6.0,2.4,4.7,5.3,4.2


In [28]:
df.to_csv("all_bias_methods_xgb.csv") 