In [1]:
# Analyzing results on NIH datasets

# Import necessary packages
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
import os
from tqdm import tqdm
from balancersV2 import BinaryBalancer
import updated_tools
import seaborn as sns
from sklearn.metrics import roc_auc_score, confusion_matrix

In [2]:
# Set relevant variables
folder_name = 'reorganized_nih_results'
columns = ['0/100 f', '0/100 m', '25/75 f', '25/75 m', '50/50 f', '50/50 m', '75/25 f', '75/25 m', '100/0 f', '100/0 m']
models = ['train100%_female_images','train75%_female_images','train50%_female_images','train25%_female_images','train0%_female_images']
condition_names = ['Atelectasis','Cardiomegaly','Effusion','Infiltration','Mass',
                   'Nodule','Pneumonia','Pneumothorax','Consolidation','Edema',
                   'Emphysema','Fibrosis','Pleural_Thickening','Hernia']
num_folds = 20
fold_list = ['Fold_' + str(k) for k in range(num_folds)]
eps = 0.05

In [3]:
# Calculate number of fair models and direction of fairness
data_list = []
for i, model in enumerate(models):
    print(model)
    favor_m = []
    favor_f = []
    for condition in condition_names:
        folder_path = os.path.join(folder_name, model, condition)
        threshold_csv = pd.read_csv(os.path.join(folder_path, 'optimal_thresholds.csv'))
        thresholds = threshold_csv['Optimal Threshold'].values
        bias_tpr = []
        bias_fpr = []
        for k in range(num_folds):
            t = thresholds[k]
            df = pd.read_csv(os.path.join(folder_path, fold_list[k]), index_col=[0])
            y = df.y.values
            y_hat = (df.y_prob.values >= t)*1
            a = df.a.values
            balancer = BinaryBalancer(y, y_hat, a, a)
            bias_tpr.append(balancer.a_gr_list[1].tpr - balancer.a_gr_list[0].tpr)
            bias_fpr.append(balancer.a_gr_list[1].fpr - balancer.a_gr_list[0].fpr)
        bias_tpr = np.array(bias_tpr)
        bias_fpr = np.array(bias_fpr)
        tpr_ind = (np.abs(bias_tpr) >= eps).astype('float')
        fpr_ind = (np.abs(bias_fpr) >= eps).astype('float')
        ind_1 = tpr_ind + fpr_ind
        ind = (ind_1 > 0).astype('float')
        gap = ind*bias_tpr - ind*bias_fpr
        m_folds = np.sum(gap > 0)
        f_folds = np.sum(gap < 0)
        favor_m.append(m_folds)
        favor_f.append(f_folds)
    data_list.append(favor_m)
    data_list.append(favor_f)

train100%_female_images
train75%_female_images
train50%_female_images
train25%_female_images
train0%_female_images


In [5]:
# Generate tables
columns = ['0/100 bias f', '0/100 bias m', '25/75 bias f', '25/75 bias m', '50/50 bias f', '50/50 bias m', '75/25 bias f', '75/25 bias m', '100/0 bias f', '100/0 bias m']
final_data = pd.DataFrame(np.array(data_list).T, columns = columns, index = condition_names)
final_data.to_csv('tables/final_bias_favor_data.csv', index=[0])
columns = final_data.columns
overall_bias = []
for i in range(5):
    overall_bias.append(final_data[columns[2*i]]+final_data[columns[((2*i)+1)]])
overall_df = pd.DataFrame(list(zip(overall_bias[0], overall_bias[1], overall_bias[2], overall_bias[3], overall_bias[4])), columns = ['0/100', '25/75', '50/50', '75/25', '100/0'], index = final_data.index)
overall_df.to_csv('tables/final_overall_bias_data.csv')