In [1]:
import pandas as pd

## Loading predictions and targets

In [37]:
# Replace with the path to checkpoints

#results_path = [
#'cast20imagenet20230821-104820_fold0-epoch=19-val_auroc=0_mean.csv',
#'cast20imagenet20230822-160601_fold1-epoch=19-val_auroc=0_mean.csv',
#'cast20imagenet20230822-172537_fold2-epoch=19-val_auroc=0_mean.csv',
#'cast20imagenet20230822-184142_fold3-epoch=19-val_auroc=0_mean.csv',
#'cast20imagenet20230822-195857_fold4-epoch=19-val_auroc=0_mean.csv',
#]

result_path = [
    
'imagenet20230820-141826_fold0-epoch=19-val_auroc=0_mean.csv',
'imagenet20230823-122216_fold1-epoch=19-val_auroc=0_mean.csv',
'imagenet20230823-133342_fold2-epoch=19-val_auroc=0_mean.csv',
'imagenet20230823-144846_fold3-epoch=19-val_auroc=0_mean.csv',
'imagenet20230823-160327_fold4-epoch=19-val_auroc=0_mean.csv',
]

In [38]:
res_df = pd.DataFrame()

# Load results for the ensemble
for idx, path in enumerate(results_path):
    cur_df = pd.read_csv(path)
    res_df[idx] = cur_df["0"]


# Get mean
res_df["mean_pred"] = res_df.mean(axis=1)

# Load target results
res_df["target"] = pd.read_csv("input/mura-v11/MURA-v1.1/valid_labeled_studies.csv", header=None)[1]

## Boostraping for AUROC and AUPRC

In [39]:
# scikit-learn bootstrap
from sklearn.utils import resample
from sklearn.metrics import precision_recall_curve, roc_curve, auc, accuracy_score, classification_report, f1_score
from tqdm import trange

In [40]:
auc_list = []
auprc_list = []
acc_list = []
f1_list = []

for i in trange(1000):
    boot = resample(res_df, replace=True, n_samples=len(res_df))
    precision, recall, _ = precision_recall_curve(boot.target, boot.mean_pred)
    fpr, tpr, thresholds = roc_curve(boot.target, boot.mean_pred)
    auc_list.append(auc(fpr, tpr))
    auprc_list.append(auc(recall, precision))
    acc_list.append(accuracy_score(boot.target, boot.mean_pred > 0.5))
    f1_list.append(f1_score(boot.target, boot.mean_pred > 0.5))
    



def ic95(score_list):
    sorted_scores = score_list
    sorted_scores.sort()

    median = sorted_scores[int(0.5 * len(sorted_scores))]
    confidence_lower = sorted_scores[int(0.025 * len(sorted_scores))]
    confidence_upper = sorted_scores[int(0.975 * len(sorted_scores))]
    return confidence_lower.item(), median.item(), confidence_upper.item()

def print_ic95(score_list):
    confidence_lower, median, confidence_upper = ic95(score_list)
    print('median: {:.3} - IC95: [{:.3}, {:.3}]'.format(median, confidence_lower, confidence_upper))


100%|██████████████████████████████████████████████████████████| 1000/1000 [00:04<00:00, 217.38it/s]


In [41]:
print("AUROC | ", end="")
print_ic95(auc_list)
print("AUPRC | ", end="")
print_ic95(auprc_list)
print("Accuracy | ", end="")
print_ic95(acc_list)
print("F1 Score | ", end="")
print_ic95(f1_list)

AUROC | median: 0.895 - IC95: [0.875, 0.913]
AUPRC | median: 0.885 - IC95: [0.857, 0.908]
Accuracy | median: 0.839 - IC95: [0.819, 0.857]
F1 Score | median: 0.806 - IC95: [0.778, 0.829]
