In [1]:
from functools import partial

import numpy as np
import pandas as pd
import tensorflow as tf

import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches

from modules.dataset import Dataset, LABELS

2022-12-09 08:21:01.177350: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


In [2]:
sns.set_theme()

## Functions

In [3]:
def get_test_dataset_5_fold():
    dataset = Dataset()
    _, test_dataset_fold_1 = dataset.get_kfold(fold_number=1, sample=False)
    _, test_dataset_fold_2 = dataset.get_kfold(fold_number=2, sample=False)
    _, test_dataset_fold_3 = dataset.get_kfold(fold_number=3, sample=False)
    _, test_dataset_fold_4 = dataset.get_kfold(fold_number=4, sample=False)
    _, test_dataset_fold_5 = dataset.get_kfold(fold_number=5, sample=False)
    return (
        test_dataset_fold_1,
        test_dataset_fold_2,
        test_dataset_fold_3,
        test_dataset_fold_4,
        test_dataset_fold_5
    )

In [4]:
def get_y_true(data):
    y_true=[]
    for X,y in data:
        for label in y:
            y_true.append(label)
    y_true = tf.Variable(y_true)
    return y_true

In [5]:
def preprocessing(x, label: int):
    return x[label]

In [6]:
def drop_duplicates(tpr, fpr):
    df = pd.DataFrame({
        "TPR": tpr,
        "FPR": fpr
    })
    df = df.drop_duplicates()
    return df["TPR"].values, df["FPR"].values

In [7]:
def get_tpr_fpr(true_positive, true_negative, false_positive, false_negative, label_index):
    func = partial(preprocessing, label=label_index)

    TP = np.array(list(map(func, true_positive)))
    TN = np.array(list(map(func, true_negative)))
    FP = np.array(list(map(func, false_positive)))
    FN = np.array(list(map(func, false_negative)))
    
    tpr = TP / (TP + FN)
    fpr = FP / (FP + TN)

    tpr, fpr = drop_duplicates(tpr, fpr)
    return tpr, fpr

In [8]:
def calculate_auc(tpr, fpr):
    delta = (tpr[1:] + tpr[:-1]) /2
    distance = abs(fpr[1:] - fpr[:-1])
    auc = sum(delta * distance)
    return auc

In [9]:
def plot_auc(tpr, fpr):
    plt.figure(figsize=(12, 6))
    
    sns.lineplot(
        x=fpr,
        y=tpr,
        linewidth=2,
    )

    # Draw Rectangle
    for idx, x in enumerate(fpr[:-1]):
        rect=mpatches.Rectangle(xy=(x,0),
                                width=fpr[idx+1] - x,
                                height=(tpr[idx] + tpr[idx+1])/2,
                                fill = False,
                                color = "purple",
                                linewidth = 0.2)
        plt.gca().add_patch(rect)

    sns.scatterplot(x=fpr, y=tpr, s=50, marker="o")
    sns.lineplot(x=[0, 1], y=[0, 1], linestyle="--", color="red")
    plt.xlabel("FPR")
    plt.ylabel("TPR");
    plt.show();

## Main

In [31]:
def get_best_model():
    return tf.keras.models.load_model("/home/jovyan/ChestXray-14/results/models/EfficientNetB0_None_fold_1.h5")

In [32]:
def model_predict(model, test_dataset):
    return model.predict(test_dataset)

In [10]:
def get_best_model_prediction_df():
    best_model_prediction = pd.read_csv("~/ChestXray-14/results/prediction/EfficientNetB0_imagenet_fold_1.csv")
    for key,values in best_model_prediction.items():
        best_model_prediction[key] = values.apply(eval).tolist()
    return best_model_prediction

In [50]:
def dict_of_auc():
    auc_dict = dict()
    
    FOLDS = 5
    dataset = get_test_dataset_5_fold()
    model = get_best_model()
    # best_model_prediction = get_best_model_prediction_df()
    
    for i in range(FOLDS):
        y_true = get_y_true(dataset[i])
        y_preds = model_predict(model, dataset[i])
        # y_preds = best_model_prediction[f"Fold {i+1}"].values.tolist()
        # y_preds = tf.Variable(y_preds)
        
        m = tf.keras.metrics.AUC(multi_label=True)
        m.update_state(y_true, y_preds)
        
        thresholds = m.thresholds
        thresholds_size = len(thresholds)
        TP, TN, FP, FN = np.array(m.variables)
        
        auc_list = []
        for label_index in range(15):
            tpr, fpr = get_tpr_fpr(TP, TN, FP, FN, label_index)
            auc = calculate_auc(tpr, fpr)
            auc_list.append(auc)
            
            try:
                auc_dict[f"Fold {i+1}"].append(auc)
            except KeyError:
                auc_dict[f"Fold {i+1}"] = [auc]
            # print(f"Class {LABELS[label_index]}: {auc}")
            # plot_auc(tpr, fpr)

        print(f"Average AUC: {np.mean(auc_list)}")
        print("From AUC function:", m.result().numpy())
    return auc_dict

In [53]:
df = pd.DataFrame(dict_of_auc())
df.index = LABELS
df

Average AUC: 0.7434553248747219
From AUC function: 0.7434553
Average AUC: 0.7646464820692084
From AUC function: 0.7646465
Average AUC: 0.7752695554776492
From AUC function: 0.7752695
Average AUC: 0.7774242436985044
From AUC function: 0.7774242
Average AUC: 0.7823884904952931
From AUC function: 0.7823885


Unnamed: 0,Fold 1,Fold 2,Fold 3,Fold 4,Fold 5
No Finding,0.752263,0.7747,0.771036,0.78452,0.787776
Atelectasis,0.762817,0.76827,0.781255,0.78802,0.794741
Consolidation,0.769436,0.790443,0.796674,0.808158,0.797804
Infiltration,0.670295,0.695342,0.694883,0.693143,0.699185
Pneumothorax,0.792337,0.83086,0.818551,0.830902,0.826809
Edema,0.858786,0.866641,0.89167,0.882505,0.880159
Emphysema,0.751866,0.76657,0.784147,0.781317,0.79274
Fibrosis,0.71566,0.726134,0.743639,0.748449,0.721444
Effusion,0.848905,0.864699,0.86525,0.874754,0.883533
Pneumonia,0.700593,0.73021,0.729097,0.739062,0.741673


In [57]:
df.to_csv("/home/jovyan/ChestXray-14/results/best_model_AUC_on_test_5_folds.csv", index=True)