In [1]:
from functools import partial

import numpy as np
import pandas as pd
import tensorflow as tf

import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches

from modules.dataset import Dataset, LABELS

2022-12-25 06:34:40.480722: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


In [2]:
sns.set_theme()

## Functions

In [3]:
def get_test_dataset_5_fold():
    dataset = Dataset()
    _, test_dataset_fold_1 = dataset.get_kfold(fold_number=1, sample=False)
    _, test_dataset_fold_2 = dataset.get_kfold(fold_number=2, sample=False)
    _, test_dataset_fold_3 = dataset.get_kfold(fold_number=3, sample=False)
    _, test_dataset_fold_4 = dataset.get_kfold(fold_number=4, sample=False)
    _, test_dataset_fold_5 = dataset.get_kfold(fold_number=5, sample=False)
    return (
        test_dataset_fold_1,
        test_dataset_fold_2,
        test_dataset_fold_3,
        test_dataset_fold_4,
        test_dataset_fold_5
    )

In [4]:
def get_y_true(data):
    y_true=[]
    for X,y in data:
        for label in y:
            y_true.append(label)
    y_true = tf.Variable(y_true)
    return y_true

In [5]:
def preprocessing(x, label: int):
    return x[label]

In [6]:
def drop_duplicates(tpr, fpr):
    df = pd.DataFrame({
        "TPR": tpr,
        "FPR": fpr
    })
    df = df.drop_duplicates()
    return df["TPR"].values, df["FPR"].values

In [7]:
def get_tpr_fpr(true_positive, true_negative, false_positive, false_negative, label_index):
    func = partial(preprocessing, label=label_index)

    TP = np.array(list(map(func, true_positive)))
    TN = np.array(list(map(func, true_negative)))
    FP = np.array(list(map(func, false_positive)))
    FN = np.array(list(map(func, false_negative)))
    
    tpr = TP / (TP + FN)
    fpr = FP / (FP + TN)

    tpr, fpr = drop_duplicates(tpr, fpr)
    return tpr, fpr

In [9]:
def calculate_auc(tpr, fpr):
    delta = (tpr[1:] + tpr[:-1]) /2
    distance = abs(fpr[1:] - fpr[:-1])
    auc = sum(delta * distance)
    return auc

In [10]:
def plot_auc(tpr, fpr):
    plt.figure(figsize=(12, 6))
    
    sns.lineplot(
        x=fpr,
        y=tpr,
        linewidth=2,
    )

    # Draw Rectangle
    for idx, x in enumerate(fpr[:-1]):
        rect=mpatches.Rectangle(xy=(x,0),
                                width=fpr[idx+1] - x,
                                height=(tpr[idx] + tpr[idx+1])/2,
                                fill = False,
                                color = "purple",
                                linewidth = 0.2)
        plt.gca().add_patch(rect)

    sns.scatterplot(x=fpr, y=tpr, s=50, marker="o")
    sns.lineplot(x=[0, 1], y=[0, 1], linestyle="--", color="red")
    plt.xlabel("FPR")
    plt.ylabel("TPR");
    plt.show();

## Main

In [14]:
def get_best_model():
    return tf.keras.models.load_model("/home/jovyan/ChestXray-14/results/models/EfficientNetB0_None_fold_3.h5")

In [15]:
def model_predict(model, test_dataset):
    return model.predict(test_dataset)

In [16]:
# def get_best_model_prediction_df():
#     best_model_prediction = pd.read_csv("~/ChestXray-14/results/prediction/EfficientNetB0_imagenet_fold_1.csv")
#     for key,values in best_model_prediction.items():
#         best_model_prediction[key] = values.apply(eval).tolist()
#     return best_model_prediction

In [17]:
def dict_of_auc():
    auc_dict = dict()
    
    FOLDS = 5
    dataset = get_test_dataset_5_fold()
    model = get_best_model()
    # best_model_prediction = get_best_model_prediction_df()
    
    for i in range(FOLDS):
        y_true = get_y_true(dataset[i])
        y_preds = model_predict(model, dataset[i])
        # y_preds = best_model_prediction[f"Fold {i+1}"].values.tolist()
        # y_preds = tf.Variable(y_preds)
        
        m = tf.keras.metrics.AUC(multi_label=True)
        m.update_state(y_true, y_preds)
        
        thresholds = m.thresholds
        thresholds_size = len(thresholds)
        TP, TN, FP, FN = np.array(m.variables)
        
        auc_list = []
        for label_index in range(15):
            tpr, fpr = get_tpr_fpr(TP, TN, FP, FN, label_index)
            auc = calculate_auc(tpr, fpr)
            auc_list.append(auc)
            
            try:
                auc_dict[f"Fold {i+1}"].append(auc)
            except KeyError:
                auc_dict[f"Fold {i+1}"] = [auc]
            # print(f"Class {LABELS[label_index]}: {auc}")
            # plot_auc(tpr, fpr)

        print(f"Average AUC: {np.mean(auc_list)}")
        print("From AUC function:", m.result().numpy())
    return auc_dict

In [18]:
df = pd.DataFrame(dict_of_auc())
df.index = LABELS
df

2022-12-25 06:36:48.812933: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-12-25 06:36:50.577575: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 31853 MB memory:  -> device: 0, name: NVIDIA A100-SXM4-40GB, pci bus id: 0000:b1:00.0, compute capability: 8.0
2022-12-25 06:37:14.183998: I tensorflow/stream_executor/cuda/cuda_dnn.cc:384] Loaded cuDNN version 8100
2022-12-25 06:37:15.435539: I tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2022-12-25 06:37:15.436882: I tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file o

      7/Unknown - 9s 28ms/step

2022-12-25 06:37:19.743189: I tensorflow/stream_executor/cuda/cuda_blas.cc:1786] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


Average AUC: 0.7684605109989027
From AUC function: 0.7684606
Average AUC: 0.7708837067566593
From AUC function: 0.77088374
Average AUC: 0.7506393305193811
From AUC function: 0.75063926
Average AUC: 0.7789387663007499
From AUC function: 0.77893883
Average AUC: 0.7859009787624086
From AUC function: 0.78590095


Unnamed: 0,Fold 1,Fold 2,Fold 3,Fold 4,Fold 5
No Finding,0.774115,0.779931,0.748267,0.784305,0.791973
Atelectasis,0.779706,0.771103,0.761866,0.785773,0.797538
Consolidation,0.779879,0.788661,0.778305,0.804929,0.795448
Infiltration,0.684235,0.69588,0.681055,0.691947,0.69758
Pneumothorax,0.820415,0.833158,0.794361,0.827699,0.832036
Edema,0.877054,0.86923,0.876619,0.887434,0.882986
Emphysema,0.786494,0.779796,0.76978,0.793139,0.806052
Fibrosis,0.74082,0.736357,0.711822,0.730632,0.735287
Effusion,0.863438,0.867108,0.846437,0.876592,0.881235
Pneumonia,0.712934,0.728076,0.68978,0.754504,0.746591


In [19]:
df.to_csv("/home/jovyan/ChestXray-14/results/paper/table3/best_model_AUC_on_test_5_folds.csv", index=True)