In [5]:
from IPython.display import clear_output
!pip install tensorflow_addons
clear_output()

In [6]:
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
import matplotlib.pyplot as plt
import tensorflow_addons as tfa
from modules.dataset import LABELS, Dataset

## Functions

In [30]:
def preprocessing_data(x):
    if "e+" in x:
        values = eval(x.replace(" ", ", "))
        return np.array(values)
    else:
        values = eval(x.replace("\n", "").replace(". ", ".,"))
        return np.array(values)

In [17]:
def get_test_dataset_5_fold():
    dataset = Dataset()
    _, test_dataset_fold_1 = dataset.get_kfold(fold_number=1, sample=False)
    _, test_dataset_fold_2 = dataset.get_kfold(fold_number=2, sample=False)
    _, test_dataset_fold_3 = dataset.get_kfold(fold_number=3, sample=False)
    _, test_dataset_fold_4 = dataset.get_kfold(fold_number=4, sample=False)
    _, test_dataset_fold_5 = dataset.get_kfold(fold_number=5, sample=False)
    return (
        test_dataset_fold_1,
        test_dataset_fold_2,
        test_dataset_fold_3,
        test_dataset_fold_4,
        test_dataset_fold_5
    )

In [10]:
def get_y_true(data):
    y_true=[]
    for X,y in data:
        for label in y:
            y_true.append(label)
    y_true = tf.Variable(y_true)
    return y_true

In [11]:
def get_model(model_path):
    return tf.keras.models.load_model(model_path)

In [12]:
def get_filename(model_path):
    return model_path.split(".")[0].split("/")[-1]

In [24]:
def get_confusion(path):
    """
    Parameters
    ----------
    path: str
        path to confusion metrics csv file of needed model
    """
    df = pd.read_csv(path)
    thresholds = df["thresholds"].values
    TP = df["TP"].apply(preprocessing_data).values
    TN = df["TN"].apply(preprocessing_data).values
    FP = df["FP"].apply(preprocessing_data).values
    FN = df["FN"].apply(preprocessing_data).values
    return thresholds, TP, TN, FP, FN

In [26]:
def get_f1_dict(thresholds, TP, TN, FP, FN):
    f1_thresholds_dict = dict()
    for i_thresh, (tps, tns, fps, fns) in enumerate(zip(TP, TN, FP, FN)):
        for label, tp, tn, fp, fn in zip(LABELS, tps, tns, fps, fns):
            f1_score = 2*tp / (2*tp + fp + fn)
            try: 
                f1_thresholds_dict[label].append((thresholds[i_thresh], f1_score))
            except:
                f1_thresholds_dict[label] = [(thresholds[i_thresh], f1_score)]
    return f1_thresholds_dict

In [28]:
def get_best_multiple_threshold(dictionary, name="F1-score vs. Thresholds", x_label='Thresholds', y_label='F1-scores', plot_compare=False):
    if plot_compare:
        plt.figure(figsize=(20,12))
    best_multiple_threshold_list = []
    for i, (label, value) in enumerate(dictionary.items()):
        x, y = zip(*value)
        if plot_compare:
            plt.plot(x, y, marker='.', label=label)
        
        y = np.array(y)
        best = y.argmax()
        if plot_compare:
            sns.scatterplot(x=[x[best]], y=[y[best]], marker="X", s=300)
        
        best_multiple_threshold_list.append(x[y.argmax()])

    if plot_compare:
        plt.title(name)
        plt.legend()
        plt.xlabel(x_label, fontweight='bold')
        plt.ylabel(y_label, fontweight='bold')
        plt.ylim(-0.05, 1.05)
        plt.xlim(-0.05, 1.05)
        plt.show()
    return best_multiple_threshold_list

In [33]:
def get_thresholds(path):
    confusion = get_confusion(path)
    f1_dicts = get_f1_dict(*confusion)
    best_thresholds = get_best_multiple_threshold(f1_dicts, plot_compare=False)
    return best_thresholds

In [19]:
def model_predict(model, test_dataset):
    return model.predict(test_dataset)

In [37]:
def evaluate(model, test_dataset, file_name):
    best_thresholds = get_thresholds(f"/home/jovyan/ChestXray-14/results/confusion/{file_name}.csv")
    y_true = get_y_true(test_dataset)
    y_preds = model_predict(model, test_dataset)
    m = tf.keras.metrics.AUC(multi_label=True)
    m.update_state(y_true, y_preds)
    return m.result().numpy()

In [49]:
def evaluate_5_folds(path):
    """
    Parameter
    ---------
    path: str
        path to best model
    """
    model = get_model(path)
    file_name = get_filename(path)
    
    auc_5_folds = []
    print(f"===== {file_name} =====")
    for i in range(5): # 5 Folds
        auc = evaluate(model=model, test_dataset=test_dataset[i-1], file_name=file_name)
        auc_5_folds.append(auc)
        print(f">> Fold {i+1} = {auc}")
        # break
    
    print(f"Average AUC of {file_name} = {np.mean(auc_5_folds)}")

## Main

In [43]:
from tensorflow.keras import mixed_precision
mixed_precision.set_global_policy("mixed_float16")

INFO:tensorflow:Mixed precision compatibility check (mixed_float16): OK
Your GPU will likely run quickly with dtype policy mixed_float16 as it has compute capability of at least 7.0. Your GPU: NVIDIA A100-SXM4-40GB, compute capability 8.0


In [15]:
model_path = {
    "DenseNet121_none": "/home/jovyan/ChestXray-14/results/models/DenseNet121_None_fold_3.h5",
    "DenseNet121_imagenet": "/home/jovyan/ChestXray-14/results/models/DenseNet121_imagenet_fold_1.h5",
    "EfficeientNetB0_none": "/home/jovyan/ChestXray-14/results/models/EfficientNetB0_None_fold_1.h5",
    "EfficeientNetB0_imagenet": "/home/jovyan/ChestXray-14/results/models/EfficientNetB0_imagenet_fold_1.h5",
    "Resnet50_none": "/home/jovyan/ChestXray-14/results/models/Resnet50_fold_1.h5",
    "Resnet50_imagenet": "/home/jovyan/ChestXray-14/results/models/Resnet50_imagenet_fold_3.h5"
}

In [18]:
test_dataset = get_test_dataset_5_fold()

2022-12-08 07:42:10.031441: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-12-08 07:42:11.728870: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 31887 MB memory:  -> device: 0, name: NVIDIA A100-SXM4-40GB, pci bus id: 0000:17:00.0, compute capability: 8.0


In [40]:
evaluate_5_folds(model_path["DenseNet121_none"])

===== DenseNet121_None_fold_3 =====
>> Fold 1 = 0.7810665965080261
>> Fold 2 = 0.7555677890777588
>> Fold 3 = 0.7544834017753601
>> Fold 4 = 0.7426716089248657
>> Fold 5 = 0.7605154514312744
Average AUC of DenseNet121_None_fold_3 = 0.7588610053062439


In [50]:
evaluate_5_folds(model_path["DenseNet121_imagenet"])

===== DenseNet121_imagenet_fold_1 =====
>> Fold 1 = 0.7647889256477356
>> Fold 2 = 0.7416101098060608
>> Fold 3 = 0.756729781627655
>> Fold 4 = 0.7636765241622925
>> Fold 5 = 0.7629464268684387
Average AUC of DenseNet121_imagenet_fold_1 = 0.7579503655433655


In [51]:
evaluate_5_folds(model_path["EfficeientNetB0_none"])

===== EfficientNetB0_None_fold_1 =====
>> Fold 1 = 0.7823885083198547
>> Fold 2 = 0.7434552907943726
>> Fold 3 = 0.7646464705467224
>> Fold 4 = 0.7752695083618164
>> Fold 5 = 0.7774242162704468
Average AUC of EfficientNetB0_None_fold_1 = 0.7686368227005005


In [52]:
evaluate_5_folds(model_path["EfficeientNetB0_imagenet"])

===== EfficientNetB0_imagenet_fold_1 =====
>> Fold 1 = 0.7020613551139832
>> Fold 2 = 0.6801937222480774
>> Fold 3 = 0.6946982145309448
>> Fold 4 = 0.6994134783744812
>> Fold 5 = 0.6946086883544922
Average AUC of EfficientNetB0_imagenet_fold_1 = 0.6941951513290405


In [53]:
evaluate_5_folds(model_path["Resnet50_none"])

===== Resnet50_fold_1 =====
>> Fold 1 = 0.7519645690917969
>> Fold 2 = 0.7188865542411804
>> Fold 3 = 0.7348979711532593
>> Fold 4 = 0.739715039730072
>> Fold 5 = 0.7384971380233765
Average AUC of Resnet50_fold_1 = 0.736792266368866


In [54]:
evaluate_5_folds(model_path["Resnet50_imagenet"])

===== Resnet50_imagenet_fold_3 =====
>> Fold 1 = 0.7640064358711243
>> Fold 2 = 0.7527133226394653
>> Fold 3 = 0.7507029175758362
>> Fold 4 = 0.7430503964424133
>> Fold 5 = 0.7600948810577393
Average AUC of Resnet50_imagenet_fold_3 = 0.7541135549545288
