In [1]:
!pip install tensorflow_addons

Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com


In [2]:
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
import matplotlib.pyplot as plt
import tensorflow_addons as tfa
from modules.dataset import LABELS, Dataset

2022-12-09 12:29:47.629839: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


## Evaluate Function

In [3]:
def preprocessing_data(x):
    if "e+" in x:
        values = eval(x.replace(" ", ", "))
        return np.array(values)
    else:
        values = eval(x.replace("\n", "").replace(". ", ".,"))
        return np.array(values)

In [4]:
def get_confusion(path):
    """
    Parameters
    ----------
    path: str
        path to confusion metrics csv file of needed model
    """
    df = pd.read_csv(path)
    thresholds = df["thresholds"].values
    TP = df["TP"].apply(preprocessing_data).values
    TN = df["TN"].apply(preprocessing_data).values
    FP = df["FP"].apply(preprocessing_data).values
    FN = df["FN"].apply(preprocessing_data).values
    return thresholds, TP, TN, FP, FN

In [5]:
def get_f1_dict(thresholds, TP, TN, FP, FN):
    f1_thresholds_dict = dict()
    for i_thresh, (tps, tns, fps, fns) in enumerate(zip(TP, TN, FP, FN)):
        for label, tp, tn, fp, fn in zip(LABELS, tps, tns, fps, fns):
            f1_score = 2*tp / (2*tp + fp + fn)
            try: 
                f1_thresholds_dict[label].append((thresholds[i_thresh], f1_score))
            except:
                f1_thresholds_dict[label] = [(thresholds[i_thresh], f1_score)]
    return f1_thresholds_dict

In [6]:
def get_best_multiple_threshold(dictionary, name="F1-score vs. Thresholds", x_label='Thresholds', y_label='F1-scores', plot_compare=False):
    if plot_compare:
        plt.figure(figsize=(20,12))
    best_multiple_threshold_list = []
    for i, (label, value) in enumerate(dictionary.items()):
        x, y = zip(*value)
        if plot_compare:
            plt.plot(x, y, marker='.', label=label)
        
        y = np.array(y)
        best = y.argmax()
        if plot_compare:
            sns.scatterplot(x=[x[best]], y=[y[best]], marker="X", s=300)
        
        best_multiple_threshold_list.append(x[y.argmax()])

    if plot_compare:
        plt.title(name)
        plt.legend()
        plt.xlabel(x_label, fontweight='bold')
        plt.ylabel(y_label, fontweight='bold')
        plt.ylim(-0.05, 1.05)
        plt.xlim(-0.05, 1.05)
        plt.show()
    return best_multiple_threshold_list

In [7]:
def get_thresholds(path):
    confusion = get_confusion(path)
    f1_dicts = get_f1_dict(*confusion)
    best_thresholds = get_best_multiple_threshold(f1_dicts, plot_compare=False)
    return best_thresholds

In [8]:
def get_confusion_metrics(trues, prediction, thresh_value):
    metric = tfa.metrics.MultiLabelConfusionMatrix(num_classes=15)
    metric.update_state(trues,
                        np.greater_equal(prediction, thresh_value).astype('int8'))
    return metric.result()

## Main Function

In [9]:
def get_model(model_path):
    return tf.keras.models.load_model(model_path)

In [10]:
def get_filename(model_path):
    return model_path.split(".")[0].split("/")[-1]

In [11]:
def get_y_true(data):
    y_true=[]
    for X,y in data:
        for label in y:
            y_true.append(label)
    y_true = tf.Variable(y_true)
    return y_true

In [12]:
def get_test_dataset_5_fold():
    dataset = Dataset()
    _, test_dataset_fold_1 = dataset.get_kfold(fold_number=1, sample=False)
    _, test_dataset_fold_2 = dataset.get_kfold(fold_number=2, sample=False)
    _, test_dataset_fold_3 = dataset.get_kfold(fold_number=3, sample=False)
    _, test_dataset_fold_4 = dataset.get_kfold(fold_number=4, sample=False)
    _, test_dataset_fold_5 = dataset.get_kfold(fold_number=5, sample=False)
    return (
        test_dataset_fold_1,
        test_dataset_fold_2,
        test_dataset_fold_3,
        test_dataset_fold_4,
        test_dataset_fold_5
    )

In [13]:
def model_predict(model, test_dataset):
    return model.predict(test_dataset)

In [14]:
def calculate_f1_scores(confusions):
    f1_scores = []
    for confusion in confusions:
        tp, tn, fp, fn = (confusion[1, 1].numpy(),
                          confusion[0, 0].numpy(),
                          confusion[0, 1].numpy(),
                          confusion[1, 0].numpy())
        f1_score = 2*tp / (2*tp + fp + fn)
        f1_scores.append(f1_score)
    return np.array(f1_scores)

In [15]:
def evaluate(model, test_dataset, file_name):
    best_thresholds = get_thresholds(f"/home/jovyan/ChestXray-14/results/confusion/{file_name}.csv")
    y_true = get_y_true(test_dataset)
    y_preds = model_predict(model, test_dataset)
    
    confusion_metrics = get_confusion_metrics(y_true, y_preds, best_thresholds)
    return calculate_f1_scores(confusion_metrics).mean()

## Main

In [16]:
test_dataset = get_test_dataset_5_fold()

2022-12-09 12:29:49.776933: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-12-09 12:29:51.565765: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 31885 MB memory:  -> device: 0, name: NVIDIA A100-SXM4-40GB, pci bus id: 0000:31:00.0, compute capability: 8.0


In [17]:
def evaluate_5_folds(path):
    """
    Parameter
    ---------
    path: str
        path to best model
    """
    model = get_model(path)
    file_name = get_filename(path)
    
    f1_score_5_folds = []
    print(f"===== {file_name} =====")
    for i in range(5): # 5 Folds
        f1_score = evaluate(model=model, test_dataset=test_dataset[i], file_name=file_name)
        f1_score_5_folds.append(f1_score)
        print(f">> Fold {i+1} = {f1_score}")
    
    print(f"Average F1-score of {file_name} = {np.mean(f1_score_5_folds)}")

In [18]:
evaluate_5_folds("/home/jovyan/ChestXray-14/results/models/DenseNet121_None_fold_3.h5")

===== DenseNet121_None_fold_3 =====


2022-12-09 12:30:18.579254: I tensorflow/stream_executor/cuda/cuda_dnn.cc:384] Loaded cuDNN version 8100
2022-12-09 12:30:19.769211: I tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2022-12-09 12:30:19.770511: I tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2022-12-09 12:30:19.770567: W tensorflow/stream_executor/gpu/asm_compiler.cc:80] Couldn't get ptxas version string: INTERNAL: Couldn't invoke ptxas --version
2022-12-09 12:30:19.771663: I tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2022-12-09 12:30:19.771871: W tensorflow/stream_executor/gpu/redzone_allocator.cc:314] INTERNAL: Failed to launch ptxas
Relying on driver to perform ptx compilation. 
Modify $PATH to customize ptxas location.
This message will be only logged once.


      5/Unknown - 12s 42ms/step

2022-12-09 12:30:26.741980: I tensorflow/stream_executor/cuda/cuda_blas.cc:1786] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


>> Fold 1 = 0.24542779222949104
>> Fold 2 = 0.2472453915780557
>> Fold 3 = 0.24230897992083864
>> Fold 4 = 0.2494182410545275
>> Fold 5 = 0.2699579870962715
Average F1-score of DenseNet121_None_fold_3 = 0.2508716783758369


In [19]:
evaluate_5_folds("/home/jovyan/ChestXray-14/results/models/DenseNet121_imagenet_fold_1.h5")

===== DenseNet121_imagenet_fold_1 =====
>> Fold 1 = 0.2390995627211101
>> Fold 2 = 0.24270190775576364
>> Fold 3 = 0.24822893202238797
>> Fold 4 = 0.24719865065763028
>> Fold 5 = 0.2542634533234685
Average F1-score of DenseNet121_imagenet_fold_1 = 0.24629850129607206


In [20]:
evaluate_5_folds("/home/jovyan/ChestXray-14/results/models/EfficientNetB0_None_fold_1.h5")

===== EfficientNetB0_None_fold_1 =====
>> Fold 1 = 0.2486133764385094
>> Fold 2 = 0.2578698300627198
>> Fold 3 = 0.26692477070520326
>> Fold 4 = 0.27011070152143113
>> Fold 5 = 0.2746828636088089
Average F1-score of EfficientNetB0_None_fold_1 = 0.26364030846733455


In [21]:
evaluate_5_folds("/home/jovyan/ChestXray-14/results/models/EfficientNetB0_imagenet_fold_1.h5")

===== EfficientNetB0_imagenet_fold_1 =====
>> Fold 1 = 0.20952754158328657
>> Fold 2 = 0.2108646439356035
>> Fold 3 = 0.21360816689806497
>> Fold 4 = 0.21284767310995595
>> Fold 5 = 0.22176285074384658
Average F1-score of EfficientNetB0_imagenet_fold_1 = 0.21372217525415152


In [22]:
evaluate_5_folds("/home/jovyan/ChestXray-14/results/models/Resnet50_fold_1.h5")

===== Resnet50_fold_1 =====
>> Fold 1 = 0.22270152446136035
>> Fold 2 = 0.22560158523003443
>> Fold 3 = 0.23483717230138135
>> Fold 4 = 0.2367149213644358
>> Fold 5 = 0.24405829605597665
Average F1-score of Resnet50_fold_1 = 0.23278269988263772


In [23]:
evaluate_5_folds("/home/jovyan/ChestXray-14/results/models/Resnet50_imagenet_fold_3.h5")

===== Resnet50_imagenet_fold_3 =====
>> Fold 1 = 0.24032120004588559
>> Fold 2 = 0.23778357570114642
>> Fold 3 = 0.23843739589530077
>> Fold 4 = 0.24766565159299098
>> Fold 5 = 0.25178957446003947
Average F1-score of Resnet50_imagenet_fold_3 = 0.24319947953907262
