In [91]:
!pip install tensorflow_addons

Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
Collecting tensorflow_addons
  Downloading tensorflow_addons-0.18.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Collecting typeguard>=2.7
  Downloading typeguard-2.13.3-py3-none-any.whl (17 kB)
Installing collected packages: typeguard, tensorflow_addons
Successfully installed tensorflow_addons-0.18.0 typeguard-2.13.3


In [92]:
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
import matplotlib.pyplot as plt
import tensorflow_addons as tfa
from modules.dataset import LABELS, Dataset

## Evaluate Function

In [49]:
def preprocessing_data(x):
    if "e+" in x:
        values = eval(x.replace(" ", ", "))
        return np.array(values)
    else:
        values = eval(x.replace("\n", "").replace(". ", ".,"))
        return np.array(values)

In [59]:
def get_confusion(path):
    """
    Parameters
    ----------
    path: str
        path to confusion metrics csv file of needed model
    """
    df = pd.read_csv(path)
    thresholds = df["thresholds"].values
    TP = df["TP"].apply(preprocessing_data).values
    TN = df["TN"].apply(preprocessing_data).values
    FP = df["FP"].apply(preprocessing_data).values
    FN = df["FN"].apply(preprocessing_data).values
    return thresholds, TP, TN, FP, FN

In [60]:
def get_f1_dict(thresholds, TP, TN, FP, FN):
    f1_thresholds_dict = dict()
    for i_thresh, (tps, tns, fps, fns) in enumerate(zip(TP, TN, FP, FN)):
        for label, tp, tn, fp, fn in zip(LABELS, tps, tns, fps, fns):
            f1_score = 2*tp / (2*tp + fp + fn)
            try: 
                f1_thresholds_dict[label].append((thresholds[i_thresh], f1_score))
            except:
                f1_thresholds_dict[label] = [(thresholds[i_thresh], f1_score)]
    return f1_thresholds_dict

In [85]:
def get_best_multiple_threshold(dictionary, name="F1-score vs. Thresholds", x_label='Thresholds', y_label='F1-scores', plot_compare=False):
    if plot_compare:
        plt.figure(figsize=(20,12))
    best_multiple_threshold_list = []
    for i, (label, value) in enumerate(dictionary.items()):
        x, y = zip(*value)
        if plot_compare:
            plt.plot(x, y, marker='.', label=label)
        
        y = np.array(y)
        best = y.argmax()
        if plot_compare:
            sns.scatterplot(x=[x[best]], y=[y[best]], marker="X", s=300)
        
        best_multiple_threshold_list.append(x[y.argmax()])

    if plot_compare:
        plt.title(name)
        plt.legend()
        plt.xlabel(x_label, fontweight='bold')
        plt.ylabel(y_label, fontweight='bold')
        plt.ylim(-0.05, 1.05)
        plt.xlim(-0.05, 1.05)
        plt.show()
    return best_multiple_threshold_list

In [96]:
def get_thresholds(path):
    confusion = get_confusion(path)
    f1_dicts = get_f1_dict(*confusion)
    best_thresholds = get_best_multiple_threshold(f1_dicts, plot_compare=False)
    return best_thresholds

In [137]:
def get_confusion_metrics(trues, prediction, thresh_value):
    metric = tfa.metrics.MultiLabelConfusionMatrix(num_classes=15)
    metric.update_state(trues,
                        np.greater_equal(prediction, thresh_value).astype('int8'))
    return metric.result()

## Main Function

In [19]:
def get_model(model_path):
    return tf.keras.models.load_model(model_path)

In [124]:
def get_filename(model_path):
    return model_path.split(".")[0].split("/")[-1]

In [20]:
def get_y_true(data):
    y_true=[]
    for X,y in data:
        for label in y:
            y_true.append(label)
    y_true = tf.Variable(y_true)
    return y_true

In [28]:
def get_test_dataset_5_fold():
    dataset = Dataset()
    _, test_dataset_fold_1 = dataset.get_kfold(fold_number=1, sample=False)
    _, test_dataset_fold_2 = dataset.get_kfold(fold_number=2, sample=False)
    _, test_dataset_fold_3 = dataset.get_kfold(fold_number=3, sample=False)
    _, test_dataset_fold_4 = dataset.get_kfold(fold_number=4, sample=False)
    _, test_dataset_fold_5 = dataset.get_kfold(fold_number=5, sample=False)
    return (
        test_dataset_fold_1,
        test_dataset_fold_2,
        test_dataset_fold_3,
        test_dataset_fold_4,
        test_dataset_fold_5
    )

In [44]:
def model_predict(model, test_dataset):
    return model.predict(test_dataset)

In [138]:
def evaluate(model, test_dataset, file_name):
    best_thresholds = get_thresholds(f"/home/jovyan/ChestXray-14/results/confusion/{file_name}.csv")
    y_true = get_y_true(test_dataset)
    y_preds = model_predict(model, test_dataset)
    confusion_metrics = get_confusion_metrics(y_true, y_preds, best_thresholds)
    return calculate_f1_scores(confusion_metrics).mean()

## Main

In [139]:
test_dataset = get_test_dataset_5_fold()

In [145]:
def evaluate_5_folds(path):
    """
    Parameter
    ---------
    path: str
        path to best model
    """
    model = get_model(path)
    file_name = get_filename(path)
    
    f1_score_5_folds = []
    print(f"===== {file_name} =====")
    for i in range(1, 6): # 5 Folds
        f1_score = evaluate(model=model, test_dataset=test_dataset[i-1], file_name=file_name)
        f1_score_5_folds.append(f1_score)
        print(f">> Fold {i} = {f1_score}")
    
    print(f"Average F1-score of {file_name} = {np.mean(f1_score_5_folds)}")

In [None]:
evaluate_5_folds("/home/jovyan/ChestXray-14/results/models/DenseNet121_None_fold_3.h5")

===== DenseNet121_None_fold_3 =====
>> Fold 1 = 0.24538062512874603
>> Fold 2 = 0.2464958280324936
>> Fold 3 = 0.24230287969112396
>> Fold 4 = 0.24926969408988953
>> Fold 5 = 0.27010810375213623
Average F1-score of DenseNet121_None_fold_3 = 0.2507114112377167


In [148]:
evaluate_5_folds("/home/jovyan/ChestXray-14/results/models/DenseNet121_imagenet_fold_1.h5")

===== DenseNet121_imagenet_fold_1 =====
>> Fold 1 = 0.23900732398033142
>> Fold 2 = 0.24249492585659027
>> Fold 3 = 0.24817658960819244
>> Fold 4 = 0.24731428921222687
>> Fold 5 = 0.2540903389453888
Average F1-score of DenseNet121_imagenet_fold_1 = 0.24621668457984924


In [149]:
evaluate_5_folds("/home/jovyan/ChestXray-14/results/models/EfficientNetB0_None_fold_1.h5")

===== EfficientNetB0_None_fold_1 =====
>> Fold 1 = 0.24855409562587738
>> Fold 2 = 0.25773802399635315
>> Fold 3 = 0.26683875918388367
>> Fold 4 = 0.2701347768306732
>> Fold 5 = 0.2746805250644684
Average F1-score of EfficientNetB0_None_fold_1 = 0.2635892331600189


In [150]:
evaluate_5_folds("/home/jovyan/ChestXray-14/results/models/EfficientNetB0_imagenet_fold_1.h5")

===== EfficientNetB0_imagenet_fold_1 =====
>> Fold 1 = 0.20957711338996887
>> Fold 2 = 0.21095892786979675
>> Fold 3 = 0.2136314958333969
>> Fold 4 = 0.2128966599702835
>> Fold 5 = 0.22186703979969025
Average F1-score of EfficientNetB0_imagenet_fold_1 = 0.21378624439239502


In [151]:
evaluate_5_folds("/home/jovyan/ChestXray-14/results/models/Resnet50_fold_1.h5")

===== Resnet50_fold_1 =====
>> Fold 1 = 0.22268527746200562
>> Fold 2 = 0.22564974427223206
>> Fold 3 = 0.23484879732131958
>> Fold 4 = 0.23671677708625793
>> Fold 5 = 0.2440558820962906
Average F1-score of Resnet50_fold_1 = 0.23279127478599548


In [152]:
evaluate_5_folds("/home/jovyan/ChestXray-14/results/models/Resnet50_imagenet_fold_3.h5")

===== Resnet50_imagenet_fold_3 =====
>> Fold 1 = 0.24034175276756287
>> Fold 2 = 0.23771868646144867
>> Fold 3 = 0.23844484984874725
>> Fold 4 = 0.2475757896900177
>> Fold 5 = 0.2518383264541626
Average F1-score of Resnet50_imagenet_fold_3 = 0.24318388104438782
