In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from modules.dataset import Dataset, LABELS

2022-11-01 16:04:14.870543: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


### Our Utils

In [2]:
def get_y_true(data):
    y_true=[]
    for X,y in data:
        for label in y:
            y_true.append(label)
    y_true = tf.Variable(y_true)
    return y_true

In [3]:
def get_f1_score_model(NAME, fold_num, dataset):
    # Dataset
    train_dataset, test_dataset = dataset.get_kfold(fold_num, sample=False)

    # Modeling
    model = tf.keras.models.load_model("results/models/{}_fold_{}.h5".format(NAME, fold_num))

    # Label and Predict
    y_true = get_y_true(test_dataset)
    y_preds = model.predict(test_dataset)

    # m object
    m = tf.keras.metrics.AUC(multi_label=True, num_thresholds=200)
    m.update_state(y_true, y_preds)
    thresholds = m._thresholds

    # average threshold
    precision_average_list = []
    recall_average_list = []
    f1_thresholds_average_list = []
    true_positives, true_negatives, false_positives, false_negatives = m.variables
    for i_thresh, (tps, tns, fps, fns) in enumerate(zip(true_positives, true_negatives, false_positives, false_negatives)):
        f1_score_average = []
        precision_average = []
        recall_average = []
        for label, tp, tn, fp, fn in zip(LABELS, tps, tns, fps, fns):
            tp, tn, fp, fn = tp.numpy(), tn.numpy(), fp.numpy(), fn.numpy()
            precision = tp / (tp + fp)
            recall = tp / (tp + fn)
            f1_score = 2 * tp / (2 * tp + fn + fp)
            precision_average.append(precision)
            recall_average.append(recall)
            f1_score_average.append(f1_score)

        precision_score_average = np.mean(precision_average)
        precision_average_list.append(precision_score_average)
        
        recall_score_average = np.mean(recall_average)
        recall_average_list.append(recall_score_average)
        
        f1_score_average = np.mean(f1_score_average)
        f1_thresholds_average_list.append((thresholds[i_thresh], f1_score_average))

    # Cast to numpy for vectorlize
    f1_thresholds_average_list = np.array(f1_thresholds_average_list)
    best_f1 = f1_thresholds_average_list.max(axis=0)[1]
    best_f1_indx = f1_thresholds_average_list.argmax(axis=0)[1]
    best_threshold = f1_thresholds_average_list.max(axis=0)[0]
    
    return precision_average_list[best_f1_indx], recall_average_list[best_f1_indx], best_f1

### Exp 1 : DenseNet121_None

In [31]:
# Settings
NAME = "DenseNet121_noDropout_None"
dataset = Dataset()

precision_score_list = []
recall_score_list = []
f1_score_list = []
for fold_num in range(1, 5+1):
    precision_score, recall_score, f1_score = get_f1_score_model(NAME, fold_num, dataset)
    precision_score_list.append(precision_score)
    recall_score_list.append(recall_score)
    f1_score_list.append(f1_score)



In [32]:
print("Best={:.4f}, Average={:.4f}, Worst={:.4f}".format(max(f1_score_list), 
                                                         np.mean(f1_score_list),
                                                         min(f1_score_list)))

Best=0.2186, Average=0.2084, Worst=0.2018


In [33]:
precision_score_list, recall_score_list, f1_score_list

([nan, nan, nan, nan, nan],
 [0.37569088, 0.3897538, 0.38936552, 0.39162198, 0.5103506],
 [0.2018217321431188,
  0.20492600782162843,
  0.2186384313589098,
  0.21154133729281815,
  0.20532232468684597])

### Exp 2: DenseNet121_imagenet

In [49]:
# Settings
NAME = "DenseNet121_imagenet"
dataset = Dataset()

f1_score_list = []
for fold_num in range(1, 5+1):
    f1_score = get_f1_score_model(NAME, fold_num, dataset)
    f1_score_list.append(f1_score)



In [50]:
print("Best={:.4f}, Average={:.4f}, Worst={:.4f}".format(max(f1_score_list), 
                                                         np.mean(f1_score_list),
                                                         min(f1_score_list)))

Best=0.2215, Average=0.2151, Worst=0.2074


In [None]:
f1_score_list

### Exp 3: EfficientNetB0_None

In [40]:
# Settings
NAME = "EfficientNetB0_None"
dataset = Dataset()

f1_score_list = []
for fold_num in range(1, 5+1):
    f1_score = get_f1_score_model(NAME, fold_num, dataset)
    f1_score_list.append(f1_score)



In [41]:
print("Best={:.4f}, Average={:.4f}, Worst={:.4f}".format(max(f1_score_list), 
                                                         np.mean(f1_score_list),
                                                         min(f1_score_list)))

Best=0.2263, Average=0.2204, Worst=0.2159


In [None]:
f1_score_list

### Exp 4: EfficientNetB0_imagenet

In [43]:
# Settings
NAME = "EfficientNetB0_imagenet"
dataset = Dataset()

f1_score_list = []
for fold_num in range(1, 5+1):
    f1_score = get_f1_score_model(NAME, fold_num, dataset)
    f1_score_list.append(f1_score)



In [44]:
print("Best={:.4f}, Average={:.4f}, Worst={:.4f}".format(max(f1_score_list), 
                                                         np.mean(f1_score_list),
                                                         min(f1_score_list)))

Best=0.1765, Average=0.1496, Worst=0.1354


In [None]:
f1_score_list

### Exp 5: Resnet50_None

In [45]:
# Settings
NAME = "Resnet50"
dataset = Dataset()

f1_score_list = []
for fold_num in range(1, 5+1):
    f1_score = get_f1_score_model(NAME, fold_num, dataset)
    f1_score_list.append(f1_score)



In [46]:
print("Best={:.4f}, Average={:.4f}, Worst={:.4f}".format(max(f1_score_list), 
                                                         np.mean(f1_score_list),
                                                         min(f1_score_list)))

Best=0.2008, Average=0.1963, Worst=0.1895


In [None]:
f1_score_list

### Exp 6: Resnet50_imagenet

In [6]:
# Settings
NAME = "Resnet50_imagenet"
dataset = Dataset()

f1_score_list = []
for fold_num in range(1, 5+1):
    precision, recall, f1_score = get_f1_score_model(NAME, fold_num, dataset)
    f1_score_list.append(f1_score)



In [7]:
print("Best={:.4f}, Average={:.4f}, Worst={:.4f}".format(max(f1_score_list), 
                                                         np.mean(f1_score_list),
                                                         min(f1_score_list)))

Best=0.2087, Average=0.2065, Worst=0.2028


In [8]:
f1_score_list

[0.20717022636086754,
 0.20821138673046882,
 0.20568500260137842,
 0.2028188009153436,
 0.2086717115430576]