## Descriptions
F1-score for each class using best model.

In [1]:
from functools import partial

import numpy as np
import pandas as pd
import tensorflow as tf

import seaborn as sns
import matplotlib.pyplot as plt

from modules.dataset import Dataset, LABELS

2022-12-13 06:27:32.040056: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


In [2]:
sns.set_theme()

In [53]:
class Evaluate:
    def __init__(self, model_path):
        self.y_true = None
        self.y_preds = None
        self.model_path = model_path
        self.model = self.get_model(model_path)
    
    def get_model(self, path):
        return tf.keras.models.load_model(path)
    
    def get_y_true(self, data):
        y_true=[]
        for X,y in data:
            for label in y:
                y_true.append(label)
        y_true = tf.Variable(y_true)
        self.y_true = y_true
        return y_true

    def get_confusion_metrics(self, y_true, y_preds):
        m = tf.keras.metrics.AUC(multi_label=True)
        m.update_state(y_true, y_preds)

        thresholds = m.thresholds
        variables = m.variables
        TP = variables[0]
        TN = variables[1]
        FP = variables[2]
        FN = variables[3]
        return thresholds, TP, TN, FP, FN

    def model_predict(self, test_dataset):
        return self.model.predict(test_dataset)

    def get_f1_scores(self, test_dataset):
        self.y_true = self.get_y_true(test_dataset)
        self.y_preds = self.model_predict(test_dataset)
        
        confusion_metrics = self.get_confusion_metrics(self.y_true, self.y_preds)
        thresholds, TP, TN, FP, FN = confusion_metrics
        f1_class_dict = dict()
        for i in range(200):
            tp, tn, fp, fn = TP[i], TN[i], FP[i], FN[i]
            for label_index in range(15):
                f1_score = 2*tp[label_index] / (2*tp[label_index] + fp[label_index] + fn[label_index])
                try:
                    f1_class_dict[LABELS[label_index]].append(f1_score)
                except KeyError:
                    f1_class_dict[LABELS[label_index]] = [f1_score]
        
        for label_index in range(15):
            label_name = LABELS[label_index]
            f1_class_score = np.mean(f1_class_dict[label_name])
            print(f"{label_name}: {f1_class_score}")    
        
        return f1_class_dict

    def __enter__(self):
        return self

    def __exit__(self, *arg):
        # print("Exit!")
        self.y_true = None
        self.y_preds = None
    

In [54]:
model_path = "/home/jovyan/ChestXray-14/results/models/EfficientNetB0_None_fold_1.h5"
best_model = Evaluate(model_path)

datasets = get_test_dataset_5_fold()

for fold, test_dataset in enumerate(datasets):
    print(f"===== Fold {fold + 1} =====")
    with best_model:
        best_model.get_f1_scores(test_dataset)

===== Fold 1 =====
No Finding: 0.559679388999939
Atelectasis: 0.1065041571855545
Consolidation: 0.02727767452597618
Infiltration: 0.13860812783241272
Pneumothorax: 0.0673852413892746
Edema: 0.03653204068541527
Emphysema: 0.012301688082516193
Fibrosis: 0.00221731374040246
Effusion: 0.2505761981010437
Pneumonia: 0.0037725353613495827
Pleural_Thickening: 0.019496580585837364
Cardiomegaly: 0.1343567818403244
Nodule: 0.016203593462705612
Mass: 0.04224595054984093
Hernia: 0.00035582558484748006
===== Fold 2 =====
No Finding: 0.5655021071434021
Atelectasis: 0.10824349522590637
Consolidation: 0.030079174786806107
Infiltration: 0.14367888867855072
Pneumothorax: 0.07823633402585983
Edema: 0.03589392080903053
Emphysema: 0.013467898592352867
Fibrosis: 0.002366876695305109
Effusion: 0.2686588168144226
Pneumonia: 0.003493357915431261
Pleural_Thickening: 0.020632591098546982
Cardiomegaly: 0.16571420431137085
Nodule: 0.01750326156616211
Mass: 0.04406566545367241
Hernia: 0.0002356281183892861
===== Fol