In [1]:
!pip3 install tensorflow_addons

Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com


In [2]:
import sys
sys.path.append('/home/jovyan/ChestXray-14')

In [14]:
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_addons as tfa

from modules.utils import get_dataset
from modules.dataset import LABELS

In [4]:
import os
CURRENT_PATH = os.path.abspath("")
CURRENT_PATH

'/home/jovyan/ChestXray-14/experiments/BinaryModelWithUnderSampling'

In [5]:
RESULT_EVALUATE_PATH = os.path.join(CURRENT_PATH, "results", "evaluate", "cross_entropy", "EfficientNetB0_None")
RESULT_EVALUATE_PATH

'/home/jovyan/ChestXray-14/experiments/BinaryModelWithUnderSampling/results/evaluate/cross_entropy/EfficientNetB0_None'

In [6]:
from pathlib import Path
Path(RESULT_EVALUATE_PATH).mkdir(parents=True, exist_ok=True)

In [7]:
class Dataset:
    INPUT_PATH = "/home/jovyan/ChestXray-14/dataset/ChestXray NIH"
    
    def get_train(self):
        filenames = tf.io.gfile.glob(f'{self.INPUT_PATH}/data/under_sampling/train/*.tfrec')
        dataset = get_dataset(filenames)
        return dataset

    def get_test(self):
        filenames = tf.io.gfile.glob(f'{self.INPUT_PATH}/data/under_sampling/test/*.tfrec')
        dataset = get_dataset(filenames)
        return dataset

In [8]:
test_dataset = Dataset().get_test()

2023-03-07 07:30:31.254910: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-03-07 07:30:33.022812: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 31693 MB memory:  -> device: 0, name: NVIDIA A100-SXM4-40GB, pci bus id: 0000:17:00.0, compute capability: 8.0


## Experiment

In [17]:
ROOT_PATH = "/home/jovyan/ChestXray-14"
num_class = 1 # TODO: change to 15 for multi-labels

class Evaluate:
    def __init__(self, model_path):
        self.y_true = None
        self.y_preds = None
        self.model_path = model_path
        self.model = self.get_model(model_path)
        self.best_thresholds = None
        self.thresholds_200 = None
    
    def get_model(self, path):
        return tf.keras.models.load_model(path)
    
    def get_y_true(self, data):
        y_true=[]
        for X,y in data:
            for label in y:
                y_true.append(label)
        y_true = tf.Variable(y_true)
        self.y_true = y_true
        return y_true

    def get_confusion_metrics(self, y_true, y_preds):
        m = tf.keras.metrics.AUC(multi_label=True)
        m.update_state(y_true, y_preds)

        thresholds = m.thresholds
        variables = m.variables
        TP = variables[0]
        TN = variables[1]
        FP = variables[2]
        FN = variables[3]
        return thresholds, TP, TN, FP, FN

    def model_predict(self, test_dataset):
        return self.model.predict(test_dataset)

    def get_f1_scores_200_thresholds(self, test_dataset):
        self.y_true = self.get_y_true(test_dataset)
        self.y_preds = self.model_predict(test_dataset)
        
        confusion_metrics = self.get_confusion_metrics(self.y_true, self.y_preds)
        thresholds, TP, TN, FP, FN = confusion_metrics
        self.thresholds_200 = thresholds
        f1_class_dict = dict()
        for i in range(len(thresholds)):
            tp, tn, fp, fn = TP[i], TN[i], FP[i], FN[i]
            for label_index in range(num_class):
                f1_score = 2*tp[label_index] / (2*tp[label_index] + fp[label_index] + fn[label_index])
                try:
                    f1_class_dict[LABELS[label_index]].append(f1_score)
                except KeyError:
                    f1_class_dict[LABELS[label_index]] = [f1_score]
        print(LABELS)
        return f1_class_dict
    
    def get_f1_scores(self, test_dataset):
        self.y_true = self.get_y_true(test_dataset)
        self.y_preds = self.model_predict(test_dataset)
        metric = tfa.metrics.MultiLabelConfusionMatrix(num_classes=num_class)
        metric.update_state(self.y_true,
                            np.greater_equal(self.y_preds, self.best_thresholds).astype('int8'))
        result = metric.result()
        
        f1_class_dict = dict()
        for idx, confusion in enumerate(result):
            label = LABELS[idx]
            TP, TN, FP, FN = (confusion[1, 1],
                              confusion[0, 0],
                              confusion[0, 1],
                              confusion[1, 0])
            f1_score = 2*TP / (2*TP + FP + FN)
            f1_class_dict[label] = [f1_score.numpy()]
        return f1_class_dict
    
    def get_precision_scores(self, test_dataset, new_calculate=True):
        if new_calculate is True:
            self.y_true = self.get_y_true(test_dataset)
            self.y_preds = self.model_predict(test_dataset)
        metric = tfa.metrics.MultiLabelConfusionMatrix(num_classes=num_class)
        metric.update_state(self.y_true,
                            np.greater_equal(self.y_preds, self.best_thresholds).astype('int8'))
        result = metric.result()
        
        precision_class_dict = dict()
        for idx, confusion in enumerate(result):
            label = LABELS[idx]
            TP, TN, FP, FN = (confusion[1, 1],
                              confusion[0, 0],
                              confusion[0, 1],
                              confusion[1, 0])
            precision = TP / (TP + FP)
            precision_class_dict[label] = [precision.numpy()]
        return precision_class_dict
    
    def get_recall_scores(self, test_dataset, new_calculate=True):
        if new_calculate is True:
            self.y_true = self.get_y_true(test_dataset)
            self.y_preds = self.model_predict(test_dataset)
        metric = tfa.metrics.MultiLabelConfusionMatrix(num_classes=num_class)
        metric.update_state(self.y_true,
                            np.greater_equal(self.y_preds, self.best_thresholds).astype('int8'))
        result = metric.result()
        
        recall_class_dict = dict()
        for idx, confusion in enumerate(result):
            label = LABELS[idx]
            TP, TN, FP, FN = (confusion[1, 1],
                              confusion[0, 0],
                              confusion[0, 1],
                              confusion[1, 0])
            recall = TP / (TP + FN)
            recall_class_dict[label] = [recall.numpy()]
        return recall_class_dict
    
    def get_best_threshold(self,
                           test_dataset=None,
                           save_best_thresholds=f"{ROOT_PATH}/results/paper/table3_1/best_thresholds.csv",
                           save_200_thresholds=f"{ROOT_PATH}/results/paper/table3_1/f1_per_thresholds.csv"):
        if test_dataset is None:
            fold_num = int(self.model_path.split(".")[0][-1])
            test_dataset = datasets[fold_num-1]
        
        f1_scores_dict = self.get_f1_scores_200_thresholds(test_dataset)
        best_thresholds_dict = {"thresholds": [], "f1_most": [], "label": []}
        for key, value in f1_scores_dict.items():
            f1_arg_max = np.argmax(value)
            best_thresholds_dict["f1_most"].append(value[f1_arg_max].numpy())
            best_thresholds_dict["label"].append(key)
            best_thresholds_dict["thresholds"].append(self.thresholds_200[f1_arg_max])
        
        df = pd.DataFrame(best_thresholds_dict)
        df = df.set_index("label")
        df.to_csv(save_best_thresholds, index=True)
        print(f"{save_best_thresholds} was success!")
        # print(df)
        
        df_200_thresholds = pd.DataFrame(f1_scores_dict)
        df_200_thresholds.to_csv(save_200_thresholds, index=True)
        print(f"{save_200_thresholds} was success!")
        self.best_thresholds = df.copy()["thresholds"].values

    def __enter__(self):
        print("Doing ...!")
        return self

    def __exit__(self, *arg):
        self.y_true = None
        self.y_preds = None
        print("Done!")

In [10]:
# TODO: Delete when test done
MODEL_PATH = f'{CURRENT_PATH}/results/models/cross_entropy/EfficientNetB0_None.h5'
y_preds = Evaluate(MODEL_PATH).model_predict(test_dataset)
y_preds

2023-03-07 07:30:42.869387: I tensorflow/stream_executor/cuda/cuda_dnn.cc:384] Loaded cuDNN version 8100
2023-03-07 07:30:44.004223: I tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2023-03-07 07:30:44.005245: I tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2023-03-07 07:30:44.005286: W tensorflow/stream_executor/gpu/asm_compiler.cc:80] Couldn't get ptxas version string: INTERNAL: Couldn't invoke ptxas --version
2023-03-07 07:30:44.006172: I tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2023-03-07 07:30:44.006255: W tensorflow/stream_executor/gpu/redzone_allocator.cc:314] INTERNAL: Failed to launch ptxas
Relying on driver to perform ptx compilation. 
Modify $PATH to customize ptxas location.
This message will be only logged once.


      5/Unknown - 9s 36ms/step

2023-03-07 07:30:48.459363: I tensorflow/stream_executor/cuda/cuda_blas.cc:1786] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.




array([[0.04218077],
       [0.05207986],
       [0.37312868],
       ...,
       [0.07624525],
       [0.14275756],
       [0.18984248]], dtype=float32)

In [11]:
y_true = Evaluate(MODEL_PATH).get_y_true(test_dataset)
y_true

<tf.Variable 'Variable:0' shape=(12239, 1) dtype=int64, numpy=
array([[0],
       [0],
       [1],
       ...,
       [0],
       [1],
       [1]])>

In [12]:
test_dataset

<PrefetchDataset element_spec=(TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32, name=None), TensorSpec(shape=(None, 1), dtype=tf.int64, name=None))>

## Using

In [18]:
MODEL_PATH = f'{CURRENT_PATH}/results/models/cross_entropy/EfficientNetB0_None.h5'
best_model = Evaluate(MODEL_PATH)
best_model.get_best_threshold(
    test_dataset=test_dataset,
    save_best_thresholds=f"{RESULT_EVALUATE_PATH}/best_thresholds.csv",
    save_200_thresholds=f"{RESULT_EVALUATE_PATH}/f1_per_thresholds.csv"
)

with best_model:
    f1_each_class = best_model.get_f1_scores(test_dataset)
    print(f1_each_class)
    df = pd.DataFrame(f1_each_class)
    df.to_csv(f"{RESULT_EVALUATE_PATH}/f1_scores.csv", index=False)

    precision_each_class = best_model.get_precision_scores(test_dataset, new_calculate=False)
    pd.DataFrame(precision_each_class)\
        .to_csv(f"{RESULT_EVALUATE_PATH}/precision.csv", index=False)

    recall_each_class = best_model.get_recall_scores(test_dataset, new_calculate=False)
    pd.DataFrame(recall_each_class)\
        .to_csv(f"{RESULT_EVALUATE_PATH}/recall.csv", index=False)

    print(df)

['No Finding', 'Atelectasis', 'Consolidation', 'Infiltration', 'Pneumothorax', 'Edema', 'Emphysema', 'Fibrosis', 'Effusion', 'Pneumonia', 'Pleural_Thickening', 'Cardiomegaly', 'Nodule', 'Mass', 'Hernia']
/home/jovyan/ChestXray-14/experiments/BinaryModelWithUnderSampling/results/evaluate/cross_entropy/EfficientNetB0_None/best_thresholds.csv was success!
/home/jovyan/ChestXray-14/experiments/BinaryModelWithUnderSampling/results/evaluate/cross_entropy/EfficientNetB0_None/f1_per_thresholds.csv was success!
Doing ...!
{'No Finding': [0.39568081]}
   No Finding
0    0.395681
Done!
