In [1]:
import sys
sys.path.append('/home/jovyan/ChestXray-14')

In [2]:
import tensorflow as tf
from modules.models import Model
from modules.utils import get_dataset
from tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger, EarlyStopping, ReduceLROnPlateau, LearningRateScheduler

2023-03-12 02:29:15.648859: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


In [3]:
from modules.dataset import LABELS

In [4]:
ROOT_PATH = "/home/jovyan/ChestXray-14"
INPUT_PATH = f"{ROOT_PATH}/dataset/ChestXray NIH"

In [5]:
class Dataset:
    INPUT_PATH = INPUT_PATH
    
    def get_train(self):
        filenames = tf.io.gfile.glob(f'{self.INPUT_PATH}/data/Under_Sampling_on_fold_3_dataset/train/*.tfrec')
        dataset = get_dataset(filenames)
        return dataset

    def get_test(self):
        filenames = tf.io.gfile.glob(f'{self.INPUT_PATH}/data/Under_Sampling_on_fold_3_dataset/test/*.tfrec')
        dataset = get_dataset(filenames)
        return dataset

## Model Training

In [6]:
import os
CURRENT_PATH = os.path.abspath("")
CURRENT_PATH

'/home/jovyan/ChestXray-14/experiments/Under Sampling for compare with best model'

In [7]:
# Learning rate
def lr_schedule(epoch, learning_rate):
    tf.summary.scalar('learning rate', data=learning_rate, step=epoch)
    return learning_rate

In [8]:
def get_callbacks(NAME, weight_option, fold_num=None):
    model_checkpoint_callback = ModelCheckpoint(f'results/models/{EXP_NAME}/{NAME}_{weight_option}.h5', monitor='val_loss', mode='min', save_best_only=True)
    early_stop_callback = EarlyStopping(monitor='val_loss', mode="min", patience=20, verbose=1)
    reduce_lr_callback = ReduceLROnPlateau(monitor='val_loss', mode="min", factor=0.5, patience=3, verbose=1)
    lr_logging_callback = LearningRateScheduler(lr_schedule)
    
    return model_checkpoint_callback, early_stop_callback, reduce_lr_callback, lr_logging_callback

In [9]:
# Constant variables
NAME = "EfficientNetB0"
EPOCHS = 100
EXP_NAME = "cross_entropy" # or "facal_loss"

In [10]:
weight_option = None # use `imagenet` or `None` only

In [11]:
import pprint
import numpy as np

def check_label_on_dataset(train_dataset, test_dataset):
    y_list = []
    for x,y in train_dataset:
        # print(y)
        y_list.append(y)
    train_labels = np.vstack(y_list)

    y_list = []
    for x,y in test_dataset:
        # print(y)
        y_list.append(y)
    test_labels = np.vstack(y_list)

    # Print
    pprint.pprint(list(zip(LABELS,train_labels.sum(axis=0))))
    print("Count:", len(train_labels))
    pprint.pprint(list(zip(LABELS, test_labels.sum(axis=0))))
    print("Count: ", len(test_labels))

    print("All:", len(train_labels)+len(test_labels))

In [12]:
# Dataset
train_dataset, test_dataset = Dataset().get_train(), Dataset().get_test()

check_label_on_dataset(train_dataset, test_dataset)

2023-03-12 02:29:18.208776: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-03-12 02:29:19.972641: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 31693 MB memory:  -> device: 0, name: NVIDIA A100-SXM4-40GB, pci bus id: 0000:b1:00.0, compute capability: 8.0


[('No Finding', 7543),
 ('Atelectasis', 9226),
 ('Consolidation', 3739),
 ('Infiltration', 15900),
 ('Pneumothorax', 4224),
 ('Edema', 1836),
 ('Emphysema', 2017),
 ('Fibrosis', 1336),
 ('Effusion', 10655),
 ('Pneumonia', 1154),
 ('Pleural_Thickening', 2711),
 ('Cardiomegaly', 2201),
 ('Nodule', 5082),
 ('Mass', 4641),
 ('Hernia', 175)]
Count: 48995
[('No Finding', 12117),
 ('Atelectasis', 2333),
 ('Consolidation', 928),
 ('Infiltration', 3994),
 ('Pneumothorax', 1078),
 ('Edema', 467),
 ('Emphysema', 499),
 ('Fibrosis', 350),
 ('Effusion', 2662),
 ('Pneumonia', 277),
 ('Pleural_Thickening', 674),
 ('Cardiomegaly', 575),
 ('Nodule', 1249),
 ('Mass', 1141),
 ('Hernia', 52)]
Count:  22424
All: 71419


In [13]:
# Callbacks
model_checkpoint_callback, early_stop_callback, reduce_lr_callback, lr_logging_callback = get_callbacks(NAME, weight_option)

# Path for CSV
path = os.path.join(CURRENT_PATH, "results", "history", f"training_with_{EXP_NAME}", f"{NAME}_{weight_option}")
os.makedirs(path, exist_ok=True)

# CSV Logger
csv_logger = CSVLogger(os.path.join(path, f"history.csv"))

# Modeling
transfer_model = tf.keras.applications.efficientnet.EfficientNetB0(
    include_top=False, 
    weights=weight_option,
    input_shape=(224, 224, 3),
    pooling=None
)

model = Model(
    transfer_model,
)
model = model.get_model(flatten=True)
model._name = "EfficientNetB0_None_Fold_3_Under_Sampling"
model.summary()

Model: "EfficientNetB0_None_Fold_3_Under_Sampling"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 efficientnetb0 (Functional)  (None, 7, 7, 1280)       4049571   
                                                                 
 flatten (Flatten)           (None, 62720)             0         
                                                                 
 dense (Dense)               (None, 128)               8028288   
                                                                 
 dense_1 (Dense)             (None, 128)               16512     
                                                                 
 dense_2 (Dense)             (None, 64)                8256      
                                                                 
 dense_3 (Dense)             (None, 64)                4160      
                                                                 
 dense_4 (Dense)         

In [14]:
print(f"Model Trainable: {model.trainable}")
for layer in model.layers:
    print(f"{layer.name} Trainable: {layer.trainable}")

Model Trainable: True
efficientnetb0 Trainable: True
flatten Trainable: True
dense Trainable: True
dense_1 Trainable: True
dense_2 Trainable: True
dense_3 Trainable: True
dense_4 Trainable: True


In [15]:
# Visualize
history = model.fit(
    train_dataset,
    epochs=EPOCHS,
    validation_data=test_dataset,
    verbose=1, # Show Progress Bar while Traning
    callbacks=[model_checkpoint_callback, csv_logger, early_stop_callback, reduce_lr_callback, lr_logging_callback]
)

Epoch 1/100


2023-03-12 02:29:55.660492: I tensorflow/stream_executor/cuda/cuda_dnn.cc:384] Loaded cuDNN version 8100
2023-03-12 02:29:56.532637: I tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2023-03-12 02:29:56.533396: I tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2023-03-12 02:29:56.533416: W tensorflow/stream_executor/gpu/asm_compiler.cc:80] Couldn't get ptxas version string: INTERNAL: Couldn't invoke ptxas --version
2023-03-12 02:29:56.534162: I tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2023-03-12 02:29:56.534211: W tensorflow/stream_executor/gpu/redzone_allocator.cc:314] INTERNAL: Failed to launch ptxas
Relying on driver to perform ptx compilation. 
Modify $PATH to customize ptxas location.
This message will be only logged once.
2023-03-12 02:30:00.395033: I tensorflow/stream_executor/cuda/c

Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 8: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 11: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 17: ReduceLROnPlateau reducing learning rate to 0.0001250000059371814.
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 22: ReduceLROnPlateau reducing learning rate to 6.25000029685907e-05.
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 25: ReduceLROnPlateau reducing learning rate to 3.125000148429535e-05.
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 28: ReduceLROnPlateau reducing learning rate to 1.5625000742147677e-05.
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 31: ReduceLROnPlateau reducing learning rate to 7.812500371073838e-06.
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 34: ReduceLROnPlateau reducing lear

In [16]:
print("Done")

Done
