In [1]:
import openpyxl
import os
import numpy as np
import scipy
import matplotlib.pyplot as plt
import pandas as pd 
from sklearn import metrics
import random
import gc 

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import load_model
from tensorflow.keras.models import Sequential

In [3]:
os.environ['TF_XLA_FLAGS'] = '--tf_xla_enable_xla_devices'

In [7]:
label="bone" #Prediction target
date=20230302     
Efficients=[5]  #EfficientNetB5
epochs=1000

In [8]:
################# loading lists of png files

In [None]:
train_ds = tf.data.Dataset.list_files('/home/hk/Research/CXRage/pngbone/train/*.png', shuffle=False) 

In [10]:
image_count1 = len(train_ds)
train_ds = train_ds.shuffle(image_count1, reshuffle_each_iteration=False) 

In [11]:
val_ds = tf.data.Dataset.list_files('/home/hk/Research/CXRage/pngbone/val/*.png', shuffle=False)

In [12]:
image_count2 = len(val_ds)
val_ds = val_ds.shuffle(image_count2, reshuffle_each_iteration=False) 

In [13]:
test_ds = tf.data.Dataset.list_files('/home/hk/Research/CXRage/pngbone/test/*.png', shuffle=False) 

In [14]:
image_count3 = len(test_ds)
test_ds = test_ds.shuffle(image_count3, reshuffle_each_iteration=False) 

In [15]:
print('Number of training set cases: %d'%(image_count1))
print('Number of validation set cases: %d'%(image_count2))
print('Number of test set cases: %d'%(image_count3))

Number of training set cases: 38166
Number of validation set cases: 4710
Number of test set cases: 4794


In [16]:
os.makedirs("/home/hk/Research/CXRage/result/%s_%d"%(label, date))
os.chdir("/home/hk/Research/CXRage/result/%s_%d"%(label, date))

In [22]:
################# tf dataset load

In [23]:
def load_image(filename, imagesize, augment=False): 
    raw = tf.io.read_file(filename)
    image = tf.image.decode_png(raw, channels=3)
    
    image.set_shape([512, 512, 3])  
    image = tf.image.resize(image, [imagesize, imagesize]) 
    
    if augment:
        image = tf.image.random_brightness(image, max_delta=25)
        image = tf.image.random_contrast(image, 0.7, 1.3)
        image = tf.clip_by_value(image, 0, 255)        
    return image 

In [24]:
def get_label(filename):
    label = tf.strings.split(filename, sep=".")[-2]
    label = tf.strings.to_number(label, tf.float32)  
    return label

In [26]:
img_augmentation = Sequential(
    [tf.keras.layers.experimental.preprocessing.RandomZoom(height_factor=(-0.1, 0.1), fill_mode="constant"),   
     tf.keras.layers.experimental.preprocessing.RandomFlip(mode="horizontal"),
     tf.keras.layers.experimental.preprocessing.RandomTranslation(height_factor=(-0.05, 0.05), width_factor=(-0.05, 0.05), fill_mode="constant"),
     tf.keras.layers.experimental.preprocessing.RandomRotation(factor=(-0.05, 0.05), fill_mode="constant")
    ],
    name="img_augmentation")

In [29]:
batch_size = 40

def configure_for_performance(ds):
    if chosen=="B2":
        ds = ds.cache() 
    ds = ds.shuffle(buffer_size=1000) 
    ds = ds.batch(batch_size, drop_remainder=True)
    ds = ds.prefetch(buffer_size=tf.data.AUTOTUNE)
    return ds 

In [30]:
from tensorflow.keras.applications import EfficientNetB5

In [31]:
def build_model(targetmodel, imagesize):
    
    inputs = layers.Input(shape=(imagesize, imagesize, 3))
    x = img_augmentation(inputs)
    model = targetmodel(include_top=False, input_tensor=x, weights="imagenet")

    # Freeze the pretrained weights
    model.trainable = False

    # Rebuild top
    x = layers.GlobalAveragePooling2D(name="avg_pool")(model.output)
    x = layers.BatchNormalization()(x)
    
    droprate=0.3

    x = layers.Dropout(droprate, name="dropout1")(x)
    x = layers.Dense(256, kernel_initializer='he_normal', activation="relu")(x)   
    
    x = layers.Dropout(droprate, name="dropout2")(x)
    x = layers.Dense(128, kernel_initializer='he_normal', activation="relu")(x)  
    
    x = layers.Dropout(droprate, name="dropout3")(x)
    x = layers.Dense(64, kernel_initializer='he_normal', activation="relu")(x)  
    
    x = layers.Dropout(droprate, name="dropout4")(x)
    outputs = layers.Dense(1, kernel_initializer='zeros', bias_initializer='zeros', activation="sigmoid", name="pred")(x)

    # Compile
    model = tf.keras.Model(inputs, outputs, name="EfficientNet")
    model.compile(optimizer=keras.optimizers.SGD(learning_rate=cos_decay_ann),
                  loss=tf.keras.losses.BinaryCrossentropy(from_logits=True), metrics=['AUC'])
    return model

In [32]:
def unfreeze_model(model):
    for layer in model.layers:
        if not isinstance(layer, layers.BatchNormalization):
            layer.trainable = True 

In [None]:
for i in Efficients:    

    path="/home/hk/Research/CXRage/result/%s_%d_EfficientNetB%d"%(label, date, i)

    paths=[path,
           "%s/transfer/best"%(path),
           "%s/transfer/final"%(path),           
           "%s/FFT/best"%(path),
           "%s/FFT/final"%(path)]    
    
    for savepath in paths:
        if os.path.isdir(savepath)==True:
            pass
        else:
            os.makedirs(savepath)
    
    os.chdir("%s/transfer"%(path))
    
    chosen="B%d"%(i) 
    
    if chosen=="B0":
        imagesize=224
    elif chosen=="B1":
        imagesize=240
    elif chosen=="B2":
        imagesize=260
    elif chosen=="B3":
        imagesize=300
    elif chosen=="B4":
        imagesize=380
    elif chosen=="B5":
        imagesize=456
    elif chosen=="B6":
        imagesize=528
    elif chosen=="B7":
        imagesize=600            
    
    def process_path_train(file_path):
        label = get_label(file_path) 
        img = load_image(file_path, imagesize, augment=True)
        return img, label
    
    def process_path(file_path):
        label = get_label(file_path) 
        img = load_image(file_path, imagesize)
        return img, label    
    
    print("In training: EfficientNet%s"%(chosen))
    print("Input image size is: %d"%(imagesize))
    
    train_ds1 = train_ds.map(process_path_train, num_parallel_calls=tf.data.AUTOTUNE)  
    val_ds1 = val_ds.map(process_path, num_parallel_calls=tf.data.AUTOTUNE)
    test_ds1 = test_ds.map(process_path, num_parallel_calls=tf.data.AUTOTUNE)
    
    train_ds1 = configure_for_performance(train_ds1)
    val_ds1 = configure_for_performance(val_ds1)
    test_ds1 = configure_for_performance(test_ds1)    
    
    targetmodel = eval("EfficientNetB%d"%(i))    
    
    cos_decay_ann = tf.keras.experimental.CosineDecayRestarts(initial_learning_rate=0.01, first_decay_steps=30, t_mul=2, m_mul=0.95, alpha=0.01)
    early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=40) 
    csv_logger = keras.callbacks.CSVLogger('./Model transfer log.csv', append=False, separator=';')
    checkpointer = keras.callbacks.ModelCheckpoint(filepath="%s/transfer/best"%(path), 
                                                   verbose=0, 
                                                   save_best_only=True, monitor='val_auc', mode='max')
    
    mirrored_strategy = tf.distribute.MirroredStrategy() 
    
    with mirrored_strategy.scope():
        model = build_model(targetmodel, imagesize) 
    hist = model.fit(train_ds1, epochs=epochs, validation_data=val_ds1,
                     callbacks=[early_stopping, csv_logger, checkpointer],
                     verbose=2)
    
    model.save("%s/transfer/final"%(path))
    model.save("%s/transfer/CXRage_transfer_final.h5"%(path))
    
    hist1 = pd.DataFrame(hist.history)
    hist1['epoch'] = hist.epoch
    hist1.to_csv('history_transfer.csv', index=False, header=True)
    
    del hist
    del hist1

    tf.keras.backend.clear_session()
    gc.collect()
    gc.collect()
    gc.collect()
    
    print("Transfer learning done")
    
    os.chdir("%s/FFT"%(path))
    
    cos_decay_ann = tf.keras.experimental.CosineDecayRestarts(initial_learning_rate=0.001, first_decay_steps=30, t_mul=2, m_mul=0.95, alpha=0.01)    
    early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=40) 
    csv_logger = keras.callbacks.CSVLogger('./Model FFT log.csv', append=False, separator=';')
    checkpointer = keras.callbacks.ModelCheckpoint(filepath="%s/FFT/best"%(path), 
                                                   verbose=0, 
                                                   save_best_only=True, monitor='val_auc', mode='max')
    
    with mirrored_strategy.scope(): 
        unfreeze_model(model) 
        model.compile(optimizer=keras.optimizers.SGD(learning_rate=cos_decay_ann),
                      loss=tf.keras.losses.BinaryCrossentropy(from_logits=True), metrics=['AUC'])
        
    hist = model.fit(train_ds1, epochs=epochs, validation_data=val_ds1, 
                     callbacks=[early_stopping, csv_logger, checkpointer],
                     verbose=2) 
    
    model.save("%s/FFT/final"%(path))
    model.save("%s/FFT/CXRage_FFT_final.h5"%(path))
    
    hist1 = pd.DataFrame(hist.history)
    hist1['epoch'] = hist.epoch
    hist1.to_csv('history_FFT.csv', index=False, header=True)     
 
    del model
    del hist
    del hist1 
    del train_ds1
    del val_ds1
    del test_ds1
    del imagesize
    del targetmodel
    del chosen
    
    tf.keras.backend.clear_session()
    gc.collect()
    gc.collect()
    gc.collect()  
    
    print("Full fine-tuning done")