In [None]:
import glob
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
import tensorflow.keras.layers as L
import tensorflow.keras.backend as K
from matplotlib import pyplot as plt
import math, os, re, warnings, random
from sklearn.utils import class_weight
#from kaggle_datasets import KaggleDatasets
from sklearn.model_selection import KFold,StratifiedKFold
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from tensorflow.keras import optimizers, applications, Sequential, losses, metrics
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint,LearningRateScheduler
import tensorflow_addons as tfa

In [None]:
import keras_tuner as kt

In [None]:
#from google.colab import drive
#drive.mount('/content/drive',force_remount=False)

In [None]:
def seed_everything(seed=0):
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)
    os.environ['PYTHONHASHED'] = str(seed)
    os.environ['TF_DETERMINISTIC_OPS'] = "1"
    
    
seed = 0
seed_everything(seed)
warnings.filterwarnings('ignore')

In [None]:
strategy = tf.distribute.get_strategy()
AUTO = tf.data.experimental.AUTOTUNE
REPLICAS = strategy.num_replicas_in_sync 
print('# REPLICAS: {}'.format(REPLICAS))

In [None]:
BATCH_SIZE = 32 * REPLICAS
LEARNING_RATE = 3e-5 * REPLICAS
EPOCHS = 32
HEIGHT = 299
WIDTH = 299
CHANNELS = 3
ES_PATIENCE = 10
AUG_BATCH=BATCH_SIZE
IMAGE_SIZE=[HEIGHT,WIDTH]

In [None]:
GCS_PATH= "D:/Super_PC_files/CTD_dataset/mode3_mel_v11_4_1"
#GCS_PATH = '/content/drive/MyDrive/ILD-Project/Batch preparation - ILD/Training_Validation_Test_mode_5'

train_dataset_negative = glob.glob(GCS_PATH + '/Training/Negative/*.png')
train_dataset_positive = glob.glob(GCS_PATH + '/Training/Positive/*.png')
val_dataset_negative = glob.glob(GCS_PATH + '/Validation/Negative/*.png')
val_dataset_positive = glob.glob(GCS_PATH + '/Validation/Positive/*.png')
test_dataset_negative = glob.glob(GCS_PATH + '/Test/Negative/*.png')
test_dataset_positive = glob.glob(GCS_PATH + '/Test/Positive/*.png')
category='CTD_inception_tuner_v11'
#train_dataset_negative = glob.glob(GCS_PATH + '/Negative/*.png')
#train_dataset_positive = glob.glob(GCS_PATH + '/Positive/*.png')

In [None]:
TTA=[]
TTH=[]

TTA.extend(test_dataset_negative)
TTH.extend(test_dataset_positive)

print(len(TTA))
print(len(TTH))

# Create labels :-
Label_TTA = [1]*len(TTA)
Label_TTH = [0]*len(TTH)

# Converting to pandas dataframe for easier access:-
TTA.extend(TTH)
Label_TTA.extend(Label_TTH)
tTdf = pd.DataFrame({'path':TTA, 'label':Label_TTA})
tTdf = tTdf.sample(frac=1).reset_index(drop=True)
TTFILENAMES = tTdf['path']
TTLABELS = tTdf['label']


print('Test Merged Data:-')
tTdf

In [None]:
TA=[]
TH=[]

TA.extend(val_dataset_negative)
TH.extend(val_dataset_positive)

print(len(TA))
print(len(TH))

# Create labels :-
Label_TA = [1]*len(TA)
Label_TH = [0]*len(TH)

# Converting to pandas dataframe for easier access:-
TA.extend(TH)
Label_TA.extend(Label_TH)
tdf = pd.DataFrame({'path':TA, 'label':Label_TA})
tdf = tdf.sample(frac=1).reset_index(drop=True)
TFILENAMES = tdf['path']
TLABELS = tdf['label']


print('Test Merged Data:-')
tdf

In [None]:
# Merging happens here:-
A=[]
H=[]
A.extend(train_dataset_negative)


H.extend(train_dataset_positive)


print(len(A))
print(len(H))

# Create labels :-
Label_A = [1]*len(A) # Negative 1
Label_H = [0]*len(H) # Positive 0

# Converting to pandas dataframe for easier access:-
A.extend(H)
Label_A.extend(Label_H)
df = pd.DataFrame({'path':A, 'label':Label_A})
df = df.sample(frac=1).reset_index(drop=True)

FILENAMES = df['path']
LABELS = df['label']

print('Final Merged Data:-')
df

In [None]:
cw = class_weight.compute_class_weight(class_weight ='balanced',
                                        classes = np.unique(LABELS),
                                        y=LABELS)
cw = {0:cw[0], 1:cw[1]}
cw

In [None]:
# Define Augmentation function:-
def data_augment(image, label):
    
    p_spatial = tf.random.uniform([], 0, 1.0, dtype=tf.float32)
    p_rotate = tf.random.uniform([], 0, 1.0, dtype=tf.float32)
    p_pixel = tf.random.uniform([], 0, 1.0, dtype=tf.float32)    
    p_shear = tf.random.uniform([], 0, 1.0, dtype=tf.float32)
    p_crop = tf.random.uniform([], 0, 1.0, dtype=tf.float32)
    p_noise = tf.random.uniform([], 0, 1.0, dtype=tf.float32)
    
    image = tf.image.random_flip_up_down(image)
    image = tf.image.random_flip_left_right(image)

    if p_noise > 0.5:
        prob_salt=0.1
        prob_pepper=0.1
        random_values = tf.random.uniform(shape=(HEIGHT,WIDTH,CHANNELS))
        image = tf.where(random_values < prob_salt, 1., image)
        image = tf.where(1 - random_values < prob_pepper, 0., image)
    
    if p_shear >= 0.3:
        shear_lambda = tf.random.uniform([],0.1,0.5,dtype=tf.float32)
        forward_transform = [[1.0,0,0],[shear_lambda,1.0,0],[0,0,1.0]]
        t = tfa.image.transform_ops.matrices_to_flat_transforms(tf.linalg.inv(forward_transform))
        image = tfa.image.transform(image, t, interpolation="BILINEAR",name=None)
    
    if p_spatial > .75:
        image = tf.image.transpose(image)

    if p_rotate >= 0.3:
        degree = tf.random.uniform([], 0, 360.0, dtype=tf.float32)
        degree= (degree * (3.141592653589793 ) / 180)
        image = tfa.image.rotate(image, degree, interpolation='BILINEAR')
        
    if p_pixel >= .2:
        if p_pixel >= .8:
            image = tf.image.random_saturation(image, lower=.7, upper=1.3)
        elif p_pixel >= .6:
            image = tf.image.random_contrast(image, lower=.8, upper=1.2)
        elif p_pixel >= .4:
            image = tf.image.random_brightness(image, max_delta=.1)
        else:
            image = tf.image.adjust_gamma(image, gamma=.6)
            
    if p_crop > .7:
        if p_crop > .9:
            image = tf.image.central_crop(image, central_fraction=.6)
        elif p_crop > .8:
            image = tf.image.central_crop(image, central_fraction=.7)
        else:
            image = tf.image.central_crop(image, central_fraction=.8)
    elif p_crop > .4:
        crop_size = tf.random.uniform([], int(HEIGHT*.6), HEIGHT, dtype=tf.int32)
        image = tf.image.random_crop(image, size=[crop_size, crop_size, CHANNELS])
     
    image = tf.image.resize(image, size=[HEIGHT, WIDTH])
    image = tf.reshape(image, [HEIGHT, WIDTH, 3])
      
    
    return image, label

In [None]:
def parse_data(filename,label):
    image = tf.io.read_file(filename)
    image = tf.image.decode_png(image)
    #image = tf.image.convert_image_dtype(image, tf.float32) /  0.45 
    image = tf.image.resize(image, IMAGE_SIZE)
    return image, tf.one_hot(label,2)

def load_dataset(filenames, labels ,ordered=False):
    ignore_order = tf.data.Options()
    if not ordered:
        ignore_order.experimental_deterministic = False

    dataset = tf.data.Dataset.from_tensor_slices((filenames, labels))
    dataset = dataset.with_options(ignore_order)
    dataset = dataset.map(parse_data, num_parallel_calls=AUTO)
    return dataset

def get_dataset(FILENAMES,LABELS, ordered=False, repeated=False, augment=False):
    dataset = load_dataset(FILENAMES, LABELS, ordered=ordered)
    if augment:
        dataset = dataset.map(data_augment, num_parallel_calls=AUTO)
    if repeated:
        dataset = dataset.repeat()
    if not ordered:
        dataset = dataset.shuffle(len(A)+len(H))
    dataset = dataset.batch(BATCH_SIZE)
    dataset = dataset.prefetch(AUTO)
    return dataset

In [None]:
import math
np.set_printoptions(threshold=15, linewidth=80)
CLASSES = [0,1]

def batch_to_numpy_images_and_labels(data):
    images, labels = data
    numpy_images = images.numpy()
    numpy_labels = labels.numpy()
    labels = [str(i) for i in  numpy_labels]

    return numpy_images, labels

def title_from_label_and_target(label, correct_label):
    if correct_label is None:
        return CLASSES[label], True
    correct = (label == correct_label)
    return "{} [{}{}{}]".format(CLASSES[label], 'OK' if correct else 'NO', u"\u2192" if not correct else '',
                                CLASSES[correct_label] if not correct else ''), correct

def display_one_flower(image, title, subplot, red=False, titlesize=16):
    plt.subplot(*subplot)
    plt.axis('off')
    plt.imshow(image.astype('uint8'))
    if len(title) > 0:
        plt.title(title, fontsize=int(titlesize) if not red else int(titlesize/1.2), color='red' if red else 'black', fontdict={'verticalalignment':'center'}, pad=int(titlesize/1.5))
    return (subplot[0], subplot[1], subplot[2]+1)
    
def display_batch_of_images(databatch, predictions=None):
    """This will work with:
    display_batch_of_images(images)
    display_batch_of_images(images, predictions)
    display_batch_of_images((images, labels))
    display_batch_of_images((images, labels), predictions)
    """
    # data
    images, labels = batch_to_numpy_images_and_labels(databatch)
    if labels is None:
        labels = [None for _ in enumerate(images)]
        
    # auto-squaring: this will drop data that does not fit into square or square-ish rectangle
    rows = int(math.sqrt(len(images)))
    cols = len(images)//rows
        
    # size and spacing
    FIGSIZE = 13.0
    SPACING = 0.1
    subplot=(rows,cols,1)
    if rows < cols:
        plt.figure(figsize=(FIGSIZE,FIGSIZE/cols*rows))
    else:
        plt.figure(figsize=(FIGSIZE/rows*cols,FIGSIZE))
    
    # display
    for i, (image, label) in enumerate(zip(images[:rows*cols], labels[:rows*cols])):
        title = label
        correct = True
        if predictions is not None:
            title, correct = title_from_label_and_target(predictions[i], label)
        dynamic_titlesize = FIGSIZE*SPACING/max(rows,cols)*40+3 # magic formula tested to work from 1x1 to 10x10 images
        # image = cv2.imdecode(image,cv2.IMREA)
        subplot = display_one_flower(image, title, subplot, not correct, titlesize=dynamic_titlesize)
    
    #layout
    plt.tight_layout()
    if label is None and predictions is None:
        plt.subplots_adjust(wspace=0, hspace=0)
    else:
        plt.subplots_adjust(wspace=SPACING, hspace=SPACING)
    
    
# Model evaluation
def plot_metrics(history):
    metric_list = [m for m in list(history.keys()) if m is not 'lr']
    size = len(metric_list)//2
    fig, axes = plt.subplots(size, 1, sharex='col', figsize=(20, size * 4))
    if size > 1:
        axes = axes.flatten()
    else:
        axes = [axes]
    
    for index in range(len(metric_list)//2):
        metric_name = metric_list[index]
        val_metric_name = metric_list[index+size]
        axes[index].plot(history[metric_name], label='Train %s' % metric_name)
        axes[index].plot(history[val_metric_name], label='Validation %s' % metric_name)
        axes[index].legend(loc='best', fontsize=16)
        axes[index].set_title(metric_name)
        if 'loss' in metric_name:
            axes[index].axvline(np.argmin(history[metric_name]), linestyle='dashed')
            axes[index].axvline(np.argmin(history[val_metric_name]), linestyle='dashed', color='orange')
        else:
            axes[index].axvline(np.argmax(history[metric_name]), linestyle='dashed')
            axes[index].axvline(np.argmax(history[val_metric_name]), linestyle='dashed', color='orange')

    plt.xlabel('Epochs', fontsize=16)
    sns.despine()
    plt.show()

In [None]:
train_dataset = get_dataset(FILENAMES[:9],LABELS[:9], ordered=True,augment=True)
train_iter = iter(train_dataset.unbatch().batch(20))

#train_dataset
#train_dataset.unbatch().batch(20)
#display_batch_of_images(next(train_iter))

In [None]:
# Cosine Annealing:-
LR_START = 1e-8
LR_MIN = 1e-8
LR_MAX = LEARNING_RATE
LR_RAMPUP_EPOCHS = 3
LR_SUSTAIN_EPOCHS = 0
N_CYCLES = .5

data_augmentation = tf.keras.Sequential([
  tf.keras.layers.RandomFlip('horizontal_and_vertical'),
  tf.keras.layers.RandomRotation(0.2),
])

def lrfn(epoch):
    if epoch < LR_RAMPUP_EPOCHS:
        lr = (LR_MAX - LR_START) / LR_RAMPUP_EPOCHS * epoch + LR_START
    elif epoch < LR_RAMPUP_EPOCHS + LR_SUSTAIN_EPOCHS:
        lr = LR_MAX
    else:
        progress = (epoch - LR_RAMPUP_EPOCHS - LR_SUSTAIN_EPOCHS) / (EPOCHS - LR_RAMPUP_EPOCHS - LR_SUSTAIN_EPOCHS)
        lr = LR_MAX * (0.5 * (1.0 + tf.math.cos(math.pi * N_CYCLES * 2.0 * progress)))
        if LR_MIN is not None:
            lr = tf.math.maximum(LR_MIN, lr)
            
    return lr

rng = [i for i in range(EPOCHS)]
y = [lrfn(x) for x in rng]

sns.set(style='whitegrid')
fig, ax = plt.subplots(figsize=(20, 6))
plt.plot(rng, y)

# print(f'{EPOCHS} total epochs and {NUM_TRAINING_IMAGES//BATCH_SIZE} steps per epoch')
print(f'Learning rate schedule: {y[0]:.3g} to {max(y):.3g} to {y[-1]:.3g}')

In [None]:
# Model Architecture :-
#class myhypermodel(kt.HyperModel):
def model_fn(hp):
    K.clear_session()
    input_shape=(None, None, CHANNELS)
    N_CLASSES=2
    input_image = L.Input(shape=input_shape, name='input_image')
    base_model = tf.keras.applications.InceptionV3(input_tensor=input_image, 
                                    include_top=False, 
                                    weights='imagenet', 
                                    pooling='avg')

    for layer in base_model.layers:
        if 'bn' in layer.name:
            layer.trainable = False
        else:
            layer.trainable = True
            
    model = tf.keras.Sequential([
        base_model,
        L.Dropout(hp.Float('dropout', 0, 0.5, step=0.05, default=0.5)),
        #L.Dropout(0.25),
        L.Dense(N_CLASSES, activation='sigmoid', name='output')
    ])

    
    LEARNING_RATE =hp.Float('learning_rate', 1e-5, 1e-2, sampling='log')
    optimizer = optimizers.Adam(lr=LEARNING_RATE)
    #hp_optimizer = hp.Choice('optimizer',['sgd', 'rmsprop', 'adam'])
#
    #if hp_optimizer == 'sgd':
    #    optimizer = optimizers.SGD(learning_rate=LEARNING_RATE)
    #elif hp_optimizer == 'rmsprop':
    #    optimizer = optimizers.RMSprop(learning_rate=LEARNING_RATE)
    #elif hp_optimizer == 'adam':
    #    optimizer = optimizers.Adam(learning_rate=LEARNING_RATE)

    model.compile(optimizer=optimizer, 
                  loss='binary_crossentropy', 
                  metrics=['accuracy',tfa.metrics.F1Score(num_classes=N_CLASSES, average='weighted')])
    
    return model

In [None]:
"Transfer_tuner_"+category

In [None]:
# Cross Validated training loop:-
skf = StratifiedKFold(n_splits=3, shuffle=True, random_state=seed)
oof_pred = []; oof_labels = []; history_list = []
cv1 = cv2 = 0


batch_sizes=[10,16,18,19,20,21,22,23,24]
result_list=[]
best_models=[]
p_name="Transfer_tuner_"+category
#with strategy.scope():
#    model = model_fn((None, None, CHANNELS), 2)

#model_path = '/content/drive/MyDrive/ILD-Project/model_'+category+'.h5'
model_path = './'+category+'.h5'
es = EarlyStopping(monitor='val_loss', mode='min', 
                patience=ES_PATIENCE, restore_best_weights=True, verbose=1)
training_data=get_dataset(FILENAMES,LABELS, ordered=False, repeated=True, augment=True)
validation_data=get_dataset(TFILENAMES,TLABELS , ordered=True, repeated=False, augment=False)
## TRAIN
#history = model.fit(x=get_dataset(FILENAMES,LABELS, ordered=False, repeated=True, augment=True), 
#                    validation_data=get_dataset(TFILENAMES,TLABELS , ordered=True, repeated=False, augment=False), 
#                    steps_per_epoch=round(len(FILENAMES) // BATCH_SIZE), 
#                    callbacks=[es, LearningRateScheduler(lrfn ,verbose=0)], 
#                    epochs=EPOCHS,  
#                    verbose=1,
#                    class_weight=cw).history
for batch_size in batch_sizes:
    hp =  kt.HyperParameters()
    Tuner_cach_path= "./"
    tuner = kt.BayesianOptimization(
        hypermodel= model_fn,
        objective='val_loss',
        max_trials=5,
        overwrite=True,
        directory=Tuner_cach_path,
        project_name=p_name)
    #tuner.search_space_summary()

    tuner.search(training_data,
                epochs=32, 
                validation_data=validation_data,
                callbacks=[es, LearningRateScheduler(lrfn ,verbose=0)],
                steps_per_epoch=round(len(FILENAMES) // batch_size), 
                class_weight=cw)
    best_hp = tuner.get_best_hyperparameters()[0]
    best_models.append(tuner.hypermodel.build(best_hp))
    result_list.append(tuner.results_summary())

    #model.save(model_path)


#history_list=history
# Save last model weights


## RESULTS
#print(f"#### FOLD {fold+1} Accuracy = {np.max(history['accuracy']):.3f}")
#print(f"#### FOLD {fold+1} F1_score = {np.max(history['f1_score']):.3f}")
#cv1 += np.max(history['accuracy'])
#cv2 += np.max(history['f1_score']) 


#print(f'### Avg. Accuracy = {cv1/3.0} \n ### Avg. Weighted F1 = {cv2/3.0}')

In [None]:
tuner.results_summary(3)