In [1]:
!pip install livelossplot

In [2]:
import math
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from glob import glob
import seaborn as sns
import os
from PIL import Image

import cv2
import tensorflow as tf
from tensorflow import keras as ks
from tensorflow.keras import backend as K
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import TensorBoard, ReduceLROnPlateau

In [3]:
import itertools

In [4]:
from tensorboard.plugins.hparams import api as hp

In [5]:
import skopt
from skopt import gp_minimize, forest_minimize
from skopt.space import Real, Categorical, Integer
from skopt.plots import plot_convergence
from skopt.plots import plot_objective, plot_evaluations
from skopt.plots import plot_histogram, plot_objective_2D
from skopt.utils import use_named_args

In [6]:
tf.__version__

In [7]:
gpu_devices = tf.config.experimental.list_physical_devices('GPU')
for device in gpu_devices:
    tf.config.experimental.set_memory_growth(device, True)

In [8]:
DIR_PATH_1 = '/kaggle/input/skin-cancer-mnist-ham10000/ham10000_images_part_1'
DIR_PATH_2 = '/kaggle/input/skin-cancer-mnist-ham10000/ham10000_images_part_2'

DIR_PATH = '/kaggle/input/skin-cancer-mnist-ham10000/'
META_PATH = './HAM10000_metadata.csv'

In [9]:
skin_df = pd.read_csv(os.path.join(DIR_PATH, META_PATH))
skin_df.sample(5)

In [10]:
skin_df.describe().T

In [11]:
skin_df.isna().sum()

In [12]:
avg_age = skin_df['age'].mean()
avg_age

In [13]:
skin_df['age'] = skin_df['age'].fillna(avg_age)

In [14]:
skin_df.isna().sum()

In [15]:
skin_df.sort_values(by=['image_id'],inplace=True)

In [16]:
skin_df.head()

In [17]:
arr_part1 = sorted(glob(os.path.join(DIR_PATH_1, '*.jpg')))
arr_part2 = sorted(glob(os.path.join(DIR_PATH_2, '*.jpg')))

In [18]:
print(arr_part1[:1])
print(arr_part2[:1])

In [19]:
images_part = arr_part1 + arr_part2

In [20]:
skin_df['path'] = images_part

In [21]:
skin_df.sample(5)

In [22]:
dx_classes = skin_df['dx'].unique()
dx_classes

In [23]:
print('Inbalanced Dataset')
skin_df['dx'].value_counts()

In [24]:
from sklearn.preprocessing import LabelEncoder

In [25]:
le = LabelEncoder()
le.fit(skin_df['dx'])
print(le.classes_)

In [26]:
skin_df['label'] = le.transform(skin_df['dx'])

In [27]:
skin_df.sample(5)

In [28]:
from sklearn.model_selection import train_test_split

In [29]:
test_size = 0.25
valid_size = 0.25
valid_size = valid_size / (1 - test_size) 

In [30]:
X = skin_df.drop(['label'], axis=1)
y = skin_df.pop('label')

In [31]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=test_size,random_state=420)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size = valid_size, random_state=420)

In [32]:
df_train = pd.concat([X_train,y_train], axis=1)
df_valid = pd.concat([X_val,y_val], axis=1)
df_test =  pd.concat([X_test,y_test], axis=1)

In [33]:
print(df_train['dx'].value_counts(), end='\n\n')
print(df_train['dx'].shape, end='\n\n')
print(df_valid['dx'].value_counts(), end='\n\n')
print(df_valid['dx'].shape, end='\n\n')
print(df_test['dx'].value_counts(), end='\n\n')
print(df_test['dx'].shape, end='\n\n')

In [34]:
from sklearn.utils import resample

In [35]:
dx_classes

In [36]:
arr_df = []
RANDOM_STATE = 120
n_samples = 1600
for i in range(len(dx_classes)):
    new_df = df_train[df_train['label'] == i]
    arr_df.append(resample(new_df, replace=True,n_samples=n_samples, random_state=RANDOM_STATE))

In [37]:
df_train_bal = pd.concat(arr_df)

In [38]:
df_train_bal.sample(5)

In [39]:
df_train_bal.info()

In [40]:
df_train_bal['dx'].value_counts()

In [41]:
HP_PREPRO = hp.HParam('preprocessing_function', hp.Discrete(['gridmask']))
HP_ROTATE = hp.HParam('rotation_range', hp.Discrete([90]))
HP_WIDTH_SHIFT = hp.HParam('width_shift_range', hp.Discrete([0.15]))
HP_HEIGHT_SHIFT = hp.HParam('height_shift_range', hp.Discrete([0.15]))
HP_SHEAR = hp.HParam('shear_range', hp.Discrete([0.5]))
HP_ZOOM = hp.HParam('zoom_range', hp.Discrete([0.25]))
HP_HORIZONTAL = hp.HParam('horizontal_flip', hp.Discrete([True]))
HP_VERTICAL = hp.HParam('vertical_flip', hp.Discrete([True]))

h_desc = [
    HP_PREPRO,
    HP_ROTATE,
    HP_WIDTH_SHIFT,
    HP_HEIGHT_SHIFT,
    HP_SHEAR,
    HP_ZOOM,
    HP_HORIZONTAL,
    HP_VERTICAL
]

METRIC_ACCURACY = 'accuracy'
METRIC_EPOCH_ACC = 'epoch_accuracy'
METRIC_EPOCH_VAL_ACC = 'epoch_val_accuracy'
METRIC_EPOCH_LOSS = "epoch_loss"
METRIC_EPOCH_VAL_LOSS = "epoch_val_loss"


with tf.summary.create_file_writer('logs/hparam_tuning').as_default():
    hp.hparams_config(
        hparams=h_desc,
        metrics=[
            hp.Metric(METRIC_ACCURACY, display_name='Accuracy'),
            hp.Metric(METRIC_EPOCH_ACC),
            hp.Metric(METRIC_EPOCH_VAL_ACC),
            hp.Metric(METRIC_EPOCH_LOSS),
            hp.Metric(METRIC_EPOCH_VAL_LOSS),
        ],
    )

In [42]:
def rescale(image):
    return image / 255.

def transform(image, inv_mat, image_shape):

      h, w, c = image_shape
      cx, cy = w//2, h//2

      new_xs = tf.repeat( tf.range(-cx, cx, 1), h)
      new_ys = tf.tile( tf.range(-cy, cy, 1), [w])
      new_zs = tf.ones([h*w], dtype=tf.int32)

      old_coords = tf.matmul(inv_mat, tf.cast(tf.stack([new_xs, new_ys, new_zs]), tf.float32))
      old_coords_x, old_coords_y = tf.round(old_coords[0, :] + w//2), tf.round(old_coords[1, :] + h//2)

      clip_mask_x = tf.logical_or(old_coords_x<0, old_coords_x>w-1)
      clip_mask_y = tf.logical_or(old_coords_y<0, old_coords_y>h-1)
      clip_mask = tf.logical_or(clip_mask_x, clip_mask_y)

      old_coords_x = tf.boolean_mask(old_coords_x, tf.logical_not(clip_mask))
      old_coords_y = tf.boolean_mask(old_coords_y, tf.logical_not(clip_mask))
      new_coords_x = tf.boolean_mask(new_xs+cx, tf.logical_not(clip_mask))
      new_coords_y = tf.boolean_mask(new_ys+cy, tf.logical_not(clip_mask))

      old_coords = tf.cast(tf.stack([old_coords_y, old_coords_x]), tf.int32)
      new_coords = tf.cast(tf.stack([new_coords_y, new_coords_x]), tf.int64)
      rotated_image_values = tf.gather_nd(image, tf.transpose(old_coords))
      rotated_image_channel = list()
      for i in range(c):
          vals = rotated_image_values[:,i]
          sparse_channel = tf.SparseTensor(tf.transpose(new_coords), vals, [h, w])
          rotated_image_channel.append(tf.sparse.to_dense(sparse_channel, default_value=0, validate_indices=False))

      return tf.transpose(tf.stack(rotated_image_channel), [1,2,0])

def random_rotate(image, angle, image_shape):

    def get_rotation_mat_inv(angle):
          #transform to radian
          angle = math.pi * angle / 180

          cos_val = tf.math.cos(angle)
          sin_val = tf.math.sin(angle)
          one = tf.constant([1], tf.float32)
          zero = tf.constant([0], tf.float32)

          rot_mat_inv = tf.concat([cos_val, sin_val, zero,
                                     -sin_val, cos_val, zero,
                                     zero, zero, one], axis=0)
          rot_mat_inv = tf.reshape(rot_mat_inv, [3,3])

          return rot_mat_inv
    angle = float(angle) * tf.random.normal([1],dtype='float32')
    rot_mat_inv = get_rotation_mat_inv(angle)
    return transform(image, rot_mat_inv, image_shape)

def GridMask(image_height, image_width, d1, d2, rotate_angle=1, ratio=0.5):

    h, w = image_height, image_width
    hh = int(np.ceil(np.sqrt(h*h+w*w)))
    hh = hh+1 if hh%2==1 else hh
    d = tf.random.uniform(shape=[], minval=d1, maxval=d2, dtype=tf.int32)
    l = tf.cast(tf.cast(d,tf.float32)*ratio+0.5, tf.int32)

    st_h = tf.random.uniform(shape=[], minval=0, maxval=d, dtype=tf.int32)
    st_w = tf.random.uniform(shape=[], minval=0, maxval=d, dtype=tf.int32)

    y_ranges = tf.range(-1 * d + st_h, -1 * d + st_h + l)
    x_ranges = tf.range(-1 * d + st_w, -1 * d + st_w + l)

    for i in range(0, hh//d+1):
        s1 = i * d + st_h
        s2 = i * d + st_w
        y_ranges = tf.concat([y_ranges, tf.range(s1,s1+l)], axis=0)
        x_ranges = tf.concat([x_ranges, tf.range(s2,s2+l)], axis=0)

    x_clip_mask = tf.logical_or(x_ranges <0 , x_ranges > hh-1)
    y_clip_mask = tf.logical_or(y_ranges <0 , y_ranges > hh-1)
    clip_mask = tf.logical_or(x_clip_mask, y_clip_mask)

    x_ranges = tf.boolean_mask(x_ranges, tf.logical_not(clip_mask))
    y_ranges = tf.boolean_mask(y_ranges, tf.logical_not(clip_mask))

    hh_ranges = tf.tile(tf.range(0,hh), [tf.cast(tf.reduce_sum(tf.ones_like(x_ranges)), tf.int32)])
    x_ranges = tf.repeat(x_ranges, hh)
    y_ranges = tf.repeat(y_ranges, hh)

    y_hh_indices = tf.transpose(tf.stack([y_ranges, hh_ranges]))
    x_hh_indices = tf.transpose(tf.stack([hh_ranges, x_ranges]))

    y_mask_sparse = tf.SparseTensor(tf.cast(y_hh_indices, tf.int64),  tf.zeros_like(y_ranges), [hh, hh])
    y_mask = tf.sparse.to_dense(y_mask_sparse, 1, False)

    x_mask_sparse = tf.SparseTensor(tf.cast(x_hh_indices, tf.int64), tf.zeros_like(x_ranges), [hh, hh])
    x_mask = tf.sparse.to_dense(x_mask_sparse, 1, False)

    mask = tf.expand_dims( tf.clip_by_value(x_mask + y_mask, 0, 1), axis=-1)

    mask = random_rotate(mask, rotate_angle, [hh, hh, 1])
    mask = tf.image.crop_to_bounding_box(mask, (hh-h)//2, (hh-w)//2, image_height, image_width)

    return mask

def apply_grid_mask(image, image_shape, d1, d2, rotate_angle, ratio):
    mask = GridMask(image_shape[0], image_shape[1], d1, d2, rotate_angle, ratio)
    if image_shape[-1] == 3:
        mask = tf.concat([mask, mask, mask], axis=-1)
    return image * tf.cast(mask, tf.float32)

def apply_grid(image):
    return apply_grid_mask(image, image.shape, 50, 70, 60, 0.3) / 255.

In [43]:
def processing_data(input_shape, batch_size, aug_params):
    function_name = aug_params['preprocessing_function']
    
    function = rescale
    if(function_name == "gridmask"):
        function = apply_grid
    
    aug_params['preprocessing_function'] = function
    
    print(function_name, function)
    
    generator = ks.preprocessing.image.ImageDataGenerator(
      **aug_params
    )
    
    traingen = generator.flow_from_dataframe(
        df_train_bal,
        x_col='path',
        y_col='dx',
        target_size=input_shape,
        batch_size=batch_size,
        save_format='jpg',
        seed=RANDOM_STATE
    )
    validation_datagen =  ks.preprocessing.image.ImageDataGenerator(preprocessing_function=rescale)
    
    validgen = validation_datagen.flow_from_dataframe(
        df_valid,
        x_col='path',
        y_col='dx',
        target_size=input_shape,
        batch_size=1,
    )
    
    return traingen, validgen

In [44]:
best_accuracy = 0.0

In [45]:
"""
    "rotation_range": hparams[HP_ROTATE],
    "width_shift_range":hparams[HP_WIDTH_SHIFT], 
    "height_shift_range":hparams[HP_HEIGHT_SHIFT],
    "shear_range":hparams[HP_SHEAR],
    "zoom_range":hparams[HP_ZOOM],
    "horizontal_flip":hparams[HP_HORIZONTAL],
    "vertical_flip":hparams[HP_VERTICAL]
"""

In [46]:
def build_model(input_shape):
    LEARNING_RATE = 0.0001
    DROPOUT = 0.5
    LAYERS = 15
    NUM_NODES = 1024
    vgg16_base = ks.applications.VGG16(include_top=False,weights='imagenet',input_shape=(input_shape + (3,)))
    for layer in vgg16_base.layers[:LAYERS]:
        print(layer.name, False)
        layer.trainable = False
    for layer in vgg16_base.layers[LAYERS:]:
        print(layer.name, True)
        layer.trainable = True
    top_model = vgg16_base.output
    top_model = ks.layers.GlobalAveragePooling2D()(top_model)
    top_model = ks.layers.Dense(NUM_NODES, activation='relu')(top_model)
    top_model = ks.layers.Dropout(DROPOUT)(top_model)
    top_model = ks.layers.Dense(NUM_NODES, activation='relu')(top_model)
    top_model = ks.layers.Dropout(DROPOUT)(top_model)
    output_layer = ks.layers.Dense(len(dx_classes), activation='softmax')(top_model)
    
    
    optimizer = ks.optimizers.Adam(lr=LEARNING_RATE,  beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
    model = ks.Model(inputs=vgg16_base.input,outputs=output_layer)
    model.compile(optimizer = optimizer, 
                   loss = 'categorical_crossentropy',
                   metrics = ['accuracy'])
    
    return model

In [47]:
class CustomCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        val_acc = logs['val_accuracy']
        val_loss = logs['val_loss']
        loss = logs['loss']
        acc = logs['accuracy']
        epoch += 1
        print("Epoch:", epoch)
        tf.summary.scalar(METRIC_EPOCH_VAL_ACC, val_acc, step=epoch)
        tf.summary.scalar(METRIC_EPOCH_VAL_LOSS, val_loss, step=epoch)
        tf.summary.scalar(METRIC_EPOCH_LOSS, loss, step=epoch)
        tf.summary.scalar(METRIC_EPOCH_ACC, acc, step=epoch)
        

In [48]:
def run(run_name, run_dir, hparams):
    with tf.summary.create_file_writer(run_dir).as_default():
        hp.hparams(hparams)
        INPUT_SHAPE = (192,192)
        BATCH_SIZE = 32
        TRAIN_GEN, VALID_GEN = processing_data(
            INPUT_SHAPE, 
            BATCH_SIZE, 
            {
                "preprocessing_function": hparams[HP_PREPRO],
                "rotation_range": hparams[HP_ROTATE],
                "width_shift_range":hparams[HP_WIDTH_SHIFT], 
                "height_shift_range":hparams[HP_HEIGHT_SHIFT],
                "shear_range":hparams[HP_SHEAR],
                "zoom_range":hparams[HP_ZOOM],
                "horizontal_flip":hparams[HP_HORIZONTAL],
                "vertical_flip":hparams[HP_VERTICAL]
            }
        )
        N_STEPS = TRAIN_GEN.samples // BATCH_SIZE
        N_VAL_STEPS = VALID_GEN.samples
        model = build_model(INPUT_SHAPE)
        from livelossplot.inputs.keras import PlotLossesCallback

        plot_loss_1 = PlotLossesCallback()

        early_val_loss_stop = ks.callbacks.EarlyStopping(monitor='val_loss',
                               patience=10,
                               restore_best_weights=True,
                               mode='min')

        tl_checkpoint_1 = ks.callbacks.ModelCheckpoint(
            filepath='./models/callback/bestsession{0}.hdf5'.format(run_name),
            save_best_only=True,
            save_weights_only=True,
            monitor='val_accuracy',
            verbose=1
        )

        tensor_log = TensorBoard(
            log_dir=run_dir,
            write_graph=True,
            write_grads=True,
            write_images=True
        )

        learning_rate_reduction = ReduceLROnPlateau(monitor='val_acc', patience=3, verbose=1, factor=0.5, 
                                                min_lr=0.000001, cooldown=3)
        history = model.fit_generator(
            TRAIN_GEN,
            epochs=30,
            steps_per_epoch=N_STEPS,
            validation_steps=N_VAL_STEPS,
            validation_data=VALID_GEN,
            callbacks=[
                plot_loss_1, 
                learning_rate_reduction,
                tl_checkpoint_1,
                tensor_log,  # log metrics
                hp.KerasCallback(run_dir, hparams), # log hparams
                CustomCallback()
            ])

        accuracy = history.history['val_accuracy'][-1]
        tf.summary.scalar(METRIC_ACCURACY, accuracy, step=1)
        print()
        print("Accuracy: {0:.2%}".format(accuracy))
        print()
        global best_accuracy
        if accuracy > best_accuracy:
            #model.save("./model/final/model{0}.h5".format(run_name))
            best_accuracy = accuracy
        del model
        K.clear_session()
    
        
    
        return accuracy

In [59]:
!mkdir models
!mkdir models/callback
!mkdir models/final

In [50]:
import uuid

In [51]:


arr = []

for param in h_desc:
    arr.append(list(param.domain.values))

all_possibles = list(itertools.product(*arr))


In [53]:
all_possibles

In [61]:
!zip -r report.zip ./*