In [None]:
# Mouting drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
import datetime
import gc
import pickle

import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

import numpy as np
import pandas as pd

import tensorflow as tf
from tensorflow import keras
from keras import layers
from keras.callbacks import *
from keras.models import Sequential, Model, load_model
from sklearn.metrics import confusion_matrix,classification_report,accuracy_score
from tensorflow.math import confusion_matrix

from tensorboard.plugins.hparams import api as hp
%load_ext tensorboard

In [None]:
# Daatset locations on G drive
# Make sure that the daatset folder is there in drive
# And that a seeprate tmpModelBackup folder has been created for each model that runs at a given time
datasetName = 'SEM100'
modelName = 'TENSORBOARD_Resnet_finetunedAfterTraining_rgb_notCropped_augemnted_learningRateReduction10x_seed108_restDefault'
trainingDataPath = '/content/drive/MyDrive/SEMProject/' + datasetName + '/train'
testingDataPath = '/content/drive/MyDrive/SEMProject/' + datasetName + '/test'
historySaveLocation = '/content/drive/My Drive/SEMProject/' + datasetName + '/models/history/' + modelName + 'data.pickle'
fineTuneHistorySaveLocation = '/content/drive/My Drive/' + datasetName + '/models/history/' + modelName + '_fineTune_data.pickle'
modelSaveLocation = '/content/drive/MyDrive/SEMProject/SEM100/models/finalModels/' + modelName + '.h5'
fineTuneModelSaveLocation = '/content/drive/MyDrive/SEMProject/SEM100/models/finalModels/' + modelName + '_fineTune.h5'

rootLogsDirectory = '/content/drive/MyDrive/SEMProject/' + datasetName + "/logs/"
logsDirectory = rootLogsDirectory + "fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")


In [None]:
batchSize = 32
imageHeight = 180
imageWidth = 180
randomisingSeed = 108
colorMode = "rgb"
validationSplitFraction = 0.2
imageInterpolationMethod  = "bilinear"
cropToPreserveAspectRatioWhileResizing = False

augmentDataset = True
cropBottomPanel = False

earlyStoppingMetric = "val_accuracy"
earlyStoppingMinimumImprovementCriterion = 0.005
earlyStoppingPatience = 17

baselineAccuracy = 0.5

learningRateReductionMetric = 'val_loss'
patienceForLearningRateReduction = 10
learningRateReductionFactor = 0.1
learningRateReductionMinimumImprovementCriterion = 0.0001
learningRateReductionCooldown = 20
minimumLearningRate = 0

useTensorboard = True

useArchitecture = ['customCNN', 'transfer'][1]
doSeperateFineTuning = True
makeTransferModelTrainable = False
addConvolutionLayersOnBaseModel = True

maxPoolingSize = (2, 2)
maxPoolingPadding = 'valid'

convolutionKernelSize = 3
convolutionKernelRegulariser = None
convolutionActivityRegulariser = None

In [None]:
if 'transfer' == useArchitecture:
  mobilenet_v2 = "https://tfhub.dev/google/tf2-preview/mobilenet_v2/feature_vector/4"
  inception_v3 = "https://tfhub.dev/google/tf2-preview/inception_v3/feature_vector/4"
  resnet_v2_50 = "https://tfhub.dev/google/imagenet/resnet_v2_50/feature_vector/5"

  featureExtractorModel = mobilenet_v2 #@param ["mobilenet_v2", "inception_v3", "resnet_v2_50"] {type:"raw"}

In [None]:
HP_NUM_UNITS_CONV = hp.HParam('num_units_conv', hp.Discrete([16, 256]))
HP_NUM_UNITS_DENSE = hp.HParam('num_units_dense', hp.Discrete([16, 256]))
HP_DROPOUT = hp.HParam('dropout', hp.RealInterval(0.1, 0.5))
HP_OPTIMIZER = hp.HParam('optimizer', hp.Discrete(['adam', 'sgd', 'adadelta', 'nadam']))
HP_LR_REDUCTION = hp.HParam('lr_reduction', hp.RealInterval(0.05, 0.5))
HP_BATCH_SIZE = hp.HParam('batch_size', hp.Discrete([20, 50]))

METRIC_ACCURACY = 'accuracy'

with tf.summary.create_file_writer(rootLogsDirectory + 'hparam_tuning').as_default():
  hp.hparams_config(
    hparams=[HP_NUM_UNITS_CONV, HP_NUM_UNITS_DENSE, HP_DROPOUT, HP_OPTIMIZER, HP_LR_REDUCTION, HP_BATCH_SIZE],
    metrics=[hp.Metric(METRIC_ACCURACY, display_name='Accuracy')],
    )

In [None]:
def getDataset(batchSize, imageHeight, imageWidth, randomisingSeed, colorMode, validationSplitFraction, imageInterpolationMethod, cropToPreserveAspectRatioWhileResizing, augmentDataset, cropBottomPanel):
  trainingDataset, validationDataset = tf.keras.utils.image_dataset_from_directory(
      directory = trainingDataPath,
      labels = "inferred",
      label_mode = "categorical",
      color_mode = colorMode,
      batch_size = batchSize,
      image_size = (imageHeight, imageWidth),
      seed = randomisingSeed,
      shuffle = True,
      validation_split = validationSplitFraction,
      subset="both",
      interpolation = imageInterpolationMethod,
      crop_to_aspect_ratio = cropToPreserveAspectRatioWhileResizing
      )

  testingDataset = tf.keras.utils.image_dataset_from_directory(
    directory = testingDataPath,
    labels = 'inferred',
    label_mode = "categorical",
    color_mode = colorMode,
    batch_size = batchSize,
    image_size = (imageHeight, imageWidth),
    seed=  randomisingSeed,
    shuffle = True,
    interpolation = imageInterpolationMethod,
    crop_to_aspect_ratio = cropToPreserveAspectRatioWhileResizing
    )

  classNames = np.array(trainingDataset.class_names)
  print(classNames)

  def cropBottomPanel(image, label):
    return tf.image.crop_and_resize(image = image , boxes = np.array( [[0, 0, 600/768, 1]] ) ,box_indices = [0], crop_size = (imageHeight, imageWidth), method = imageInterpolationMethod,extrapolation_value = 0.0,name = None), label

  #if cropBottomPanel:
    #trainingDataset = trainingDataset.map(cropBottomPanel)
    #validationDataset = validationDataset.map(cropBottomPanel)
    #testingDataset = testingDataset.map(cropBottomPanel)
    #imageHeight = int(imageHeight * 600/ 768)

  augmentedTrainingDataset = trainingDataset

  if augmentDataset:
    dataAugmentation = keras.Sequential(
        [
            layers.RandomRotation(factor = (-0.5, 0.5), fill_mode = "nearest", interpolation = imageInterpolationMethod, seed = randomisingSeed),
            layers.RandomFlip(mode = "horizontal_and_vertical", seed = randomisingSeed),
            layers.RandomTranslation(height_factor = (-0.2, 0.2), width_factor = (-0.2, 0.2), fill_mode = "nearest", interpolation = imageInterpolationMethod, seed = randomisingSeed),
            layers.RandomZoom(height_factor = (-0.3, 0.3), width_factor = (-0.2, 0.2), fill_mode = "nearest", interpolation = imageInterpolationMethod, seed = randomisingSeed),
            layers.RandomContrast(factor = (0.4, 0.3), seed = randomisingSeed)
        ]
    )

    augmentedTrainingDataset = trainingDataset.map(lambda x, y: (dataAugmentation(x), y))

  AUTOTUNE = tf.data.AUTOTUNE

  if augmentDataset:
    augmentedTrainingDataset = augmentedTrainingDataset.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
  else:
    trainingDataset = trainingDataset.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)

  validationDataset = validationDataset.cache().prefetch(buffer_size=AUTOTUNE)

  return trainingDataset, augmentedTrainingDataset, validationDataset, testingDataset, classNames, imageHeight

In [None]:
def visualiseDataset(dataset, noOfRows, noOfColumns):
  plt.figure(figsize=(noOfRows*2.5, noOfColumns*2.5))

  for images, labels in dataset.take(1):
    for i in range(noOfRows * noOfColumns):
      ax = plt.subplot(noOfRows, noOfColumns, i + 1)
      #print(images[i])

      plt.imshow(images[i].numpy().astype("uint8"))
      plt.title(classNames[ np.argmax(labels[i]) ])

      plt.axis("off")

In [None]:
def learningRateSchedule(currentEpochNumber, learningRatefromPreviousEpoch):
    learningRateForCurrentEpoch = learningRatefromPreviousEpoch
    return learningRateForCurrentEpoch

In [None]:
def getCallbacks(datasetName, modelName, earlyStoppingMetric, earlyStoppingMinimumImprovementCriterion, earlyStoppingPatience,
                 baselineAccuracy,
                 learningRateReductionMetric, learningRateReductionFactor, patienceForLearningRateReduction,
                 learningRateReductionMinimumImprovementCriterion, learningRateReductionCooldown):
  callbackList = []

  callbackList.append(BackupAndRestore(backup_dir = '/content/drive/My Drive/SEMProject/' + datasetName + '/models/tmpModelBackup/' + modelName + 'epoch_{epoch:04d}.h5', save_freq = 'epoch',
                                    delete_checkpoint = True, save_before_preemption = False))

  callbackList.append(EarlyStopping(monitor = earlyStoppingMetric, min_delta = earlyStoppingMinimumImprovementCriterion, patience = earlyStoppingPatience,
                                    verbose = 1, mode = 'auto', baseline = None, restore_best_weights = True, start_from_epoch = 0))

  #def learningrateSchedule(currentEpochNumber, learningRatefromPreviousEpoch):
  #  learningRateForCurrentEpoch = learningRatefromPreviousEpoch
  #  return learningRateForCurrentEpoch

  #callbackList.append(LearningRateScheduler(learningRateSchedule(epoch, lr), verbose = 1))

  #callbackList.append(ModelCheckpoint(filepath = '/content/drive/My Drive/SEMProject/' + datasetName + '/models/allTrainingModels/' + modelName + 'epoch_{epoch:04d}.h5',
  #                                    monitor = 'val_loss', verbose = 1, save_best_only = False, save_weights_only = False, mode = 'auto', save_freq = 'epoch',
  #                                    options = None, initial_value_threshold = None))
  callbackList.append(ModelCheckpoint(filepath = '/content/drive/My Drive/SEMProject' + datasetName + '/models/finalModels/' + modelName + 'MostAccurateEpoch_{epoch:04d}.h5',
                                      monitor = 'val_accuracy', verbose = 1, save_best_only = True, save_weights_only = False, mode = 'auto', save_freq = 'epoch',
                                      options = None, initial_value_threshold = baselineAccuracy))
  callbackList.append(ModelCheckpoint(filepath = '/content/drive/My Drive/SEMProject' + datasetName + '/models/finalModels/' + modelName + 'LeastLossEpoch_{epoch:04d}.h5',
                                      monitor = 'val_loss', verbose = 1, save_best_only = True, save_weights_only = False, mode = 'auto', save_freq = 'epoch',
                                      options = None, initial_value_threshold = None))

  callbackList.append(ReduceLROnPlateau(monitor = learningRateReductionMetric, factor = learningRateReductionFactor, patience = patienceForLearningRateReduction,
                                        verbose = 1, mode='auto', min_delta = learningRateReductionMinimumImprovementCriterion, cooldown = learningRateReductionCooldown,
                                        min_lr = minimumLearningRate))

  callbackList.append(TerminateOnNaN())

  if useTensorboard:
    callbackList.append(tf.keras.callbacks.TensorBoard(log_dir=logsDirectory, histogram_freq=1, write_graph=True, write_images=True,
                                                       update_freq = 'epoch', profile_batch = [40, 45]))

  return callbackList

In [None]:
def saveHistoryAndFinalModel(model, modelSaveLoaction, history, historySaveLoaction):
  with open(historySaveLocation, 'wb') as file_pi:
      pickle.dump(history.history, file_pi)

  model.save(modelSaveLocation)

In [None]:
def getBaselineModel(classNames, convolutionKernelRegulariser, convolutionActivityRegulariser):
  numberOfClasses = len(classNames)

  model = Sequential([
    layers.Rescaling(scale=1 / 127.5, offset=-1 input_shape=(imageHeight, imageWidth, 1)),
    layers.Conv2D(16, 3, padding='same', activation='relu', kernel_regularizer = convolutionKernelRegulariser, activity_regularizer = convolutionActivityRegulariser),
    layers.MaxPooling2D(),
    layers.Conv2D(32, 3, padding='same', activation='relu', kernel_regularizer = convolutionKernelRegulariser, activity_regularizer = convolutionActivityRegulariser),
    layers.MaxPooling2D(),
    layers.Dropout(0.1),
    layers.Conv2D(64, 3, padding='same', activation='relu', kernel_regularizer = convolutionKernelRegulariser, activity_regularizer = convolutionActivityRegulariser),
    layers.MaxPooling2D(),
    layers.Conv2D(128, 3, padding='same', activation='relu', kernel_regularizer = convolutionKernelRegulariser, activity_regularizer = convolutionActivityRegulariser),
    layers.MaxPooling2D(),
    layers.Dropout(0.4),
    layers.Conv2D(256, 3, padding='same', activation='relu', kernel_regularizer = convolutionKernelRegulariser, activity_regularizer = convolutionActivityRegulariser),
    layers.Dropout(0.1),
    layers.Conv2D(256, 3, padding='same', activation='relu', kernel_regularizer = convolutionKernelRegulariser, activity_regularizer = convolutionActivityRegulariser),
    layers.Conv2D(128, 3, padding='same', activation='relu', kernel_regularizer = convolutionKernelRegulariser, activity_regularizer = convolutionActivityRegulariser),
    layers.MaxPooling2D(),
    layers.Dropout(0.25),
    layers.Conv2D(64, 3, padding='same', activation='relu', kernel_regularizer = convolutionKernelRegulariser, activity_regularizer = convolutionActivityRegulariser),
    layers.MaxPooling2D(),
    layers.Conv2D(64, 3, padding='same', activation='relu', kernel_regularizer = convolutionKernelRegulariser, activity_regularizer = convolutionActivityRegulariser),
    layers.MaxPooling2D(),
    layers.Dropout(0.2),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.05),
    layers.Dense(128, activation='relu'),
    layers.Dense(numberOfClasses)
  ])

  model.compile(optimizer='adam',
                loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
                metrics=['accuracy',  'crossentropy'])

  #model.summary()

  return model

In [None]:
def getTransferLearningModel(classNames, imageHeight, imageWidth, doSeperateFineTuning, makeTransferModelTrainable, addConvolutionLayersOnBaseMode,
                             numberOfUnitsConvolution, numberOfUnitsDense, dropoutRate, optimizer):
  numberOfClasses = len(classNames)

  baseModel = keras.applications.ResNet152V2(include_top=False, weights="imagenet",  input_shape=(imageHeight, imageWidth, 3), pooling = 'max')
  output = baseModel.layers[-2].output
  #output = keras.layers.Flatten()(output)
  baseModel = Model(baseModel.input, output)
  baseModel.trainable = makeTransferModelTrainable
  #baseModel.summary()

  inputs = keras.Input(shape=(imageHeight, imageWidth, 3))
  x= tf.keras.applications.resnet.preprocess_input(inputs)

  x = baseModel(x, training = False)
  x = layers.MaxPooling2D()(x)

  if addConvolutionLayersOnBaseMode:
    x = layers.Conv2D(numberOfUnitsConvolution, 3, padding='same', activation='relu', kernel_regularizer = convolutionKernelRegulariser, activity_regularizer = convolutionActivityRegulariser)(x)
    x = layers.MaxPooling2D()(x)
    x = layers.Dropout(dropoutRate)(x)
    x = layers.Conv2D(numberOfUnitsConvolution, 3, padding='same', activation='relu', kernel_regularizer = convolutionKernelRegulariser, activity_regularizer = convolutionActivityRegulariser)(x)
    x = layers.Dropout(dropoutRate)(x)

  x = layers.Flatten()(x)
  x = layers.Dense(numberOfUnitsDense, activation='relu')(x)
  x = layers.Dropout(dropoutRate)(x)
  outputs = layers.Dense(numberOfClasses)(x)

  model = keras.Model(inputs, outputs)



  model.compile(optimizer=optimizer,
                loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
                metrics=['accuracy',  'crossentropy'])

  #model.summary()

  return model, baseModel

In [None]:
def trainAndTestModel(hparams, imageHeight, imageWidth, randomisingSeed, validationSplitFraction, imageInterpolationMethod, cropToPreserveAspectRatioWhileResizing, cropBottomPanel):
  trainingDataset, augmentedTrainingDataset, validationDataset, testingDataset, classNames, imageHeight = getDataset(batchSize = hparams[HP_BATCH_SIZE],
                                                                                                      imageHeight = imageHeight, imageWidth = imageWidth,
                                                                                                      randomisingSeed = randomisingSeed,
                                                                                                      colorMode = colorMode,
                                                                                                      validationSplitFraction = validationSplitFraction,
                                                                                                      imageInterpolationMethod = imageInterpolationMethod,
                                                                                                      cropToPreserveAspectRatioWhileResizing = cropToPreserveAspectRatioWhileResizing,
                                                                                                      augmentDataset = augmentDataset, cropBottomPanel = cropBottomPanel)

  if 'customCNN' == useArchitecture:
    model = getBaselineModel(classNames, convolutionKernelRegulariser, convolutionActivityRegulariser)
  elif 'transfer' == useArchitecture:
    model, baseModel = getTransferLearningModel(classNames, imageHeight, imageWidth, doSeperateFineTuning, makeTransferModelTrainable, addConvolutionLayersOnBaseModel,
                                                numberOfUnitsConvolution = hparams[HP_NUM_UNITS_CONV], numberOfUnitsDense = hparams[HP_NUM_UNITS_DENSE],
                                                dropoutRate = hparams[HP_DROPOUT], optimizer = hparams[HP_OPTIMIZER])

  callbacks = getCallbacks(datasetName, modelName,
                         earlyStoppingMetric, earlyStoppingMinimumImprovementCriterion, earlyStoppingPatience,
                         baselineAccuracy,
                         learningRateReductionMetric, learningRateReductionFactor = hparams[HP_LR_REDUCTION],
                         patienceForLearningRateReduction = patienceForLearningRateReduction,
                         learningRateReductionMinimumImprovementCriterion = learningRateReductionMinimumImprovementCriterion,
                         learningRateReductionCooldown = learningRateReductionCooldown)



  history = model.fit(
    augmentedTrainingDataset,
    validation_data=validationDataset,
    epochs = 40,
    verbose=1,
    callbacks = callbacks
  )

  saveHistoryAndFinalModel(model, modelSaveLocation, history, historySaveLocation)
  _, accuracy, ce, *is_anything_else_being_returned = model.evaluate(testingDataset)
  return accuracy

In [None]:
def run(run_dir, hparams):
  with tf.summary.create_file_writer(run_dir).as_default():
    hp.hparams(hparams)  # record the values used in this trial
    accuracy = trainAndTestModel(hparams, imageHeight, imageWidth, randomisingSeed, validationSplitFraction, imageInterpolationMethod, cropToPreserveAspectRatioWhileResizing, cropBottomPanel)
    tf.summary.scalar(METRIC_ACCURACY, accuracy, step=1)

In [None]:
sessionNumber = 0

for numberOfUnitsConvolution in HP_NUM_UNITS_CONV.domain.values:
  for numberOfUnitsDense in HP_NUM_UNITS_DENSE.domain.values:
    for dropoutRate in tf.linspace(HP_DROPOUT.domain.min_value, HP_DROPOUT.domain.max_value, 6).numpy(). tolist() :
      for optimizer in HP_OPTIMIZER.domain.values:
        for learningRateReductionFactor in tf.linspace(HP_LR_REDUCTION.domain.min_value, HP_LR_REDUCTION.domain.max_value, 6).numpy().tolist() :
          for batchSize in HP_BATCH_SIZE.domain.values:
            hparams = {
                HP_NUM_UNITS_CONV: numberOfUnitsConvolution,
                HP_NUM_UNITS_DENSE: numberOfUnitsDense,
                HP_DROPOUT: dropoutRate,
                HP_OPTIMIZER: optimizer,
                HP_LR_REDUCTION: learningRateReductionFactor,
                HP_BATCH_SIZE: batchSize,
            }
            runName = "run-%d" % sessionNumber
            print('--- Starting trial: %s' % runName)
            print({h.name: hparams[h] for h in hparams})
            run(rootLogsDirectory + 'hparam_tuning' + runName, hparams)
            sessionNumber += 1


--- Starting trial: run-0
{'num_units_conv': 16, 'num_units_dense': 16, 'dropout': 0.10000000149011612, 'optimizer': 'adadelta', 'lr_reduction': 0.05000000074505806, 'batch_size': 20}
Found 1223 files belonging to 10 classes.
Using 979 files for training.
Using 244 files for validation.
Found 319 files belonging to 10 classes.
['Biological' 'Fibres' 'Films_Coated_Surface'
 'MEMS_devices_and_electrodes' 'Nanowires' 'Particles' 'Patterned_surface'
 'Porous_Sponge' 'Powder' 'Tips']
Epoch 1/40
Epoch 1: val_accuracy did not improve from 0.50000

Epoch 1: val_loss improved from inf to 84.34154, saving model to /content/drive/My Drive/SEMProjectSEM100/models/finalModels/TENSORBOARD_Resnet_finetunedAfterTraining_rgb_notCropped_augemnted_learningRateReduction10x_seed108_restDefaultLeastLossEpoch_0001.h5
Epoch 2/40
Epoch 2: val_accuracy did not improve from 0.50000

Epoch 2: val_loss improved from 84.34154 to 53.19442, saving model to /content/drive/My Drive/SEMProjectSEM100/models/finalModels/T

In [None]:
%tensorboard --logdir logs/hparam_tuning