In [None]:
import matplotlib.pyplot as plt
import numpy as np
import os
import PIL
import cv2
import tensorflow as tf
import sys
from datetime import datetime

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.applications import VGG19
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.applications import imagenet_utils
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.optimizers import Adam, Nadam, SGD, RMSprop
from sklearn.metrics import confusion_matrix, f1_score, classification_report, precision_score, recall_score

from tensorflow.keras import backend as K
from typeguard import typechecked
from tensorflow_addons.utils.types import AcceptableDTypes, FloatTensorLike
from typing import Optional
import tensorflow_addons as tfa

#Mount gdrive

In [None]:
from google.colab import drive
#drive.flush_and_unmount() #you can unmount your drive with this code
drive.mount('/gdrive', force_remount=True)

# Project Folder path

In [None]:
project_dir = "/gdrive/My Drive/Final_Project_CrystalsFirst/Model/"
%cd {project_dir} 

# Image Settings

In [None]:
## image settings
img_height = 960
img_width = 1280

## labels to be adapted based on desired classification 
classes = ['crystal', 'no_crystal']
# classes = ['amorphous_precipitate', 'clear', 'crystal', 'impurity', 'homogenous_precipitate', 'inhomogenous_precipitate', 'phase_seperation']

# Train / Val   +  Data augmentation

In [None]:
IMG_SIZE = 224
BATCH_SIZE = 16

#############  TRAIN  ############################################################
# Training ImagaDataGenerator with Augmentation transf.
train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
                                  rescale=1./255, 
                                  validation_split=0.3,
                                  rotation_range=45, 
                                  width_shift_range=0.2,
                                  height_shift_range=0.2,
                                  # shear_range=0.005,
                                  # zoom_range=[0.9, 1.4],
                                  horizontal_flip=True,
                                  vertical_flip=True,
                                  brightness_range=(.8,1.2),
                                  fill_mode='nearest'
                                  )

# Create a flow from the directory using same seed and 'training' subset.
train_generator = train_datagen.flow_from_directory(
                                project_dir + "labels/binary_original", 
                                subset='training',
                                class_mode='categorical',
                                # class_mode='binary',
                                shuffle=True, 
                                seed=42, 
                                target_size=(IMG_SIZE, IMG_SIZE),
                                batch_size=BATCH_SIZE
                                )


#############  VALIDATION ########################################################
# Validation ImageDataGenerator with rescaling.
val_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
                                  rescale=1./255, 
                                  validation_split=0.3
                                  )

# Create a flow from the directory for validation data - seed=42
# Choose subset = 'validation'
val_generator = val_datagen.flow_from_directory(
                                  project_dir + "labels/binary_original",
                                  subset='validation',
                                  class_mode='categorical',
                                  # class_mode='binary',
                                  shuffle=True, 
                                  seed=42, 
                                  target_size=(IMG_SIZE, IMG_SIZE),
                                  batch_size=BATCH_SIZE
                                  )




In [None]:
# ## Plot sample images
# x,y = train_generator.next()
# print(x.shape, y.shape)
# for i in range(0,5):
#     image = x[i]
#     plt.title(y[i])
#     plt.imshow(image)
#     plt.show()

# Get classes from folder names

In [None]:
# class_names = train_generator.labels
# print(class_names)

In [None]:
# len(class_names)

In [None]:
train_generator.class_indices

# Shape of training input

In [None]:
for image_batch, labels_batch in train_generator:
  print(image_batch.shape)
  print(labels_batch.shape)
  break

## Configure the dataset for performance

# TO BE DONE:  
`Dataset.cache()` 
or
`Dataset.prefetch()` 
(https://www.tensorflow.org/guide/data_performance#prefetching).

In [None]:
# AUTOTUNE = tf.data.experimental.AUTOTUNE

# train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
# val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

## Defining the step size

In [None]:
steps_train = round(train_generator.n / BATCH_SIZE)
steps_val = round(val_generator.n / BATCH_SIZE)

# Balancing Classes

In [None]:
from sklearn.utils.class_weight import compute_class_weight
class_weights = compute_class_weight(class_weight='balanced',
                                    classes=np.unique(train_generator.classes),
                                    y=train_generator.classes)
class_weights

In [None]:
# class_weight has to be a dictionary format
class_weight_dict = { i : class_weights[i] for i in range(0, len(class_weights))}
class_weight_dict

In [None]:
# # getting number of classes
# num_classes = len(class_weights)
# num_classes

In [None]:
# ## Plot sample images
# x,y = train_generator.next()
# for i in range(0,5):
#     image = x[i]
#     plt.imshow(image)
#     plt.show()

# Setting up VGG19

In [None]:
input_t = tf.keras.Input(shape = (224,224,3))

In [None]:
# load a new instance of the ResNet model.
VGG19_model = tf.keras.applications.VGG19(input_tensor = input_t,
                                              include_top=False, # do not include ImageNet classifier at the top
                                              weights='imagenet',
                                              )

In [None]:
VGG19_model.summary()

# Transfer Learning

In [None]:
# VGG19_model.trainable = False
# for layer in VGG19_model.layers:
#    if 'block5_' in layer.name:
#      layer.trainable = True

Fine Tuning / Opening up more layers

In [None]:
# # Fine-tune from this layer onwards
# VGG19_model.trainable = True
# fine_tune_at = 16 # 16 = block_4_pool and all block_5 open

# # Freeze all the layers before the `fine_tune_at` layer
# for layer in VGG19_model.layers[:fine_tune_at]:
#   layer.trainable =  False


""" model behaves the best when all VGG layers are closed"""
# mark loaded layers as not trainable
for layer in VGG19_model.layers:
  layer.trainable = False

In [None]:
for layer in VGG19_model.layers:
    print(layer.name, '->', layer.trainable)

In [None]:
# points to last layer
last_conv_layer = VGG19_model.get_layer('block5_pool')

# Setting up New_Model

In [None]:
## Convolutional  model
conv_model = Model(inputs=VGG19_model.input,
                   outputs=last_conv_layer.output)
## Start a new Keras Sequential model
new_model = Sequential()
new_model.add(conv_model)
## Add more layers
new_model.add(tf.keras.layers.Flatten())
new_model.add(tf.keras.layers.Dense(512, activation='relu')) 
new_model.add(tf.keras.layers.Dense(2, activation='softmax')) 

In [None]:
new_model.summary()

# Optimizer
best optimizer is ADAM, but others could be tested.   
Learning rate to be adjusted

In [None]:
optimizer = Adam(lr=1e-3)
optimizer.lr.numpy()

## Compile the model

Cross entropy loss function for binary classification
https://www.tensorflow.org/api_docs/python/tf/keras/losses/BinaryCrossentropy

In [None]:
# ## Binary Crossentropy 
# new_model.compile(optimizer= optimizer,
#               loss=tf.keras.losses.BinaryCrossentropy(from_logits=False),
#               metrics=[tf.keras.metrics.BinaryAccuracy()])

In [None]:
# #  Sparse Categorical
# new_model.compile(optimizer= optimizer,
#               loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
#               metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])

In [None]:
# Categorical
new_model.compile(optimizer= optimizer,
              loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False),
              metrics=[tf.keras.metrics.CategoricalAccuracy()])

## Model summary

View all the layers of the network using the model's `summary` method:

In [None]:
## Transfer learning moderl summary.... check if layers are open or not....
# conv_model.summary()

In [None]:
## open orclose transfer leanong model layers
# conv_model.trainable = False

In [None]:
## complete model layers
for layer in new_model.layers:
    print(layer.name, '->', layer.trainable)

In [None]:
## complete model summary
new_model.summary()

## Train the model

In [None]:
%%time
epochs = 50
history = new_model.fit(
  train_generator,
  steps_per_epoch=steps_train,
  validation_data=val_generator,
  validation_steps = steps_val,
  # callbacks = [tboard_callback],
  class_weight = class_weight_dict,
  epochs=epochs
)


## Visualize training results  
Create plots of loss and accuracy on the training and validation sets.

In [None]:
acc = history.history['categorical_accuracy']
val_acc = history.history['val_categorical_accuracy']

loss=history.history['loss']
val_loss=history.history['val_loss']

epochs_range = range(epochs)

plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

# Save Model

In [None]:
!mkdir -p saved_model
new_model.save('saved_model/VGG19_batch_16_epoch_50') 

In [None]:
# # model directory
# !ls saved_model

# # Contains an assets folder, saved_model.pb, and variables folder.
# !ls saved_model/VGG19_2/

# Test / Predict

In [None]:
new_model = tf.keras.models.load_model('saved_model/VGG19_batch_16_epoch_50')

In [None]:
folder_path = "/gdrive/My Drive/Final_Project_CrystalsFirst/Model/labels/test/"


#####  TEST ON LARGE SET #####
folder_path = "/gdrive/My Drive/Final_Project_CrystalsFirst/Model/labels/test_on_wellimages_17/"
##############################


predictions = {}

for filename in os.listdir(folder_path):
    if (filename.endswith(".jpg") or filename.endswith(".JPG")):
      img = keras.preprocessing.image.load_img(folder_path+filename,
                                               target_size=(IMG_SIZE, IMG_SIZE))
      img_array = keras.preprocessing.image.img_to_array(img)/255
      img_array = tf.expand_dims(img_array, 0)
      pred = new_model.predict(img_array)
      predictions[filename] = (classes[np.argmax(pred)],
                               ("confidance of {:.2f}%".format(100 * np.max(pred))))
      continue
    else:
      continue

predictions


# Evaluation

## Confusion Matrix

In [None]:
import pandas as pd
df_json = pd.read_json(project_dir+"source/image_labels.json", orient="columns")
# df_json.columns=["image","original_labels"]
df_json.columns=["original_labels"]

In [None]:
# df_json

In [None]:
df_pred = pd.DataFrame.from_dict(predictions)                     
df_pred = df_pred.T
df_pred = df_pred.drop(1, axis=1)
# df_pred = df_pred.reset_index()
# df_pred.columns = ["image", "predictions"]
df_pred.columns = ["predictions"]

In [None]:
cm_df = pd.concat([df_pred, df_json], axis=1, join='inner')

In [None]:
cm_df

In [None]:
cm = confusion_matrix(cm_df.loc[:,"original_labels"], cm_df.loc[:,"predictions"])
cm

In [None]:
import seaborn as sn

fig = plt.figure(figsize=(8,8))
labels = ['crystal', 'no_crystal']
ax = fig.add_subplot(111)
cax = ax.matshow(cm)
sn.heatmap(cm, annot=True, xticklabels=True, yticklabels=True)
plt.xlabel('Predicted')
plt.ylabel('True')
ax.set_xticklabels(labels)
ax.set_yticklabels(labels)
plt.show()

## Classification Report

In [None]:
cr = classification_report(cm_df.loc[:,"original_labels"], cm_df.loc[:,"predictions"], labels = ['crystal', 'no_crystal'], digits=2, zero_division='warn')

In [None]:
print(cr)

## ROC curve

In [None]:
from sklearn.metrics import roc_curve, roc_auc_score


In [None]:
cm_df.replace(to_replace="no_crystal", value=1, inplace=True)
cm_df.replace(to_replace="crystal", value=0, inplace=True)

In [None]:
def plot_roc(y_test, proba_preds):

    # create linear line
    base_probs = [0 for _ in range(len(y_test))]

    base_auc = roc_auc_score(y_test, base_probs)
    lr_auc = roc_auc_score(y_test, proba_preds)

    # summarize scores
    print('Logistic: ROC AUC=%.3f' % (lr_auc))

    # calculate roc curves
    ns_fpr, ns_tpr, _ = roc_curve(y_test, base_probs)
    lr_fpr, lr_tpr, _ = roc_curve(y_test, proba_preds)

    # plot the roc curve for the model
    plt.plot(ns_fpr, ns_tpr, linestyle='--', label='Base')
    plt.plot(lr_fpr, lr_tpr, marker='.', label='VGG19')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.legend()
    plt.show()

In [None]:
plot_roc(cm_df.loc[:,"original_labels"], 
         cm_df.loc[:,"predictions"])

In [None]:
fpr, tpr, thresholds 

# Hyperparameters tuning

In [None]:
# # Load the TensorBoard notebook extension
# %load_ext tensorboard

In [None]:
# # Clear any logs from previous runs
# ######################################################## only deleting the corresponding folder and not all other folder ###############
# !rm -rf ./logs/hparam_tuning_VGG

In [None]:
# from tensorboard.plugins.hparams import api as hp

In [None]:
# HP_DROPOUT = hp.HParam('dropout', hp.Discrete([0.3]))
# HP_OPTIMIZER = hp.HParam('optimizer', hp.Discrete(['Adam', 'RMSprop']))
# HP_ACTIVATION = hp.HParam('activation', hp.Discrete(['softmax'])) 
# HP_LEARNINGRATE = hp.HParam('learningrate', hp.Discrete([0.001, 0.0001, 0.00001]))
# HP_EPOCHS = hp.HParam('epochs', hp.Discrete([10, 20]))
# HP_BATCHS = hp.HParam('epochs', hp.Discrete([16, 32]))

In [None]:
# METRIC_ACCURACY = 'categorical_accuracy'

# with tf.summary.create_file_writer('./logs/hparam_tuning_VGG').as_default():
#   hp.hparams_config(
#     hparams=[HP_DROPOUT, HP_OPTIMIZER, HP_ACTIVATION, HP_EPOCHS],
#     metrics=[hp.Metric(METRIC_ACCURACY, display_name='Accuracy')],
#   )

In [None]:
# def train_test_model(hparams, logs):
#   input_t = tf.keras.Input(shape = (224,224,3))
#   VGG19_model = tf.keras.applications.VGG19(input_tensor = input_t,
#                                               include_top=False, # do not include ImageNet classifier at the top
#                                               weights='imagenet',
#                                               )
  
#   last_conv_layer = VGG19_model.get_layer('block5_pool')
  
#   conv_model = Model(inputs=VGG19_model.input,
#                    outputs=last_conv_layer.output)
  
#   for layer in conv_model.layers:
#     layer.trainable = False

#   new_model = tf.keras.Sequential()
#   new_model.add(conv_model)
#   new_model.add(tf.keras.layers.Flatten())
#   new_model.add(tf.keras.layers.Dense(512, activation='relu')) 
#   new_model.add(tf.keras.layers.Dense(2, activation= hparams[HP_ACTIVATION])) 
  
  
#   new_model.compile(optimizer=hparams[HP_OPTIMIZER],
#                 loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False),
#               metrics=[tf.keras.metrics.CategoricalAccuracy()])
  
#   new_model.fit(
#   train_generator,
#   steps_per_epoch=steps_train,
#   validation_data=val_generator,
#   validation_steps = steps_val,
#   callbacks = [tf.keras.callbacks.TensorBoard(log_dir = logs,
#                                               write_graph = True,
#                                               histogram_freq = 1,
#                                               profile_batch = '500,520')],
#                                               class_weight = class_weight_dict,
#                                               epochs = hparams[HP_EPOCHS]
#                                               )

#   _, accuracy = new_model.evaluate(val_generator)
#   return accuracy
  


In [None]:
# def run(run_dir, hparams):
#   with tf.summary.create_file_writer(run_dir).as_default():
#     hp.hparams(hparams)  # record the values used in this trial
#     accuracy = train_test_model(hparams, run_dir)
#     tf.summary.scalar(METRIC_ACCURACY, accuracy, step=1)

In [None]:
# session_num = 0
# for dropout_rate in HP_DROPOUT.domain.values:
#   for activation in HP_ACTIVATION.domain.values:
#     for optimizer in HP_OPTIMIZER.domain.values:
#       for epochs in HP_EPOCHS.domain.values:
          
#           hparams = {
#               HP_DROPOUT: dropout_rate,
#               HP_ACTIVATION: activation,
#               HP_OPTIMIZER: optimizer,
#               HP_EPOCHS: epochs,
              
#           }
#           ############ change the folder name here in run ####################
#           run_name = "run-%d" % session_num
#           print('--- Starting trial: %s' % run_name)
#           print({h.name: hparams[h] for h in hparams})
#           run('./logs/hparam_tuning_VGG/' + run_name, hparams)
#           session_num += 1

In [None]:
# !pip install -U tensorboard_plugin_profile

In [None]:
# %tensorboard --logdir ./logs/hparam_tuning_VGG/