In [None]:
import matplotlib.pyplot as plt
import numpy as np
import os
import PIL
import cv2
import tensorflow as tf
import sys
from datetime import datetime

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications import EfficientNetB7
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.applications import imagenet_utils
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.optimizers import Adam

from sklearn.metrics import confusion_matrix, f1_score, classification_report, precision_score, recall_score



#Mount gdrive

In [None]:
from google.colab import drive
#drive.flush_and_unmount() #you can unmount your drive with this code
drive.mount('/gdrive', force_remount=True)


# Project Folder path

In [None]:
project_dir = "/gdrive/My Drive/Final_Project_CrystalsFirst/Model/"
%cd {project_dir} 

# Image Settings

In [None]:
## image settings
img_height = 960
img_width = 1280

##################################################################################################################################
##### CODE TO BE REMOVED
# ## labels to be adapted based on desired classification 
classes = ['crystal', 'no_crystal']
# classes = ['amorphous_precipitate', 'clear', 'crystal', 'impurity', 'homogenous_precipitate', 'inhomogenous_precipitate', 'phase_seperation']

# Train / Val Split  +  Data augmentation

In [None]:
IMG_SIZE = 224
BATCH_SIZE = 1


#############  TRAIN  ############################################################
# Training ImagaDataGenerator with Augmentation transf.
train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
                                  rescale=1./255, 
                                  validation_split=0.3,
                                  rotation_range=45, 
                                  # width_shift_range=0.2,
                                  # height_shift_range=0.2,
                                  # shear_range=0.005,
                                  # zoom_range=[0.9, 1.4],
                                  horizontal_flip=True,
                                  vertical_flip=True,
                                  brightness_range=(.8,1.2),
                                  fill_mode='nearest'
                                  )

# Create a flow from the directory using same seed and 'training' subset.
train_generator = train_datagen.flow_from_directory(
                                # project_dir + "labels/multi_class", 
                                project_dir + "labels/binary",
                                subset='training',
                                class_mode='categorical',
                                shuffle=True, 
                                seed=42, 
                                target_size=(IMG_SIZE, IMG_SIZE),
                                batch_size=BATCH_SIZE
                                )


#############  VALIDATION ########################################################
# Validation ImageDataGenerator with rescaling.
val_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
                                  rescale=1./255, 
                                  validation_split=0.3
                                  )

# Create a flow from the directory for validation data - seed=42
# Choose subset = 'validation'
val_generator = val_datagen.flow_from_directory(
                                  # project_dir + "labels/multi_class", 
                                  project_dir + "labels/binary",
                                  subset='validation',
                                  class_mode='categorical',
                                  shuffle=True, 
                                  seed=42, 
                                  target_size=(IMG_SIZE, IMG_SIZE),
                                  batch_size=BATCH_SIZE
                                  )


# Get classes from folder names

In [None]:
class_names = train_generator.labels
print(class_names)

In [None]:
train_generator.class_indices

## Visualize sample data 

In [None]:
# plt.figure(figsize=(10, 10))
# for images, labels in train_ds.take(1):
#   for i in range(4):
#     ax = plt.subplot(2, 2, i + 1)
#     plt.imshow(images[i].numpy().astype("uint8"))
#     plt.title(class_names[labels[i]])
#     plt.axis("off")

# Shape of training input

In [None]:
for image_batch, labels_batch in train_generator:
  print(image_batch.shape)
  print(labels_batch.shape)
  break

## Configure the dataset for performance

# TO BE DONE:  
`Dataset.cache()` 
or
`Dataset.prefetch()` 
(https://www.tensorflow.org/guide/data_performance#prefetching).

In [None]:
# AUTOTUNE = tf.data.experimental.AUTOTUNE

# train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
# val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

## Defining the step size

In [None]:
steps_train = round(train_generator.n / BATCH_SIZE)
steps_val = round(val_generator.n / BATCH_SIZE)


# Balancing Classes

In [None]:
from sklearn.utils.class_weight import compute_class_weight
class_weights = compute_class_weight(class_weight='balanced',
                                    classes=np.unique(train_generator.classes),
                                    y=train_generator.classes)
class_weights

In [None]:
# class_weight has to be a dictionary format
class_weight_dict = { i : class_weights[i] for i in range(0, len(class_weights))}
class_weight_dict

In [None]:
# getting number of classes
num_classes = len(class_weights)
num_classes

In [None]:
# ## Plot sample images
# x,y = train_generator.next()
# for i in range(0,5):
#     image = x[i]
#     plt.imshow(image)
#     plt.show()

# Setting up Efficient Net
https://keras.io/api/applications/efficientnet/#efficientnetb7-function

https://ai.googleblog.com/2019/05/efficientnet-improving-accuracy-and.html

In [None]:
input_t = tf.keras.Input(shape = (224,224,3))

In [None]:
# load new instance of Efficient Net B7
effnet_model = tf.keras.applications.EfficientNetB7(input_tensor = input_t,
                                              include_top=False, # do not include ImageNet classifier at the top
                                              weights='imagenet',
                                                    )

In [None]:
effnet_model.summary()

# Transfer Learning
Should try different "opening layers" strategies

# Efficient Net

In [None]:
# Setting all layer to not trainable except for last layer
effnet_model.trainable = False
for layer in effnet_model.layers:
   if ((('block7d' in layer.name) or ("top_" in layer.name)) and ('bn' not in layer.name)):
     layer.trainable = True    

# effnet_model.trainable = True
# for layer in effnet_model.layers:
#    if ('bn' in layer.name):
#      layer.trainable = False   


In [None]:
for layer in effnet_model.layers:
    print(layer.name, '->', layer.trainable)

In [None]:
last_conv_layer = effnet_model.get_layer('top_activation')

# Setting up New_Model

In [None]:

conv_model = Model(inputs=effnet_model.input,
                    outputs=last_conv_layer.output)

new_model = Sequential()

# new_model.add(resize_layer)

# conv_model.add_loss(1.0)
new_model.add(conv_model)

new_model.add(tf.keras.layers.GlobalAveragePooling2D())

new_model.add(tf.keras.layers.Dropout(0.4)) 

new_model.add(tf.keras.layers.Dense(2, activation='softmax')) 
# new_model.add(tf.keras.layers.Dense(7, activation='softmax')) 

# Optimizer
best optimizer is ADAM, but others could be tested.   
Learning rate to be adjusted

In [None]:
optimizer = Adam(lr=1e-4)
optimizer.lr.numpy()

## Compile the model

Cross entropy loss function for binary classification
https://www.tensorflow.org/api_docs/python/tf/keras/losses/BinaryCrossentropy

In [None]:
# ## Binary Crossentropy 
# new_model.compile(optimizer= optimizer,
#               loss=tf.keras.losses.BinaryCrossentropy(from_logits=False),
#               metrics=[tf.keras.metrics.BinaryAccuracy()])

In [None]:
# Categorical
new_model.compile(optimizer= optimizer,
              loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False),
              metrics=[tf.keras.metrics.CategoricalAccuracy()])

## Model summary

View all the layers of the network using the model's `summary` method:

In [None]:
## Transfer learning moderl summary.... check if layers are open or not....
conv_model.summary()

In [None]:
## open orclose transfer leanong model layers
# conv_model.trainable = False

In [None]:
## complete model layers
for layer in new_model.layers:
    print(layer.name, '->', layer.trainable)

In [None]:
## complete model summary

new_model.summary()

## Train the model

In [None]:
# training initially
epochs = 100
history = new_model.fit(
  train_generator,
  steps_per_epoch=steps_train,
  validation_data=val_generator,
  validation_steps = steps_val,
  epochs=epochs
)

In [None]:
new_epochs = 20
epochs += new_epochs

# training including previous training
history = new_model.fit(
  train_generator,
  initial_epoch=history.epoch[-1]+1,
  epochs = epochs,
  
  steps_per_epoch=steps_train,
  validation_data=val_generator,
  validation_steps = steps_val,
  class_weight = class_weight_dict
  
)



## Visualize training results 

In [None]:
acc = history.history['categorical_accuracy']
val_acc = history.history['val_categorical_accuracy']

loss=history.history['loss']
val_loss=history.history['val_loss']

epochs_range = range(epochs)

plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

# Save Model
https://www.tensorflow.org/tutorials/keras/save_and_load

In [None]:
!mkdir -p saved_model
new_model.save('saved_model/EffNet_4') 

In [None]:
# model directory
# !ls saved_model

# # Contains an assets folder, saved_model.pb, and variables folder.
# !ls saved_model/EffNet_3/

# Predict on new data

In [None]:
folder_path = "/gdrive/My Drive/Final_Project_CrystalsFirst/Model/labels/test_small/"

predictions = {}

for filename in os.listdir(folder_path):
    if filename.endswith(".jpg") or filename.endswith(".JPG"):
      img = keras.preprocessing.image.load_img(folder_path+filename,
                                               target_size=(IMG_SIZE, IMG_SIZE))
      img_array = keras.preprocessing.image.img_to_array(img)/255
      img_array = tf.expand_dims(img_array, 0)
      pred = new_model.predict(img_array)
      predictions[filename] = (classes[np.argmax(pred)],
                               ("confidance of {:.2f}%".format(100 * np.max(pred))))
      continue
    else:
      continue

predictions

In [None]:
collection = []
for i in predictions.values():
  collection.append(i[0])

num_crystal = 0
num_no_crystal = 0
for i in collection:
  if i == 'crystal':
    num_crystal += 1 
  if i == 'no_crystal':
    num_no_crystal += 1 
    
print("Number of crystals:", num_crystal, "\n",
      "Number of NO crystals", num_no_crystal)

# Confusion Matrix

In [None]:
import pandas as pd
# create different df depending on binary / multi calss problem
df_json = pd.read_json(project_dir+"source/image_labels.json", orient="columns")
#df_json = df_json.set_index("index")
df_json.head(2)

In [None]:
df_pred = pd.DataFrame.from_dict(predictions, orient="index").reset_index()                    
df_pred.columns = ["image", "predictions","confidence"]
df_pred = df_pred.set_index("image")
df_pred.head(2)

In [None]:
cm_df = pd.concat([df_pred, df_json], axis=1, join='inner')

In [None]:
cm_df

In [None]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(cm_df.loc[:,"y_true"], cm_df.loc[:,"predictions"], labels = ['crystal', 'no_crystal'])
cm

In [None]:
import seaborn as sn

fig = plt.figure(figsize=(10,10))
labels = ['crystal', 'no_crystal']
ax = fig.add_subplot(111)
cax = ax.matshow(cm)
sn.heatmap(cm, annot=True, xticklabels=True, yticklabels=True)
plt.xlabel('Predicted')
plt.ylabel('True')
ax.set_xticklabels(labels)
ax.set_yticklabels(labels)
plt.show()

# Classification Report

In [None]:
cr = classification_report(cm_df.loc[:,"y_true"], cm_df.loc[:,"predictions"], labels = ['crystal', 'no_crystal'], digits=2, zero_division='warn')

In [None]:
print(cr)

# Hyperparameters tuning

In [None]:
# Load the TensorBoard notebook extension
%load_ext tensorboard

In [None]:
# Clear any logs from previous runs
######################### only deleting the corresponding folder and not all other folder ###############
!rm -rf logs/hparam_tuning_EffNet

In [None]:
from tensorboard.plugins.hparams import api as hp

In [None]:
# HP_NUM_UNITS = hp.HParam('num_units', hp.Discrete([200, 250, 300]))
# HP_DROPOUT = hp.HParam('dropout', hp.Discrete([0.1, 0.2, 0.4]))
# HP_OPTIMIZER = hp.HParam('optimizer', hp.Discrete(['adam', 'nadam', 'sgd']))
# HP_ACTIVATION = hp.HParam('activation', hp.Discrete(['softmax', 'sigmoid']))
# HP_LEARNINGRATE = hp.HParam('learningrate', hp.Discrete([0.001, 0.0001, 0.00001]))
# HP_EPOCHS = hp.HParam('epochs', hp.Discrete([20, 50, 70]))
# HP_BATCHS = hp.HParam('batches', hp.Discrete([5, 8, 10]))

In [None]:
# METRIC_ACCURACY = 'categorical_accuracy'

# with tf.summary.create_file_writer('logs/hparam_tuning_EffNet').as_default():
#   hp.hparams_config(
#     hparams=[HP_DROPOUT, HP_OPTIMIZER, HP_ACTIVATION, HP_LEARNINGRATE, HP_EPOCHS, HP_BATCHS],
#     metrics=[hp.Metric(METRIC_ACCURACY, display_name='Accuracy')],
#   )

In [None]:
def train_test_model(hparams, logs):
## ResNet #############################################################################
#   conv_model = Model(inputs=effnet_model.input,
#                    outputs=last_conv_layer.output)

#   new_model = tf.keras.Sequential()
#   new_model.add(conv_model)
#   new_model.add(tf.keras.layers.GlobalAveragePooling2D())
# # new_model.add(tf.keras.layers.Flatten()) # flatten has similar effect as GAP
#   new_model.add(tf.keras.layers.Dropout(hparams[HP_DROPOUT])) #dropout after dense layer is usually recommended
# # new_model.add(tf.keras.layers.Dense(2, activation="relu")) 
#   new_model.add(tf.keras.layers.Dense(2, activation= hparams[HP_ACTIVATION])) 
  
  
#   new_model.compile(optimizer=hparams[HP_OPTIMIZER],
#                 loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
#                 metrics=[tf.keras.metrics.CategoricalAccuracy()])
  

#   new_model.fit(
#   train_generator,
#   steps_per_epoch=steps_train,
#   validation_data=val_generator,
#   validation_steps = steps_val,
#   callbacks = [tf.keras.callbacks.TensorBoard(log_dir = logs,
#                                                  write_graph = True,
#                                                  histogram_freq = 1,
#                                                  profile_batch = '500,520')],
#   class_weight = class_weight_dict,
#   epochs = hparams[HP_EPOCHS]

# )

#   _, accuracy = new_model.evaluate(val_generator)
#   return accuracy

In [None]:
# def run(run_dir, hparams):
#   with tf.summary.create_file_writer(run_dir).as_default():
#     hp.hparams(hparams)  # record the values used in this trial
#     accuracy = train_test_model(hparams, run_dir)
#     tf.summary.scalar(METRIC_ACCURACY, accuracy, step=1)

In [None]:
# session_num = 0
# for dropout_rate in HP_DROPOUT.domain.values:
#   for activation in HP_ACTIVATION.domain.values:
#     for optimizer in HP_OPTIMIZER.domain.values:
#       for epochs in HP_EPOCHS.domain.values:
          
#           hparams = {
#               HP_DROPOUT: dropout_rate,
#               HP_ACTIVATION: activation,
#               HP_OPTIMIZER: optimizer,
#               HP_EPOCHS: epochs,
              
#           }

#           ############ change the folder name here in run ####################
#           run_name = "run-%d" % session_num
#           print('--- Starting trial: %s' % run_name)
#           print({h.name: hparams[h] for h in hparams})
#           run('logs/hparam_tuning_EffNet/' + run_name, hparams)
#           session_num += 1

In [None]:
# !pip install -U tensorboard_plugin_profile

In [None]:
# !kill 1267 

In [None]:
%tensorboard --logdir logs/hparam_tuning_EffNet/