In [None]:
import numpy as np
import os
import tensorflow as tf
import pandas as pd
import PIL
import matplotlib.pyplot as plt
from datetime import datetime
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.applications import InceptionV3
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.applications import imagenet_utils
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import confusion_matrix, classification_report, ConfusionMatrixDisplay, roc_curve, roc_auc_score
from sklearn.utils.class_weight import compute_class_weight

# Mount gdrive

In [None]:
from google.colab import drive
# drive.flush_and_unmount() #you can unmount your drive with this code
drive.mount('/gdrive', force_remount=True)

## Project Folder path

In [None]:
project_dir = "/gdrive/My Drive/Final_Project_CrystalsFirst/Model/"
%cd {project_dir} 

# Train / Validation splitting  and  Data augmentation


In [None]:
classes = ['crystal', 'no_crystal']
IMG_SIZE = 224
BATCH_SIZE = 16

### Training set
train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
                                  rescale=1./255, 
                                  validation_split=0.3,
                                  rotation_range=45, 
                                  horizontal_flip=True,
                                  vertical_flip=True,
                                  brightness_range=(.8,1.2),
                                  fill_mode='nearest'
                                  )

train_generator = train_datagen.flow_from_directory(
                                project_dir + "labels/training_data",
                                subset='training',
                                class_mode='categorical',
                                shuffle=True, 
                                seed=42, 
                                target_size=(IMG_SIZE, IMG_SIZE),
                                batch_size=BATCH_SIZE
                                )

### Validation set
val_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
                                  rescale=1./255, 
                                  validation_split=0.3
                                  )

val_generator = val_datagen.flow_from_directory(
                                  project_dir + "labels/training_data",
                                  subset='validation',
                                  class_mode='categorical',
                                  shuffle=True, 
                                  seed=42, 
                                  target_size=(IMG_SIZE, IMG_SIZE),
                                  batch_size=BATCH_SIZE
                                  )


## Get classes from folder names

In [None]:
train_generator.class_indices

## Visualize exemple data

In [None]:
x,y = train_generator.next()
for i in range(0,5):
    image = x[i]
    plt.title(y[i])
    plt.imshow(image)
    plt.show()

## Shape of training input

In [None]:
for image_batch, labels_batch in train_generator:
    print(image_batch.shape)
    print(labels_batch.shape)
    break

## Defining the step size

In [None]:
steps_train = round(train_generator.n / BATCH_SIZE)
steps_val = round(val_generator.n / BATCH_SIZE)

## Balancing Classes

In [None]:
class_weights = compute_class_weight(class_weight='balanced',
                                    classes=np.unique(train_generator.classes),
                                    y=train_generator.classes)
# class_weights

In [None]:
### class_weight has to be a dictionary format
class_weight_dict = { i : class_weights[i] for i in range(0, len(class_weights))}
# class_weight_dict

In [None]:
### getting number of classes
count_classes = len(class_weights)
# count_classes

# Setting up Inception V3

In [None]:
### define input tensor for model = shape of image and channels
input_t = tf.keras.Input(shape = (224,224,3))

### load a new instance of model
InceptionV3_model = tf.keras.applications.InceptionV3(input_tensor = input_t,
                                              include_top=False, # do not include ImageNet classifier at the top
                                              weights='imagenet',
                                                    )

In [None]:
### get an overview of the model architecture
# InceptionV3_model.summary()

## Transfer Learning of InceptionV3


In [None]:
### Making the model trainable in general except 
### for the batch normalization layers which should never be trained
InceptionV3_model.trainable = True
for layer in InceptionV3_model.layers:
    if('batch_normalization_' in layer.name):
        layer.trainable = False

In [None]:
### Fine-tune from this layer onward. This means most layers remain closed and will not be trained
fine_tune_at = 287
### Freeze all the layers before the `fine_tune_at` layer
for layer in InceptionV3_model.layers[:fine_tune_at]:
    layer.trainable =  False

In [None]:
### visualize layers
# for layer in InceptionV3_model.layers:
#   print(layer.name, '->', layer.trainable)

In [None]:
### Inception output layer
last_conv_layer = InceptionV3_model.get_layer('mixed10')

# Setting up New Model

In [None]:
num_classes = len(classes)
### Inception
conv_model = Model(inputs=InceptionV3_model.input, outputs=last_conv_layer.output)
### Start a new Keras Sequential model
new_model = Sequential()
### Add the convolutional part of the model from above 
new_model.add(conv_model)
### Add a global average pooling layer before the final dense layer
new_model.add(tf.keras.layers.GlobalAveragePooling2D())
### adding dropout layer to further prevent overfitting
new_model.add(tf.keras.layers.Dropout(0.2)) 
### Add a final dense (=fully-connected) layer.
### note that the number of nodes is the same as the number of classes 
new_model.add(tf.keras.layers.Dense(num_classes, activation='softmax')) 

## Optimizer

In [None]:
### defining an optimizer for compilation
optimizer = Adam(lr=1e-4)
# optimizer.lr.numpy()

## Compile the model

In [None]:
new_model.compile(optimizer= optimizer,
              loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False),
              metrics=[tf.keras.metrics.CategoricalAccuracy()])

## Model summary

In [None]:
### get an overview of the New Model architecture
# new_model.summary()

In [None]:
### visualize New Model layers
# for layer in new_model.layers:
#     print(layer.name, '->', layer.trainable)

## Train the model

In [None]:
epochs = 50
history = new_model.fit(
  train_generator,
  steps_per_epoch=steps_train,
  validation_data=val_generator,
  validation_steps = steps_val,
  # callbacks = [tboard_callback],
  class_weight = class_weight_dict,
  epochs=epochs
)

In [None]:
### This is only needed if further training based on the previous history is required. 
# new_epochs = 20
# epochs += new_epochs

# # training including previous training
# history_2 = new_model.fit(
#   train_generator,
#   initial_epoch=history.epoch[-1]+1,
#   epochs = epochs,
#   steps_per_epoch=steps_train,
#   validation_data=val_generator,
#   validation_steps = steps_val,
#   class_weight = class_weight_dict
# )

## Save Model

In [None]:
### Get current Date and Time
now = datetime.now().strftime("%Y_%m_%d__%Hh%Mmin")

### Save pre-trained model with date and time stamp
!mkdir -p saved_model
new_model.save('saved_model/Inception_' + now) 

In [None]:
### model directory
# !ls saved_model

### Contains an assets folder, saved_model.pb, and variables folder.
# !ls saved_model/Inception_9/

## Visualize training results  

In [None]:
acc = history.history['categorical_accuracy']
val_acc = history.history['val_categorical_accuracy']

loss=history.history['loss']
val_loss=history.history['val_loss']

epochs_range = range(epochs)

plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

# Predict on Test data

In [None]:
IMG_SIZE = 224
folder_path = "/gdrive/My Drive/Final_Project_CrystalsFirst/Model/labels/test/"

predictions = {}

for filename in os.listdir(folder_path):
    if filename.endswith(".jpg") or filename.endswith(".JPG"):
      img = keras.preprocessing.image.load_img(folder_path+filename,
                                               target_size=(IMG_SIZE, IMG_SIZE))
      img_array = keras.preprocessing.image.img_to_array(img)/255
      img_array = tf.expand_dims(img_array, 0)
      pred = new_model.predict(img_array)
      predictions[filename] = (classes[np.argmax(pred)],
                               ("confidance of {:.2f}%".format(100 * np.max(pred))))
      continue
    else:
      continue

predictions

# Test evaluation

## Confusion Matrix 

In [None]:
### create data frame with true labels
df_json = pd.read_json(project_dir+"source/image_labels.json", orient="columns")
# df_json = df_json.set_index("index")
# df_json.head(2)

In [None]:
### create data frame with predicted labels
df_pred = pd.DataFrame.from_dict(predictions, orient="index").reset_index()                    
df_pred.columns = ["image", "predictions","confidence"]
df_pred = df_pred.set_index("image")
# df_pred.head(2)

In [None]:
### concatenate the two data frames
cm_df = pd.concat([df_pred, df_json], axis=1, join='inner')
# cm_df

In [None]:
conf_mat = confusion_matrix(cm_df.loc[:,"y_true"], cm_df.loc[:,"predictions"])
cm_display = ConfusionMatrixDisplay(conf_mat, display_labels=['crystal', 'no_crystal']).plot()

## Classification Report

In [None]:
cr = classification_report(cm_df.loc[:,"y_true"], 
                           cm_df.loc[:,"predictions"], 
                           labels = ['crystal', 'no_crystal'], 
                           digits=2, 
                           zero_division='warn')
print(cr)

## ROC curve

In [None]:
### replace categories with 0 or 1
cm_df.replace(to_replace="no_crystal", value=1, inplace=True)
cm_df.replace(to_replace="crystal", value=0, inplace=True)

In [None]:
def plot_roc(y_test, proba_preds):
    ### create linear line
    base_probs = [0 for _ in range(len(y_test))]
    base_auc = roc_auc_score(y_test, base_probs)
    lr_auc = roc_auc_score(y_test, proba_preds)
    ### summarize scores
    print('Logistic: ROC AUC=%.3f' % (lr_auc))
    ### calculate roc curves
    ns_fpr, ns_tpr, _ = roc_curve(y_test, base_probs)
    lr_fpr, lr_tpr, _ = roc_curve(y_test, proba_preds)
    ### plot the roc curve for the model
    plt.plot(ns_fpr, ns_tpr, linestyle='--', label='Base')
    plt.plot(lr_fpr, lr_tpr, marker='.', label='InceptionV3')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.legend()
    plt.show()

In [None]:
plot_roc(cm_df.loc[:,"y_true"], 
         cm_df.loc[:,"predictions"])

# Predicting on a pre-trained model
In case you only want to predict on new data, jump to this part of the notebook

In [None]:
### make sure to load the correct model in case you reset the runtime
new_model = tf.keras.models.load_model('saved_model/Inception_Pre-trained_Model')

In [None]:
IMG_SIZE = 224
folder_path = "/gdrive/My Drive/Final_Project_CrystalsFirst/Model/labels/test/"

predictions = {}

for filename in os.listdir(folder_path):
    if filename.endswith(".jpg") or filename.endswith(".JPG"):
      img = keras.preprocessing.image.load_img(folder_path+filename,
                                               target_size=(IMG_SIZE, IMG_SIZE))
      img_array = keras.preprocessing.image.img_to_array(img)/255
      img_array = tf.expand_dims(img_array, 0)
      pred = new_model.predict(img_array)
      predictions[filename] = (classes[np.argmax(pred)],
                               ("confidance of {:.2f}%".format(100 * np.max(pred))))
      continue
    else:
      continue

predictions