In [None]:
!pip install segmentation_models --quiet
!pip install boto3 --quiet
!pip install mlflow --quiet

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import os
import sys
sys.path.append('/content/drive/MyDrive/unet/')
from datetime import datetime

import boto3
import mlflow
import pandas as pd
import segmentation_models as sm
import tensorflow as tf
from tensorflow.keras.callbacks import (BackupAndRestore, EarlyStopping,
                                        ModelCheckpoint)
from tensorflow.keras.optimizers import Adagrad, Adam

from attresunet import Attention_ResUNet
from train import trainGenerator

In [None]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

In [None]:
tf.version.VERSION

In [None]:
# load images and sort masks and images
train_img_dir = '/content/drive/MyDrive/unet/data_train/train/images/train/'
train_msk_dir = '/content/drive/MyDrive/unet/data_train/train/masks/train/'

img_list = os.listdir(train_img_dir)
img_list.sort()
msk_list = os.listdir(train_msk_dir)
msk_list.sort()
num_images = len(os.listdir(train_img_dir))
print("Total number of training images are: ", num_images)

In [None]:
# Define the model metrics
seed = 69
batch_size = 4
n_classes = 4
epochs = 50
LR = 0.001

# Learning Rate decay. questionable usefulness  with adam optimizer but here if we need it.
decay = tf.keras.optimizers.schedules.ExponentialDecay(
    LR, 1000, 0.96, staircase=True
)

# Optimizer
optimizer = tf.keras.optimizers.Adam(LR)

In [None]:
train_img_path = '/content/drive/MyDrive/unet/data_train/train/images/'
train_msk_path = '/content/drive/MyDrive/unet/data_train/train/masks/'
train_img_gen = trainGenerator(train_img_path, train_msk_path, num_class=4, batch_size=batch_size, seed=seed)

x, y = train_img_gen.__next__()

In [None]:
val_img_path = '/content/drive/MyDrive/unet/data_train/val/images/'
val_msk_path = '/content/drive/MyDrive/unet/data_train/val/masks/'
val_img_gen = trainGenerator(val_img_path, val_msk_path, num_class=4, batch_size=batch_size, seed=seed)

x, y = val_img_gen.__next__()

In [None]:
#Define the model metrics and load model. 
num_train_images = len(os.listdir('/content/drive/MyDrive/unet/data_train/train/images/train'))
num_val_images = len(os.listdir('/content/drive/MyDrive/unet/data_train/val/images/val'))
steps_per_epoch = num_train_images//batch_size
val_steps_per_epoch = num_val_images//batch_size

IMG_HEIGHT = x.shape[1]
IMG_WIDTH  = x.shape[2]
IMG_CHANNELS = x.shape[3]
input_shape = (IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS)

print(f'Input shape: {input_shape}\nTraining images: {num_train_images}\nValidation images: {num_val_images}\nSteps per epoch: {steps_per_epoch}\nValidation steps per epoch: {val_steps_per_epoch}' )

In [None]:
# Segmentation models losses can be combined together by '+' and scaled by integer or float factor
mean_iou = tensorflow.keras.metrics.MeanIoU(num_classes=n_classes)
dice_loss = sm.losses.DiceLoss() 
focal_loss = sm.losses.CategoricalFocalLoss()
total_loss = dice_loss + (1 * focal_loss)
metrics = [sm.metrics.IOUScore(threshold=0.5), mean_iou, sm.metrics.FScore(threshold=0.5)]

In [None]:
# Model name for saving
loss_name = 'diceplusfocal'
# Model type for saving
model_type = 'AttResUnet'
#Model naming convention
model_name = f'{model_type}_{loss_name}_epochs{epochs}_batchsize{batch_size}_learningrate{LR}'

In [None]:
# Build the model
# Attention Residual Unet
att_res_unet_model = Attention_ResUNet(input_shape)
att_res_unet_model.compile(optimizer=optimizer,
                           loss=total_loss,
                           metrics=['accuracy', metrics])
#print(att_res_unet_model.summary())

In [None]:
# CALLBACKS
# Define early stopping criteria
early_stopping = EarlyStopping(monitor='val_iou_score', # Quantity to monitor
                patience = 10, # Number of epochs with no improvement. 0 means the training is terminated as soon as the performance measure gets worse from one epoch to the next.
                min_delta = 0.0001,  # Minimum change in the monitored quantity to qualify as an improvement, i.e. an absolute change of less than min_delta, will count as no improvement. 
                mode = 'max',
                baseline = 0.5,
                verbose = 1
)

model_checkpoint = ModelCheckpoint(
    filepath = f'/content/drive/MyDrive/unet/checkpoints/{model_name}checkpoint.hdf5',
    monitor='val_iou_score',
    verbose=1,
    save_best_only=True,
    save_weights_only=False,
    mode='max',
    save_freq='epoch',
    options=None,
    initial_value_threshold=0.5
)

model_backup = BackupAndRestore(
    backup_dir = '/content/drive/MyDrive/unet/backup'
)

In [None]:
# ML_FLOW 
TRACKING_URI = open('/content/drive/MyDrive/unet/mlflow_uri.txt').read().strip()
mlflow.set_tracking_uri(TRACKING_URI)

# Set an experiment name, which must be unique and case-sensitive.
experiment = mlflow.set_experiment('magic rocks!')

# Set run name and start autolog
mlflow.start_run(run_name=f'{model_name}')
mlflow.tensorflow.autolog()

# Get Experiment Details
print("Experiment_id: {}".format(experiment.experiment_id))
print("Artifact Location: {}".format(experiment.artifact_location))
print("Name: {}".format(experiment.name))
print("Lifecycle_stage: {}".format(experiment.lifecycle_stage))

In [None]:
start = datetime.now() 
att_res_unet_history = att_res_unet_model.fit(train_img_gen,
                    steps_per_epoch=steps_per_epoch,
                    epochs=epochs,
                    verbose=1,
                    validation_data=val_img_gen,
                    validation_steps=val_steps_per_epoch,
                    callbacks = [model_checkpoint, model_backup])
stop = datetime.now()

# End mlflow run
mlflow.end_run()

In [None]:
#Execution time of the model 
execution_time_AttResUnet = stop-start
print("Attention ResUnet execution time is: ", execution_time_AttResUnet)

In [None]:
#Save the model
att_res_unet_model.save(f'/content/drive/MyDrive/unet/{model_name}.hdf5')

In [None]:
# convert the history.history dict to a pandas DataFrame and save as csv for
# future plotting
att_res_unet_history_df = pd.DataFrame(att_res_unet_history.history) 

with open('/content/drive/MyDrive/unet/custom_code_att_res_unet_history_df.csv', mode='w') as f:
    att_res_unet_history_df.to_csv(f)

In [None]:
#Check history plots, one model at a time
history = att_res_unet_history

#plot the training and validation accuracy and loss at each epoch
loss = history.history['loss']
val_loss = history.history['val_iou_score']
epochs = range(1, len(loss) + 1)
plt.plot(epochs, loss, 'y', label='Training loss')
plt.plot(epochs, val_loss, 'r', label='Validation IOU Score')
plt.title('Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()