In [None]:
# load all necessary packages
import os
import sys

import boto3
import mlflow
import segmentation_models as sm
import tensorflow
from matplotlib import pyplot as plt
from tensorflow.keras.callbacks import (BackupAndRestore, EarlyStopping,
                                        ModelCheckpoint)
from tensorflow.keras.optimizers.schedules import ExponentialDecay

sys.path.append('../modeling')
from train import build_unet, preprocess_data, trainGenerator

sm.set_framework('tf.keras')
sm.framework()


In [None]:
# Define the model metrics
seed = 69
batch_size = 6
n_classes = 4
epochs = 5
LR = 0.01

# Learning Rate decay. questionable usefulness  with adam optimizer but here if we need it.
decay = tensorflow.keras.optimizers.schedules.ExponentialDecay(
    LR, 1000, 0.96, staircase=True
)

# Optimizer
optimizer = tensorflow.keras.optimizers.Adam(decay)

In [None]:
train_img_path = '../data/data_train/train/images/'
train_msk_path = '../data/data_train/train/masks/'
train_img_gen = trainGenerator(train_img_path, train_msk_path, num_class=4, batch_size=batch_size, seed=seed)

x, y = train_img_gen.__next__()

In [None]:
val_img_path = '../data/data_train/val/images/'
val_msk_path = '../data/data_train/val/masks/'
val_img_gen = trainGenerator(val_img_path, val_msk_path, num_class=4, batch_size=batch_size, seed=seed)

x, y = val_img_gen.__next__()

In [None]:
num_train_images = len(os.listdir('../data/data_train/train/images/train'))
num_val_images = len(os.listdir('../data/data_train/val/images/val/'))
steps_per_epoch = num_train_images//batch_size
val_steps_per_epoch = num_val_images//batch_size

IMG_HEIGHT = x.shape[1]
IMG_WIDTH  = x.shape[2]
IMG_CHANNELS = x.shape[3]
input_shape = (IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS)


print(f'Input shape: {input_shape}\nTraining images: {num_train_images}\nValidation images: {num_val_images}\nSteps per epoch: {steps_per_epoch}\nValidation steps per epoch: {val_steps_per_epoch}' )

In [None]:
# Segmentation models losses can be combined together by '+' and scaled by integer or float factor
mean_iou = tensorflow.keras.metrics.MeanIoU(num_classes=n_classes)
dice_loss = sm.losses.DiceLoss() 
focal_loss = sm.losses.CategoricalFocalLoss()
total_loss = dice_loss + (1 * focal_loss)
metrics = [sm.metrics.IOUScore(threshold=0.5), mean_iou, sm.metrics.FScore(threshold=0.5)]

In [None]:
# Model name for saving
loss_name = 'diceplusfocal'
# Model type for saving
model_type = 'StdUnet'
#Model naming convention
model_name = f'{model_type}_{loss_name}_epochs{epochs}_batchsize{batch_size}_learningrate{LR}'

In [None]:
# Build the model
model = build_unet(input_shape, n_classes)
model.compile(optimizer=optimizer, loss=total_loss, metrics=['accuracy', metrics])
model.summary()

In [None]:
# CALLBACKS
# Define early stopping criteria
early_stopping = EarlyStopping(monitor='val_iou_score', # Quantity to monitor
                patience = 10, # Number of epochs with no improvement. 0 means the training is terminated as soon as the performance measure gets worse from one epoch to the next.
                min_delta = 0.0001,  # Minimum change in the monitored quantity to qualify as an improvement, i.e. an absolute change of less than min_delta, will count as no improvement. 
                mode = 'max',
                baseline = 0.5,
                verbose = 1
)

# Define mode checkpoints
model_checkpoint = ModelCheckpoint(
    filepath = f'../models/checkpoints/{model_name}_checkpoint.hdf5',
    monitor='val_iou_score',
    verbose=1,
    save_best_only=True,
    save_weights_only=False,
    mode='max',
    save_freq='epoch',
    options=None,
    initial_value_threshold=0.5,
)

# Backups in case of interruption
model_backup = BackupAndRestore(
    backup_dir = '../models/backup/'
)

callback_list=[early_stopping, model_checkpoint, model_backup]

In [None]:
# ML_FLOW 
TRACKING_URI = open('../.mlflow_uri').read().strip()
mlflow.set_tracking_uri(TRACKING_URI)

# Set an experiment name, which must be unique and case-sensitive.
experiment = mlflow.set_experiment('magic rocks!')

# Set run name and start run
mlflow.start_run(run_name=f'{model_name}')
mlflow.tensorflow.autolog()

# Get Experiment Details
print("Experiment_id: {}".format(experiment.experiment_id))
print("Artifact Location: {}".format(experiment.artifact_location))
print("Name: {}".format(experiment.name))
print("Lifecycle_stage: {}".format(experiment.lifecycle_stage))

In [None]:
history=model.fit(train_img_gen,
          steps_per_epoch=steps_per_epoch,
          epochs=epochs,
          verbose=1,
          validation_data=val_img_gen,
          validation_steps=val_steps_per_epoch,
          callbacks=[callback_list])

# End mlflow run
mlflow.end_run()

In [None]:
#Save the model
model.save(f'../models/{model_name}.hdf5')

In [None]:
#plot the training and validation accuracy and loss at each epoch
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(loss) + 1)
plt.plot(epochs, loss, 'y', label='Training loss')
plt.plot(epochs, val_loss, 'r', label='Validation loss')
plt.title('Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

plt.plot(epochs, acc, 'y', label='Training Accuracy')
plt.plot(epochs, val_acc, 'r', label='Validation Accuracy')
plt.title('Training and validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()
plt.savefig(f'../models/{model_name}.jpg', dpi=150)