## Preparing the setup

In [None]:
#@title Mount drive content
# Load the Drive helper and mount
from google.colab import drive

# This will prompt for authorization.
drive.mount('/content/drive')
%cd /content/drive/My\ Drive/Work/TSM

In [None]:
import pathlib as pt
import os
import yaml
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import keras
import tensorflow as tf
from datetime import datetime
from keras.callbacks import ModelCheckpoint, CSVLogger, EarlyStopping
from keras import optimizers

import generators as gen
import custom_metrics as cm
from UNetModel import UNetModel

print("Keras", keras.__version__)
print("Tensorflow", tf.__version__)

### Set useful paths


In [None]:
RUN_TIME = datetime.now().strftime("%Y_%m_%d-%H_%M")

In [None]:
# paths for the data
ROOT_PATH    = pt.Path("/content/drive/My Drive/Work/TSM")
DATASET_PATH = ROOT_PATH/"dataset"
IMGS_DIR     = "images"
MASKS_DIR    = "masks"
CONFIG_PATH  = ROOT_PATH/"config_train.yaml"
IMGS_CSV     = DATASET_PATH/"images.csv"
MASKS_CSV    = DATASET_PATH/"masks.csv"

# paths for callbacks
TRAIN_PATH       = ROOT_PATH/"trainings"/RUN_TIME
CHECKPOINTS_PATH = TRAIN_PATH/"checkpoints"

if not TRAIN_PATH.exists():
  os.makedirs(str(TRAIN_PATH))

if not CHECKPOINTS_PATH.exists():
  os.mkdir(str(CHECKPOINTS_PATH))

## Training U-Net

In [None]:
with open(CONFIG_PATH) as fp:
  config = yaml.safe_load(fp)
config

### Load data

In [None]:
imgs_df  = pd.read_csv(IMGS_CSV)
masks_df = pd.read_csv(MASKS_CSV)

#### Create 2 csvs with file paths for images and masks

In [None]:
# img_paths = sorted(list((DATASET_PATH/IMGS_DIR).glob("*")), key=lambda p: p.stem)
# mask_paths = sorted(list((DATASET_PATH/MASKS_DIR).glob("*")), key=lambda p: p.stem)
# print("[INFO] Found {} images and {} masks".format(len(img_paths), len(mask_paths)))

# imgs_df = pd.DataFrame(data={'file_path': img_paths})
# masks_df = pd.DataFrame(data={'file_path': mask_paths})

# imgs_df["tumor_type"]  = ["MASS" if "MASS" in str(p) else "CALC" for p in imgs_df["file_path"]]
# masks_df["tumor_type"] = ["MASS" if "MASS" in str(p) else "CALC" for p in masks_df["file_path"]]

#### Split dataset

In [None]:
# seed = 42
# np.random.seed(seed)
# total = len(imgs_df)
# indices = np.arange(0, total)
# np.random.shuffle(indices)

# imgs_df['subset'] = ''
# masks_df['subset'] = ''

# start_idx = 0
# for _subset in config['dataset']:
#   print(_subset, config['dataset'][_subset])
#   next_idx = int(config['dataset'][_subset] * total)
#   print(next_idx)
#   selected = indices[start_idx:next_idx]
#   imgs_df.loc[imgs_df.index.isin(selected), 'subset'] = _subset
#   masks_df.loc[masks_df.index.isin(selected), 'subset'] = _subset
#   start_idx = next_idx

In [None]:
# imgs_df.head()

In [None]:
# imgs_df.to_csv(IMGS_CSV, index=False)
# masks_df.to_csv(MASKS_CSV, index=False)

### Train

#### Build the U-Net model


In [None]:
unet = UNetModel()
model = unet.build(width=config['target_size'][0],
                  height=config['target_size'][1],
                  n_channels=config['n_channels'],
                  with_bn=True)
# print(model.summary())

In [None]:
def choose_optimizer(opt_name, opt_param):
    if opt_name == 'rmsprop':
        return optimizers.RMSprop(**opt_param)
    elif opt_name == 'adam':
        return optimizers.Adam(**opt_param)
    elif opt_name == 'sgd':
        return optimizers.SGD(**opt_param)

In [None]:
opt = choose_optimizer(config['opt_name'], config['opt_param'])
opt

In [None]:
 model.compile(optimizer=opt, loss=cm.dice_coef_loss, metrics=[cm.dice_coef])

#### Define callbacks

In [None]:
callbacks = []

file_path = CHECKPOINTS_PATH/"unet_weights-{epoch:002d}-{val_loss:.5f}.hdf5"
checkpoint = ModelCheckpoint(file_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
callbacks.append(checkpoint)

# early_stopping = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=config["patience"])
# callbacks.append(early_stopping)

csv_filename = TRAIN_PATH/"history_{}.csv".format(RUN_TIME)
csv_logger = CSVLogger(csv_filename, separator=',', append=True)
callbacks.append(csv_logger)
print("[INFO] Added {} callbacks".format(len(callbacks)))

#### Start training

In [None]:
image_shape = [config['batch_size'], config['target_size'][0], config['target_size'][1], config['n_channels']]
mask_shape  = [config['batch_size'], config['target_size'][0], config['target_size'][1], 1]
seed = 43

train_generator = gen.image_mask_generator_imgaug(imgs_df,
                                                  masks_df,
                                                  subset="train",
                                                  batch_size=config['batch_size'],
                                                  target_size=config['target_size'],
                                                  data_aug=True,
                                                  seed=seed)

val_generator = gen.image_mask_generator_imgaug(imgs_df,
                                                masks_df,
                                                subset="val",
                                                batch_size=config['batch_size'],
                                                target_size=config['target_size'],
                                                data_aug=False,
                                                seed=seed)

train_dataset = tf.data.Dataset.from_generator(lambda: map(tuple, train_generator), 
                                               output_types=(tf.float32, tf.float32),
                                               output_shapes=(image_shape, mask_shape))
print("[INFO] Train dataset: ", train_dataset)

val_dataset   = tf.data.Dataset.from_generator(lambda: map(tuple, val_generator), 
                                               output_types=(tf.float32, tf.float32),
                                               output_shapes=(image_shape, mask_shape))
print("[INFO] Validation dataset: ", val_dataset)


#### Vizualize a few training samples

In [None]:
samples = list(train_dataset.take(4).as_numpy_iterator())

In [None]:
fig, ax = plt.subplots(len(samples), 2, figsize=(10, 7))
idx_img = 0 # image & mask index from each batch
for idx, _sample in enumerate(samples):
  batch_img, batch_mask = _sample
  print(batch_img.shape, batch_mask.shape)
  ax[idx][0].imshow(batch_img[idx_img])
  ax[idx][0].axis('off')

  mask_shape = batch_mask[idx_img].shape[:2]
  conv_mask = np.zeros(shape=(mask_shape[0], mask_shape[1], 3), dtype=np.float32)
  conv_mask[:, :, 0] = batch_mask[idx_img][:, :, 0]
  conv_mask[:, :, 1] = batch_mask[idx_img][:, :, 0]
  conv_mask[:, :, 2] = batch_mask[idx_img][:, :, 0]

  ax[idx][1].imshow(conv_mask)
  ax[idx][1].axis('off')

plt.show()

In [None]:
train_nr_samples = len(imgs_df.loc[imgs_df['subset'] == "train", :])
val_nr_samples   = len(imgs_df.loc[imgs_df['subset'] == "val", :])

steps_per_epoch = np.ceil(train_nr_samples / config["batch_size"])
validation_steps = val_nr_samples // config["batch_size"]

print("[INFO] Train size {} Val size {}".format(train_nr_samples, val_nr_samples))

In [None]:
print("[INFO] Starting training for {} epochs with batch size {}".format(config['epochs'], config['batch_size']))
model.fit(train_generator,
          epochs=config["epochs"],
          validation_data=val_generator,
          steps_per_epoch=steps_per_epoch,
          validation_steps=validation_steps,
          verbose=1,
          callbacks=callbacks)