In [8]:
from keras.preprocessing.image import ImageDataGenerator
from keras.models import load_model
from keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard
from callbacks import ValPlotCallback
from model import make_model
from keras.layers import Input
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
# from tqdm.keras import TqdmCallback
from tqdm import tqdm_notebook as tqdm

import numpy as np
import glob
import cv2
import os
from pathlib import Path

from helpers import image_splitter, print_array_properties, image_checker, image_tester, create_batches, batch_stacker


%matplotlib inline

%load_ext autoreload
%autoreload 2


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [9]:
train_image_dir = 'images/training/build/0/'
train_mask_dir = 'images/training/mask/0/'
test_image_dir = 'images/training/test/0/'
data_dir = 'data/'


In [16]:
SEED = 77

# ======Image Splitter Params ====== #
split_rows = 200
split_cols = 200
resize = True
image_resize_width = 240
image_resize_height = 240

images_per_batch = 64  # decrease this number if running out of memory
train_val_split_size = 0.1
# ======================= MODEL PARAMS ======================= #
epochs = 20
model_batch_size = 64
model_name = 'datagenmodel'
model_path = os.path.join(data_dir, model_name + '.hdf5')
pretrained_model = True
pretrained_model_path = model_path
print_model_summary_on_compile = False
plot_epoch_val_images = True

# ==================== Data Augmentation ==================== #
data_augmentation = True
datagen_args = dict(
    featurewise_center=False,  # set input mean to 0 over the dataset
    samplewise_center=False,  # set each sample mean to 0
    featurewise_std_normalization=False,  # divide inputs by std of the dataset
    samplewise_std_normalization=False,  # divide each input by its std
    zca_whitening=False,  # apply ZCA whitening
    zca_epsilon=1e-06,  # epsilon for ZCA whitening
    # randomly rotate images in the range (degrees, 0 to 180)
    rotation_range=60,
    # randomly shift images horizontally (fraction of total width)
    width_shift_range=0.2,
    # randomly shift images vertically (fraction of total height)
    height_shift_range=0.2,
    shear_range=0.0,  # set range for random shear
    zoom_range=0.0,  # set range for random zoom
    channel_shift_range=0.0,  # set range for random channel shifts
    # set mode for filling points outside the input boundaries
    fill_mode='nearest',
    cval=0.0,  # value used for fill_mode = "constant"
    horizontal_flip=True,  # randomly flip images
    vertical_flip=True,  # randomly flip images
    # set rescaling factor (applied before any other transformation)
    rescale=None,
    # set function that will be applied on each input
    preprocessing_function=None,
    # image data format, either "channels_first" or "channels_last"
    data_format='channels_last',
    # fraction of images reserved for validation (strictly between 0 and 1)
    validation_split=0.0,
)

# ==================== Callbacks ==================== #
early_stop = EarlyStopping(patience=5, verbose=1)
check_point = ModelCheckpoint(
    os.path.join(data_dir, model_name + '.hdf5'), save_best_only=True, verbose=1
)
tensor_board = TensorBoard(
    log_dir='../logs/tensorboard/',
    histogram_freq=1,
    write_graph=True,
    write_grads=False,
    write_images=True,
    embeddings_freq=1,
    update_freq='epoch',
)


In [17]:
train_path = Path(train_image_dir)
mask_path = Path(train_mask_dir)

filenames = sorted([f.name for f in train_path.glob('*.tif')])
# print(filenames)

train_files = [str(train_path / name) for name in filenames]
mask_files = [str(mask_path / name) for name in filenames]

# train = list(train)
# mask = list(mask)
# print('images', len(train))

train_img_batches = create_batches(train_files, batch_size=images_per_batch)
train_mask_batches = create_batches(mask_files, batch_size=images_per_batch)
# train_img_batches = list(train_img_batches)
# train_mask_batches = list(train_mask_batches)
# print('batches', len(train_img_batches))

train_stack = batch_stacker(train_img_batches, resize=(image_resize_width, image_resize_width))
mask_stack = batch_stacker(train_mask_batches, resize=(image_resize_width, image_resize_width), gray=True)
# train_stack = list(train_stack)
# mask_stack = list(mask_stack)
# print('stack', len(train_stack))


for batch_number, (train_batch, mask_batch) in enumerate(zip(train_stack, mask_stack), start=1):
    print(f'BATCH NUMBER {batch_number}')

    x_train, x_val, y_train, y_val = train_test_split(
        train_batch, mask_batch, random_state=SEED, test_size=train_val_split_size
    )
    if batch_number == 1:
        print_array_properties('x_train', x_train)
        print_array_properties('y_train', y_train)
        print_array_properties('x_val', x_val)
        print_array_properties('y_val', y_val)

    inputs = Input(shape=(x_train.shape[1:]))
    
    if pretrained_model or batch_number > 1:
        print('Loading Trained Model')
        model = load_model(pretrained_model_path)
    else:
        print('Creating New Model')
        model = make_model(inputs=inputs, model_name=model_name, print_summary=False)

    # =================== MODEL PARAMS =================== #
    validation_plots = ValPlotCallback(model, model_batch_size, x_val, y_val)
    model_fit_params = dict(
        batch_size=model_batch_size,
        epochs=epochs,
        validation_data=(x_val, y_val),
        verbose=1,
        steps_per_epoch=max(x_train.shape[0] // model_batch_size, 1),
        validation_steps=max((x_train.shape[0] // model_batch_size) * train_val_split_size, 1),
        callbacks=[early_stop, check_point, tensor_board, validation_plots],
    )
    if not data_augmentation:
        print('Not using data augmentation.')
        model.fit(x_train, y_train, **model_fit_params)
    else:
        print('Augmenting Data')

        # provide the same seed and keyword arguments
        image_datagen = ImageDataGenerator(**datagen_args)
        mask_datagen = ImageDataGenerator(**datagen_args)

        image_datagen.fit(x_train, augment=True, seed=SEED)
        mask_datagen.fit(y_train, augment=True, seed=SEED)

        # save_to_dir='../images/augmented/images/',
        # save_to_dir='../images/augmented/masks/',
        image_generator = image_datagen.flow(x_train, seed=SEED)
        mask_generator = mask_datagen.flow(y_train, seed=SEED)

        train_generator = zip(image_generator, mask_generator)

        model.fit(train_generator, **model_fit_params)
        if batch_number == 1:
            # otherwise possibly load an old model on second epoch
            model.save(model_path)


BATCH NUMBER 1
X_TRAIN
Length: 57
Shape: (57, 240, 240, 3)
Size: 0.037 GB

Y_TRAIN
Length: 57
Shape: (57, 240, 240, 1)
Size: 0.012 GB

X_VAL
Length: 7
Shape: (7, 240, 240, 3)
Size: 0.005 GB

Y_VAL
Length: 7
Shape: (7, 240, 240, 1)
Size: 0.002 GB

Loading Trained Model
Augmenting Data
Epoch 1/20
Epoch 1: val_loss improved from inf to 0.50869, saving model to data/datagenmodel.hdf5
Epoch 2/20
Epoch 2: val_loss improved from 0.50869 to 0.48748, saving model to data/datagenmodel.hdf5
Epoch 3/20
Epoch 3: val_loss did not improve from 0.48748
Epoch 4/20


In [None]:
# ================================================================ #
# ========================== VALIDATION ========================== #

x_val_pred = model.predict(x_val, verbose=1, batch_size=model_batch_size)

model.evaluate(x=x_val, y=y_val, batch_size=model_batch_size)

# simple threshold to change to 1/0, mask
x_val_pred_mask = (x_val_pred > 0.5).astype(np.uint8)

In [None]:
plot_predictions(original=x_val, predicted=x_val_pred,
                 predicted_mask=x_val_pred_mask, ground_truth=y_val)

In [None]:
# model = load_model('../data/testing_model.h5')

In [None]:
# ================================================================ #
# ========================== PREDICTION ========================== #


x_test = [np.array(
    image_splitter(
        cv2.imread(test_image_dir + img_name).astype(np.uint8),
        num_col_splits=split_cols,
        num_row_splits=split_rows,
        resize=resize,
        resize_height=image_resize_height,
        resize_width=image_resize_width
    )
) for img_name in tqdm(test_filenames[:20])]

x_test = (np.vstack(x_test)/255).astype(np.float32)

shape_and_mem(x_test)

test_datagen = ImageDataGenerator()
test_generator = test_datagen.flow(x_test, seed=seed)

y_pred = model.predict(test_generator, verbose=1)

shape_and_mem(y_pred)

y_pred_mask = (y_pred > 0.5).astype(np.uint8)

In [None]:
plot_predictions(original=x_test, predicted=y_pred,
                 predicted_mask=y_pred_mask)


# https://www.jeremyjordan.me/evaluating-image-segmentation-models/

# result = cv2.bitwise_and(test_split[0], test_split[0], mask=prediction[0])

# result

In [None]:
# https://towardsdatascience.com/metrics-to-evaluate-your-semantic-segmentation-model-6bcb99639aa2

# https://www.jeremyjordan.me/evaluating-image-segmentation-models/

