In [None]:
"""

In this problem we need find COTS starfish in the great barrier reef,
but we don't need implementate an object detection model like YOLO
where the user can see the object in a bounding box, otherwise we only
need show the scientics if in these place there are COTS starfish. For
this reason, I believe that is better show a canvas where appear the
starfish.

"""

In [None]:
# REQUIRED

from IPython.display import display, clear_output
from PIL import Image, ImageDraw, ImageEnhance
import matplotlib.pyplot as plt
import tensorflow as tf
import pandas as pd
import numpy as np
import ast
import os

In [None]:
# CONSTANTS

INPUT_PATH = str('../input/tensorflow-great-barrier-reef')
IMAGE_SIZE = tuple((int(1280), int(720))) # width x height

In [None]:
# GET THE DATA AND DISPLAY ITS STRUCTURE

train_dataset = pd.read_csv(INPUT_PATH + '/train.csv')
print(train_dataset)
print("\n")
print(train_dataset.info())

In [None]:
# DISPLAY THE DATA

for video_id in train_dataset['video_id'].unique():
    print(f'Image ID: {video_id}')
    print(f'Number of images withou COTS:  {sum(train_dataset[train_dataset["video_id"]==video_id]["annotations"] == "[]")}')
    print(f'Number of images with COTS:  {sum(train_dataset[train_dataset["video_id"]==video_id]["annotations"] != "[]")}')
    if video_id != 2:
        print(f'----------------------------------')

In [None]:
# CONVERT DATA ANNOTATIONS STR to LIST

train_dataset['annotations'] = train_dataset['annotations'].apply(ast.literal_eval)

In [None]:
# ADD A COLUMN WITH THE IMAGE PATH

train_dataset['image_path'] = INPUT_PATH + '/train_images/video_' + train_dataset['video_id'].astype(str) + '/' + train_dataset['video_frame'].astype(str) + ".jpg"

In [None]:
# ADD A COLUMN WITH THE NUMBER OF BOXES PER IMAGE

train_dataset['num_bboxes'] = train_dataset['annotations'].apply(lambda x: len(x))

In [None]:
# DISPLAY THE NEW STRUCTURE

train_dataset.head(18)

In [None]:
def draw_grid(draw, strides=8):
    x_lines = int(IMAGE_SIZE[0] // strides)
    y_lines = int(IMAGE_SIZE[1] // strides)
    
    for line in range(x_lines):
        shape = tuple((((IMAGE_SIZE[0] // x_lines) * line, 0), ((IMAGE_SIZE[0] // x_lines) * line, IMAGE_SIZE[1])))
        draw.line(shape, fill="black", width=1)
        
    for line in range(y_lines):
        shape = tuple(((0, (IMAGE_SIZE[1] // y_lines) * line), (IMAGE_SIZE[0], (IMAGE_SIZE[1] // y_lines) * line)))
        draw.line(shape, fill="black", width=1)
        
def draw_bbox(draw, bbox):
    x, y, width, height = bbox['x'], bbox['y'], bbox['width'], bbox['height']
    draw.rectangle([x, y, x + width, y + height], width=2, outline='salmon')
    draw.text([x, y - 10], 'COTS', width=7, fill='salmon')
    
def draw_bboxes(image_path, bboxes, grid=False):
    image = Image.open(image_path)
    image = image.resize(IMAGE_SIZE)
    draw  = ImageDraw.Draw(image)
    
    if grid:
        draw_grid(draw)
            
    for bbox in bboxes:
        draw_bbox(draw, bbox)
    
    return image

In [None]:
# DISPLAY AN IMAGE

IMAGE_TEST_ID = int(19668)
display(draw_bboxes(train_dataset['image_path'][IMAGE_TEST_ID], train_dataset['annotations'][IMAGE_TEST_ID]))

In [None]:
display(draw_bboxes(train_dataset['image_path'][IMAGE_TEST_ID], train_dataset['annotations'][IMAGE_TEST_ID], grid=True))

In [None]:
def load_image(image_path):
    image = tf.io.read_file(image_path)
    image = tf.image.decode_jpeg(image)
    image = tf.cast(image, tf.float32)[..., :3]
    return ((image.numpy() / 127.5) - 1) # [-1, 1] (height, width, channels)

def label_image(labels):
    label_map = np.zeros((IMAGE_SIZE[1] // 8, IMAGE_SIZE[0] // 8, 1))
    for label in labels:
        x1, y1, x2, y2 = label['x'] // 8, label['y'] // 8, (label['x'] // 8) + (label['width'] // 8), (label['y'] // 8) + (label['height'] // 8)
        for y in range((IMAGE_SIZE[1] // 8)):
            for x in range((IMAGE_SIZE[0] // 8)):
                if x >= x1 and x <= x2 and y >= y1 and y <= y2:
                    label_map[y][x][0] = 1.0

    return label_map # [0, 1] (height, width, channels)

In [None]:
plt.subplots(figsize=(32, 18))
plt.imshow(label_image(train_dataset['annotations'][IMAGE_TEST_ID]))
plt.show()

In [None]:
# DATASET MAKER

"""

    In this case, since there is only one possibility (COTS),the tiles
    in the grid where there a COTS will be activated with a 1 value.
    
    Example in a 5x5 matrix where in the rigth side there's a COTS:
    0 0 0 1 1
    0 0 0 1 1
    0 0 1 1 1
    0 0 0 1 1
    0 0 0 0 1

"""

data = list([])
for i, image in enumerate(train_dataset['image_path'][:18]):
    data.append([load_image(image), label_image(train_dataset['annotations'][i])])
    
print(tf.shape(data[0][0]))
print(data[0][1].shape)
plt.subplots(figsize=(32, 18))
plt.imshow(data[17][1])
plt.show()
plt.subplots(figsize=(32, 18))
plt.imshow(data[17][0] * 0.5 + 0.5)
plt.show()
display(draw_bboxes(train_dataset['image_path'][17], train_dataset['annotations'][17]))

In [None]:
class DatasetLoader():
    def __init__(self, data):
        self.data = list([])
        
        for row in data:
            self.data.append(row)
            
    def shuffle(self):
        self.data = np.random.shuffle(self.data)
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, i):
        x = tf.cast(tf.reshape(load_image(self.data['image_path'][i]), (1, IMAGE_SIZE[1], IMAGE_SIZE[0], 3)), tf.float32)
        y = tf.cast(tf.reshape(label_image(self.data['annotations'][i]), (1, IMAGE_SIZE[1] // 8, IMAGE_SIZE[0] // 8, 1)), tf.float32)
        b = self.data['num_bboxes'][i]
        return x, y, b

In [None]:
class CNNBlock(tf.keras.layers.Layer):
    def __init__(self, features, **kwargs):
        super(CNNBlock, self).__init__()
        self.conv = tf.keras.layers.Conv2D(features, padding='same', kernel_initializer=tf.keras.initializers.Ones(), use_bias=False, **kwargs)
        self.bn = tf.keras.layers.BatchNormalization()
        self.leaky = tf.keras.layers.LeakyReLU(0.1)

    def call(self, x):
        return self.leaky(self.bn(self.conv(x)))

In [None]:
class ResidualBlock(tf.keras.layers.Layer):
    def __init__(self, features, num_repeats=1):
        super(ResidualBlock, self).__init__()
        self.layers = list([])
        for repeat in range(num_repeats):
            self.layers.append(list([
                CNNBlock(features // 2, kernel_size=1),
                CNNBlock(features, kernel_size=3),
            ]))

    def call(self, x):
        r = x
        for layer in self.layers:
            x = layer[0](x)
            x = layer[1](x)

        return tf.keras.layers.Add()([r, x])

In [None]:
def feture_extractor(**kwargs):
    inputs = tf.keras.layers.Input((IMAGE_SIZE[1], IMAGE_SIZE[0], 3)) # (height, width, channels)

    x = CNNBlock(features=64, kernel_size=3)(inputs)
    x = CNNBlock(features=128, kernel_size=3, strides=2)(x)
    s = x
    x = ResidualBlock(features=128, num_repeats=2)(x)
    x = tf.keras.layers.Concatenate()([s, x])
    x = CNNBlock(features=256, kernel_size=3, strides=2)(x)
    s = x
    x = ResidualBlock(features=256, num_repeats=4)(x)
    x = tf.keras.layers.Concatenate()([s, x])
    x = CNNBlock(features=512, kernel_size=3, strides=2)(x)
    s = x
    x = ResidualBlock(features=512, num_repeats=8)(x)
    x = tf.keras.layers.Concatenate()([s, x])
    x = ResidualBlock(features=1024, num_repeats=1)(x)

    outputs = tf.keras.layers.Conv2D(1, kernel_size=2, padding='same', activation='sigmoid', kernel_initializer=tf.keras.initializers.Ones())(x)
    
    return tf.keras.models.Model(inputs=inputs, outputs=outputs, **kwargs)

In [None]:
model = feture_extractor(name='feature_extractor_model')
model.summary(120)

In [None]:
tf.keras.utils.plot_model(model, show_shapes=True, dpi=64)

In [None]:
opt = tf.keras.optimizers.Adam(1e-4)
bce = tf.keras.losses.BinaryCrossentropy()
mse = tf.keras.losses.MeanSquaredError()

In [None]:
@tf.function
def train_step(x, y):
    with tf.GradientTape() as tape:
        pred = model(x, training=True)
        _pos = tf.math.reduce_sum((pred + 1e-8) - (y + 1e-8) * tf.math.log((pred + 1e-8)))
        _bce = bce(y, pred)
        _mse = mse(y, pred)
        loss = (1e-4 * _pos) + _bce + _mse
        grad = tape.gradient(loss, model.trainable_weights)
        opt.apply_gradients(zip(grad, model.trainable_weights))
        
    return loss

In [None]:
CHECKPOINT = './'

checkpoint_prefix = os.path.join(CHECKPOINT, "ckpt")
checkpoint        = tf.train.Checkpoint(
    LAST_EPOCH    = tf.Variable(0),
    model         = model,
    opt           = opt,
)

manager = tf.train.CheckpointManager(checkpoint=checkpoint, directory=CHECKPOINT, max_to_keep=5)
if manager.latest_checkpoint:
    checkpoint.restore(manager.latest_checkpoint)
    print("Restaurado de {}".format(manager.latest_checkpoint))
else:
    print("Inicializando desde cero")

def update_checkpoint():
    print("Updating checkpoint...")
    manager = tf.train.CheckpointManager(checkpoint=checkpoint, directory=CHECKPOINT, max_to_keep=5)
    checkpoint.save(file_prefix=checkpoint_prefix)
    if manager.latest_checkpoint:
        os.remove(manager.latest_checkpoint + '.data-00000-of-00001')
        os.remove(manager.latest_checkpoint + '.index')
    print("===============================================\nCheckpoint updated")

In [None]:
print(f'Last epoch: {checkpoint.LAST_EPOCH.numpy()}')

In [None]:
def decayed_learning_rate(lr, step, ds_len):
    return lr * 0.875 ** (step / ds_len)

In [None]:
EPOCHS = int(10)
LOSSES = list([])

dataset_length = int(len(train_dataset['image_path']))
for epoch in range(EPOCHS):
    for i, image_path in enumerate(train_dataset['image_path']):
        b = train_dataset['num_bboxes'][i]
        x = tf.cast(tf.reshape(load_image(train_dataset['image_path'][i]), (1, IMAGE_SIZE[1], IMAGE_SIZE[0], 3)), tf.float32)
        y = tf.cast(tf.reshape(label_image(train_dataset['annotations'][i]), (1, IMAGE_SIZE[1] // 8, IMAGE_SIZE[0] // 8, 1)), tf.float32)

        loss = train_step(x, y)

        clear_output(wait=True)
        print(f'Epoch: [{(epoch + 1)}/{EPOCHS}] - Step: [{(i + 1)}/{dataset_length}] - Nº bboxes: {b} - Loss: {loss.numpy()}')
#         opt.lr = decayed_learning_rate(opt.lr.numpy(), i, dataset_length)
#         print(f'LR: {opt.lr.numpy()}')
#         LOSSES.append([i, loss.numpy()])
        
    update_checkpoint()
#     break

In [None]:
# plt.subplots(figsize=(36, 6))
# plt.plot([i for i, l in LOSSES], [l for i, l in LOSSES])
# plt.show()

In [None]:
inputs  = load_image(train_dataset['image_path'][35])
outputs = model(tf.reshape(inputs, (1, IMAGE_SIZE[1], IMAGE_SIZE[0], 3)))
inputs  = draw_bboxes(train_dataset['image_path'][35], train_dataset['annotations'][35])
target  = label_image(train_dataset['annotations'][35])

In [None]:
print('Input')
display(inputs)
print('Predicted')
plt.subplots(figsize=(32, 18))
plt.imshow(outputs[0])
plt.show()
print('Target')
plt.subplots(figsize=(32, 18))
plt.imshow(target)
plt.show()