In [None]:
#from google.colab import drive
#mount_path = "/content/drive"
#drive.mount(mount_path)

In [None]:
import numpy as np
import pandas as pd

import tensorflow as tf

from pathlib import Path
import os
import glob

import json

import matplotlib.pyplot as plt
plt.rcParams["figure.facecolor"] = "w"

In [None]:
# Make the parent `src` folder accessible
import os
import sys
p = os.path.abspath('..')
sys.path.insert(1, p)

In [None]:
from src.util.lr_scheduler import LRFinder, OneCycleScheduler

In [None]:
root_data = Path("../data/VOC2012")

#root_drive = Path(mount_path) / "My Drive"
#root_data = root_drive / "object-detection/data/VOC2012"

In [None]:
# Train / Val
main_path = root_data / "ImageSets/Main"

def get_dataset_ids(split):
  with open(main_path / f"{split}.txt") as file:
    lines = file.readlines()
    lines = [line.rstrip() for line in lines]
  return pd.DataFrame(
    np.array(lines).T,
    columns=("imageName",)
  )

df_train = get_dataset_ids("train")
print(f"len train: {len(df_train)}")

df_val = get_dataset_ids("val")
print(f"len val: {len(df_val)}")

In [None]:
# Inspect labels
labels_train = []
labels_val = []

for filename in main_path.glob("*.txt"):
  parts = filename.stem.split("_")
  # ignore train.txt and val.txt
  if len(parts) != 2: 
    continue

  label, split = parts[0], parts[1]
  if split == "train":
    labels_train.append(label)
  elif split == "val":
    labels_val.append(label)
  else:
    # trainval
    continue

assert sorted(labels_train) == sorted(labels_val)

# build label map
label_map = {label: i for i, label in enumerate(sorted(labels_train))}
label_map_inverse = {i: label for label, i in label_map.items()}
n_outputs = len(label_map)
label_map

In [None]:
# Build y_train & y_val
include_difficult = True

def build_y_dict(df, split):
    """Builds a dictionary with the image name as keys and the one-hot representation of labels as values
    TODO add mask for difficult cases
    """
    y_dict = {}
    # initialize all keys of the dict
    for row in df.itertuples():
        y_dict[row.imageName] = np.zeros(len(label_map))

    # loop through every label txt file
    for label, idx in label_map.items():
        with open(main_path / f"{label}_{split}.txt") as file:
            lines = file.readlines()
            lines = [line.rstrip() for line in lines]
        for line in lines:
            parts = line.split(" ")
            image_name, ground_truth = parts[0], parts[-1]
            if ground_truth == "1":
                y_dict[image_name][idx] = 1
            elif ground_truth == "0" and include_difficult:
                y_dict[image_name][idx] = 1

    return y_dict

y_train = build_y_dict(df_train, "train")
y_val = build_y_dict(df_val, "val")

n_train, n_val = len(y_train), len(y_val)

In [None]:
# distribution of labels
def plot_distribution(y_dict):
    summed_classes = np.zeros(len(label_map))
    for y in y_dict.values():
        summed_classes += y

    fig, ax = plt.subplots()
    ax.bar(list(label_map.values()), summed_classes)
    ax.set_xticks(list(label_map.values()))
    ax.set_xticklabels(list(label_map.keys()), rotation=60)
    plt.show()

plot_distribution(y_train)
plot_distribution(y_val)

In [None]:
# Build tf datasets
jpeg_path = root_data / "JPEGImages"

def buid_tf_datasets(y_dict):
    jpeg_list = [str((jpeg_path / f"{image_name}.jpg").resolve()) for image_name in y_dict.keys()]
    labels = [val for val in y_dict.values()]
    return tf.data.Dataset.from_tensor_slices((jpeg_list, labels))

ds_train = buid_tf_datasets(y_train)
ds_val = buid_tf_datasets(y_val)

In [None]:
# Load and preprocess images
# This has to be done in the tf.Data pipeline otherwise you can't batch data with variable input shapes
INPUT_SIZE = (224, 224)

def rescale(image):
    return tf.math.divide(image, 255)

def resize(image):
    # tf.image.resize raises an error about images having no shape when `from_generator` is used
    #return tf.image.resize_with_pad(image, INPUT_SIZE[0], INPUT_SIZE[1], method=tf.image.ResizeMethod.BILINEAR, antialias=True)
    return tf.image.resize(image, (INPUT_SIZE[0], INPUT_SIZE[1]), method=tf.image.ResizeMethod.BILINEAR, antialias=True)

def clip_values(image):
    return tf.clip_by_value(image, 0, 1)

def preprocess_image(image):
    image = clip_values(resize(rescale(image)))
    return image

def load_and_preprocess_image(image_path, labels):
    img_content = tf.io.read_file(image_path)
    image = tf.io.decode_jpeg(img_content)
    return (preprocess_image(image), labels)

ds_train = (
    ds_train
    .map(load_and_preprocess_image, num_parallel_calls=tf.data.AUTOTUNE)
)

ds_val = (
    ds_val
    .map(load_and_preprocess_image, num_parallel_calls=tf.data.AUTOTUNE)
)

In [None]:
for x in ds_train.as_numpy_iterator():
  fig, ax = plt.subplots()
  ax.imshow(x[0])
  break

In [None]:
BATCH_SIZE = 256

ds_train = (
    ds_train
    .cache()
    .shuffle(n_train)
    .batch(BATCH_SIZE)
    .prefetch(tf.data.AUTOTUNE)
)

ds_val = (
    ds_val
    .batch(1)
    .cache()  # caching is done after batching because batches can be the same between epochs
    .prefetch(tf.data.AUTOTUNE)
)

In [None]:
# A simple sequential CNN model
def simple_model():
    inputs = tf.keras.Input(shape=(INPUT_SIZE[0], INPUT_SIZE[1], 3))
    x = tf.keras.layers.Conv2D(16, 3, padding="same", activation="relu")(inputs)
    x = tf.keras.layers.Conv2D(32, 5, padding="same", activation="relu")(x)
    x = tf.keras.layers.MaxPooling2D(3, 1)(x)
    x = tf.keras.layers.Conv2D(64, 9, padding="same", activation="relu")(inputs)
    x = tf.keras.layers.Conv2D(128, 17, padding="same", activation="relu")(x)
    x = tf.keras.layers.GlobalMaxPooling2D()(x)
    outputs = tf.keras.layers.Dense(n_outputs)(x)

    return tf.keras.Model(inputs=inputs, outputs=outputs, name="object-classification")

In [None]:
# A shorter version of Inception (~30% accuracy)
def short_inception_model():

  def conv2d_bn(x, filters, width, height, padding="same", strides=(1, 1)):
    x = tf.keras.layers.Conv2D(filters, (width, height), strides=strides, padding=padding, use_bias=False)(x)
    x = tf.keras.layers.BatchNormalization(scale=False)(x)
    x = tf.keras.layers.Activation('relu')(x)

    return x

  def input_block(inputs):
    x = conv2d_bn(inputs, 32, 3, 3, strides=(2, 2), padding='valid')
    x = conv2d_bn(x, 32, 3, 3, padding='valid')
    x = conv2d_bn(x, 64, 3, 3, padding="same")
    x = tf.keras.layers.MaxPooling2D((3, 3), strides=(2, 2))(x)

    #x = conv2d_bn(x, 80, 1, 1, padding='valid')
    # --
    #x = conv2d_bn(x, 192, 3, 3, padding='valid')
    x = conv2d_bn(x, 96, 3, 3, padding='valid')
    x = tf.keras.layers.MaxPooling2D((3, 3), strides=(2, 2))(x)

    return x

  def inception_block_0(x):
    #branch1x1 = conv2d_bn(x, 64, 1, 1, padding="same")
    branch1x1 = conv2d_bn(x, 48, 1, 1, padding="same")

    #branch5x5 = conv2d_bn(x, 48, 1, 1, padding="same")
    branch5x5 = conv2d_bn(x, 32, 1, 1, padding="same")
    #branch5x5 = conv2d_bn(branch5x5, 64, 5, 5, padding="same")
    branch5x5 = conv2d_bn(branch5x5, 48, 5, 5, padding="same")

    #branch3x3dbl = conv2d_bn(x, 64, 1, 1, padding="same")
    branch3x3dbl = conv2d_bn(x, 32, 1, 1, padding="same")
    #branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3, padding="same")
    branch3x3dbl = conv2d_bn(branch3x3dbl, 48, 3, 3, padding="same")
    #branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3, padding="same")
    branch3x3dbl = conv2d_bn(branch3x3dbl, 64, 3, 3, padding="same")

    branch_pool = tf.keras.layers.AveragePooling2D((3, 3), strides=(1, 1), padding='same')(x)
    branch_pool = conv2d_bn(branch_pool, 32, 1, 1, padding="same")

    x = tf.keras.layers.concatenate([branch1x1, branch5x5, branch3x3dbl, branch_pool])

    return x

  def inception_block_1(x):
    #branch1x1 = conv2d_bn(x, 192, 1, 1, padding="same")
    branch1x1 = conv2d_bn(x, 128, 1, 1, padding="same")

    #branch7x7 = conv2d_bn(x, 128, 1, 1, padding="same")
    branch7x7 = conv2d_bn(x, 96, 1, 1, padding="same")
    #branch7x7 = conv2d_bn(branch7x7, 128, 1, 7, padding="same")
    branch7x7 = conv2d_bn(branch7x7, 96, 1, 7, padding="same")
    #branch7x7 = conv2d_bn(branch7x7, 192, 7, 1, padding="same")
    branch7x7 = conv2d_bn(branch7x7, 128, 7, 1, padding="same")

    #branch7x7dbl = conv2d_bn(x, 128, 1, 1, padding="same")
    branch7x7dbl = conv2d_bn(x, 96, 1, 1, padding="same")
    #branch7x7dbl = conv2d_bn(branch7x7dbl, 128, 7, 1, padding="same")
    branch7x7dbl = conv2d_bn(branch7x7dbl, 96, 7, 1, padding="same")
    #branch7x7dbl = conv2d_bn(branch7x7dbl, 128, 1, 7, padding="same")
    branch7x7dbl = conv2d_bn(branch7x7dbl, 96, 1, 7, padding="same")
    #branch7x7dbl = conv2d_bn(branch7x7dbl, 128, 7, 1, padding="same")
    branch7x7dbl = conv2d_bn(branch7x7dbl, 96, 7, 1, padding="same")
    #branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7, padding="same")
    branch7x7dbl = conv2d_bn(branch7x7dbl, 96, 1, 7, padding="same")

    branch_pool = tf.keras.layers.AveragePooling2D((3, 3), strides=(1, 1), padding='same')(x)
    #branch_pool = conv2d_bn(branch_pool, 192, 1, 1, padding='same')
    branch_pool = conv2d_bn(branch_pool, 128, 1, 1, padding='same')

    x = tf.keras.layers.concatenate([branch1x1, branch7x7, branch7x7dbl, branch_pool])

    return x

  def pooling_block(x):
    #branch3x3 = conv2d_bn(x, 384, 3, 3, strides=(2, 2), padding='valid')
    branch3x3 = conv2d_bn(x, 256, 3, 3, strides=(2, 2), padding='valid')

    #branch3x3dbl = conv2d_bn(x, 64, 1, 1)
    branch3x3dbl = conv2d_bn(x, 48, 1, 1)
    #branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)
    branch3x3dbl = conv2d_bn(branch3x3dbl, 64, 3, 3)
    #branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3, strides=(2, 2), padding='valid')
    branch3x3dbl = conv2d_bn(branch3x3dbl, 64, 3, 3, strides=(2, 2), padding='valid')

    branch_pool = tf.keras.layers.MaxPooling2D((3, 3), strides=(2, 2))(x)

    x = tf.keras.layers.concatenate([branch3x3, branch3x3dbl, branch_pool])  

    return x

  def output_block(x, n_outputs):
    x = tf.keras.layers.GlobalAveragePooling2D()(x)
    x = tf.keras.layers.Dense(n_outputs)(x)

    return x

  inputs = tf.keras.Input(shape=(INPUT_SIZE[0], INPUT_SIZE[1], 3))  # (224, 224, 3)
  x = input_block(inputs)  # (25, 25, 192)
  for i in range(2):
    x = inception_block_0(x)  # (25, 25, 256)
  x = pooling_block(x)  # (12, 12, 736)
  for i in range(2):
    x = inception_block_1(x)  # (12, 12, 768)
  x = pooling_block(x)  # (5, 5, 1248)
  x = output_block(x, n_outputs)  # (20,)


  return tf.keras.Model(inputs=inputs, outputs=x, name="object-classification")

In [None]:
# MobileNet network (~70% accuracy when transfer learning)
def mobile_net(transfer_learning=True):
    if transfer_learning:
        weights = "imagenet"
    else:
        weights = None

    base_model = tf.keras.applications.mobilenet_v2.MobileNetV2(
        input_shape=(INPUT_SIZE[0], INPUT_SIZE[1], 3),
        include_top=False,
        weights=weights,
        pooling='avg'
    )

    if transfer_learning:
        base_model.trainable = False

    inputs = base_model.input
    x = base_model(inputs, training=False)
    x = tf.keras.layers.Dense(512, activation='relu')(x)
    x = tf.keras.layers.Dense(256, activation='relu')(x)
    outputs = tf.keras.layers.Dense(n_outputs)(x)

    return tf.keras.Model(inputs=inputs, outputs=outputs, name="object-classification")

In [None]:
#model = simple_model()
#model = short_inception_model()
model = mobile_net()

#model.summary()

In [None]:
lr_finder = LRFinder()
optimizer = tf.keras.optimizers.Adam()
model.compile(
    loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),  # recommended to set True, but remove activation on output layer
    optimizer=optimizer,
    metrics=["accuracy"]
)
_ = model.fit(ds_train, epochs=10, callbacks=[lr_finder])

lr_finder.plot()

In [None]:
n_epochs = 15
lr = 5e-3
steps = np.ceil(len(ds_train) / BATCH_SIZE) * n_epochs
lr_schedule = OneCycleScheduler(lr, steps)
optimizer = tf.keras.optimizers.Adam(lr=lr)

In [None]:
model.compile(
    loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),  # recommended to set True, but remove activation on output layer
    optimizer=optimizer,
    metrics=["accuracy"]
)

In [None]:
model.fit(
    ds_train,
    epochs=n_epochs,
    validation_data=ds_val,
    callbacks=[lr_schedule]
)

In [None]:
lr_schedule.plot()

In [None]:
for xval in ds_val.take(1).as_numpy_iterator():
  batch_img = xval[0]
  batch_labels = xval[1]
  print(xval[1].shape)
  for i in range(10):
    img = batch_img[i, :]
    # ground truth
    labels = []
    for idx in np.where(batch_labels[i, :] > 0.5)[0]:
      labels.append(label_map_inverse[idx])
    ground_truth = "-".join(labels)
    # predictions
    ypred = model.predict(img[np.newaxis, ...])
    labels = []
    for idx in np.where(ypred.squeeze() > 0.5)[0]:
      labels.append(label_map_inverse[idx])
    predictions = "-".join(labels)
    
    fig, ax = plt.subplots()
    ax.set_title(f"Ground truth: {ground_truth}\nPredictions: {predictions}")
    
    ax.imshow(img)