# Parameters and imports

In [1]:
random_seed = 20211216

# Use the "mini" training set with 9 images.
# fcd_ts_dir = '/content/drive/My Drive/PredatorEye/mini_training_set/'
# Use the "real" training set with 2008 images
fcd_ts_dir = '/content/drive/My Drive/PredatorEye/fcd_training_set/'

# max_input_images = 100
max_input_images = 'all'

# For each image read from dataset "amplify" the set by up to 7 additional
# variations of the image via rotations and mirroring.
# When I tried using 8 I would run out of memory when the training began.
# amplification = 1
amplification = 6
# amplification = 7

# Maybe read from image file?
# Maybe assert all images are square and this size?
fcd_image_size = 1024

# Disk diameter, relative to full sized megapixel image.
fcd_disk_size = 201

# For scaling down the input image size.
# input_scale = 1
input_scale = 0.125
# input_scale = 0.25
# input_scale = 0.5
if (input_scale != 1):
    fcd_image_size = int(fcd_image_size * input_scale)
    # does this really want to be an int?
    fcd_disk_size = int(fcd_disk_size * input_scale)

fcd_epochs = 100
# fcd_epochs = 40
fcd_batch_size = 32

In [2]:
%tensorflow_version 2.x
import tensorflow as tf
print('TensorFlow version:', tf.__version__)

import gc
import PIL
import time
import random
import numpy as np
from PIL import Image
from os import listdir
from os.path import join
from tqdm.auto import tqdm
from matplotlib import image
import matplotlib.pyplot as plt

# from DLAVA, includes unused symbols, maybe tighten later
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.constraints import MaxNorm
from tensorflow.keras.optimizers import Adam

from tensorflow.keras.losses import Loss

## maybe just write these inline in the code below?
from numpy import asarray 
from tensorflow.keras import backend as keras_backend
keras_backend.set_image_data_format('channels_last')

TensorFlow version: 2.7.0


In [3]:
# Check for Colab Pro resources
def check_colab_resources():
    gpu_info = !nvidia-smi
    gpu_info = '\n'.join(gpu_info)
    if gpu_info.find('failed') >= 0:
        print('Not connected to a GPU')
    else:
        print(gpu_info)
    from psutil import virtual_memory
    ram_gb = virtual_memory().total / 1e9
    if ram_gb < 20:
        print('Not using a high-RAM runtime.')
    else:
        print('Using a high-RAM runtime.')
    print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

check_colab_resources()

Sat Dec 18 01:07:43 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 495.44       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   48C    P0    27W / 250W |      0MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

# Utilities

In [4]:
# Prints "expression = <value>"
def debug_print(expression):
    print(expression, '=', eval(expression))

# Reset random sequence seeds in Python's "random", Numpy, and TensorFlow.
def reset_random_seeds():
    random.seed(random_seed)
    np.random.seed(random_seed)
    tf.random.set_seed(random_seed)

# Parse FCD filename to a list of two ints: (x, y) pixel coordinates.
def fcd_filename_to_xy_ints(filename):
    without_extension = filename.split('.')[0]
    two_numeric_strings = without_extension.split('_')[1:3]
    return list(map(int, two_numeric_strings))

# Get image label from image file names ([x, y] as floats on [0,1])
def fcd_normalized_xy(filename, pixels):
    pixel_coordinates = fcd_filename_to_xy_ints(filename)
    return pixel_coordinates / (np.array(pixels.shape)[1:2] / input_scale)

# Draw a training image on the log. First arg is either a 24 bit RGB pixel
# representation as read from file, or the rescaled 3xfloat used internally.
# Optionally draw crosshairs to show center of disk.
def draw_image(rgb_pixel_tensor, center=(0,0)):
    i24bit = []
    if (rgb_pixel_tensor.dtype == np.double):
        unscaled_pixels = np.interp(rgb_pixel_tensor, [0, 1], [0, 255])
        i24bit = Image.fromarray(unscaled_pixels.astype('uint8'), mode='RGB')
    else:
        i24bit = Image.fromarray(rgb_pixel_tensor)
    plt.imshow(i24bit)
    if (center != (0,0)):
        draw_crosshairs(center)
    plt.show()

# Draw crosshairs to indicate disk position (label or estimate).
def draw_crosshairs(center):
    m = fcd_image_size - 1       # max image coordinate
    s = fcd_disk_size * 1.2 / 2  # gap size (radius)
    h = center[0] * m            # center x in pixels
    v = center[1] * m            # center y in pixels
    plt.hlines(v, 0, max(0, h - s), color="black")
    plt.hlines(v, min(m, h + s), m, color="black")
    plt.vlines(h, 0, max(0, v - s), color="white")
    plt.vlines(h, min(m, v + s), m, color="white")

# Draw line in plot between arbitrary points in plot.
# eg: draw_line((100, 100), (924, 924), color="yellow")
def draw_line(p1, p2, color="white"):
    plt.plot([p1[0], p2[0]], [p1[1], p2[1]], color)

# debug_print('fcd_filename_to_xy_ints("foobar_123_456")')
# debug_print('fcd_normalized_xy("foobar_123_456", np.zeros((1024,1024,3)))')
# debug_print('[123/(1024/input_scale), 456/(1024/input_scale)]')

# Training data reader

In [5]:
# Loads FCD training data image files from "directory_pathname". Returns an
# array of images and an array of labels (each an XY pair, the relative position
# of the disk center). Optional "image_count" can limit the number of images
# read, by taking a random sample of availble image files, defaults to "all".

def read_fcd_data_from_directory(directory_pathname, image_count = 'all'):
    directory_contents = listdir(directory_pathname)
    if (image_count == 'all'): image_count = len(directory_contents)
    assert image_count <= len(directory_contents), "Too few images in directory"
    directory_contents = random.sample(directory_contents, image_count)
    image_count *= amplification # for rot/mir
    print('Reading', max_input_images, 'images from ' + fcd_ts_dir)
    print('With an amplification factor of', amplification,
          'for a total of', image_count, 'images in dataset.')

    # Pre-allocate a tensor for all image data and one for all labels.
    local_images = np.zeros([image_count, fcd_image_size, fcd_image_size, 3])
    local_labels = np.zeros([image_count, 2])
    image_index = 0
    for filename in tqdm(directory_contents):
        image_pathname = join(directory_pathname, filename)
        # Numpy pixel array of image object.
        image = Image.open(image_pathname)
        new_size = (fcd_image_size, fcd_image_size)
        pixels = asarray(image.resize(new_size, PIL.Image.LANCZOS))
        # Convert input image data to floating-point.
        float_pixels = keras_backend.cast_to_floatx(pixels)
        # Scale input image data to range [0, 1] (in DLAVA was [-1, 1])
        scaled_pixels = np.interp(float_pixels, [0, 255], [0, 1])
        # Read disk center position from file name.
        center_position = fcd_normalized_xy(filename, pixels)
        def center_rot90(cp): return (cp[1], 0.5 - (cp[0] - 0.5))
        def center_flip(cp): return (0.5 - (cp[0] - 0.5), cp[1])
        variations = 8  # 4 from rotations times two from mirroring
        assert ((amplification > 0) and (amplification <= variations))
        keepers = random.sample(range(1, variations), amplification - 1)
        keepers.append(0)
        for i in range(variations):
            if (keepers.count(i) > 0):
                # Copy pixel data into slice "image_index" of "local_images"
                local_images[image_index, :, :, :] = scaled_pixels
                # Copy disk center XY position into slice of "local_labels".
                local_labels[image_index, :] = center_position
                image_index += 1
                draw_frequency = 50 * amplification
                # 20211216 is this using up too much memory (for amp=8)?
                # if ((image_index % draw_frequency) == draw_frequency - 1):
                #     draw_image(scaled_pixels, center_position)
                #     print(image_index + 1, "of", image_count, "images...")
            if (i < 7):
                if (i == 3):
                    scaled_pixels = np.flip(scaled_pixels, axis=1)
                    center_position = center_flip(center_position)
                else:
                    scaled_pixels = np.rot90(scaled_pixels, k=1, axes=(0, 1))
                    center_position = center_rot90(center_position)
    return local_images, local_labels

# Distance-based loss

In [6]:
# class FCDPositionLoss(Loss):
#     # def __init__(self, regularization_factor=0.1, name="custom_mse"):
#     def __init__(self,
#                  # diameter = fcd_image_size / fcd_disk_size,
#                  diameter = float(fcd_disk_size) / float(fcd_image_size),
#                  name = "custom_mse"):
#         super().__init__(name="fcd_position_loss")
#         self.diameter = diameter

#     def call(self, y_true, y_pred):
#         # mse = tf.math.reduce_mean(tf.square(y_true - y_pred))
#         # reg = tf.math.reduce_mean(tf.square(0.5 - y_pred))
#         # return mse + reg * self.regularization_factor
#         # return fcd_position_loss_helper(y_true, y_pred)
#         return corresponding_distances(y_true, y_pred)

# class FCDDiskShapedLoss(Loss):
#     # def __init__(self, regularization_factor=0.1, name="custom_mse"):
#     def __init__(self,
#                  # diameter = fcd_image_size / fcd_disk_size,
#                 #  diameter = float(fcd_disk_size) / float(fcd_image_size),
#                  radius = (float(fcd_disk_size) / float(fcd_image_size)) / 2,
#                  name = "custom_mse"):
#         super().__init__(name="fcd_disk_shaped_loss")
#         self.radius = radius

#     def call(self, y_true, y_pred):
#         # d = corresponding_distances(y_true, y_pred)
#         # print("d", d)
#         # scaled = (d / self.radius)
#         # print("scaled", scaled)
#         # exponentiated = scaled ** 4
#         # print("exponentiated", exponentiated)
#         # return exponentiated
#         # return fcd_disk_shaped_loss_helper(self.radius, y_true, y_pred)
#         return fcd_disk_shaped_loss_helper(y_true, y_pred, self.radius)

# Calculates RELATIVE disk radius on the fly -- rewrite later.
def fcd_disk_radius():
    return (float(fcd_disk_size) / float(fcd_image_size)) / 2

def fcd_disk_shaped_loss_helper(y_true, y_pred):
    radius = fcd_disk_radius()
    d = corresponding_distances(y_true, y_pred)
    # print("d", d)
    scaled = d / radius
    # print("scaled", scaled)
    exponentiated = scaled ** 4
    # print("exponentiated", exponentiated)
    return exponentiated

# Given two tensors of 2d point coordinates, return a tensor of the Cartesian
# distance between corresponding points in the input tensors.
def corresponding_distances(y_true, y_pred):
    true_pos_x, true_pos_y = tf.split(y_true, num_or_size_splits=2, axis=1)
    pred_pos_x, pred_pos_y = tf.split(y_pred, num_or_size_splits=2, axis=1)
    dx = true_pos_x - pred_pos_x
    dy = true_pos_y - pred_pos_y
    distances = tf.sqrt(tf.square(dx) + tf.square(dy))
    return distances

In [7]:
# Prototype metric to measure the fraction of predictions that are inside disks.
# For each pair of 2d points of input, output tensor is 1 for IN and 0 for OUT.
# def fcd_prediction_inside_disk(y_true, y_pred):

# (make name shorter so it is easier to read fit() log.)
def in_disk(y_true, y_pred):
    distances = corresponding_distances(y_true, y_pred)
    # relative_disk_radius = (float(fcd_disk_size) / float(fcd_image_size)) / 2

    # From https://stackoverflow.com/a/42450565/1991373
    # Boolean tensor marking where distances are less than relative_disk_radius.
    # insides = tf.less(distances, relative_disk_radius)
    insides = tf.less(distances, fcd_disk_radius())
    map_to_zero_or_one = tf.cast(insides, tf.int32)
    return map_to_zero_or_one


example_true_positions = tf.convert_to_tensor([[1.0, 2.0],
                                               [3.0, 4.0],
                                               [5.0, 6.0],
                                               [7.0, 8.0]])
example_pred_positions = tf.convert_to_tensor([[1.1, 2.0],
                                               [3.0, 4.2],
                                            #    [5.0, 6.1],
                                               [5.0, 6.0],
                                               [7.3, 8.0]])

in_disk(example_true_positions, example_pred_positions)
fcd_disk_shaped_loss_helper(example_true_positions, example_pred_positions)

<tf.Tensor: shape=(4, 1), dtype=float32, numpy=
array([[ 1.0995128],
       [17.592115 ],
       [ 0.       ],
       [89.06067  ]], dtype=float32)>

# Keras model utilities

In [8]:
# Construct a Keras model with CNN layers at the front, striding down in
# resolution, then dense layers funneling down to just two output neurons
# representing the predicted image position center of the conspicuous disk.
# (First version cribbed from DLAVA chapter B3, Listing B3-41)

# def make_striding_cnn_model():
def make_fcd_cnn_model():
    cnn_act = 'relu'
    dense_act = 'relu'
    output_act = 'linear'
    cnn_dropout = 0.2
    dense_dropout = 0.5  # ala Hinton (2012)
    model = Sequential()
    # Two units of:
    #     2 CNN layers with 3x3 32 filters, second one striding down by half
    #     followed by dropout
    #
    # 20211215 maybe use 5x5 for first of each layer? more drop out?
    # 20211217 change 3x3/3x3 to 5x5/3x3, now try 7x7/5x5
    #          temporarily turn off dropout
    #          temporarily switch back to MSE
    #          go back to 5x5/3x3
    #          add back all dropout
    #          add dropout to next 2 dense layers (was 512, now add 128 and 32)
    #          nope, remove that.
    #          try doubling the number of filters in the CNN layer 32 to 64.
    #          nope, remove that.
    #          In the 2 CNN groups, add a second 3x3 CNN/dropout between other 2

    # model.add(Conv2D(32, (3, 3), activation=cnn_act, padding='same',
    model.add(Conv2D(32, (5, 5), activation=cnn_act, padding='same',
    # model.add(Conv2D(64, (5, 5), activation=cnn_act, padding='same',
    # model.add(Conv2D(32, (7, 7), activation=cnn_act, padding='same',
                     kernel_constraint=MaxNorm(3),
                     input_shape=(fcd_image_size, fcd_image_size, 3)))
    # model.add(Dropout(cnn_dropout)) # added 20211215 9:30 am ###################
    model.add(Dropout(cnn_dropout))

    # 20211217 4:45pm add a second 3x3 CNN/dropout between other 2:
    model.add(Conv2D(32, (3, 3), activation=cnn_act, padding='same',
                     kernel_constraint=MaxNorm(3)))
    model.add(Dropout(cnn_dropout))

    model.add(Conv2D(32, (3, 3), activation=cnn_act, padding='same',
    # model.add(Conv2D(64, (3, 3), activation=cnn_act, padding='same',
    # model.add(Conv2D(32, (5, 5), activation=cnn_act, padding='same',
                     strides=(2, 2), kernel_constraint=MaxNorm(3)))
    # model.add(Dropout(cnn_dropout)) # removed 20211215 9:55 am ###############
    model.add(Dropout(cnn_dropout))

    # model.add(Conv2D(32, (3, 3), activation=cnn_act, padding='same', 
    model.add(Conv2D(32, (5, 5), activation=cnn_act, padding='same', 
    # model.add(Conv2D(64, (5, 5), activation=cnn_act, padding='same', 
    # model.add(Conv2D(32, (7, 7), activation=cnn_act, padding='same', 
                     kernel_constraint=MaxNorm(3)))
    # model.add(Dropout(cnn_dropout)) # added 20211215 9:30 am ###################
    model.add(Dropout(cnn_dropout))

    # 20211217 4:45pm add a second 3x3 CNN/dropout between other 2:
    model.add(Conv2D(32, (3, 3), activation=cnn_act, padding='same',
                     kernel_constraint=MaxNorm(3)))
    model.add(Dropout(cnn_dropout))

    model.add(Conv2D(32, (3, 3), activation=cnn_act, padding='same', 
    # model.add(Conv2D(64, (3, 3), activation=cnn_act, padding='same', 
    # model.add(Conv2D(32, (5, 5), activation=cnn_act, padding='same', 
                     strides=(2, 2), kernel_constraint=MaxNorm(3)))
    # model.add(Dropout(cnn_dropout)) # removed 20211215 9:55 am ###############
    model.add(Dropout(cnn_dropout))

    # 20211215 double largest size, decrease by factor of 4, etc.
    # try again at 10:50am, with dropout of 0.3 instead of 0.5

    # Then flatten and use a large-ish dense layer with heavy dropout.
    model.add(Flatten())
    model.add(Dense(512, activation=dense_act))
    
    # model.add(Dropout(dense_dropout))
    # model.add(Dropout(0.3))
    model.add(Dropout(dense_dropout))

    # Then funnel down to two output neurons for (x, y) of predicted center.
    model.add(Dense(128, activation=dense_act))
    # model.add(Dropout(dense_dropout))
    model.add(Dense(32, activation=dense_act))
    # model.add(Dropout(dense_dropout))
    model.add(Dense(8, activation=dense_act))
    model.add(Dense(2, activation=output_act))

    # Compile with disk-shaped loss, tracking accuracy and fraction-inside-disk.
    # model.compile(loss=fcd_disk_shaped_loss_helper,
    model.compile(loss='mse',
                  optimizer='adam', metrics=["accuracy", in_disk])
    return model

In [9]:
# Utility to fit and plot a run, again cribbed from DLAVA chapter B3.
def run_model(model_maker, plot_title):
    model = model_maker()
    print("In run_model():")
    debug_print('X_train.shape')
    debug_print("y_train.shape")
    history = model.fit(X_train, y_train, validation_split=0.2,
                        epochs=fcd_epochs, batch_size=fcd_batch_size)
    print()
    plot_accuracy_and_loss(history, plot_title)
    return model, history

# A little utility to draw plots of accuracy and loss.
def plot_accuracy_and_loss(history, plot_title):
    xs = range(len(history.history['accuracy']))
    # plt.figure(figsize=(10,3))
    plt.figure(figsize=(15,3))

    # plt.subplot(1, 2, 1)
    plt.subplot(1, 3, 1)
    plt.plot(xs, history.history['accuracy'], label='train')
    plt.plot(xs, history.history['val_accuracy'], label='validation')
    plt.legend(loc='lower left')
    plt.xlabel('epochs')
    plt.ylabel('accuracy')
    plt.title(plot_title+': Accuracy')

    plt.subplot(1, 3, 2)
    # plt.plot(xs, history.history['fcd_prediction_inside_disk'], label='train')
    # plt.plot(xs, history.history['val_fcd_prediction_inside_disk'], label='validation')
    plt.plot(xs, history.history['in_disk'], label='train')
    plt.plot(xs, history.history['val_in_disk'], label='validation')
    plt.legend(loc='lower left')
    plt.xlabel('epochs')
    plt.ylabel('fraction inside disk')
    plt.title(plot_title+': fraction inside disk')

    # plt.subplot(1, 2, 2)
    plt.subplot(1, 3, 3)
    plt.plot(xs, history.history['loss'], label='train')
    plt.plot(xs, history.history['val_loss'], label='validation')
    plt.legend(loc='upper left')
    plt.xlabel('epochs')
    plt.ylabel('loss')
    plt.title(plot_title+': Loss')

    plt.show()

# Read training data

In [10]:
# Read FCD training data from a given directory.
reset_random_seeds()
start_time = time.time()
(X_train, y_train) = ([], [])  # To release memory when rerunning in notebook.
gc.collect()
(X_train, y_train) = read_fcd_data_from_directory(fcd_ts_dir, max_input_images)
print('Total of', X_train.shape[0], 'labeled images.')
print('Elapsed time:', int(time.time() - start_time), 'seconds.')

Reading all images from /content/drive/My Drive/PredatorEye/fcd_training_set/
With an amplification factor of 6 for a total of 30000 images in dataset.


  0%|          | 0/5000 [00:00<?, ?it/s]

Total of 30000 labeled images.
Elapsed time: 149 seconds.


# Build and train model

In [None]:
# Run a model.
reset_random_seeds()
start_time = time.time()
(model, history) = ([], [])  # To release memory when rerunning in notebook.
gc.collect()
# (model, history) = run_model(make_striding_cnn_model, 'FCD')
(model, history) = run_model(make_fcd_cnn_model, 'FCD')
print('Elapsed time:', int(time.time() - start_time), 'seconds.')

In run_model():
X_train.shape = (30000, 128, 128, 3)
y_train.shape = (30000, 2)
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100

# Analyze results

In [None]:
# Draw some results to understand performance
# TODO needs to discriminate between training and validation sets.
#      feature best/worse results of both cases?
# 20211210 refactor to not run predict on whole training set
def draw_results(count = 20):
    for i in random.sample(range(X_train.shape[0]), count) :
        pixel_tensor = X_train[i, :, :, :]
        predict = model.predict(tf.convert_to_tensor([pixel_tensor]))[0]
        x = predict[0]
        y = predict[1]
        print(i, ": (", x, ",", y, ")")
        draw_image(X_train[i, :, :, :], [x, y])

reset_random_seeds()
draw_results()