# Image Classification and Object Localization


## Imports

In [None]:
import os, re, time, json
import PIL.Image, PIL.ImageFont, PIL.ImageDraw
import numpy as np
try:
    # %tensorflow_version only exists in Colab.
    %tensorflow_version 2.x
except Exception:
    pass
import tensorflow as tf
from matplotlib import pyplot as plt
# import tensorflow_datasets as tfds

print("Tensorflow version " + tf.__version__)
os.environ['KMP_DUPLICATION_LIB_OK']="TRUE"

# Visualization Utilities

These functions are used to draw bounding boxes around the digits.

In [None]:
print(tf.config.list_physical_devices('GPU'))

These utilities are used to visualize the data and predictions.

In [None]:
BATCH_SIZE = 1 # Gobal batch size.

## Get data
Read images into an array of (N, H, W, C)

In [None]:
import glob
from PIL import Image
# Read training and testing dataset
def get_images_labels_bboxes(fdir):
    """
    Returns: (training_images, training_labels, training_bboxs
    """
    
    # read in training imagery as (N x H x W x D)
    # fdir = r"E:\projects\burner-airfield\data\processed\gt\train"
    img_fps = glob.glob(os.path.join(fdir, "*.tif"))
    # loop through each img, get matching label file, add to list and combine all imgs into array of (N  H x W x D)
    img_list = []
    labels_coords = [] # list of tuples (xmin, ymin, xmax, ymax)
    labels_class = [] # list of labels classes (in same order as labels_coords)
    for img_fp in img_fps:
        label_fp = img_fp.replace(".tif",".json")
        # Get image as array
        img_np = np.array(Image.open(img_fp).convert("RGB"))
        img_list.append(img_np)
        with open(label_fp) as f:
            img_label = json.load(f)
            for label in img_label['shapes']:
                # get bbox coords (label["points"][0] is upper left coords, label["points"][1] is lower right coords)
                xmin,ymin = label['points'][0][0], label["points"][0][1]
                xmax,ymax = label['points'][1][0], label["points"][1][1]
                labels_coords.append([xmin,ymin,xmax,ymax])
                if label['label']=="GCP_RED":
                    lbl = 1
                elif label['label']=="GCP":
                    lbl = 2
                labels_class.append(lbl)
    # img_stack = np.stack(img_list, axis=0)
    img_stack = np.array(img_list)
    
    return (img_stack, np.array(labels_class), np.array(labels_coords)) # is rehsape necessary for labels_class?



In [None]:
(training_images, training_labels, training_bboxes) = get_images_labels_bboxes("./train")

In [None]:
(test_images, test_labels, test_bboxes) = get_images_labels_bboxes("./test")

### Get data

In [None]:
import glob
from PIL import Image
# Read training and testing dataset
def get_images_labels_bboxes(fdir):
    """
    Returns: (training_images, training_labels, training_bboxs
    """
    
    # read in training imagery as (N x H x W x D)
    # fdir = r"E:\projects\burner-airfield\data\processed\gt\train"
    img_fps = glob.glob(os.path.join(fdir, "*.tif"))
    # loop through each img, get matching label file, add to list and combine all imgs into array of (N  H x W x D)
    img_list = []
    labels_coords = [] # list of tuples (xmin, ymin, xmax, ymax)
    labels_class = [] # list of labels classes (in same order as labels_coords)
    for img_fp in img_fps:
        label_fp = img_fp.replace(".tif",".json")
        # Get image as array
        img_np = np.array(Image.open(img_fp).convert("RGB"))
        img_list.append(img_np)
        with open(label_fp) as f:
            img_label = json.load(f)
            for label in img_label['shapes']:
                # get bbox coords (label["points"][0] is upper left coords, label["points"][1] is lower right coords)
                xmin,ymin = label['points'][0][0], label["points"][0][1]
                xmax,ymax = label['points'][1][0], label["points"][1][1]
                labels_coords.append([xmin,ymin,xmax,ymax])
                if label['label']=="GCP_RED":
                    lbl = 1
                elif label['label']=="GCP":
                    lbl = 2
                labels_class.append(lbl)
    # img_stack = np.stack(img_list, axis=0)
    img_stack = np.array(img_list)
    
    return (img_stack, np.array(labels_class), np.array(labels_coords)) # is rehsape necessary for labels_class?



In [None]:
(training_images, training_labels, training_bboxes) = get_images_labels_bboxes("/mnt/e/projects/burner-airfield/data/processed/gt/train")

In [None]:
(test_images, test_labels, test_bboxes) = get_images_labels_bboxes("/mnt/e/projects/burner-airfield/data/processed/gt/test")

In [None]:
training_images.shape

## Define the Network

Here, you'll define your custom CNN.
- `feature_extractor`: these convolutional layers extract the features of the image.
- `classifier`:  This defines the output layer that predicts among 2 categories (GCP_RED and GCP (or 1 and 2))
- `bounding_box_regression`: This defines the output layer that predicts 4 numeric values, which define the coordinates of the bounding box (xmin, ymin, xmax, ymax)
- `final_model`: This combines the layers for feature extraction, classification and bounding box prediction.  
  - This is a branching model, because the model splits to produce two kinds of output (a category and set of numbers).  
- `define_and_compile_model`: choose the optimizer and metrics, then compile the model.

In [None]:
'''
Feature extractor is the CNN that is made up of convolution and pooling layers.
'''
def feature_extractor(inputs):
    x = tf.keras.layers.Conv2D(16, activation='relu', kernel_size=3, input_shape=(424, 424, 3))(inputs)
    x = tf.keras.layers.AveragePooling2D((2, 2))(x)

    x = tf.keras.layers.Conv2D(32,kernel_size=3,activation='relu')(x)
    x = tf.keras.layers.AveragePooling2D((2, 2))(x)

    x = tf.keras.layers.Conv2D(64,kernel_size=3,activation='relu')(x)
    x = tf.keras.layers.AveragePooling2D((2, 2))(x)

    return x

'''
dense_layers adds a flatten and dense layer.
This will follow the feature extraction layers
'''
def dense_layers(inputs):
    x = tf.keras.layers.Flatten()(inputs)
    x = tf.keras.layers.Dense(128, activation='relu')(x)
    return x


'''
Classifier defines the classification output.
This has a set of fully connected layers and a softmax layer.
'''
def classifier(inputs):
    classification_output = tf.keras.layers.Dense(2, activation='softmax', name = 'classification')(inputs)
    # classification_output = tf.keras.layers.Dense(1, activation='softmax', name = 'classification')(inputs) # works but so should above
    return classification_output


'''
This function defines the regression output for bounding box prediction.
Note that we have four outputs corresponding to (xmin, ymin, xmax, ymax)
'''
def bounding_box_regression(inputs):
    bounding_box_regression_output = tf.keras.layers.Dense(units = '4', name = 'bounding_box')(inputs)
    return bounding_box_regression_output


def final_model(inputs):
    feature_cnn = feature_extractor(inputs)
    dense_output = dense_layers(feature_cnn)

    '''
    The model branches here.
    The dense layer's output gets fed into two branches:
    classification_output and bounding_box_output
    '''
    classification_output = classifier(dense_output)
    bounding_box_output = bounding_box_regression(dense_output)

    model = tf.keras.Model(inputs = inputs, outputs = [classification_output, bounding_box_output])

    return model


def define_and_compile_model(inputs):
    model = final_model(inputs)
    model.compile(optimizer='adam',
                  loss = {'classification' : 'sparse_categorical_crossentropy',
                          'bounding_box' : 'mse'
                         },
                  metrics = {'classification' : 'accuracy',
                             'bounding_box' : 'mse'
                            })
    # Returns full and compiled model
    return model



inputs = tf.keras.layers.Input(shape=(424, 424, 3))
# inputs = tf.keras.layers.Input(shape=(None, 424, 424, 3))
model = define_and_compile_model(inputs)

# print model layers
model.summary()

### Train and validate the model

In [None]:
# Combine train bbox and train labels into dictionary
trainTargets = {
    "classification": training_labels,
    "bounding_box": training_bboxes
}

testTargets = {
    "classification": test_labels,
    "bounding_box": test_bboxes
}

In [None]:
EPOCHS = 5 # 45
steps_per_epoch = trainTargets["bounding_box"].shape[0]//BATCH_SIZE  # need number of training items only!
validation_steps = 1

history = model.fit(training_images, trainTargets,
                    steps_per_epoch=steps_per_epoch,
                    validation_data=(test_images, testTargets),
                    validation_steps=validation_steps, epochs=EPOCHS)


In [None]:
# Evaluate!
loss, classification_loss, bounding_box_loss, classification_accuracy, bounding_box_mse = model.evaluate(test_images, testTargets, steps=1)
print("Validation accuracy: ", classification_accuracy)