# Image Classification and Object Localization


## Imports

In [1]:
import os, re, time, json
import PIL.Image, PIL.ImageFont, PIL.ImageDraw
import numpy as np
try:
    # %tensorflow_version only exists in Colab.
    %tensorflow_version 2.x
except Exception:
    pass
import tensorflow as tf
from matplotlib import pyplot as plt
# import tensorflow_datasets as tfds

print("Tensorflow version " + tf.__version__)
os.environ['KMP_DUPLICATION_LIB_OK']="TRUE"

2023-11-05 18:37:41.971595: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-11-05 18:37:41.971651: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-11-05 18:37:41.971676: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-11-05 18:37:41.978688: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Tensorflow version 2.14.0


# Visualization Utilities

These functions are used to draw bounding boxes around the digits.

In [2]:
print(tf.config.list_physical_devices('GPU'))

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


2023-11-05 18:37:45.797903: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:880] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-11-05 18:37:45.813538: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:880] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-11-05 18:37:45.813581: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:880] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.


These utilities are used to visualize the data and predictions.

In [3]:
BATCH_SIZE = 1 # Gobal batch size.

## Get data
Read images into an array of (N, H, W, C)

In [4]:
import glob
from PIL import Image
# Read training and testing dataset
def get_images_labels_bboxes(fdir):
    """
    Returns: (training_images, training_labels, training_bboxs
    """
    
    # read in training imagery as (N x H x W x D)
    # fdir = r"E:\projects\burner-airfield\data\processed\gt\train"
    img_fps = glob.glob(os.path.join(fdir, "*.tif"))
    # loop through each img, get matching label file, add to list and combine all imgs into array of (N  H x W x D)
    img_list = []
    labels_coords = [] # list of tuples (xmin, ymin, xmax, ymax)
    labels_class = [] # list of labels classes (in same order as labels_coords)
    for img_fp in img_fps:
        label_fp = img_fp.replace(".tif",".json")
        # Get image as array
        img_np = np.array(Image.open(img_fp).convert("RGB"))
        img_list.append(img_np)
        with open(label_fp) as f:
            img_label = json.load(f)
            for label in img_label['shapes']:
                # get bbox coords (label["points"][0] is upper left coords, label["points"][1] is lower right coords)
                xmin,ymin = label['points'][0][0], label["points"][0][1]
                xmax,ymax = label['points'][1][0], label["points"][1][1]
                labels_coords.append([xmin,ymin,xmax,ymax])
                if label['label']=="GCP_RED":
                    lbl = 1
                elif label['label']=="GCP":
                    lbl = 2
                labels_class.append(lbl)
    # img_stack = np.stack(img_list, axis=0)
    img_stack = np.array(img_list)
    
    return (img_stack, np.array(labels_class), np.array(labels_coords)) # is rehsape necessary for labels_class?



In [5]:
(training_images, training_labels, training_bboxes) = get_images_labels_bboxes("./train")

In [6]:
(test_images, test_labels, test_bboxes) = get_images_labels_bboxes("./test")

In [7]:
print(test_images.shape)

(6, 424, 424, 3)


## Define the Network

Here, you'll define your custom CNN.
- `feature_extractor`: these convolutional layers extract the features of the image.
- `classifier`:  This defines the output layer that predicts among 2 categories (GCP_RED and GCP (or 1 and 2))
- `bounding_box_regression`: This defines the output layer that predicts 4 numeric values, which define the coordinates of the bounding box (xmin, ymin, xmax, ymax)
- `final_model`: This combines the layers for feature extraction, classification and bounding box prediction.  
  - This is a branching model, because the model splits to produce two kinds of output (a category and set of numbers).  
- `define_and_compile_model`: choose the optimizer and metrics, then compile the model.

In [8]:
'''
Feature extractor is the CNN that is made up of convolution and pooling layers.
'''
def feature_extractor(inputs):
    x = tf.keras.layers.Conv2D(16, activation='relu', kernel_size=3, input_shape=(424, 424, 3))(inputs)
    x = tf.keras.layers.AveragePooling2D((2, 2))(x)

    x = tf.keras.layers.Conv2D(32,kernel_size=3,activation='relu')(x)
    x = tf.keras.layers.AveragePooling2D((2, 2))(x)

    x = tf.keras.layers.Conv2D(64,kernel_size=3,activation='relu')(x)
    x = tf.keras.layers.AveragePooling2D((2, 2))(x)

    return x

'''
dense_layers adds a flatten and dense layer.
This will follow the feature extraction layers
'''
def dense_layers(inputs):
    x = tf.keras.layers.Flatten()(inputs)
    x = tf.keras.layers.Dense(128, activation='relu')(x)
    return x


'''
Classifier defines the classification output.
This has a set of fully connected layers and a softmax layer.
'''
def classifier(inputs):
    classification_output = tf.keras.layers.Dense(2, activation='softmax', name = 'classification')(inputs)
    # classification_output = tf.keras.layers.Dense(1, activation='softmax', name = 'classification')(inputs) # works but so should above
    return classification_output


'''
This function defines the regression output for bounding box prediction.
Note that we have four outputs corresponding to (xmin, ymin, xmax, ymax)
'''
def bounding_box_regression(inputs):
    bounding_box_regression_output = tf.keras.layers.Dense(units = '4', name = 'bounding_box')(inputs)
    return bounding_box_regression_output


def final_model(inputs):
    feature_cnn = feature_extractor(inputs)
    dense_output = dense_layers(feature_cnn)

    '''
    The model branches here.
    The dense layer's output gets fed into two branches:
    classification_output and bounding_box_output
    '''
    classification_output = classifier(dense_output)
    bounding_box_output = bounding_box_regression(dense_output)

    model = tf.keras.Model(inputs = inputs, outputs = [classification_output, bounding_box_output])

    return model


def define_and_compile_model(inputs):
    model = final_model(inputs)
    model.compile(optimizer='adam',
                  loss = {'classification' : 'sparse_categorical_crossentropy',
                          'bounding_box' : 'mse'
                         },
                  metrics = {'classification' : 'accuracy',
                             'bounding_box' : 'mse'
                            })
    # Returns full and compiled model
    return model



inputs = tf.keras.layers.Input(shape=(424, 424, 3))
# inputs = tf.keras.layers.Input(shape=(None, 424, 424, 3))
model = define_and_compile_model(inputs)

# print model layers
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 424, 424, 3)]        0         []                            
                                                                                                  
 conv2d (Conv2D)             (None, 422, 422, 16)         448       ['input_1[0][0]']             
                                                                                                  
 average_pooling2d (Average  (None, 211, 211, 16)         0         ['conv2d[0][0]']              
 Pooling2D)                                                                                       
                                                                                                  
 conv2d_1 (Conv2D)           (None, 209, 209, 32)         4640      ['average_pooling2d[0][0]'

2023-11-05 18:38:16.716963: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:880] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-11-05 18:38:16.717056: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:880] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-11-05 18:38:16.717091: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:880] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-11-05 18:38:17.583107: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:880] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-11-05 18:38:17.583163: I tensorflow/compile

### Train and validate the model

In [9]:
# Combine train bbox and train labels into dictionary
trainTargets = {
    "classification": training_labels,
    "bounding_box": training_bboxes
}

testTargets = {
    "classification": test_labels,
    "bounding_box": test_bboxes
}

In [10]:
EPOCHS = 5 # 45
steps_per_epoch = trainTargets["bounding_box"].shape[0]//BATCH_SIZE  # need number of training items only!
validation_steps = 1

history = model.fit(training_images, trainTargets,
                    steps_per_epoch=steps_per_epoch,
                    validation_data=(test_images, testTargets),
                    validation_steps=validation_steps, epochs=EPOCHS)


Epoch 1/5


2023-11-05 18:38:32.404212: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:442] Loaded cuDNN version 8700
2023-11-05 18:38:51.942315: I tensorflow/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2023-11-05 18:38:53.149187: I tensorflow/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2023-11-05 18:38:53.474278: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7f1328e36e80 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2023-11-05 18:38:53.474331: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): NVIDIA GeForce GTX 1060 6GB, Compute Capability 6.1
2023-11-05 18:38:53.478764: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2023-11-05 18:38:53.585642: I ./tensorflow/compiler/jit/device_

Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [None]:
# Evaluate!
loss, classification_loss, bounding_box_loss, classification_accuracy, bounding_box_mse = model.evaluate(test_images, testTargets, steps=1)
print("Validation accuracy: ", classification_accuracy)