In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import keras.backend as K
from tensorflow.python.keras.regularizers import l2
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

2023-12-15 23:17:44.494918: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Num GPUs Available:  1


In [5]:
class YOLOv1_LastLayer_Reshape(tf.keras.layers.Layer):
    """
    Defines a costume layer for reshaping the last layer to YOLOv1 compatible layer.
    Note, No build function is needed.
    """
    def __init__(self, targetShape):
        """
        Initializes the layer.
        """
        super().__init__()
        self.targetShape = tuple(targetShape)
    
    def get_config(self):
        """
        Helps in serializing the layer data
        """
        config = super().get_config()
        config.update({"target_shape": self.targetShape})
        return config
    
    def call(self, layerInput):
        """
        Forward computations. We take the first Sx * Sy * C indexes of each the input 
        vector to resemble the class probabilities of each grid cell. The rest of the 
        Sx * Sy * B indexes resemble the confidence scores of each grid cell And the 
        rest resemble the bounding box parameters <boxCenterX, boxCenterY, width, height>  

        Args:
            layerInput: tensor: The output from a dense (fully connected) layer.
        """
        
        Sx, Sy = self.targetShape[0], self.targetShape[1] # Number of parts that each axis is divided to
        C = 1 # Number of classes
        B = 2 # Number of predicted bounding boxes per grid cell

        # Get the batch size
        batchSize = tf.keras.backend.shape(layerInput)[0]

        # Class probabilities
        classProbs = tf.keras.backend.reshape(layerInput[:,:Sx*Sy*C], (batchSize,) + (Sx,Sy,C))
        classProbs = tf.keras.backend.softmax(classProbs) # Run a softmax to choose the right class with highest prob

        # Confidence scores
        confScores = tf.keras.backend.reshape(layerInput[:,Sx*Sy*C:Sx*Sy*(C+B)], (batchSize,) + (Sx,Sy,B))
        confScores = tf.keras.backend.sigmoid(confScores) # Confidence scores should be between 0 and 1

        # Bounding boxes
        bBox = tf.keras.backend.reshape(layerInput[:,Sx*Sy*(C+B):], (batchSize,) + (Sx,Sy,B))
        bBox = tf.keras.backend.sigmoid(bBox) # All of the bounding box parameters are relative (Between 0 and 1)

        return tf.keras.backend.concatenate([classProbs, confScores, bBox])



In [6]:
# YOLOv1 Loss

def calcIOU(output, groundTruth):
    """
    Calculates intersection over union for two bounding boxes.

    Args:
        output: tuple: Coordinates of the outputted bounding box (x_topLeft,y_topLeft,x_bottomRight,y_bottomRight)
        output: tuple: Coordinates of the ground truth bounding box (x_topLeft,y_topLeft,x_bottomRight,y_bottomRight)
    
    Returns:
        Intersection over union of two boxes
    """

    unionX_topLeft = tf.maximum(output[0], groundTruth[0])
    unionY_topLeft = tf.maximum(output[1], groundTruth[1])
    unionX_bottomRight = tf.minimum(output[2], groundTruth[2])
    unionY_bottomRight = tf.minimum(output[3], groundTruth[3])

    unionArea = tf.multiply(tf.abs(unionX_topLeft-unionX_bottomRight),tf.abs(unionY_topLeft-unionY_bottomRight))
    outputArea = tf.multiply(tf.abs(output[0]-output[2]),tf.abs(output[1]-output[3]))
    groundTruthArea = tf.multiply(tf.abs(groundTruth[0]-groundTruth[2]),tf.abs(groundTruth[1]-groundTruth[3]))

    return tf.divide(unionArea,outputArea + groundTruthArea - unionArea)

In [7]:
# YOLOv1 structure
YOLOv1_inputShape = (448,448,3) # Shape of the input image 
classNo = 1 # Number of classes we are trying to detect
input = tf.keras.layers.Input(shape=YOLOv1_inputShape)
leakyReLu = tf.keras.layers.LeakyReLU(alpha = .1)


# The backbone, Acts ads a feature extractor
# L1
x = tf.keras.layers.Conv2D(filters = 64, kernel_size=7, strides = 2, padding = "same", activation= leakyReLu, kernel_regularizer=l2(1e-5))(input)
x = tf.keras.layers.MaxPool2D(pool_size=2, strides=2, padding = "same")(x)

# L2
x = tf.keras.layers.Conv2D(filters = 192, kernel_size=3, strides = 1, padding = "same", activation= leakyReLu, kernel_regularizer=l2(1e-5))(x)
x = tf.keras.layers.MaxPool2D(pool_size=2, strides=2, padding = "same")(x)

# L3
x = tf.keras.layers.Conv2D(filters = 128, kernel_size=1, strides = 1, padding = "same", activation= leakyReLu, kernel_regularizer=l2(1e-5))(x)
x = tf.keras.layers.Conv2D(filters = 256, kernel_size=3, strides = 1, padding = "same", activation= leakyReLu, kernel_regularizer=l2(1e-5))(x)
x = tf.keras.layers.Conv2D(filters = 256, kernel_size=1, strides = 1, padding = "same", activation= leakyReLu, kernel_regularizer=l2(1e-5))(x)
x = tf.keras.layers.Conv2D(filters = 512, kernel_size=3, strides = 1, padding = "same", activation= leakyReLu, kernel_regularizer=l2(1e-5))(x)
x = tf.keras.layers.MaxPool2D(pool_size=2, strides=2, padding = "same")(x)

# L4
for _ in range(4):
    x = tf.keras.layers.Conv2D(filters = 256, kernel_size=1, strides = 1, padding = "same", activation= leakyReLu, kernel_regularizer=l2(1e-5))(x)
    x = tf.keras.layers.Conv2D(filters = 512, kernel_size=3, strides = 1, padding = "same", activation= leakyReLu, kernel_regularizer=l2(1e-5))(x)
x = tf.keras.layers.Conv2D(filters = 512, kernel_size=1, strides = 1, padding = "same", activation= leakyReLu, kernel_regularizer=l2(1e-5))(x)
x = tf.keras.layers.Conv2D(filters = 1024, kernel_size=3, strides = 1, padding = "same", activation= leakyReLu, kernel_regularizer=l2(1e-5))(x)
x = tf.keras.layers.MaxPool2D(pool_size=2, strides=2, padding = "same")(x)

# L5
x = tf.keras.layers.Conv2D(filters = 512, kernel_size=1, strides = 1, padding = "same", activation= leakyReLu, kernel_regularizer=l2(1e-5))(x)
x = tf.keras.layers.Conv2D(filters = 1024, kernel_size=3, strides = 1, padding = "same", activation= leakyReLu, kernel_regularizer=l2(1e-5))(x)
x = tf.keras.layers.Conv2D(filters = 512, kernel_size=1, strides = 1, padding = "same", activation= leakyReLu, kernel_regularizer=l2(1e-5))(x)
x = tf.keras.layers.Conv2D(filters = 1024, kernel_size=3, strides = 1, padding = "same", activation= leakyReLu, kernel_regularizer=l2(1e-5))(x)
x = tf.keras.layers.Conv2D(filters = 1024, kernel_size=3, strides = 1, padding = "same", activation= leakyReLu, kernel_regularizer=l2(1e-5))(x)
x = tf.keras.layers.Conv2D(filters = 1024, kernel_size=3, strides = 2, padding = "same", activation= leakyReLu, kernel_regularizer=l2(1e-5))(x)

# L6
x = tf.keras.layers.Conv2D(filters = 1024, kernel_size=3, strides = 1, padding = "same", activation= leakyReLu, kernel_regularizer=l2(1e-5))(x)
x = tf.keras.layers.Conv2D(filters = 1024, kernel_size=3, strides = 1, padding = "same", activation= leakyReLu, kernel_regularizer=l2(1e-5))(x)

# Neck
x = tf.keras.layers.Flatten()(x)
x = tf.keras.layers.Dense(4096)(x)
x = tf.keras.layers.Dense(7*7*(5*2+classNo))(x)
x = tf.keras.layers.Dropout(.5)(x) # Dropout layer for avoiding overfitting
# x = tf.keras.layers.Reshape((7,7,5*2+classNo))(x)
x = YOLOv1_LastLayer_Reshape((7,7,5*2+classNo))(x)
model = tf.keras.Model(inputs = input, outputs = x, name = "YOLOv1")

for layer in model.layers:
    print(layer.output_shape)

[(None, 448, 448, 3)]
(None, 224, 224, 64)
(None, 112, 112, 64)
(None, 112, 112, 192)
(None, 56, 56, 192)
(None, 56, 56, 128)
(None, 56, 56, 256)
(None, 56, 56, 256)
(None, 56, 56, 512)
(None, 28, 28, 512)
(None, 28, 28, 256)
(None, 28, 28, 512)
(None, 28, 28, 256)
(None, 28, 28, 512)
(None, 28, 28, 256)
(None, 28, 28, 512)
(None, 28, 28, 256)
(None, 28, 28, 512)
(None, 28, 28, 512)
(None, 28, 28, 1024)
(None, 14, 14, 1024)
(None, 14, 14, 512)
(None, 14, 14, 1024)
(None, 14, 14, 512)
(None, 14, 14, 1024)
(None, 14, 14, 1024)
(None, 7, 7, 1024)
(None, 7, 7, 1024)
(None, 7, 7, 1024)
(None, 50176)
(None, 4096)
(None, 539)
(None, 539)
(None, 7, 7, 5)
