In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import keras.backend as K
import sys, os
from tensorflow.python.keras.regularizers import l2
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))


here = os.path.dirname(".")
sys.path.append(os.path.join(here, '..'))

from dataHandler import *

2024-07-14 09:08:22.750743: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Num GPUs Available:  1


2024-07-14 09:08:24.882282: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-07-14 09:08:24.903091: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-07-14 09:08:24.903273: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysf

In [2]:
class YOLOv1_LastLayer_Reshape(tf.keras.layers.Layer):
    """
    Defines a costume layer for reshaping the last layer to YOLOv1 compatible layer.
    Note, No build function is needed.
    """
    def __init__(self, targetShape):
        """
        Initializes the layer.
        """
        super().__init__()
        self.targetShape = tuple(targetShape)
    
    def get_config(self):
        """
        Helps in serializing the layer data
        """
        config = super().get_config()
        config.update({"target_shape": self.targetShape})
        return config

    def call(self, layerInput):
        """
        Forward computations. We take the first Sx * Sy * C indexes of each the input 
        vector to resemble the class probabilities of each grid cell. The rest of the 
        Sx * Sy * B indexes resemble the confidence scores of each grid cell And the 
        rest resemble the bounding box parameters <boxCenterX, boxCenterY, width, height>  

        Args:
            layerInput: tensor: The output from a dense (fully connected) layer.
        """
        
        Sx, Sy = self.targetShape[0], self.targetShape[1] # Number of parts that each axis is divided to
        C = 1 # Number of classes
        B = 2 # Number of predicted bounding boxes per grid cell


        # Get the batch size
        batchSize = tf.keras.backend.shape(layerInput)[0]

        # Class probabilities
        classProbs = tf.keras.backend.reshape(layerInput[:,:Sx*Sy*C], (batchSize,) + (Sx,Sy,C))
        classProbs = tf.keras.backend.softmax(classProbs) # Run a softmax to choose the right class with highest prob

        # Confidence scores
        confScores = tf.keras.backend.reshape(layerInput[:,Sx*Sy*C:Sx*Sy*(C+B)], (batchSize,) + (Sx,Sy,B))
        confScores = tf.keras.backend.sigmoid(confScores) # Confidence scores should be between 0 and 1

        # Bounding boxes
        bBox = tf.keras.backend.reshape(layerInput[:,Sx*Sy*(C+B):], (batchSize,) + (Sx,Sy,B*4))
        tf.print(layerInput[:,Sx*Sy*(C+B):].shape)
        bBox = tf.keras.backend.sigmoid(bBox) # All of the bounding box parameters are relative (Between 0 and 1)


        return tf.keras.backend.concatenate([classProbs, confScores, bBox])

# # # Define a simple model using the custom reshaper layer to test it
# # input = tf.keras.layers.Input(shape=(539,))
# # x = YOLOv1_LastLayer_Reshape((7,7,11))(input)
# # model = tf.keras.Model(inputs = input, outputs = x, name = "dummy")
# # model.compile(optimizer='adam',  loss='mse', metrics=['accuracy'])

# # xTest = np.random.randint(10, size = (1,539))
# # pred = model.predict(xTest)
# # print(pred.shape)

In [3]:
# Get test  ground truth vector
df = generateGroundTruth_YOLOv1("../data/labels/train", "txt", (7,7,1,1))
yTruth = df.at["018329bef1f1186f", "vector"]

In [4]:
def calcIOU(output, groundTruth):
    """
    Calculates intersection over union for two bounding boxes.

    Args:
        output: tuple: Coordinates of the outputted bounding box (x_topLeft,y_topLeft,x_bottomRight,y_bottomRight)
        output: tuple: Coordinates of the ground truth bounding box (x_topLeft,y_topLeft,x_bottomRight,y_bottomRight)
    
    Returns:
        Intersection over union of two boxes
    """

    unionX_topLeft = tf.maximum(output[0], groundTruth[0])
    unionY_topLeft = tf.maximum(output[1], groundTruth[1])
    unionX_bottomRight = tf.minimum(output[2], groundTruth[2])
    unionY_bottomRight = tf.minimum(output[3], groundTruth[3])

    unionArea = tf.multiply(tf.abs(unionX_topLeft-unionX_bottomRight),tf.abs(unionY_topLeft-unionY_bottomRight))
    outputArea = tf.multiply(tf.abs(output[0]-output[2]),tf.abs(output[1]-output[3]))
    groundTruthArea = tf.multiply(tf.abs(groundTruth[0]-groundTruth[2]),tf.abs(groundTruth[1]-groundTruth[3]))

    return tf.divide(unionArea,outputArea + groundTruthArea - unionArea)


# YOLOv1 Loss
class YOLOv1_Loss(tf.keras.losses.Loss):
    """
    Defines the custom loss function that is used for YOLOv1 network.
    The loss is calculated in 3 parts:
    1. Localization Loss
    2. Confidence Loss
    3. Classification Loss

    Note: In this method's documentation, "ground truth" and "target" are used interchangeably 
    """
    
    def __init__(self, Sx = 7, Sy = 7, B_target = 1, B_pred = 2, C = 1):
        """
        Initializes the loss function. 

        Args:
            Sx: int: Number of grid cells on x axis
            Sy: int: Number of grid cells on y axis 
            B_target: int: Number of bounding boxes in the ground truth data
            B_pred: int: Number of bounding boxes in prediction
            C: int: Number of the classes
        """
        super().__init__()

        # Define YOLOv1 parameters
        self.Sx = Sx
        self.Sy = Sy
        self.B_target = B_target # Ground truth grid cells only have one bounding box 
        self.B_pred = B_pred
        self.C = C

    def call(self, yTrue, yPred):
        # Split the predictions and ground truth vectors to coordinates, confidence and class matrices
        # 1. Ground truth 
        idx1, idx2 = self.C, self.C + self.B_target
        targetConf = yTrue[:,:,0:idx1]
        targetClass = yTrue[:,:,idx1:idx2]
        targetCoords = yTrue[:,:,idx2:]

        idx1, idx2 = self.C, self.C + self.B_pred
        predConf = yTrue[:,:,0:idx1]
        predClass = yTrue[:,:,idx1:idx2]
        predCoords = yTrue[:,:,idx2:]

        # TODO
        return 1
        

# Define a simple model using the custom reshaper layer to test it
input = tf.keras.layers.Input(shape=(539,))
x = YOLOv1_LastLayer_Reshape((7,7,11))(input)
model = tf.keras.Model(inputs = input, outputs = x, name = "dummy")
model.compile(optimizer='adam',  loss=YOLOv1_Loss, metrics=['accuracy'])

xTest = np.random.randint(10, size = (1,539))
pred = model.predict(xTest)
print(pred.shape)

TensorShape([None, 392])
(1, 7, 7, 11)


2024-07-14 09:08:25.642955: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-07-14 09:08:25.643186: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-07-14 09:08:25.643314: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysf

In [5]:
# YOLOv1 structure
YOLOv1_inputShape = (448,448,3) # Shape of the input image 
classNo = 1 # Number of classes we are trying to detect
input = tf.keras.layers.Input(shape=YOLOv1_inputShape)
leakyReLu = tf.keras.layers.LeakyReLU(alpha = .1)


# The backbone, Acts ads a feature extractor
# L1
x = tf.keras.layers.Conv2D(filters = 64, kernel_size=7, strides = 2, padding = "same", activation= leakyReLu, kernel_regularizer=l2(1e-5))(input)
x = tf.keras.layers.MaxPool2D(pool_size=2, strides=2, padding = "same")(x)

# L2
x = tf.keras.layers.Conv2D(filters = 192, kernel_size=3, strides = 1, padding = "same", activation= leakyReLu, kernel_regularizer=l2(1e-5))(x)
x = tf.keras.layers.MaxPool2D(pool_size=2, strides=2, padding = "same")(x)

# L3
x = tf.keras.layers.Conv2D(filters = 128, kernel_size=1, strides = 1, padding = "same", activation= leakyReLu, kernel_regularizer=l2(1e-5))(x)
x = tf.keras.layers.Conv2D(filters = 256, kernel_size=3, strides = 1, padding = "same", activation= leakyReLu, kernel_regularizer=l2(1e-5))(x)
x = tf.keras.layers.Conv2D(filters = 256, kernel_size=1, strides = 1, padding = "same", activation= leakyReLu, kernel_regularizer=l2(1e-5))(x)
x = tf.keras.layers.Conv2D(filters = 512, kernel_size=3, strides = 1, padding = "same", activation= leakyReLu, kernel_regularizer=l2(1e-5))(x)
x = tf.keras.layers.MaxPool2D(pool_size=2, strides=2, padding = "same")(x)

# L4
for _ in range(4):
    x = tf.keras.layers.Conv2D(filters = 256, kernel_size=1, strides = 1, padding = "same", activation= leakyReLu, kernel_regularizer=l2(1e-5))(x)
    x = tf.keras.layers.Conv2D(filters = 512, kernel_size=3, strides = 1, padding = "same", activation= leakyReLu, kernel_regularizer=l2(1e-5))(x)
x = tf.keras.layers.Conv2D(filters = 512, kernel_size=1, strides = 1, padding = "same", activation= leakyReLu, kernel_regularizer=l2(1e-5))(x)
x = tf.keras.layers.Conv2D(filters = 1024, kernel_size=3, strides = 1, padding = "same", activation= leakyReLu, kernel_regularizer=l2(1e-5))(x)
x = tf.keras.layers.MaxPool2D(pool_size=2, strides=2, padding = "same")(x)

# L5
x = tf.keras.layers.Conv2D(filters = 512, kernel_size=1, strides = 1, padding = "same", activation= leakyReLu, kernel_regularizer=l2(1e-5))(x)
x = tf.keras.layers.Conv2D(filters = 1024, kernel_size=3, strides = 1, padding = "same", activation= leakyReLu, kernel_regularizer=l2(1e-5))(x)
x = tf.keras.layers.Conv2D(filters = 512, kernel_size=1, strides = 1, padding = "same", activation= leakyReLu, kernel_regularizer=l2(1e-5))(x)
x = tf.keras.layers.Conv2D(filters = 1024, kernel_size=3, strides = 1, padding = "same", activation= leakyReLu, kernel_regularizer=l2(1e-5))(x)
x = tf.keras.layers.Conv2D(filters = 1024, kernel_size=3, strides = 1, padding = "same", activation= leakyReLu, kernel_regularizer=l2(1e-5))(x)
x = tf.keras.layers.Conv2D(filters = 1024, kernel_size=3, strides = 2, padding = "same", activation= leakyReLu, kernel_regularizer=l2(1e-5))(x)

# L6
x = tf.keras.layers.Conv2D(filters = 1024, kernel_size=3, strides = 1, padding = "same", activation= leakyReLu, kernel_regularizer=l2(1e-5))(x)
x = tf.keras.layers.Conv2D(filters = 1024, kernel_size=3, strides = 1, padding = "same", activation= leakyReLu, kernel_regularizer=l2(1e-5))(x)

# Neck
x = tf.keras.layers.Flatten()(x)
x = tf.keras.layers.Dense(4096)(x)
x = tf.keras.layers.Dense(7*7*(5*2+classNo), activation="sigmoid")(x)
x = tf.keras.layers.Dropout(.5)(x) # Dropout layer for avoiding overfitting
x = YOLOv1_LastLayer_Reshape((7,7,5*2+classNo))(x)
model = tf.keras.Model(inputs = input, outputs = x, name = "YOLOv1")

# # Print the architecture
# for layer in model.layers:
#     print(layer.output_shape)

In [6]:
model.summary()

Model: "YOLOv1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 448, 448, 3)]     0         
                                                                 
 conv2d (Conv2D)             (None, 224, 224, 64)      9472      
                                                                 
 max_pooling2d (MaxPooling2  (None, 112, 112, 64)      0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 112, 112, 192)     110784    
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 56, 56, 192)       0         
 g2D)                                                            
                                                                 
 conv2d_2 (Conv2D)           (None, 56, 56, 128)       24704