<a href="https://www.kaggle.com/code/guilhermedemarchi/yolo-v1-implementation?scriptVersionId=122374670" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [1]:
# Source tutorial: https://www.youtube.com/watch?v=n9_XyCGr-MI&list=PLhhyoLH6Ijfw0TpCTVTNk42NN08H6UvNq&index=5
# Tutorial's repo: https://github.com/aladdinpersson/Machine-Learning-Collection/tree/master/ML/Pytorch/object_detection/YOLO
# My github account: https://github.com/Guilherme-De-Marchi

In [2]:
import tensorflow as tf
import tensorflow.keras as keras
import numpy as np

In [3]:
class VOCDataset(tf.data.Dataset):
    
    def __init__(self, label_type, base_folder_path):
        self.base_folder_path = base_folder_path
        self.label_type = label_type
        
        with open(os.path.join(self.base_folder_path, "ImageSets", self.label_type)) as file:
            self.filenames = [line.rstrip() for line in file]
        
    def __getitem__(self, index):
        annon_filename = self.filenames[index][-4:] + ".xml"
        annon_path = os.path.join(self.base_folder_path, "Annotations", annon_filename)
        
        boxes_info = []
        root_node = ET.parse(annon_path).getroot()
        for obj in root_node.iter('object'):
            label_name = obj.find('name').text
            xmlbox = obj.find('bndbox')
            bndbox = {
                "xmin": int(xmlbox.find('xmin').text), 
                "ymin": int(xmlbox.find('ymin').text),
                "xmax": int(xmlbox.find('xmax').text), 
                "ymax": int(xmlbox.find('ymax').text),
            }

            boxes_info.append({
                "label": label_name, 
                "bndbox": bndbox,
            })
            
        label_matrix = np.zeros((self.S, self.S, self.C + 5 * self.B))

SyntaxError: invalid syntax (671455428.py, line 3)

In [None]:
architecture_config = [
    # tuple items are: (kernel_size, filters, strides)
    # str 'M' is a maxpooling with stride 2x2 and kernel 2x2
    
    (7, 64, 2),
    'M',
    
    (3, 192, 1),
    'M',
    
    (1, 128, 1),
    (3, 256, 1),
    (1, 256, 1),
    (3, 512, 1),
    'M',
    
    (1, 256, 1),
    (3, 512, 1),
    (1, 256, 1),
    (3, 512, 1),
    (1, 256, 1),
    (3, 512, 1),
    (1, 256, 1),
    (3, 512, 1),
    (1, 512, 1),
    (3, 1024, 1),
    'M',
    
    (1, 512, 1),
    (3, 1024, 1),
    (1, 512, 1),
    (3, 1024, 1),
    (3, 1024, 1),
    
    (3, 1024, 2),
    (3, 1024, 1),
    
    (3, 1024, 1),
]

class ConvBlock(keras.layers.Conv2D):
    """
    Conv2D layer with leaky relu.
    
    Leaky relu explanation: https://paperswithcode.com/method/leaky-relu
    """
    
    def __init__(self, filters=64, kernel_size=3, strides=1, padding='same',
                 activation=keras.layers.LeakyReLU(alpha=0.1), kernel_regularizer=None,
                 name='conv', **kwargs):
        
        super().__init__(
            filters=filters, 
            kernel_size=kernel_size, 
            strides=strides, 
            padding=padding, 
            activation=None, 
            kernel_regularizer=kernel_regularizer, 
            name=name, 
            **kwargs,
        )
        
        self.activation = keras.layers.Activation(activation, name=name+'_act')
        
    def call(self, inputs):
        x = super().call(inputs)
        return self.activation(x)
    
class YoloV1(keras.models.Model):
    
    def __init__(self, input_shape, grid_shape=(7,7), num_boxes=2, 
                 num_classes=20, architecture=architecture_config, model_name='YoloV1'):
        
        super().__init__()
        self.S = grid_shape
        self.B = num_boxes
        self.C = num_classes
        self.architecture = architecture
        self.model_name = model_name
        self.sequential = self._create_sequential()
        
        self.build(input_shape=(None, *input_shape))
        
    def call(self, inputs, training=False):
        return self.sequential(inputs)
        
    def _create_sequential(self):
        layers = []
        for i, x in enumerate(self.architecture):
            if type(x) == tuple:
                layers.append(ConvBlock(
                    kernel_size=x[0],
                    filters=x[1],
                    strides=x[2],
                    padding="same",
                    name=self.model_name+'_conv_'+str(i)
                ))
                
            elif type(x) == str and x == 'M':
                layers.append(keras.layers.MaxPooling2D(
                    pool_size=2,
                    strides=2,
                    name=self.model_name+'_max_polling_2d_'+str(i)
                ))
                
        layers += [
            keras.layers.Flatten(name=self.model_name+'_flatten'),
            keras.layers.Dense(4096, activation=None, name=self.model_name+'_dense_1'),
            keras.layers.LeakyReLU(0.1, name=self.model_name+'_dense_act'),
            keras.layers.Dense(self.S[0] * self.S[1] * (self.C + self.B * 5), name=self.model_name+'_dense_2'),
        ]
                
        return keras.Sequential(layers)
    
def test_YoloV1():
    yolo = YoloV1((448, 448, 3))
    print(yolo.summary())
    yolo.compile(loss="binary_crossentropy")
    yolo.fit(np.random.rand(1, 448, 448, 3) * 255)

In [None]:
test_YoloV1()