# Yolo_v2_implementation

## darknet-19

In [None]:
# import tensorflow as tf
# from tensorflow.keras import layers
import torch
from torch import nn
from torchinfo import summary

In [None]:
def conv_block(x, filters, kernel_size):
    x = layers.Conv2D(filters, kernel_size, padding='same', activation='linear')(x)
    x = layers.BatchNormalization()(x)
    x = layers.LeakyReLU(alpha=0.1)(x)
    return x

class basic_conv(nn.Module):
    def __init__(self, in_channels, output_channels, kernel_size, stride=1, output=False):
        super.__init__()
        if output:
            self.conv= nn.Sequential(
                nn.Conv2d(in_channels, output_channels, kernel_size, stride, padding = (kernel_size - 1) // 2)
                
            )
        else:
            self.conv=nn.Sequential(
                nn.Conv2d(in_channels, output_channels, kernel_size, stride, padding = (kernel_size - 1) // 2),
                nn.BatchNorm2d(output_channels),
                nn.LeakyReLU(0.1, inplace=True)
            )
    def forward(self, x):
        return self.conv(x)
    
class basic_yolo_block(nn.Module):
    def __init__(self, in_channels, output_channels, double):
        super.__init__()
        self.block= nn.Sequential(
            basic_conv(in_channels, output_channels, 3),
            basic_conv(output_channels, output_channels//2, 1),
            basic_conv(output_channels//2, output_channels, 3),
            basic_conv(output_channels, output_channels//2, 1),
            basic_conv(output_channels//2, output_channels, 3)
        )

class Yolo_v2(nn.Module):
    def __init__(self, anchors = [(1.3221, 1.73145), (3.19275, 4.00944), (5.05587, 8.09892), (9.47112, 4.84053),(11.2364, 10.0071)], num_classes = 20):
        super.__init__()

        self.num_class= num_classes
        self.anchors= anchors

        self.first_conv_block= nn.Sequential(
            basic_conv(3, 32, 3),
            nn.MaxPool2d(2),
            basic_conv(32, 64, 3),
            nn.MaxPool2d(2)
        )

        self.second_conv_block= nn.Sequential(
            basic_conv(64, 128, 3),
            basic_conv(128, 64, 1),
            basic_conv(64, 128, 3),
            nn.MaxPool2d(2)
        )

        self.third_conv_block= nn.Sequential(
            basic_conv
        )
def darknet19(input_shape=(224, 224, 3), num_classes=1000):
    inputs = tf.keras.Input(shape=input_shape)

    # Block 1
    x = conv_block(inputs, 32, 3)
    x = layers.MaxPooling2D(pool_size=2)(x)

    # Block 2
    x = conv_block(x, 64, 3)
    x = layers.MaxPooling2D(pool_size=2)(x)

    # Block 3
    x = conv_block(x, 128, 3)
    x = conv_block(x, 64, 1)
    x = conv_block(x, 128, 3)
    x = layers.MaxPooling2D(pool_size=2)(x)

    # Block 4
    x = conv_block(x, 256, 3)
    x = conv_block(x, 128, 1)
    x = conv_block(x, 256, 3)
    x = layers.MaxPooling2D(pool_size=2)(x)

    # Block 5
    x = conv_block(x, 512, 3)
    x = conv_block(x, 256, 1)
    x = conv_block(x, 512, 3)
    x = conv_block(x, 256, 1)
    x = conv_block(x, 512, 3)
    x = layers.MaxPooling2D(pool_size=2)(x)

    # Block 6
    x = conv_block(x, 1024, 3)
    x = conv_block(x, 512, 1)
    x = conv_block(x, 1024, 3)
    x = conv_block(x, 512, 1)
    x = conv_block(x, 1024, 3)

    # Additional 1x1 conv Layer
    x = conv_block(x, 1000, 1)

    # Output
    x = layers.GlobalAveragePooling2D()(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)

    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    return model

In [None]:
darknet_19 = darknet19()
darknet_19.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 conv2d (Conv2D)             (None, 224, 224, 32)      896       
                                                                 
 batch_normalization (BatchN  (None, 224, 224, 32)     128       
 ormalization)                                                   
                                                                 
 leaky_re_lu (LeakyReLU)     (None, 224, 224, 32)      0         
                                                                 
 max_pooling2d (MaxPooling2D  (None, 112, 112, 32)     0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 112, 112, 64)      18496 

In [None]:
darknet_19 = darknet19((448, 448, 3))
darknet_19.summary()

Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 448, 448, 3)]     0         
                                                                 
 conv2d_38 (Conv2D)          (None, 448, 448, 32)      896       
                                                                 
 batch_normalization_38 (Bat  (None, 448, 448, 32)     128       
 chNormalization)                                                
                                                                 
 leaky_re_lu_38 (LeakyReLU)  (None, 448, 448, 32)      0         
                                                                 
 max_pooling2d_10 (MaxPoolin  (None, 224, 224, 32)     0         
 g2D)                                                            
                                                                 
 conv2d_39 (Conv2D)          (None, 224, 224, 64)      1849

## YOLO V2

In [None]:
def conv_block(x, filters, kernel_size):
    x = layers.Conv2D(filters, kernel_size, padding='same', activation='linear')(x)
    x = layers.BatchNormalization()(x)
    x = layers.LeakyReLU(alpha=0.1)(x)
    return x

def yolo_v2(input_shape=(416, 416, 3), num_class=20, bbox_per_cell=5):
    inputs = tf.keras.Input(shape=input_shape)

    # Block 1
    x = conv_block(inputs, 32, 3)
    x = layers.MaxPooling2D(pool_size=2)(x)

    # Block 2
    x = conv_block(x, 64, 3)
    x = layers.MaxPooling2D(pool_size=2)(x)

    # Block 3
    x = conv_block(x, 128, 3)
    x = conv_block(x, 64, 1)
    x = conv_block(x, 128, 3)
    x = layers.MaxPooling2D(pool_size=2)(x)

    # Block 4
    x = conv_block(x, 256, 3)
    x = conv_block(x, 128, 1)
    x = conv_block(x, 256, 3)
    x = layers.MaxPooling2D(pool_size=2)(x)

    # Block 5
    x = conv_block(x, 512, 3)
    x = conv_block(x, 256, 1)
    x = conv_block(x, 512, 3)
    x = conv_block(x, 256, 1)
    x = conv_block(x, 512, 3) # 26 x 26 x 512

    # Fine Grained Features / 0~12 + 13~25
    ## method 1
    # x_1 = x[:, :13, :13, :] # batch x 13 x 13 x 512
    # x_2 = x[:, :13, 13:, :]
    # x_3 = x[:, 13:, :13, :]
    # x_4 = x[:, 13:, 13:, :]
    ## method 2
    x_1 = x[:, ::2, ::2, :]
    x_2 = x[:, ::2, 1::2, :]
    x_3 = x[:, 1::2, ::2, :]
    x_4 = x[:, 1::2, 1::2, :]
    fine_grained = tf.concat([x_1, x_2, x_3, x_4], axis = -1) # batch x 13 x 13 x 2048

    x = layers.MaxPooling2D(pool_size=2)(x)

    # Block 6
    x = conv_block(x, 1024, 3)
    x = conv_block(x, 512, 1)
    x = conv_block(x, 1024, 3)
    x = conv_block(x, 512, 1)
    x = conv_block(x, 1024, 3)
    x = conv_block(x, 1024, 3) # add
    x = conv_block(x, 1024, 3) # add

    x = tf.concat([x, fine_grained], axis = -1) # batch x 13 x 13 x 3072

    # Output
    x = conv_block(x, 1024, 3)
    outputs = conv_block(x, bbox_per_cell*(num_class+5), 1)

    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    return model

In [None]:
yolo_v2 = yolo_v2()
yolo_v2.summary()

Model: "model_4"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_5 (InputLayer)           [(None, 416, 416, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv2d_79 (Conv2D)             (None, 416, 416, 32  896         ['input_5[0][0]']                
                                )                                                                 
                                                                                                  
 batch_normalization_79 (BatchN  (None, 416, 416, 32  128        ['conv2d_79[0][0]']              
 ormalization)                  )                                                           