# YOLOv3 Implementation




![YOLOv3 ]( https://d33wubrfki0l68.cloudfront.net/c6fd049f28b66dbd35faed6965905ec6281f7d7d/c0399/assets/images/yolo/yolo-architecture.webp )


**model.py**

In [1]:
import torch 
import torch.nn as nn 
from torchsummary import summary

In [2]:
# CNNBlock

class CNNBlock(nn.Module):
    def __init__(self, input, output, kernel, stride=1):
        super(CNNBlock, self).__init__()
        self.conv = nn.Conv2d(in_channels=input,
                              out_channels=output,
                              kernel_size=kernel,
                              stride=stride,
                              padding=kernel//2
        )
        self.bn = nn.BatchNorm2d(output)
        self.act = nn.LeakyReLU(0.1)

    def forward(self, x: torch.Tensor):
        return self.act(self.bn(self.conv(x)))


In [3]:
# Residual Block

class ResidualBlock(nn.Module):
    def __init__(self, input):
        super(ResidualBlock, self).__init__()
        self.cnnblock1 = CNNBlock(input=input,
                                   output=input//2,
                                   kernel=1)
        self.cnnblock2 = CNNBlock(input=input//2,
                                   output=input,
                                   kernel=3)

    def forward(self, x: torch.Tensor):
        residual = self.cnnblock1(x)
        residual = self.cnnblock2(residual)
        return x + residual


In [4]:
# The Backbone - Darknet53

class Darknet53(nn.Module):
    def __init__(self):
        super(Darknet53, self).__init__()
        self.c1 = CNNBlock(3, 32, 3)
        self.c2 = CNNBlock(32, 64, 3, stride=2)
        self.r1 = self._make_resblocks(64, 1)
        self.c3 = CNNBlock(64, 128, 3, stride=2)
        self.r2 = self._make_resblocks(128, 2)
        self.c4 = CNNBlock(128, 256, 3, stride=2)
        self.r3 = self._make_resblocks(256, 8)
        self.c5 = CNNBlock(256, 512, 3, stride=2)
        self.r4 = self._make_resblocks(512, 8)
        self.c6 = CNNBlock(512, 1024, 3, stride=2)
        self.r5 = self._make_resblocks(1024, 4)

    def _make_resblocks(self, input, num_blocks=1):
        block_layes = []
        for _ in range(num_blocks):
            block_layes.append(ResidualBlock(input=input))
        return nn.Sequential(*block_layes)

    def forward(self, x: torch.Tensor):
        x = self.c1(x)
        x = self.c2(x)
        x = self.r1(x)
        x = self.c3(x)
        x = self.r2(x)
        x = self.c4(x)
        x = self.r3(x)
        route1 = x
        x = self.c5(x)
        x = self.r4(x)
        route2 = x
        x = self.c6(x)
        route3 = self.r5(x)
        return route1, route2, route3


In [5]:
# YOLO v3

class YOLOv3(nn.Module):
    def __init__(self, num_classes:int=20, anchors_per_scale:int=3):
        super(YOLOv3, self).__init__()
        self.num_classes = num_classes
        self.anchors_per_scale = anchors_per_scale
        self.darknet = Darknet53()
        self.layer1 =  nn.Sequential(
            CNNBlock(1024, 512, 1),
            CNNBlock(512, 1024, 3),
            CNNBlock(1024, 512, 1),
            CNNBlock(512, 1024, 3),
            CNNBlock(1024, 512, 1)
        )
        self.pred1 = nn.Sequential(
            CNNBlock(512, 1024, 3),
            nn.Conv2d(1024, 3*(self.num_classes + 5), 1)
        )

        self.presampling1 = CNNBlock(512, 256, 1)
        self.upsample1 = nn.Upsample(scale_factor=2,  mode='nearest')
        self.layer2 = nn.Sequential(
            CNNBlock(768, 256, 1),
            CNNBlock(256, 512, 3)
        )
        self.pred2 = nn.Conv2d(512, 3*(self.num_classes + 5), 1)

        self.presampling2 = CNNBlock(512, 128, 1)
        self.upsample2 = nn.Upsample(scale_factor=2, mode='nearest')
        self.layer3 = nn.Sequential(
            CNNBlock(384, 128, 1),
            CNNBlock(128, 256, 3)
        )
        self.pred3 = nn.Conv2d(256, 3*(self.num_classes + 5), 1)

    def forward(self, x: torch.Tensor):
        route1, route2, route3 = self.darknet(x)
        
        x = self.layer1(route3)
        output1 = self.pred1(x)
        output1 = output1.reshape(
            output1.shape[0], self.anchors_per_scale, self.num_classes + 5, output1.shape[2], output1.shape[3]
        ).permute(0, 1, 3, 4, 2)    # [batch, anchors, grid, grid, classes + 5]

        x = self.upsample1(self.presampling1(x))
        x = torch.cat([x, route2], 1)
        route2 = self.layer2(x)
        output2 = self.pred2(route2)
        output2 = output2.reshape(
            output2.shape[0], self.anchors_per_scale, self.num_classes + 5, output2.shape[2], output2.shape[3]
        ).permute(0, 1, 3, 4, 2)

        x = self.upsample2(self.presampling2(route2))
        route1 = torch.cat([x, route1], 1)
        route1 = self.layer3(route1)
        output3 = self.pred3(route1)
        output3 = output3.reshape(
            output3.shape[0], self.anchors_per_scale, self.num_classes + 5, output3.shape[2], output3.shape[3]
        ).permute(0, 1, 3, 4, 2)

        return output1, output2, output3


In [6]:
sample = torch.rand(3, 416, 416)
summary(YOLOv3(), (sample.shape), 32)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [32, 32, 416, 416]             896
       BatchNorm2d-2         [32, 32, 416, 416]              64
         LeakyReLU-3         [32, 32, 416, 416]               0
          CNNBlock-4         [32, 32, 416, 416]               0
            Conv2d-5         [32, 64, 208, 208]          18,496
       BatchNorm2d-6         [32, 64, 208, 208]             128
         LeakyReLU-7         [32, 64, 208, 208]               0
          CNNBlock-8         [32, 64, 208, 208]               0
            Conv2d-9         [32, 32, 208, 208]           2,080
      BatchNorm2d-10         [32, 32, 208, 208]              64
        LeakyReLU-11         [32, 32, 208, 208]               0
         CNNBlock-12         [32, 32, 208, 208]               0
           Conv2d-13         [32, 64, 208, 208]          18,496
      BatchNorm2d-14         [32, 64, 2

**model.py ends here**

In [7]:
def test():
    num_classes = 20
    model = YOLOv3(num_classes=num_classes)
    img_size = 416
    x = torch.randn((2, 3, img_size, img_size))
    out = model(x)
    assert out[0].shape == (2, 3, img_size//32, img_size//32, 5 + num_classes)
    assert out[1].shape == (2, 3, img_size//16, img_size//16, 5 + num_classes)
    assert out[2].shape == (2, 3, img_size//8, img_size//8, 5 + num_classes)
    print('Success!')

test()

Success!
