In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import numpy as np

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [2]:
class ConvBnLeakyReLU(nn.Module):
    """
    [CONV]-[BN]-[LeakyReLU]
    """
    def __init__(self, inCh, outCh, kernel):
        super(ConvBnLeakyReLU, self).__init__()
        self.inCh = inCh  # Number of input channels
        self.outCh = outCh  # Number of output channels
        self.kernel = kernel  # Kernel size
        padding = (self.kernel - 1) // 2 
        self.conv = nn.Sequential(
            nn.Conv2d(self.inCh, self.outCh, kernel, stride=1, padding=padding, bias=False),
            nn.BatchNorm2d(outCh),
            nn.LeakyReLU(negative_slope=0.1, inplace=True)
        )
        
    def forward(self, x):
        return self.conv(x)
    

class YOLOv3Params():
    """
    Parameters for MobileNetV2
    """
    def __init__(self):
        self.n_classes = 4  # Udacity Self-driving car dataset
        self.final_channels = 3 * (5 + self.n_classes)
        self.class_names = ['car', 'truck', 'pedestrian', 'signal']
        self.anchors = [[10, 13], [16, 30], [33, 23], 
                        [30, 61], [62, 45], [59, 119], 
                        [116, 90], [156, 198], [373, 326]]
        self.mode = "infer"

In [3]:
# http://machinethink.net/blog/object-detection/
# Very helpful diagram: https://www.cyberailab.com/home/a-closer-look-at-yolov3
# https://github.com/marvis/pytorch-yolo3
# https://gitlab.com/EAVISE/lightnet/blob/master/lightnet/network/loss/_regionloss.py

### YOLO Layer

In [4]:
class YOLOv3Layer(nn.Module):
    def __init__(self, params, baseParams):
        self.params = params
        self.base = base_network(baseParams)  # MobileNetV2
        self.base_out_channels = self.base.out_channels  # [256, 512, 1280]
        self.n_classes = self.params.n_classes 
        self.out_channels = 3 * (5 + self.n_classes)  # 3 x (B + C)
        self.anchors = np.array(params.anchors)
        self.n_layers = len(self.anchors) // 3
        self.loss = YOLOLoss(params)
        
        # Conv layer block for 13x13 feature maps from base network
        self.conv_block13 = self._make_conv_block(inCh=self.base_out_channels[-1],
                                                  channel_list=[512, 1024],
                                                  outCh=self.out_channels)
        
        # Conv layer block for 26x26 feature maps from base network
        self.conv_block26 = self._make_conv_block(inCh=self.base_out_channels[-2] + 256,
                                                  channel_list=[256, 512],
                                                  outCh=self.out_channels)
        
        # Conv layer block for 52x52 feature maps from base network
        self.conv_block52 = self._make_conv_block(inCh=self.base_out_channels[-3] + 128,
                                                  channel_list=[128, 256],
                                                  outCh=self.out_channels)
        
    def _make_conv_block(self, inCh, channel_list, outCh):
        """Outputs from Base is passed through a few ConvBNReLU layers"""
        modList = nn.ModuleList([
            ConvBnLeakyReLU(inCh, channel_list[0], kernel=1),
            ConvBnLeakyReLU(channel_list[0], channel_list[1], kernel=3),
            ConvBnLeakyReLU(channel_list[1], channel_list[0], kernel=1),
            ConvBnLeakyReLU(channel_list[0], channel_list[1], kernel=3),
            ConvBnLeakyReLU(channel_list[1], channel_list[0], kernel=1),
            ConvBnLeakyReLU(channel_list[0], channel_list[1], kernel=3),
        ])
        modList.add_module("ConvOut", nn.Conv2d(channel_list[0], outCh, 
                                                kernel_size=1, stride=1, 
                                                padding=0, bias=True))
        
        return modList
    
    def _route(self, in_feature, conv_block):
        for i, conv_module in enumerate(conv_block):
            in_feature = conv_module(in_feature)
            if i == 4:
                route = in_feature
        return in_feature, route
    
    def forward(self, img, label13, label26, label52):
        x52, x26, x13 = self.base(img)
        
        out13, out13_route = self._route(self.conv_block13, x13)  # size: 13x13
        
        # YOLO branch 1
        x26_in = self.conv_block26(out13_route)  # size: 13x13
        x26_in = F.interpolate(x26_in, scale_factor=2, mode='nearest')  # size: 13x13 -> 26x26
        x26_in = torch.cat([x26_in, x26], dim=1)
        out26, out26_route = self._route(self.conv_block26, x26_in)  # size: 26x26
        
        # YOLO branch 2
        x52_in = self.conv_block52(out26_route)  # size: 26x26
        x52_in = F.interpolate(x52_in, scale_factor=2, mode='nearest')  # size: 26x26 -> 52x52
        x52_in = torch.cat([x52_in, x52], dim=1)
        out52, out52_route = self._route(self.conv_block52, x52_in)  # size: 52x52
        
        # Compute loss
        loss = self.loss((out13, out26, out52), (label13, label26, label52))
        
        return loss

### YOLO Layer Scratch

Outputs from MobileNetV2

```
52x52 size feature map:  torch.Size([1, 256, 52, 52])
26x26 size feature map:  torch.Size([1, 512, 26, 26])
13x13 size feature map:  torch.Size([1, 1280, 13, 13])
```

In [5]:
kernel = 3
p = (kernel - 1) // 2
print(p)

1


In [6]:
y52 = torch.randn([1, 256, 52, 52])
y26 = torch.randn([1, 512, 26, 26])
y13 = torch.randn([1, 1280, 13, 13])

In [7]:
# 13 x 13
final_channels = 3 * (5 + 4)
outCh_list = [256, 512, 1280]

inCh = outCh_list[-1]
channel_list = [512, 1024]

convBlock13 = nn.ModuleList([
    ConvBnLeakyReLU(inCh, channel_list[0], 1),
    ConvBnLeakyReLU(channel_list[0], channel_list[1], 3),
    ConvBnLeakyReLU(channel_list[1], channel_list[0], 1),
    ConvBnLeakyReLU(channel_list[0], channel_list[1], 3),
    ConvBnLeakyReLU(channel_list[1], channel_list[0], 1),
    ConvBnLeakyReLU(channel_list[0], channel_list[1], 3),
    ConvBnLeakyReLU(channel_list[1], final_channels, 1),
])

print(convBlock13[0])

in13 = y13
for i, conv in enumerate(convBlock13):
    in13 = conv(in13)
    print('i: {} and in13 size: {}'.format(i, in13.size()))
    if i == 4:
        out_route13 = in13
out13 = in13
print(out13.size())
print(out_route13.size())

ConvBnLeakyReLU(
  (conv): Sequential(
    (0): Conv2d(1280, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): LeakyReLU(negative_slope=0.1, inplace)
  )
)
i: 0 and in13 size: torch.Size([1, 512, 13, 13])
i: 1 and in13 size: torch.Size([1, 1024, 13, 13])
i: 2 and in13 size: torch.Size([1, 512, 13, 13])
i: 3 and in13 size: torch.Size([1, 1024, 13, 13])
i: 4 and in13 size: torch.Size([1, 512, 13, 13])
i: 5 and in13 size: torch.Size([1, 1024, 13, 13])
i: 6 and in13 size: torch.Size([1, 27, 13, 13])
torch.Size([1, 27, 13, 13])
torch.Size([1, 512, 13, 13])


```int26 = F.interpolate(in26, scale_factor=2, mode='bilinear', align_corners=False)``` - This seems to not work

In [8]:
# 26 x 26
final_channels = 3 * (5 + 4)
outCh_list = [256, 512, 1280]

inCh = outCh_list[-2] + 256
channel_list = [256, 512]

convBlock26 = nn.ModuleList([
    ConvBnLeakyReLU(inCh, channel_list[0], 1),
    ConvBnLeakyReLU(channel_list[0], channel_list[1], 3),
    ConvBnLeakyReLU(channel_list[1], channel_list[0], 1),
    ConvBnLeakyReLU(channel_list[0], channel_list[1], 3),
    ConvBnLeakyReLU(channel_list[1], channel_list[0], 1),
    ConvBnLeakyReLU(channel_list[0], channel_list[1], 3),
    ConvBnLeakyReLU(channel_list[1], final_channels, 1),
])

print(convBlock26[0])

conv26 = ConvBnLeakyReLU(inCh=512, outCh=256, kernel=1)
in26 = conv26(out_route13)  # 13x13 -[UPSAMPLE: 2x]-> 26x26
print(in26.shape)

#upsample26 = nn.Upsample(scale_factor=2, mode='bilinear')  # Depr
#in26 = upsample26(in26)

in26 = F.interpolate(in26, scale_factor=2, mode='nearest')
print('after upsample ', in26.shape)

in26 = torch.cat([in26, y26], dim=1)  # Concatenate
print(in26.shape)

for i, conv in enumerate(convBlock26):
    in26 = conv(in26)
    print('i: {} and in26 size: {}'.format(i, in26.size()))
    if i == 4:
        out_route26 = in26
        
out26 = in26
print(out26.size())
print(out_route26.size())

ConvBnLeakyReLU(
  (conv): Sequential(
    (0): Conv2d(768, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): LeakyReLU(negative_slope=0.1, inplace)
  )
)
torch.Size([1, 256, 13, 13])
after upsample  torch.Size([1, 256, 26, 26])
torch.Size([1, 768, 26, 26])
i: 0 and in26 size: torch.Size([1, 256, 26, 26])
i: 1 and in26 size: torch.Size([1, 512, 26, 26])
i: 2 and in26 size: torch.Size([1, 256, 26, 26])
i: 3 and in26 size: torch.Size([1, 512, 26, 26])
i: 4 and in26 size: torch.Size([1, 256, 26, 26])
i: 5 and in26 size: torch.Size([1, 512, 26, 26])
i: 6 and in26 size: torch.Size([1, 27, 26, 26])
torch.Size([1, 27, 26, 26])
torch.Size([1, 256, 26, 26])


In [9]:
# 52 x 52
final_channels = 3 * (5 + 4)
outCh_list = [256, 512, 1280]

inCh = outCh_list[-3] + 128
channel_list = [128, 256]

convBlock52 = nn.ModuleList([
    ConvBnLeakyReLU(inCh, channel_list[0], 1),
    ConvBnLeakyReLU(channel_list[0], channel_list[1], 3),
    ConvBnLeakyReLU(channel_list[1], channel_list[0], 1),
    ConvBnLeakyReLU(channel_list[0], channel_list[1], 3),
    ConvBnLeakyReLU(channel_list[1], channel_list[0], 1),
    ConvBnLeakyReLU(channel_list[0], channel_list[1], 3),
    ConvBnLeakyReLU(channel_list[1], final_channels, 1),
])

print(convBlock52[0])

conv52 = ConvBnLeakyReLU(inCh=256, outCh=128, kernel=1)
in52 = conv52(out_route26)  # 26x26 -[UPSAMPLE: 2x]-> 52x52
print(in52.shape)
# upsample52 = nn.Upsample(scale_factor=2, mode='bilinear')
# in52 = upsample52(in52)
in52 = F.interpolate(in52, scale_factor=2, mode='nearest')
print(in52.shape)

in52 = torch.cat([in52, y52], dim=1)  # Concatenate
print(in52.shape)

for i, conv in enumerate(convBlock52):
    in52 = conv(in52)
    print('i: {} and in52 size: {}'.format(i, in52.size()))
    if i == 4:
        out_route52 = in52
        
out52 = in52
print(out52.size())
print('Ignored! ', out_route52.size())

ConvBnLeakyReLU(
  (conv): Sequential(
    (0): Conv2d(384, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): LeakyReLU(negative_slope=0.1, inplace)
  )
)
torch.Size([1, 128, 26, 26])
torch.Size([1, 128, 52, 52])
torch.Size([1, 384, 52, 52])
i: 0 and in52 size: torch.Size([1, 128, 52, 52])
i: 1 and in52 size: torch.Size([1, 256, 52, 52])
i: 2 and in52 size: torch.Size([1, 128, 52, 52])
i: 3 and in52 size: torch.Size([1, 256, 52, 52])
i: 4 and in52 size: torch.Size([1, 128, 52, 52])
i: 5 and in52 size: torch.Size([1, 256, 52, 52])
i: 6 and in52 size: torch.Size([1, 27, 52, 52])
torch.Size([1, 27, 52, 52])
Ignored!  torch.Size([1, 128, 52, 52])
