In [2]:
import numpy as np
import torch
import torch.nn as nn

from anchors import generate_anchors, SCALES, RATIOS
from dataset import SimpleDataset

torch.manual_seed(9)

<torch._C.Generator at 0x7f438031cc10>

In [None]:
class Conv2d(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0):
        self.conv = nn.Sequential(
            nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding, bias=False),
            nn.BatchNorm2d(num_channels=out_channels),
            nn.LeakyReLU()
        )
        
    def forward(self, x):
        return self.conv(x)
    
class BoxPredictor(nn.Module):
    def __init__(self, in_channels, num_anchors):
        # Note: output bboxes = number of anchor boxes * 4 coordinates (x1_offset, y1_offset, x2_offset, y2_offset)
        self.box_predictor = \
            nn.Conv2d(in_channels=in_channels, out_channels=num_anchors * 4, kernel_size=3, stride=1, padding=1)
        
    def forward(self, x):
        return self.box_predictor(x)
    
class ClassPredictor(nn.Module):
    def __init__(self, in_channels, num_anchors, num_classes):
        # Note: output bboxes = number of anchor boxes * (num_classes + 1)
        self.box_predictor = \
            nn.Conv2d(in_channels=in_channels, out_channels=num_anchors * (num_classes + 1), kernel_size=3, stride=1, padding=1)
        
    def forward(self, x):
        return self.box_predictor(x)

class SimpleSSD(nn.Module):
    def __init__(self, num_anchors, num_classes):
        self.backbone = nn.Sequential(
            # [3, 256, 256]
            Conv2d(3, 16, 3, 1, 1),
            nn.MaxPool2d(2, 2),
            
            # [16, 128, 128]
            Conv2d(16, 32, 3, 1, 1),
            nn.MaxPool2d(2, 2),
            
            # [32, 64, 64]
            Conv2d(32, 64, 3, 1, 1),
            nn.MaxPool2d(2, 2)
        )
        self.backbone_class_predictor = ClassPredictor(64, num_anchors, num_classes)
        self.backbone_box_predictor = BoxPredictor(64, num_anchors)
        
        self.multiscale_block1 = nn.Sequential(
            # [64, 32, 32]
            Conv2d(64, 128, 3, 1, 1),
            nn.MaxPool2d(2, 2)
        )
        self.ms1_class_predictor = ClassPredictor(128, num_anchors, num_classes)
        self.ms1_box_predictor = BoxPredictor(128, num_anchors)
        
        self.multiscale_block2 = nn.Sequential(
            # [128, 12, 12]
            Conv2d(128, 128, 3, 1, 1),
            nn.MaxPool2d(2, 2)
        )
        self.ms2_class_predictor = ClassPredictor(128, num_anchors, num_classes)
        self.ms2_box_predictor = BoxPredictor(128, num_anchors)
        
        # [128, 12, 12]
        self.multiscale_block3 = nn.AdaptiveMaxPool2d((1, 1))
        self.ms3_class_predictor = ClassPredictor(128, num_anchors, num_classes)
        self.ms3_box_predictor = BoxPredictor(128, num_anchors)
        
        # [128, 1, 1]
    
    def forward(self, x):
        anchors = []; class_preds = []; box_pred = []
        
        backbone_out = self.backbone(x)
        backbone_anchors = generate_anchors(backbone_out, SCALES, RATIOS)
        backbone_class_predictions = self.backbone_class_predictor(backbone_out)
        backbone_box_predictions = self.backbone_box_predictor(backbone_out)
        anchors.append(backbone_anchors)
        class_preds.append(backbone_class_predictions)
        box_preds.append(backbone_box_predictions)
        
        ms1_out = self.multiscale_block1(backbone_out)
        
        ms2_out = self.multiscale_block2(ms1_out)
        
        ms3_out = self.multiscale_block3(ms2_out)
        
        # TODO: concat these arrays
        return anchors, class_preds, box_preds

In [None]:
num_anchors = len(SCALES) + len(RATIOS) - 1