In [84]:
import cv2
import torch
from torch import nn
import torch.nn.functional as F
from torchvision.models import VGG
from torchvision.models.vgg import cfg, model_urls
import torch.utils.model_zoo as model_zoo

from ssdmultibox.datasets import TrainPascalDataset
from ssdmultibox.datasets import open_image

In [76]:
def make_layers(cfg, batch_norm=False):
    layers = []
    in_channels = 3
    for v in cfg:
        if v == 'M':
            layers += [nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)]
        else:
            conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
            if batch_norm:
                layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
            else:
                layers += [conv2d, nn.ReLU(inplace=True)]
            in_channels = v
    return nn.Sequential(*layers)

def vgg16(pretrained=False, **kwargs):
    """VGG 16-layer model (configuration "D")

    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
    """
    if pretrained:
        kwargs['init_weights'] = False
    model = VGG(make_layers(cfg['D']), **kwargs)
    if pretrained:
        model.load_state_dict(model_zoo.load_url(model_urls['vgg16']))
    return model

In [77]:
model = vgg16(pretrained=True)

In [78]:
model

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=True)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=True)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=True)
    (17): Conv2d(25

# SSD auxiliary network

In [51]:
# setup blocks to capute SSD parameters

feat_layers=['block4', 'block7', 'block8', 'block9', 'block10', 'block11']
blocks = {b:None for b in feat_layers}
blocks

{'block4': None,
 'block7': None,
 'block8': None,
 'block9': None,
 'block10': None,
 'block11': None}

In [14]:
# TODO: make `grid_size=4` a default
grid_size = 4
dataset = TrainPascalDataset(grid_size)

In [15]:
image_paths = dataset.images()

In [17]:
image_id = 17
image_path = image_paths[image_id]

In [18]:
im = open_image(image_path)
im.shape

(364, 480, 3)

In [21]:
SIZE = 300
im = cv2.resize(im, (SIZE, SIZE))
im.shape

(300, 300, 3)

In [57]:
tim = torch.tensor(im)
tim.shape

torch.Size([300, 300, 3])

In [58]:
tim = tim.permute(2, 0, 1)
tim.shape

torch.Size([3, 300, 300])

In [59]:
tim = tim.unsqueeze(0)
tim.shape

torch.Size([1, 3, 300, 300])

In [62]:
tim.shape

torch.Size([1, 3, 300, 300])

In [52]:
# VGG16 base network output
outputs = model.features(tim)
outputs.shape

torch.Size([1, 512, 9, 9])

## block4

In [81]:
x = tim
for i, f in enumerate(model.features):
    print(i, x.shape, model.features[i])
    x = model.features[i](x)
    if i == 22:
        blocks['block4'] = x
    
    # don't call the final max_pool layer b/c we want an output shape of (512, 19, 19)
    if i == 29:
        break

print('block4:', blocks['block4'].shape)
print('base output:', x.shape)

0 torch.Size([1, 3, 300, 300]) Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
1 torch.Size([1, 64, 300, 300]) ReLU(inplace)
2 torch.Size([1, 64, 300, 300]) Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
3 torch.Size([1, 64, 300, 300]) ReLU(inplace)
4 torch.Size([1, 64, 300, 300]) MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=True)
5 torch.Size([1, 64, 150, 150]) Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
6 torch.Size([1, 128, 150, 150]) ReLU(inplace)
7 torch.Size([1, 128, 150, 150]) Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
8 torch.Size([1, 128, 150, 150]) ReLU(inplace)
9 torch.Size([1, 128, 150, 150]) MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=True)
10 torch.Size([1, 128, 75, 75]) Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
11 torch.Size([1, 256, 75, 75]) ReLU(inplace)
12 torch.Size([1, 256, 75, 75]) Conv2d(256, 256, kernel_size=

## block 6 + 7

In [85]:
conv6 = nn.Conv2d(512, 1024, kernel_size=3, padding=1)
out6 = F.dropout2d(F.relu(conv6(x)))
out6.shape

torch.Size([1, 1024, 19, 19])

In [90]:
conv7 = nn.Conv2d(1024, 1024, kernel_size=1)
out7 = F.dropout2d(F.relu(conv7(out6)))
blocks['block7'] = out7
out7.shape

torch.Size([1, 1024, 19, 19])

In [89]:
blocks.keys()

dict_keys(['block4', 'block7', 'block8', 'block9', 'block10', 'block11'])

## Block 8/9/10/11

block8

In [116]:
conv8 = nn.Conv2d(1024, 256, kernel_size=1, padding=1)
out8 = F.relu(conv8(out7))
out8.shape

torch.Size([1, 256, 21, 21])

In [124]:
conv8_2 = nn.Conv2d(256, 512, kernel_size=3, stride=2)
out8_2 = F.relu(conv8_2(out8))
blocks['block8'] = out8_2
out8_2.shape

torch.Size([1, 512, 10, 10])

block9

In [118]:
conv9 = nn.Conv2d(512, 128, kernel_size=1, padding=1)
out9 = F.relu(conv9(out8_2))
out9.shape

torch.Size([1, 128, 12, 12])

In [125]:
conv9_2 = nn.Conv2d(128, 256, kernel_size=3, stride=2)
out9_2 = F.relu(conv9_2(out9))
blocks['block9'] = out9_2
out9_2.shape

torch.Size([1, 256, 5, 5])

block10

In [120]:
conv10 = nn.Conv2d(256, 128, kernel_size=1, padding=1)
out10 = F.relu(conv10(out9_2))
out10.shape

torch.Size([1, 128, 7, 7])

In [126]:
conv10_2 = nn.Conv2d(128, 256, kernel_size=3, stride=2)
out10_2 = F.relu(conv10_2(out10))
blocks['block10'] = out10_2
out10_2.shape

torch.Size([1, 256, 3, 3])

block11

In [122]:
conv11 = nn.Conv2d(256, 128, kernel_size=1)
out11 = F.relu(conv11(out10_2))
out11.shape

torch.Size([1, 128, 3, 3])

In [127]:
conv11_2 = nn.Conv2d(128, 256, kernel_size=3)
out11_2 = F.relu(conv11_2(out11))
blocks['block11'] = out11_2
out11_2.shape

torch.Size([1, 256, 1, 1])

In [128]:
for k,v in blocks.items():
    print(k, v.shape)

block4 torch.Size([1, 512, 38, 38])
block7 torch.Size([1, 1024, 19, 19])
block8 torch.Size([1, 512, 10, 10])
block9 torch.Size([1, 256, 5, 5])
block10 torch.Size([1, 256, 3, 3])
block11 torch.Size([1, 256, 1, 1])
