In [1]:
from backbone import EfficientDetBackbone
from efficientdet.model import Regressor, Classifier
from efficientdet.loss import FocalLoss, calc_iou
from efficientdet.utils import Anchors, BBoxTransform
import torch
import numpy as np
import itertools

In [2]:
model = EfficientDetBackbone(num_classes=2, compound_coef=0,
                             ratios=[(1.4, 0.7), (1.0, 1.0), (0.7, 1.4), (0.5, 1.5), (0.4, 1.6)], 
                             scales=[2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)])

# model = EfficientDetBackbone(num_classes=2, compound_coef=0,
#                              ratios=[(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)], 
#                              scales=[2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)])

In [5]:
img = np.random.randn(1,3,512,512)
img = torch.from_numpy(img).float()
a,b,c,d = model(img)
b.shape, c.shape, d.shape

(torch.Size([1, 81840, 4]),
 torch.Size([1, 81840, 2]),
 torch.Size([1, 81840, 4]))

In [None]:
regressor = Regressor(in_channels=self.fpn_num_filters[self.compound_coef], num_anchors=num_anchors,
                           num_layers=self.box_class_repeats[self.compound_coef],
                           pyramid_levels=self.pyramid_levels[self.compound_coef])
classifier = Classifier(in_channels=self.fpn_num_filters[self.compound_coef], num_anchors=num_anchors,
                             num_classes=num_classes,
                             num_layers=self.box_class_repeats[self.compound_coef],
                             pyramid_levels=self.pyramid_levels[self.compound_coef])

In [None]:
self.compound_coef = 0

self.backbone_compound_coef = [0, 1, 2, 3, 4, 5, 6, 6, 7]
self.fpn_num_filters = [64, 88, 112, 160, 224, 288, 384, 384, 384]
self.fpn_cell_repeats = [3, 4, 5, 6, 7, 7, 8, 8, 8]
self.input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536, 1536]
self.box_class_repeats = [3, 3, 3, 4, 4, 4, 5, 5, 5]
self.pyramid_levels = [5, 5, 5, 5, 5, 5, 5, 5, 6]
self.anchor_scale = [4., 4., 4., 4., 4., 4., 4., 5., 4.]
self.aspect_ratios = kwargs.get('ratios', [(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)])
self.num_scales = len(kwargs.get('scales', [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)]))
conv_channel_coef = {
    # the channels of P3/P4/P5.
    0: [40, 112, 320],
    1: [40, 112, 320],
    2: [48, 120, 352],
    3: [48, 136, 384],
    4: [56, 160, 448],
    5: [64, 176, 512],
    6: [72, 200, 576],
    7: [72, 200, 576],
    8: [80, 224, 640],
}

num_anchors = len(self.aspect_ratios) * self.num_scales

In [7]:
pyramid_levels = [2, 3, 4, 5]
strides = [2**x for x in pyramid_levels]
scales = np.array([2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)])
ratios = [(1.4, 0.7), (1.0, 1.0), (0.7, 1.4), (0.5, 1.5), (0.4, 1.6)]
anchor_scale = 4.
image_shape = (512,512)

In [8]:
boxes_all = []
for stride in strides:
    boxes_level = []
    for scale, ratio in itertools.product(scales, ratios):
        if image_shape[1] % stride != 0:
            raise ValueError('input size must be divided by the stride.')
        base_anchor_size = anchor_scale * stride * scale
        anchor_size_x_2 = base_anchor_size * ratio[0] / 2.0
        anchor_size_y_2 = base_anchor_size * ratio[1] / 2.0

        x = np.arange(stride / 2, image_shape[1], stride)
        y = np.arange(stride / 2, image_shape[0], stride)
        xv, yv = np.meshgrid(x, y)
        xv = xv.reshape(-1)
        yv = yv.reshape(-1)

        # y1,x1,y2,x2
        boxes = np.vstack((yv - anchor_size_y_2, xv - anchor_size_x_2,
                           yv + anchor_size_y_2, xv + anchor_size_x_2))
        boxes = np.swapaxes(boxes, 0, 1)
        boxes_level.append(np.expand_dims(boxes, axis=1))
    # concat anchors on the same level to the reshape NxAx4
    boxes_level = np.concatenate(boxes_level, axis=1)
    boxes_all.append(boxes_level.reshape([-1, 4]))

anchor_boxes = np.vstack(boxes_all)

anchor_boxes = torch.from_numpy(anchor_boxes)
anchor_boxes = anchor_boxes.unsqueeze(0)


In [10]:
anchor_boxes.shape

torch.Size([1, 326400, 4])

In [94]:
boxes_all = []
image_shape = (512,512)
for stride in strides:
    boxes_level = []
    
    scale, ratio = (scales[0], ratios[3])
    if image_shape[1] % stride != 0:
        raise ValueError('input size must be divided by the stride.')
    base_anchor_size = anchor_scale * stride * scale
    anchor_size_x_2 = base_anchor_size * ratio[0] / 2.0
    anchor_size_y_2 = base_anchor_size * ratio[1] / 2.0

    x = np.arange(stride / 2, image_shape[1], stride)
    y = np.arange(stride / 2, image_shape[0], stride)
    xv, yv = np.meshgrid(x, y)
    xv = xv.reshape(-1)
    yv = yv.reshape(-1)

    # y1,x1,y2,x2
    boxes = np.vstack((yv - anchor_size_y_2, xv - anchor_size_x_2,
                       yv + anchor_size_y_2, xv + anchor_size_x_2))
    boxes = np.swapaxes(boxes, 0, 1)
    boxes_level.append(np.expand_dims(boxes, axis=1))
    # concat anchors on the same level to the reshape NxAx4
    boxes_level = np.concatenate(boxes_level, axis=1)
    boxes_all.append(boxes_level.reshape([-1, 4]))

anchor_boxes = np.vstack(boxes_all)

anchor_boxes = torch.from_numpy(anchor_boxes)
anchor_boxes = anchor_boxes.unsqueeze(0)


In [95]:
anchor_boxes.long().shape

torch.Size([1, 87296, 4])