In [1]:
import time

import torch
from torch.backends import cudnn

from backbone import EfficientDetBackbone
import cv2
import numpy as np

from efficientdet.utils import BBoxTransform, ClipBoxes
from utils.utils import preprocess, invert_affine, postprocess

compound_coef = 0
force_input_size = 1024  # set None to use default size
img_path = './datasets/coco/train2017/A1300A102P0800605000101O_01.jpg'
# obj_list = ['TPFP0','TTPIG','TTFBG','TPPP5','TPDPD']
obj_list = ['DEFECT']

# replace this part with your project's anchor config
anchor_ratios = [(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)]
anchor_scales = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)]

threshold = 0.2
iou_threshold = 0.2

use_cuda = False
use_float16 = False
cudnn.fastest = False
cudnn.benchmark = False


# tf bilinear interpolation is different from any other's, just make do
input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536]
input_size = input_sizes[compound_coef] if force_input_size is None else force_input_size
ori_imgs, framed_imgs, framed_metas = preprocess(img_path, max_size=input_size)

if use_cuda:
    x = torch.stack([torch.from_numpy(fi).cuda() for fi in framed_imgs], 0)
else:
    x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0)

x = x.to(torch.float32 if not use_float16 else torch.float16).permute(0, 3, 1, 2)

model = EfficientDetBackbone(compound_coef=compound_coef, num_classes=len(obj_list),
                             ratios=anchor_ratios, scales=anchor_scales)
# model.load_state_dict(torch.load(f'weights/efficientdet-d{compound_coef}.pth'))
# model.load_state_dict(torch.load(f'./logs/coco/efficientdet-d0_35_26988.pth'))

# model.requires_grad_(False)
model.eval()

EfficientDetBackbone(
  (bifpn): Sequential(
    (0): BiFPN(
      (conv6_up): SeparableConvBlock(
        (depthwise_conv): Conv2dStaticSamePadding(
          (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), groups=64, bias=False)
        )
        (pointwise_conv): Conv2dStaticSamePadding(
          (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))
        )
        (bn): BatchNorm2d(64, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
      )
      (conv5_up): SeparableConvBlock(
        (depthwise_conv): Conv2dStaticSamePadding(
          (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), groups=64, bias=False)
        )
        (pointwise_conv): Conv2dStaticSamePadding(
          (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))
        )
        (bn): BatchNorm2d(64, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
      )
      (conv4_up): SeparableConvBlock(
        (depthwise_conv): Conv2dStaticSamePadding(
      

In [None]:
        'r1_k3_s11_e1_i32_o16_se0.25', 'r2_k3_s22_e6_i16_o24_se0.25',
        'r2_k5_s22_e6_i24_o40_se0.25', 'r3_k3_s22_e6_i40_o80_se0.25',
        'r3_k5_s11_e6_i80_o112_se0.25', 'r4_k5_s22_e6_i112_o192_se0.25',
        'r1_k3_s11_e6_i192_o320_se0.25',

In [78]:
model.backbone_net

EfficientNet(
  (model): EfficientNet(
    (_conv_stem): Conv2dStaticSamePadding(
      (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
    )
    (_bn0): BatchNorm2d(32, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
    (_blocks): ModuleList(
      (0): MBConvBlock(
        (_depthwise_conv): Conv2dStaticSamePadding(
          (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=[1], groups=32, bias=False)
        )
        (_bn1): BatchNorm2d(32, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
        (_se_reduce): Conv2dStaticSamePadding(
          (conv): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
        )
        (_se_expand): Conv2dStaticSamePadding(
          (conv): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
        )
        (_project_conv): Conv2dStaticSamePadding(
          (conv): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        )
        (_bn2): BatchNorm2

In [None]:
32,32
16,96
24,144
24,144
40,240
40,240
80,480
80,480
80,480
112,672
112,672
112,672
192,1152
192,1152
192,1152
192,1152


In [None]:
32,16
16,24
144,40,

In [5]:
p6_w1 = nn.Parameter(torch.ones(2, dtype=torch.float32), requires_grad=True)

In [134]:
32/36

0.8888888888888888

In [84]:
from efficientnet.utils import get_same_padding_conv2d

In [113]:
Conv2d = get_same_padding_conv2d(image_size = 4)
Conv2d_dy = get_same_padding_conv2d()

In [129]:
inputs = torch.randint(0,10,(3,5,5),dtype=torch.float)

In [130]:
_conv_stem = Conv2d(in_channels=3, out_channels=6, kernel_size=3, stride=2, bias=False)
_conv_stem_dy = Conv2d(in_channels=3, out_channels=6, kernel_size=3, stride=2, bias=False)

In [132]:
c1 = _conv_stem(inputs.unsqueeze(0))
c2 = _conv_stem_dy(inputs.unsqueeze(0))
c1.shape

torch.Size([1, 6, 3, 3])

In [133]:
c2.shape

torch.Size([1, 6, 3, 3])

In [112]:
c1.shape

torch.Size([1, 6, 1, 1])