In [1]:
from vision.ssd.mobilenet_v2_ssd_lite import create_mobilenetv2_ssd_lite, create_mobilenetv2_ssd_lite_predictor

In [2]:
import torch
from glob import glob
import cv2
from vision.ssd.data_preprocessing import PredictionTransform
import numpy as np

In [3]:
label_path = 'models/voc.txt'
class_names = [name.strip() for name in open(label_path).readlines()]

In [4]:
net = create_mobilenetv2_ssd_lite(len(class_names), quantized=True, is_test=True, onnx_compatible=True, device='cpu')
net.load('models/mb2-ssd-lite-mp-0_686.pth')
net.cpu().eval()

  self.load_state_dict(torch.load(model, map_location=lambda storage, loc: storage))


SSD(
  (base_net): Sequential(
    (0): Sequential(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
        (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU(inplace=True)
        (3): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (4): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU(inplace=True)
        (3): Conv2

In [5]:
transform = PredictionTransform(300, np.array([127, 127, 127]), 128.0)

orig_image = cv2.imread("imgs/photo_2024-11-20_15-40-16.jpg")
image = cv2.cvtColor(orig_image, cv2.COLOR_BGR2RGB)

In [6]:
predictor = create_mobilenetv2_ssd_lite_predictor(net, candidate_size=200, device='cpu')

In [7]:
boxes, labels, probs = predictor.predict(image, 10, 0.4)

Inference time:  0.12408995628356934


In [8]:
boxes

tensor([[ 75.9228, 216.3671, 226.8937, 374.9238],
        [186.1299,   2.4702, 495.5327, 375.4444]])

In [9]:
predictor.trace(image, "checkpoints/traced_m_v2.pt", onnx=False)

Trace time:  2.393906593322754


In [10]:
ffuse_list = [
    ["base_net.0.0", "base_net.0.1", "base_net.0.2"],

    ["base_net.1.conv.0", "base_net.1.conv.1", "base_net.1.conv.2"],
    ["base_net.1.conv.3", "base_net.1.conv.4"],

    ["base_net.2.conv.0", "base_net.2.conv.1", "base_net.2.conv.2"],
    ["base_net.2.conv.3", "base_net.2.conv.4", "base_net.2.conv.5"],
    ["base_net.2.conv.6", "base_net.2.conv.7"],

    ["base_net.3.conv.0", "base_net.3.conv.1", "base_net.3.conv.2"],
    ["base_net.3.conv.3", "base_net.3.conv.4", "base_net.3.conv.5"],
    ["base_net.3.conv.6", "base_net.3.conv.7"],

    ["base_net.4.conv.0", "base_net.4.conv.1", "base_net.4.conv.2"],
    ["base_net.4.conv.3", "base_net.4.conv.4", "base_net.4.conv.5"],
    ["base_net.4.conv.6", "base_net.4.conv.7"],
    
    ["base_net.5.conv.0", "base_net.5.conv.1", "base_net.5.conv.2"],
    ["base_net.5.conv.3", "base_net.5.conv.4", "base_net.5.conv.5"],
    ["base_net.5.conv.6", "base_net.5.conv.7"],

    ["base_net.6.conv.0", "base_net.6.conv.1", "base_net.6.conv.2"],
    ["base_net.6.conv.3", "base_net.6.conv.4", "base_net.6.conv.5"],
    ["base_net.6.conv.6", "base_net.6.conv.7"],

    ["base_net.7.conv.0", "base_net.7.conv.1", "base_net.7.conv.2"],
    ["base_net.7.conv.3", "base_net.7.conv.4", "base_net.7.conv.5"],
    ["base_net.7.conv.6", "base_net.7.conv.7"],

    ["base_net.8.conv.0", "base_net.8.conv.1", "base_net.8.conv.2"],
    ["base_net.8.conv.3", "base_net.8.conv.4", "base_net.8.conv.5"],
    ["base_net.8.conv.6", "base_net.8.conv.7"],

    ["base_net.9.conv.0", "base_net.9.conv.1", "base_net.9.conv.2"],
    ["base_net.9.conv.3", "base_net.9.conv.4", "base_net.9.conv.5"],
    ["base_net.9.conv.6", "base_net.9.conv.7"],

    ["base_net.10.conv.0", "base_net.10.conv.1", "base_net.10.conv.2"],
    ["base_net.10.conv.3", "base_net.10.conv.4", "base_net.10.conv.5"],
    ["base_net.10.conv.6", "base_net.10.conv.7"],

    ["base_net.11.conv.0", "base_net.11.conv.1", "base_net.11.conv.2"],
    ["base_net.11.conv.3", "base_net.11.conv.4", "base_net.11.conv.5"],
    ["base_net.11.conv.6", "base_net.11.conv.7"],

    ["base_net.12.conv.0", "base_net.12.conv.1", "base_net.12.conv.2"],
    ["base_net.12.conv.3", "base_net.12.conv.4", "base_net.12.conv.5"],
    ["base_net.12.conv.6", "base_net.12.conv.7"],

    ["base_net.13.conv.0", "base_net.13.conv.1", "base_net.13.conv.2"],
    ["base_net.13.conv.3", "base_net.13.conv.4", "base_net.13.conv.5"],
    ["base_net.13.conv.6", "base_net.13.conv.7"],

    ["base_net.14.conv.0", "base_net.14.conv.1", "base_net.14.conv.2"],
    ["base_net.14.conv.3", "base_net.14.conv.4", "base_net.14.conv.5"],
    ["base_net.14.conv.6", "base_net.14.conv.7"],

    ["base_net.15.conv.0", "base_net.15.conv.1", "base_net.15.conv.2"],
    ["base_net.15.conv.3", "base_net.15.conv.4", "base_net.15.conv.5"],
    ["base_net.15.conv.6", "base_net.15.conv.7"],

    ["base_net.16.conv.0", "base_net.16.conv.1", "base_net.16.conv.2"],
    ["base_net.16.conv.3", "base_net.16.conv.4", "base_net.16.conv.5"],
    ["base_net.16.conv.6", "base_net.16.conv.7"],

    ["base_net.17.conv.0", "base_net.17.conv.1", "base_net.17.conv.2"],
    ["base_net.17.conv.3", "base_net.17.conv.4", "base_net.17.conv.5"],
    ["base_net.17.conv.6", "base_net.17.conv.7"],

    ["base_net.18.0", "base_net.18.1", "base_net.18.2"],

    ["extras.0.conv.0", "extras.0.conv.1", "extras.0.conv.2"],
    ["extras.0.conv.3", "extras.0.conv.4", "extras.0.conv.5"],
    ["extras.0.conv.6", "extras.0.conv.7"],
 
    ["extras.1.conv.0", "extras.1.conv.1", "extras.1.conv.2"],
    ["extras.1.conv.3", "extras.1.conv.4", "extras.1.conv.5"],
    ["extras.1.conv.6", "extras.1.conv.7"],

    ["extras.2.conv.0", "extras.2.conv.1", "extras.2.conv.2"],
    ["extras.2.conv.3", "extras.2.conv.4", "extras.2.conv.5"],
    ["extras.2.conv.6", "extras.2.conv.7"],

    ["extras.3.conv.0", "extras.3.conv.1","extras.3.conv.2"],
    ["extras.3.conv.3", "extras.3.conv.4", "extras.3.conv.5"],
    ["extras.3.conv.6", "extras.3.conv.7"],

    ["classification_headers.0.0", "classification_headers.0.1", "classification_headers.0.2"],
    ["classification_headers.1.0", "classification_headers.1.1", "classification_headers.1.2"],
    ["classification_headers.2.0", "classification_headers.2.1", "classification_headers.2.2"],
    ["classification_headers.3.0", "classification_headers.3.1", "classification_headers.3.2"],
    ["classification_headers.4.0", "classification_headers.4.1", "classification_headers.4.2"],

    ["regression_headers.0.0", "regression_headers.0.1", "regression_headers.0.2"],
    ["regression_headers.1.0", "regression_headers.1.1", "regression_headers.1.2"],
    ["regression_headers.2.0", "regression_headers.2.1", "regression_headers.2.2"],
    ["regression_headers.3.0", "regression_headers.3.1", "regression_headers.3.2"],
    ["regression_headers.4.0", "regression_headers.4.1", "regression_headers.4.2"],
]

torch.quantization.fuse_modules(net, ffuse_list, inplace=True)

SSD(
  (base_net): Sequential(
    (0): Sequential(
      (0): ConvReLU2d(
        (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
        (1): ReLU(inplace=True)
      )
      (1): Identity()
      (2): Identity()
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): ConvReLU2d(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32)
          (1): ReLU(inplace=True)
        )
        (1): Identity()
        (2): Identity()
        (3): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1))
        (4): Identity()
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): ConvReLU2d(
          (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1))
          (1): ReLU(inplace=True)
        )
        (1): Identity()
        (2): Identity()
        (3): ConvReLU2d(
          (0): Conv2d(96, 96, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=96)
          (1): ReLU(inplace=True)
        

In [11]:
predictor.trace(image, "checkpoints/traced_m_f_v2.pt", onnx=False)

Trace time:  2.7752151489257812


In [12]:
net.qconfig = torch.quantization.get_default_qconfig("qnnpack")

In [13]:
for name, value in net.__dict__.items():
    print(f"{name}: {type(value)}")

training: <class 'bool'>
_parameters: <class 'collections.OrderedDict'>
_buffers: <class 'collections.OrderedDict'>
_non_persistent_buffers_set: <class 'set'>
_backward_pre_hooks: <class 'collections.OrderedDict'>
_backward_hooks: <class 'collections.OrderedDict'>
_is_full_backward_hook: <class 'NoneType'>
_forward_hooks: <class 'collections.OrderedDict'>
_forward_hooks_with_kwargs: <class 'collections.OrderedDict'>
_forward_hooks_always_called: <class 'collections.OrderedDict'>
_forward_pre_hooks: <class 'collections.OrderedDict'>
_forward_pre_hooks_with_kwargs: <class 'collections.OrderedDict'>
_state_dict_hooks: <class 'collections.OrderedDict'>
_state_dict_pre_hooks: <class 'collections.OrderedDict'>
_load_state_dict_pre_hooks: <class 'collections.OrderedDict'>
_load_state_dict_post_hooks: <class 'collections.OrderedDict'>
_modules: <class 'collections.OrderedDict'>
num_classes: <class 'int'>
source_layer_indexes: <class 'list'>
is_test: <class 'bool'>
config: <class 'module'>
quan

In [14]:
model_fp32_prepared = torch.quantization.prepare(net, inplace=True)

In [15]:
model_fp32_prepared

SSD(
  (base_net): Sequential(
    (0): Sequential(
      (0): ConvReLU2d(
        (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
        (1): ReLU(inplace=True)
        (activation_post_process): HistogramObserver(min_val=inf, max_val=-inf)
      )
      (1): Identity()
      (2): Identity()
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): ConvReLU2d(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32)
          (1): ReLU(inplace=True)
          (activation_post_process): HistogramObserver(min_val=inf, max_val=-inf)
        )
        (1): Identity()
        (2): Identity()
        (3): Conv2d(
          32, 16, kernel_size=(1, 1), stride=(1, 1)
          (activation_post_process): HistogramObserver(min_val=inf, max_val=-inf)
        )
        (4): Identity()
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): ConvReLU2d(
          (0): Conv2d(16, 96, kernel_size=(1, 1), st

In [16]:
images = glob("imgs/*")

for image_c in images:
    image_c = cv2.imread(image_c)
    image_c = cv2.cvtColor(orig_image, cv2.COLOR_BGR2RGB)
    image_c = transform(image_c)
    image_c = image_c.unsqueeze(0).cpu()
    with torch.no_grad():
        out = model_fp32_prepared(image_c)

In [17]:
model_fp32_prepared

SSD(
  (base_net): Sequential(
    (0): Sequential(
      (0): ConvReLU2d(
        (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
        (1): ReLU(inplace=True)
        (activation_post_process): HistogramObserver(min_val=0.0, max_val=2.321777820587158)
      )
      (1): Identity()
      (2): Identity()
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): ConvReLU2d(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32)
          (1): ReLU(inplace=True)
          (activation_post_process): HistogramObserver(min_val=0.0, max_val=5.651936054229736)
        )
        (1): Identity()
        (2): Identity()
        (3): Conv2d(
          32, 16, kernel_size=(1, 1), stride=(1, 1)
          (activation_post_process): HistogramObserver(min_val=-7.1643524169921875, max_val=6.572861671447754)
        )
        (4): Identity()
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): ConvReLU2

In [18]:
model_quantized = torch.quantization.convert(model_fp32_prepared, inplace=True)

In [19]:
predictor = create_mobilenetv2_ssd_lite_predictor(model_quantized, candidate_size=200, device='cpu')
boxes, labels, probs = predictor.predict(image, 10, 0.3)

Inference time:  0.06508350372314453


In [20]:
predictor.trace(image, "checkpoints/traced_m_f_q_v2.pt", onnx=False)

Trace time:  3.96474552154541


In [21]:
boxes

tensor([[ 75.2000, 218.1068, 230.3542, 374.6018],
        [185.8052,   1.7361, 494.7440, 376.7361]])