In [None]:
# !pip install onnx onnxruntime

In [None]:
## tracing

In [1]:
import torch
import torch.onnx
import torch.nn as nn
import torch.nn.init as init
import torch.utils.model_zoo as model_zoo


In [None]:
class SuperResolutionNet(nn.Module):
    def __init__(self, upscale_factor, inplace=False):
        super(SuperResolutionNet, self).__init__()

        self.relu = nn.ReLU(inplace=inplace)
        self.conv1 = nn.Conv2d(1, 64, (5, 5), (1, 1), (2, 2))
        self.conv2 = nn.Conv2d(64, 64, (3, 3), (1, 1), (1, 1))
        self.conv3 = nn.Conv2d(64, 32, (3, 3), (1, 1), (1, 1))
        self.conv4 = nn.Conv2d(32, upscale_factor ** 2, (3, 3), (1, 1), (1, 1))
        self.pixel_shuffle = nn.PixelShuffle(upscale_factor)

        self._initialize_weights()

    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.relu(self.conv2(x))
        x = self.relu(self.conv3(x))
        x = self.pixel_shuffle(self.conv4(x))
        return x

    def _initialize_weights(self):
        init.orthogonal_(self.conv1.weight, init.calculate_gain('relu'))
        init.orthogonal_(self.conv2.weight, init.calculate_gain('relu'))
        init.orthogonal_(self.conv3.weight, init.calculate_gain('relu'))
        init.orthogonal_(self.conv4.weight)

# 위에서 정의된 모델을 사용하여 초해상도 모델 생성
torch_model = SuperResolutionNet(upscale_factor=3)

In [None]:
# 미리 학습된 가중치를 읽어옵니다
model_url = 'https://s3.amazonaws.com/pytorch/test_data/export/superres_epoch100-44c6958e.pth'
batch_size = 1    # 임의의 수

# 모델을 미리 학습된 가중치로 초기화합니다
map_location = lambda storage, loc: storage
if torch.cuda.is_available():
    map_location = None
torch_model.load_state_dict(model_zoo.load_url(model_url, map_location=map_location))

# 모델을 추론 모드로 전환합니다
torch_model.eval()

In [None]:
x = torch.randn(batch_size, 1, 224, 224, requires_grad=True)
torch_out = torch_model(x)
torch_out

In [None]:
# 모델 변환
torch.onnx.export(torch_model,               # 실행될 모델
                  x,                         # 모델 입력값 (튜플 또는 여러 입력값들도 가능)
                  "./super_resolution_example.onnx",   # 모델 저장 경로 (파일 또는 파일과 유사한 객체 모두 가능)
                  export_params=True,        # 모델 파일 안에 학습된 모델 가중치를 저장할지의 여부
                  opset_version=10,          # 모델을 변환할 때 사용할 ONNX 버전
                  do_constant_folding=True,  # 최적화시 상수폴딩을 사용할지의 여부
                  input_names = ['input'],   # 모델의 입력값을 가리키는 이름
                  output_names = ['output'], # 모델의 출력값을 가리키는 이름
                  dynamic_axes={'input' : {0 : 'batch_size'},    # 가변적인 길이를 가진 차원
                                'output' : {0 : 'batch_size'}})

In [None]:
import onnx
from onnx import shape_inference
path = "../../OpenPCDet/deploy/super_resolution_example.onnx"
onnx.save(onnx.shape_inference.infer_shapes(onnx.load(path)), path)

In [None]:
import onnx

onnx_model = onnx.load("super_resolution_example.onnx")
onnx.checker.check_model(onnx_model)

In [None]:
import onnxruntime
import numpy as np
ort_session = onnxruntime.InferenceSession("super_resolution_example.onnx")

def to_numpy(tensor):
    return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()

# ONNX 런타임에서 계산된 결과값
ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(x)}
ort_outs = ort_session.run(None, ort_inputs)

# ONNX 런타임과 PyTorch에서 연산된 결과값 비교
np.testing.assert_allclose(to_numpy(torch_out), ort_outs[0], rtol=1e-03, atol=1e-05)

print("Exported model has been tested with ONNXRuntime, and the result looks good!")

In [None]:
# from PIL import Image
# import torchvision.transforms as transforms

# img = Image.open("./_static/img/cat.jpg")

# resize = transforms.Resize([224, 224])
# img = resize(img)

# img_ycbcr = img.convert('YCbCr')
# img_y, img_cb, img_cr = img_ycbcr.split()

# to_tensor = transforms.ToTensor()
# img_y = to_tensor(img_y)
# img_y.unsqueeze_(0)

# ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(img_y)}
# ort_outs = ort_session.run(None, ort_inputs)
# img_out_y = ort_outs[0]

In [2]:
class AllDSVTBlocksTRT(nn.Module):
    def __init__(self, dsvtblocks_list, layer_norms_list):
        super().__init__()
        self.layer_norms_list = layer_norms_list
        self.dsvtblocks_list = dsvtblocks_list
    def forward(
        self,
        pillar_features,
        set_voxel_inds_tensor_shift_0,
        set_voxel_inds_tensor_shift_1,
        set_voxel_masks_tensor_shift_0,
        set_voxel_masks_tensor_shift_1,
        pos_embed_tensor,
    ):
        outputs = pillar_features

        residual = outputs
        blc_id = 0
        set_id = 0
        set_voxel_inds = set_voxel_inds_tensor_shift_0[set_id:set_id+1].squeeze(0)
        set_voxel_masks = set_voxel_masks_tensor_shift_0[set_id:set_id+1].squeeze(0)
        pos_embed = pos_embed_tensor[blc_id:blc_id+1, set_id:set_id+1].squeeze(0).squeeze(0)
        # inputs = (outputs, set_voxel_inds, set_voxel_masks, pos_embed, True)
        inputs = (outputs, set_voxel_inds, set_voxel_masks, pos_embed)

        outputs = self.dsvtblocks_list[blc_id].encoder_list[set_id](*inputs)
        set_id = 1
        set_voxel_inds = set_voxel_inds_tensor_shift_0[set_id:set_id+1].squeeze(0)
        set_voxel_masks = set_voxel_masks_tensor_shift_0[set_id:set_id+1].squeeze(0)
        pos_embed = pos_embed_tensor[blc_id:blc_id+1, set_id:set_id+1].squeeze(0).squeeze(0)
        # inputs = (outputs, set_voxel_inds, set_voxel_masks, pos_embed, True)
        inputs = (outputs, set_voxel_inds, set_voxel_masks, pos_embed)

        outputs = self.dsvtblocks_list[blc_id].encoder_list[set_id](*inputs)

        outputs = self.layer_norms_list[blc_id](residual + outputs)

        residual = outputs
        blc_id = 1
        set_id = 0
        set_voxel_inds = set_voxel_inds_tensor_shift_1[set_id:set_id+1].squeeze(0)
        set_voxel_masks = set_voxel_masks_tensor_shift_1[set_id:set_id+1].squeeze(0)
        pos_embed = pos_embed_tensor[blc_id:blc_id+1, set_id:set_id+1].squeeze(0).squeeze(0)
        inputs = (outputs, set_voxel_inds, set_voxel_masks, pos_embed)
        # inputs = (outputs, set_voxel_inds, set_voxel_masks, pos_embed, True)

        outputs = self.dsvtblocks_list[blc_id].encoder_list[set_id](*inputs)
        set_id = 1
        set_voxel_inds = set_voxel_inds_tensor_shift_1[set_id:set_id+1].squeeze(0)
        set_voxel_masks = set_voxel_masks_tensor_shift_1[set_id:set_id+1].squeeze(0)
        pos_embed = pos_embed_tensor[blc_id:blc_id+1, set_id:set_id+1].squeeze(0).squeeze(0)
        inputs = (outputs, set_voxel_inds, set_voxel_masks, pos_embed)
        # inputs = (outputs, set_voxel_inds, set_voxel_masks, pos_embed, True)
        outputs = self.dsvtblocks_list[blc_id].encoder_list[set_id](*inputs)

        outputs = self.layer_norms_list[blc_id](residual + outputs)

        residual = outputs
        blc_id = 2
        set_id = 0
        set_voxel_inds = set_voxel_inds_tensor_shift_0[set_id:set_id+1].squeeze(0)
        set_voxel_masks = set_voxel_masks_tensor_shift_0[set_id:set_id+1].squeeze(0)
        pos_embed = pos_embed_tensor[blc_id:blc_id+1, set_id:set_id+1].squeeze(0).squeeze(0)
        # inputs = (outputs, set_voxel_inds, set_voxel_masks, pos_embed, True)
        inputs = (outputs, set_voxel_inds, set_voxel_masks, pos_embed)
        outputs = self.dsvtblocks_list[blc_id].encoder_list[set_id](*inputs)
        set_id = 1
        set_voxel_inds = set_voxel_inds_tensor_shift_0[set_id:set_id+1].squeeze(0)
        set_voxel_masks = set_voxel_masks_tensor_shift_0[set_id:set_id+1].squeeze(0)
        pos_embed = pos_embed_tensor[blc_id:blc_id+1, set_id:set_id+1].squeeze(0).squeeze(0)
        # inputs = (outputs, set_voxel_inds, set_voxel_masks, pos_embed, True)
        inputs = (outputs, set_voxel_inds, set_voxel_masks, pos_embed)
        outputs = self.dsvtblocks_list[blc_id].encoder_list[set_id](*inputs)

        outputs = self.layer_norms_list[blc_id](residual + outputs)

        residual = outputs
        blc_id = 3
        set_id = 0
        set_voxel_inds = set_voxel_inds_tensor_shift_1[set_id:set_id+1].squeeze(0)
        set_voxel_masks = set_voxel_masks_tensor_shift_1[set_id:set_id+1].squeeze(0)
        pos_embed = pos_embed_tensor[blc_id:blc_id+1, set_id:set_id+1].squeeze(0).squeeze(0)
        # inputs = (outputs, set_voxel_inds, set_voxel_masks, pos_embed, True)
        inputs = (outputs, set_voxel_inds, set_voxel_masks, pos_embed)
        outputs = self.dsvtblocks_list[blc_id].encoder_list[set_id](*inputs)
        set_id = 1
        set_voxel_inds = set_voxel_inds_tensor_shift_1[set_id:set_id+1].squeeze(0)
        set_voxel_masks = set_voxel_masks_tensor_shift_1[set_id:set_id+1].squeeze(0)
        pos_embed = pos_embed_tensor[blc_id:blc_id+1, set_id:set_id+1].squeeze(0).squeeze(0)
        inputs = (outputs, set_voxel_inds, set_voxel_masks, pos_embed)
        # inputs = (outputs, set_voxel_inds, set_voxel_masks, pos_embed, True)
        outputs = self.dsvtblocks_list[blc_id].encoder_list[set_id](*inputs)

        outputs = self.layer_norms_list[blc_id](residual + outputs)

        return outputs

In [3]:
from pcdet.config import cfg, cfg_from_yaml_file
from pcdet.models import build_network
from pcdet.datasets import build_dataloader
from pcdet.utils import common_utils
import os
import numpy as np
import torch
# # import onnx
import onnxruntime as ort
import torch.nn as nn

# from typing import Sequence, NamedTuple



In [4]:
# cfg_file = "./onnx_config.yaml"
cfg_file = "../tools/cfgs/waymo_models/dsvt_pillar.yaml"
cfg_from_yaml_file(cfg_file, cfg)
if os.path.exists('./deploy_files')==False:
    os.mkdir('./deploy_files')
log_file = './deploy_files/log_trt.log'
logger = common_utils.create_logger(log_file, rank=0)
test_set, test_loader, sampler = build_dataloader(
    dataset_cfg=cfg.DATA_CONFIG,
    class_names=cfg.CLASS_NAMES,
    batch_size=1,
    dist=False, workers=8, logger=logger, training=False
)

2023-11-16 06:28:58,020   INFO  Loading Waymo dataset
2023-11-16 06:29:00,521   INFO  Total skipped info 0
2023-11-16 06:29:00,523   INFO  Total samples for Waymo dataset: 39987


In [5]:
model = build_network(model_cfg=cfg.MODEL, num_class=len(cfg.CLASS_NAMES), dataset=test_set)


In [6]:
ckpt = "/mnt/nas2/users/eslim/result_log/dsvt_pillar_waymo/ckpt/latest_model.pth"
model.load_params_from_file(filename=ckpt, logger=logger, to_cpu=False, pre_trained_path=None)
model.eval()
model.cuda()

2023-11-16 06:29:02,545   INFO  ==> Loading parameters from checkpoint /mnt/nas2/users/eslim/result_log/dsvt_pillar_waymo/ckpt/latest_model.pth to GPU
2023-11-16 06:29:02,669   INFO  ==> Checkpoint trained from version: pcdet+0.6.0++py88d9ce2
2023-11-16 06:29:02,692   INFO  ==> Done (loaded 391/391)


CenterPoint(
  (vfe): DynamicVoxelVFE(
    (pfn_layers): ModuleList(
      (0): PFNLayerV2(
        (linear): Linear(in_features=11, out_features=96, bias=False)
        (norm): BatchNorm1d(96, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
        (relu): ReLU()
      )
      (1): PFNLayerV2(
        (linear): Linear(in_features=192, out_features=192, bias=False)
        (norm): BatchNorm1d(192, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
        (relu): ReLU()
      )
    )
  )
  (backbone_3d): DSVT(
    (input_layer): DSVTInputLayer(
      (posembed_layers): ModuleList(
        (0): ModuleList(
          (0): ModuleList(
            (0): PositionEmbeddingLearned(
              (position_embedding_head): Sequential(
                (0): Linear(in_features=2, out_features=192, bias=True)
                (1): BatchNorm1d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
                (2): ReLU(inplace=True)
                (3)

In [25]:
# with torch.no_grad():
#     DSVT_Backbone = model.backbone_3d
#     dsvtblocks_list = DSVT_Backbone.stage_0
#     layer_norms_list = DSVT_Backbone.residual_norm_stage_0
#     inputs = model.vfe(inputs)
#     voxel_info = DSVT_Backbone.input_layer(inputs)
#     set_voxel_inds_list = [[voxel_info[f'set_voxel_inds_stage{s}_shift{i}'] for i in range(2)] for s in range(1)]
#     set_voxel_masks_list = [[voxel_info[f'set_voxel_mask_stage{s}_shift{i}'] for i in range(2)] for s in range(1)]
#     pos_embed_list = [[[voxel_info[f'pos_embed_stage{s}_block{b}_shift{i}'] for i in range(2)] for b in range(4)] for s in range(1)]
# inputs['voxel_features'].size()
# # print(len(set_voxel_masks_list))
# # pos_embed_list

1


In [33]:
# set_voxel_masks_list[0][1]

tensor([[[False,  True,  True,  ...,  True,  True,  True],
         [False,  True,  True,  ...,  True,  True,  True],
         [False,  True,  True,  ...,  True,  True,  True],
         ...,
         [False,  True, False,  ..., False,  True, False],
         [False,  True,  True,  ...,  True,  True,  True],
         [False,  True,  True,  ...,  True,  True,  True]],

        [[False,  True,  True,  ...,  True,  True,  True],
         [False,  True,  True,  ...,  True,  True,  True],
         [False,  True,  True,  ...,  True,  True,  True],
         ...,
         [False,  True, False,  ..., False,  True, False],
         [False,  True,  True,  ...,  True,  True,  True],
         [False,  True,  True,  ...,  True,  True,  True]]], device='cuda:0')

In [7]:
batch_dict = torch.load("./batch_dict.pth", map_location="cuda")
inputs = batch_dict

In [8]:
with torch.no_grad():
    DSVT_Backbone = model.backbone_3d
    dsvtblocks_list = DSVT_Backbone.stage_0
    layer_norms_list = DSVT_Backbone.residual_norm_stage_0
    inputs = model.vfe(inputs)
    voxel_info = DSVT_Backbone.input_layer(inputs)
    set_voxel_inds_list = [[voxel_info[f'set_voxel_inds_stage{s}_shift{i}'] for i in range(2)] for s in range(1)]
    set_voxel_masks_list = [[voxel_info[f'set_voxel_mask_stage{s}_shift{i}'] for i in range(2)] for s in range(1)]
    pos_embed_list = [[[voxel_info[f'pos_embed_stage{s}_block{b}_shift{i}'] for i in range(2)] for b in range(4)] for s in range(1)]

    pillar_features = inputs['voxel_features']
    alldsvtblockstrt_inputs = (
        pillar_features,
        set_voxel_inds_list[0][0],
        set_voxel_inds_list[0][1],
        set_voxel_masks_list[0][0],
        set_voxel_masks_list[0][1],
        torch.stack([torch.stack(v, dim=0) for v in pos_embed_list[0]], dim=0),
    )

In [None]:
# inputs

In [9]:

jit_mode = "trace"
input_names = [
    'src',
    'set_voxel_inds_tensor_shift_0',
    'set_voxel_inds_tensor_shift_1',
    'set_voxel_masks_tensor_shift_0',
    'set_voxel_masks_tensor_shift_1',
    'pos_embed_tensor'
]
output_names = ["output",]
input_shapes = {
    "src": {
        "min_shape": [24629, 192],
        "opt_shape": [24629, 192],
        "max_shape": [24629, 192],
    },
    "set_voxel_inds_tensor_shift_0": {
        "min_shape": [2, 1156, 36],
        "opt_shape": [2, 1156, 36],
        "max_shape": [2, 1156, 36],
    },
    "set_voxel_inds_tensor_shift_1": {
        "min_shape": [2, 834, 36],
        "opt_shape": [2, 834, 36],
        "max_shape": [2, 834, 36],
    },
    "set_voxel_masks_tensor_shift_0": {
        "min_shape": [2, 1156, 36],
        "opt_shape": [2, 1156, 36],
        "max_shape": [2, 1156, 36],
    },
    "set_voxel_masks_tensor_shift_1": {
        "min_shape": [2, 834, 36],
        "opt_shape": [2, 834, 36],
        "max_shape": [2, 834, 36],
    },
    "pos_embed_tensor": {
        "min_shape": [4, 2, 24629, 192],
        "opt_shape": [4, 2, 24629, 192],
        "max_shape": [4, 2, 24629, 192],
    },
}

dynamic_axes = {
    "src": {
        0: "voxel_number",
    },
    "set_voxel_inds_tensor_shift_0": {
        1: "set_number_shift_0",
    },
    "set_voxel_inds_tensor_shift_1": {
        1: "set_number_shift_1",
    },
    "set_voxel_masks_tensor_shift_0": {
        1: "set_number_shift_0",
    },
    "set_voxel_masks_tensor_shift_1": {
        1: "set_number_shift_1",
    },
    "pos_embed_tensor": {
        2: "voxel_number",
    },
    "output": {
        0: "voxel_number",
    }
}

In [10]:
base_name = "./deploy_files/dsvt"
ts_path = f"{base_name}.ts"
onnx_path = f"{base_name}.onnx"

allptransblocktrt = AllDSVTBlocksTRT(dsvtblocks_list, layer_norms_list).eval().cuda()

torch.onnx.export(
    allptransblocktrt,
    alldsvtblockstrt_inputs,
    onnx_path, input_names=input_names,
    output_names=output_names, dynamic_axes=dynamic_axes,
    opset_version=14,
)
# https://github.com/Haiyang-W/DSVT/issues/60

In [13]:
# test onnx
ort_session = ort.InferenceSession(onnx_path)
def to_numpy(tensor):
    return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()

# compute ONNX Runtime output prediction
ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(pillar_features),
              ort_session.get_inputs()[1].name: to_numpy(set_voxel_inds_list[0][0]),
              ort_session.get_inputs()[2].name: to_numpy(set_voxel_inds_list[0][1]),
              ort_session.get_inputs()[3].name: to_numpy(set_voxel_masks_list[0][0]),
              ort_session.get_inputs()[4].name: to_numpy(set_voxel_masks_list[0][1]),
              ort_session.get_inputs()[5].name: to_numpy(torch.stack([torch.stack(v, dim=0) for v in pos_embed_list[0]], dim=0)),}
ort_outs = ort_session.run(None, ort_inputs)
# ort_outs


In [34]:
##Trt##

In [38]:
# ! python3 -m pip install --upgrade tensorrt
# 

In [37]:
# !trtexec --onnx=./deploy_files/dsvt.onnx  --saveEngine=./deploy_files/dsvt.engine \
# --memPoolSize=workspace:4096 --verbose --buildOnly --device=1 --fp16 \
# --tacticSources=+CUDNN,+CUBLAS,-CUBLAS_LT,+EDGE_MASK_CONVOLUTIONS \
# --minShapes=src:3000x192,set_voxel_inds_tensor_shift_0:2x170x36,set_voxel_inds_tensor_shift_1:2x100x36,set_voxel_masks_tensor_shift_0:2x170x36,set_voxel_masks_tensor_shift_1:2x100x36,pos_embed_tensor:4x2x3000x192 \
# --optShapes=src:20000x192,set_voxel_inds_tensor_shift_0:2x1000x36,set_voxel_inds_tensor_shift_1:2x700x36,set_voxel_masks_tensor_shift_0:2x1000x36,set_voxel_masks_tensor_shift_1:2x700x36,pos_embed_tensor:4x2x20000x192 \
# --maxShapes=src:35000x192,set_voxel_inds_tensor_shift_0:2x1500x36,set_voxel_inds_tensor_shift_1:2x1200x36,set_voxel_masks_tensor_shift_0:2x1500x36,set_voxel_masks_tensor_shift_1:2x1200x36,pos_embed_tensor:4x2x35000x192 \
# > debug.log 2>&1