## Pytorch

In [29]:
import torch
from det3d.torchie import Config
from det3d.models import build_detector
from det3d.torchie.trainer import load_checkpoint

config = "/workspace/centerformer/configs/nusc/nuscenes_centerformer_poolformer.py"

cfg = Config.fromfile(config)
checkpoint_path = "/workspace/centerformer/work_dirs/nuscenes_poolformer/poolformer.pth"

model = build_detector(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg)
checkpoint = load_checkpoint(model, checkpoint_path, map_location="cpu")
model.cuda()
model.eval();

[12/19/2023-01:50:55] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage. See `CUDA_MODULE_LOADING` in https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#env-vars
Use HM Bias:  -2.19


In [30]:
import pickle
pickle_dir = "/workspace/centerformer/work_dirs/partition/sample_data/"

with open(pickle_dir + "findcenter_input.pkl", 'rb') as handle:
    x = pickle.load(handle)

In [31]:
with torch.no_grad():
    ct_feat, center_pos_embedding, out_scores, out_labels, out_orders, out_masks = model.neck.find_centers(x)

## onnxruntime

In [32]:
import onnxruntime as ort
import numpy as np

onnx_path = "/data/centerformer/work_dirs/partition/onnx/findCenter_sanitized.onnx"

with open(pickle_dir + "findcenter_input.pkl", 'rb') as handle:
    x = pickle.load(handle)
input_ort = x.cpu().numpy()
providers = ['CUDAExecutionProvider', 'CPUExecutionProvider']
ort_sess = ort.InferenceSession(onnx_path, providers=providers)
output_names = [output.name for output in ort_sess.get_outputs()]

In [33]:
ct_feat_ort, center_pos_embedding_ort, out_scores_ort, out_labels_ort, out_orders_ort, out_masks_ort = ort_sess.run(output_names, {'input_tensor': input_ort})

In [34]:
ct_feat_ort = torch.from_numpy(ct_feat_ort).cuda()
center_pos_embedding_ort = torch.from_numpy(center_pos_embedding_ort).cuda()
out_scores_ort = torch.from_numpy(out_scores_ort).cuda()
out_labels_ort = torch.from_numpy(out_labels_ort).cuda()
out_orders_ort = torch.from_numpy(out_orders_ort).cuda()
out_masks_ort = torch.from_numpy(out_masks_ort).cuda()

In [35]:
print(ct_feat[0][0][:10]) #torch
print(ct_feat_ort[0][0][:10]) #onnxruntime

tensor([0.0000, 0.0000, 0.4185, 0.2847, 0.0000, 0.0000, 0.0000, 0.0000, 0.2222,
        0.9349], device='cuda:0')
tensor([0.0000, 0.0000, 0.4185, 0.2847, 0.0000, 0.0000, 0.0000, 0.0000, 0.2222,
        0.9349], device='cuda:0')


In [36]:
torch.testing.assert_close(ct_feat, ct_feat_ort, rtol=1e-03, atol=1e-05)

AssertionError: Tensor-likes are not close!

Mismatched elements: 1098 / 768000 (0.1%)
Greatest absolute difference: 1.414588212966919 at index (0, 1860, 115) (up to 1e-05 allowed)
Greatest relative difference: inf at index (0, 1859, 0) (up to 0.001 allowed)

In [37]:
torch.testing.assert_close(center_pos_embedding, center_pos_embedding_ort, rtol=1e-03, atol=1e-05)

AssertionError: Tensor-likes are not close!

Mismatched elements: 1518 / 768000 (0.2%)
Greatest absolute difference: 0.5689306855201721 at index (0, 2795, 80) (up to 1e-05 allowed)
Greatest relative difference: 9840.962743437765 at index (0, 2795, 104) (up to 0.001 allowed)

In [38]:
torch.testing.assert_close(out_scores, out_scores_ort, rtol=1e-03, atol=1e-05)

In [39]:
torch.testing.assert_close(out_labels, out_labels_ort, rtol=1e-03, atol=1e-05)

AssertionError: Tensor-likes are not close!

Mismatched elements: 2 / 3000 (0.1%)
Greatest absolute difference: 1 at index (5, 0, 295) (up to 1e-05 allowed)
Greatest relative difference: inf at index (5, 0, 295) (up to 0.001 allowed)

In [40]:
torch.testing.assert_close(out_orders, out_orders_ort, rtol=1e-03, atol=1e-05)

AssertionError: Tensor-likes are not close!

Mismatched elements: 6 / 3000 (0.2%)
Greatest absolute difference: 50219 at index (5, 0, 295) (up to 1e-05 allowed)
Greatest relative difference: 2.1216118335723877 at index (3, 0, 360) (up to 0.001 allowed)

In [41]:
torch.testing.assert_close(out_masks, out_masks_ort, rtol=1e-03, atol=1e-05)