In [1]:
from mmdet3d.apis import inference_mono_3d_detector, init_model

# Copyright (c) OpenMMLab. All rights reserved.
import warnings
from copy import deepcopy
from os import path as osp
from pathlib import Path
from typing import Optional, Sequence, Union

import mmengine
import numpy as np
import torch
import torch.nn as nn
from mmengine.config import Config
from mmengine.dataset import Compose, pseudo_collate
from mmengine.registry import init_default_scope
from mmengine.runner import load_checkpoint

from mmdet3d.registry import DATASETS, MODELS
from mmdet3d.structures import Box3DMode, Det3DDataSample, get_box_type
from mmdet3d.structures.det3d_data_sample import SampleList
import cv2

In [2]:
ann_file = '/home/javier/sensus-loci/sensus/notebooks/002238.pkl'
cam_type = 'CAM_BACK'
pitch = 0.2031

config_file = '/home/javier/sensus-loci/sensus/configs/smoke/smoke_dla34_dlaneck_gn-all_4xb8-6x_dair-mono3d.py'
checkpoint_file = '/home/javier/sensus-loci/work_dirs/smoke_dla34_dlaneck_gn-all_4xb8-6x_dair-mono3d/epoch_100.pth'
device = 'cuda:0'

In [3]:
model = init_model(config_file, checkpoint_file, device=device)
imgs = '/home/javier/sensus-loci/sensus/notebooks/002238.png'

Loads checkpoint by local backend from path: /home/javier/sensus-loci/work_dirs/smoke_dla34_dlaneck_gn-all_4xb8-6x_dair-mono3d/epoch_100.pth
11/02 17:07:48 - mmengine - [4m[97mINFO[0m - ModulatedDeformConvPack neck.dla_up.ida_0.projs.0.conv is upgraded to version 2.
11/02 17:07:48 - mmengine - [4m[97mINFO[0m - ModulatedDeformConvPack neck.dla_up.ida_0.nodes.0.conv is upgraded to version 2.
11/02 17:07:48 - mmengine - [4m[97mINFO[0m - ModulatedDeformConvPack neck.dla_up.ida_1.projs.0.conv is upgraded to version 2.
11/02 17:07:48 - mmengine - [4m[97mINFO[0m - ModulatedDeformConvPack neck.dla_up.ida_1.projs.1.conv is upgraded to version 2.
11/02 17:07:48 - mmengine - [4m[97mINFO[0m - ModulatedDeformConvPack neck.dla_up.ida_1.nodes.0.conv is upgraded to version 2.
11/02 17:07:48 - mmengine - [4m[97mINFO[0m - ModulatedDeformConvPack neck.dla_up.ida_1.nodes.1.conv is upgraded to version 2.
11/02 17:07:48 - mmengine - [4m[97mINFO[0m - ModulatedDeformConvPack neck.dla_up.i

In [4]:
if isinstance(imgs, (list, tuple)):
        is_batch = True
else:
        imgs = [imgs]
        is_batch = False

In [5]:

cfg = model.cfg
print(cfg)

Config (path: /home/javier/sensus-loci/sensus/configs/smoke/smoke_dla34_dlaneck_gn-all_4xb8-6x_dair-mono3d.py): {'dataset_type': 'KittiDataset', 'data_root': '/home/javier/datasets/DAIR/single-infrastructure-side-mmdet/', 'class_names': ['Pedestrian', 'Cyclist', 'Car'], 'input_modality': {'use_lidar': False, 'use_camera': True}, 'metainfo': {'classes': ['Pedestrian', 'Cyclist', 'Car']}, 'backend_args': None, 'train_pipeline': [{'type': 'LoadImageFromFileMono3D', 'backend_args': None}, {'type': 'LoadAnnotations3D', 'with_bbox': True, 'with_label': True, 'with_attr_label': False, 'with_bbox_3d': True, 'with_label_3d': True, 'with_bbox_depth': True}, {'type': 'RandomFlip3D', 'flip_ratio_bev_horizontal': 0.5}, {'type': 'RandomShiftScale', 'shift_scale': (0.2, 0.4), 'aug_prob': 0.3}, {'type': 'AffineResize', 'img_scale': (1920, 1080), 'down_ratio': 4}, {'type': 'Pack3DDetInputs', 'keys': ['img', 'gt_bboxes', 'gt_bboxes_labels', 'gt_bboxes_3d', 'gt_labels_3d', 'centers_2d', 'depths']}], 'tes

In [6]:

# build the data pipeline
test_pipeline = deepcopy(cfg.test_dataloader.dataset.pipeline)
test_pipeline = Compose(test_pipeline)
box_type_3d, box_mode_3d = \
    get_box_type(cfg.test_dataloader.dataset.box_type_3d)

data_list = mmengine.load(ann_file) ##['data_list'] instead of nothing
data_list = data_list
assert len(imgs) == len(data_list)

In [7]:
print(test_pipeline)

Compose(
    LoadImageFromFileMono3D(ignore_empty=False, to_float32=False, color_type='color', imdecode_backend='cv2', backend_args=None)
    AffineResize(img_scale=(1920, 1080), down_ratio=4) 
    Pack3DDetInputs(keys=['img'])(meta_keys=('img_path', 'ori_shape', 'img_shape', 'lidar2img', 'depth2img', 'cam2img', 'pad_shape', 'scale_factor', 'flip', 'pcd_horizontal_flip', 'pcd_vertical_flip', 'box_mode_3d', 'box_type_3d', 'img_norm_cfg', 'num_pts_feats', 'pcd_trans', 'sample_idx', 'pcd_scale_factor', 'pcd_rotation', 'pcd_rotation_angle', 'lidar_path', 'transformation_3d_flow', 'trans_mat', 'affine_aug', 'sweep_img_metas', 'ori_cam2img', 'cam2global', 'crop_offset', 'img_crop_offset', 'resize_img_shape', 'lidar2cam', 'ori_lidar2img', 'num_ref_frames', 'num_views', 'ego2global', 'axis_align_matrix'))
)


In [8]:

data = []
img = imgs[0]
index = 0
# get data info containing calib
data_info = data_list[index]
img_path = data_info['images'][cam_type]['img_path']
if osp.basename(img_path) != osp.basename(img):
    raise ValueError(f'the info file of {img_path} is not provided.')


In [9]:
print(img_path)

002238.png


In [10]:

# replace the img_path in data_info with img
data_info['images'][cam_type]['img_path'] = img
print(data_info['images'][cam_type]['img_path'])


/home/javier/sensus-loci/sensus/notebooks/002238.png


In [11]:
# avoid data_info['images'] has multiple keys anout camera views.
mono_img_info = {f'{cam_type}': data_info['images'][cam_type]}

print(mono_img_info)


{'CAM_BACK': {'img_path': '/home/javier/sensus-loci/sensus/notebooks/002238.png', 'cam2img': [[2183.375019, 0.0, 940.590363], [0.0, 2329.297332, 567.568513], [0.0, 0.0, 1.0]]}}


In [12]:
'hasdf'

'hasdf'

In [13]:

data_ = dict(
    images=mono_img_info,
    box_type_3d=box_type_3d,
    box_mode_3d=box_mode_3d)
print(data_)

{'images': {'CAM_BACK': {'img_path': '/home/javier/sensus-loci/sensus/notebooks/002238.png', 'cam2img': [[2183.375019, 0.0, 940.590363], [0.0, 2329.297332, 567.568513], [0.0, 0.0, 1.0]]}}, 'box_type_3d': <class 'mmdet3d.structures.bbox_3d.cam_box3d.CameraInstance3DBoxes'>, 'box_mode_3d': <Box3DMode.CAM: 1>}


In [14]:

data_ = test_pipeline(data_)


In [20]:
datapip = data_['inputs']['img']
print(datapip)

tensor([[[ 36,  40,  43,  ...,  66,  66,  66],
         [ 42,  45,  48,  ...,  66,  66,  66],
         [ 35,  42,  49,  ...,  66,  66,  66],
         ...,
         [  0,   0,   0,  ..., 117, 117, 117],
         [  7,   5,  23,  ..., 117, 117, 117],
         [ 18,  18,  36,  ..., 117, 117, 117]],

        [[ 40,  44,  47,  ...,  75,  75,  75],
         [ 46,  49,  52,  ...,  75,  75,  75],
         [ 39,  46,  53,  ...,  75,  75,  75],
         ...,
         [ 14,  10,  11,  ..., 110, 110, 110],
         [ 17,  15,  30,  ..., 110, 110, 110],
         [ 28,  28,  43,  ..., 110, 110, 110]],

        [[ 35,  39,  42,  ...,  69,  69,  69],
         [ 41,  44,  47,  ...,  69,  69,  69],
         [ 34,  41,  48,  ...,  69,  69,  69],
         ...,
         [ 25,  21,  20,  ..., 102, 102, 102],
         [ 29,  27,  41,  ..., 102, 102, 102],
         [ 40,  40,  54,  ..., 102, 102, 102]]], dtype=torch.uint8)


In [17]:
import mmcv

In [18]:
video = mmcv.VideoReader('/home/javier/sensus-loci/output.mp4')
frame = video[1050]
# Convert numpy array to a PyTorch tensor
torch_image = torch.tensor(frame, dtype=torch.uint8)
torch_image = torch_image.permute(2, 0, 1)
print(torch_image)

tensor([[[ 36,  40,  43,  ...,  66,  66,  66],
         [ 42,  45,  48,  ...,  66,  66,  66],
         [ 35,  42,  49,  ...,  66,  66,  66],
         ...,
         [  0,   0,   0,  ..., 117, 117, 117],
         [  7,   5,  23,  ..., 117, 117, 117],
         [ 18,  18,  36,  ..., 117, 117, 117]],

        [[ 40,  44,  47,  ...,  75,  75,  75],
         [ 46,  49,  52,  ...,  75,  75,  75],
         [ 39,  46,  53,  ...,  75,  75,  75],
         ...,
         [ 14,  10,  11,  ..., 110, 110, 110],
         [ 17,  15,  30,  ..., 110, 110, 110],
         [ 28,  28,  43,  ..., 110, 110, 110]],

        [[ 35,  39,  42,  ...,  69,  69,  69],
         [ 41,  44,  47,  ...,  69,  69,  69],
         [ 34,  41,  48,  ...,  69,  69,  69],
         ...,
         [ 25,  21,  20,  ..., 102, 102, 102],
         [ 29,  27,  41,  ..., 102, 102, 102],
         [ 40,  40,  54,  ..., 102, 102, 102]]], dtype=torch.uint8)


In [19]:
# Check if they are equal
are_equal = torch.equal(torch_image, datapip)
print(are_equal)

True


In [26]:
data_['inputs'] = 'hola'
print(data_)

{'data_samples': <Det3DDataSample(

    META INFORMATION
    trans_mat: array([[ 0.25, -0.  ,  0.  ],
               [ 0.  ,  0.25,  0.  ],
               [ 0.  ,  0.  ,  1.  ]], dtype=float32)
    img_shape: (1080, 1920, 3)
    ori_shape: (1080, 1920)
    box_mode_3d: <Box3DMode.CAM: 1>
    img_path: '/home/javier/sensus-loci/sensus/notebooks/002238.png'
    affine_aug: False
    pad_shape: (1080, 1920, 3)
    box_type_3d: <class 'mmdet3d.structures.bbox_3d.cam_box3d.CameraInstance3DBoxes'>
    cam2img: [[2183.375019, 0.0, 940.590363], [0.0, 2329.297332, 567.568513], [0.0, 0.0, 1.0]]

    DATA FIELDS
    eval_ann_info: None
    gt_pts_seg: <PointData(
        
            META INFORMATION
        
            DATA FIELDS
        ) at 0x7fa2f1a0ff10>
    gt_instances_3d: <InstanceData(
        
            META INFORMATION
        
            DATA FIELDS
        ) at 0x7fa34a11f220>
    gt_instances: <InstanceData(
        
            META INFORMATION
        
            DATA FIELDS

In [23]:
data = []
data.append(data_)

collate_data = pseudo_collate(data)

In [24]:

# forward the model
with torch.no_grad():
    results = model.test_step(collate_data)

print(results)

[<Det3DDataSample(

    META INFORMATION
    cam2img: [[2183.375019, 0.0, 940.590363], [0.0, 2329.297332, 567.568513], [0.0, 0.0, 1.0]]
    img_path: '/home/javier/sensus-loci/sensus/notebooks/002238.png'
    pad_shape: (1088, 1920)
    img_shape: (1080, 1920, 3)
    trans_mat: array([[ 0.25, -0.  ,  0.  ],
               [ 0.  ,  0.25,  0.  ],
               [ 0.  ,  0.  ,  1.  ]], dtype=float32)
    affine_aug: False
    ori_shape: (1080, 1920)
    box_type_3d: <class 'mmdet3d.structures.bbox_3d.cam_box3d.CameraInstance3DBoxes'>
    batch_input_shape: (1088, 1920)
    box_mode_3d: <Box3DMode.CAM: 1>

    DATA FIELDS
    eval_ann_info: None
    gt_pts_seg: <PointData(
        
            META INFORMATION
        
            DATA FIELDS
        ) at 0x7f6a9601d6d0>
    pred_instances: <InstanceData(
        
            META INFORMATION
        
            DATA FIELDS
        ) at 0x7f6a945b4520>
    pred_instances_3d: <InstanceData(
        
            META INFORMATION
        
  

In [None]:
def inference_mono_3d_detector(model: nn.Module,
                               imgs: ImagesType,
                               ann_file: Union[str, Sequence[str]],
                               cam_type: str = 'CAM_FRONT'):
    """Inference image with the monocular 3D detector.

    Args:
        model (nn.Module): The loaded detector.
        imgs (str, Sequence[str]):
           Either image files or loaded images.
        ann_files (str, Sequence[str]): Annotation files.
        cam_type (str): Image of Camera chose to infer.
            For kitti dataset, it should be 'CAM_2',
            and for nuscenes dataset, it should be
            'CAM_FRONT'. Defaults to 'CAM_FRONT'.

    Returns:
        :obj:`Det3DDataSample` or list[:obj:`Det3DDataSample`]:
        If pcds is a list or tuple, the same length list type results
        will be returned, otherwise return the detection results directly.
    """
    if isinstance(imgs, (list, tuple)):
        is_batch = True
    else:
        imgs = [imgs]
        is_batch = False

    cfg = model.cfg

    # build the data pipeline
    test_pipeline = deepcopy(cfg.test_dataloader.dataset.pipeline)
    test_pipeline = Compose(test_pipeline)
    box_type_3d, box_mode_3d = \
        get_box_type(cfg.test_dataloader.dataset.box_type_3d)

    data_list = mmengine.load(ann_file) ##['data_list'] instead of nothing
    data_list = data_list
    assert len(imgs) == len(data_list)

    data = []
    for index, img in enumerate(imgs):
        # get data info containing calib
        data_info = data_list[index]
        img_path = data_info['images'][cam_type]['img_path']
        if osp.basename(img_path) != osp.basename(img):
            raise ValueError(f'the info file of {img_path} is not provided.')

        # replace the img_path in data_info with img
        data_info['images'][cam_type]['img_path'] = img
        # avoid data_info['images'] has multiple keys anout camera views.
        mono_img_info = {f'{cam_type}': data_info['images'][cam_type]}
        data_ = dict(
            images=mono_img_info,
            box_type_3d=box_type_3d,
            box_mode_3d=box_mode_3d)

        data_ = test_pipeline(data_)
        data.append(data_)

    collate_data = pseudo_collate(data)

    # forward the model
    with torch.no_grad():
        results = model.test_step(collate_data)

    if not is_batch:
        return results[0]
    else:
        return results
