In [1]:
import os
os.chdir('./mmdetection3d')
print(os.getcwd())

/home/015312115/mmdetection3d


In [2]:
from mmdet3d.datasets.kitti_dataset import KittiDataset
from mmdet3d.apis import init_model, inference_detector, show_result_meshlab
from mmdet3d.core.visualizer.show_result import show_multi_modality_result
import cv2
from pathlib import Path
import time
import numpy as np

  'Please follow `getting_started.md` to install MinkowskiEngine.`')


### Initialize Dataset Info

In [3]:
data_root = 'data/kitti'
ann_file = 'data/kitti/kitti_infos_train.pkl'
classes = ['Pedestrian', 'Cyclist', 'Car']
pts_prefix = 'velodyne'
pipeline = [
    dict(
        type='LoadPointsFromFile',
        coord_type='LIDAR',
        load_dim=4,
        use_dim=4,
        file_client_args=dict(backend='disk')),
    dict(
        type='MultiScaleFlipAug3D',
        img_scale=(1333, 800),
        pts_scale_ratio=1,
        flip=False,
        transforms=[
            dict(
                type='GlobalRotScaleTrans',
                rot_range=[0, 0],
                scale_ratio_range=[1.0, 1.0],
                translation_std=[0, 0, 0]),
            dict(type='RandomFlip3D'),
            dict(
                type='PointsRangeFilter',
                point_cloud_range=[0, -40, -3, 70.4, 40, 1]),
            dict(
                type='DefaultFormatBundle3D',
                class_names=classes,
                with_label=False),
            dict(type='Collect3D', keys=['points'])
        ])
]
modality = dict(use_lidar=True, use_camera=False)
split = 'training'

kitti_dataset = KittiDataset(data_root, ann_file, split, pts_prefix, pipeline, classes, modality)

## SECOND - 3D Car

In [4]:
cfg = 'hv_second_secfpn_6x8_80e_kitti-3d-car'
config_file = './configs/second/hv_second_secfpn_6x8_80e_kitti-3d-car.py'
checkpoint_file = './checkpoints/hv_second_secfpn_6x8_80e_kitti-3d-car_20200620_230238-393f000c.pth'

model = init_model(config_file, checkpoint_file, device='cuda:0')

  'dir_offset and dir_limit_offset will be depressed and be '


load checkpoint from local path: ./checkpoints/hv_second_secfpn_6x8_80e_kitti-3d-car_20200620_230238-393f000c.pth


In [5]:
# Run inference on first N frames and generate 3D boxes, projected on 2D images
for idx in range(50):
    vis_result_dir = f'./visualize/{cfg}'

    # Visualize Results

    # get information from dataset
    data_info = kitti_dataset.get_data_info(idx)
    pcd = data_info['pts_filename']

    # get inference result
    result, data = inference_detector(model, pcd)

    # get data for multi-modal result
    kitti_dataset.pre_pipeline(data_info)
    example = kitti_dataset.pipeline(data_info)

    img_path = data_info['img_info']['filename']
    img = cv2.imread(img_path)
    gt_bboxes = data_info['ann_info']['gt_bboxes_3d']
    pred_bboxes = result[0]['boxes_3d']
    img_metas = example['img_metas']
    proj_mat = data_info['lidar2img']

    # get frame number
    frame_num = Path(img_path).name.strip('.png')

    try:
        # write multi modality result image
        show_multi_modality_result(img, gt_bboxes, pred_bboxes, proj_mat, 
                vis_result_dir, frame_num, box_mode='lidar', img_metas=img_metas,
                gt_bbox_color=(255,0,0), pred_bbox_color=(0,255,0))

        # combine pred and gt results to singe image
        frame_dir = f'{vis_result_dir}/{frame_num}'
        img_pred = cv2.imread(f'{frame_dir}/{frame_num}_pred.png')
        img_gt = cv2.imread(f'{frame_dir}/{frame_num}_gt.png')
        img_combined = cv2.addWeighted(img_pred, 0.5, img_gt, 0.5, 0)
        cv2.imwrite(f'{frame_dir}/{frame_num}_comb.png', img_combined)

        show_result_meshlab(data, result, f'./visualize/{cfg}')
    except:
        print(f'Frame not processed: {frame_num}')

Frame not processed: 000042
Frame not processed: 000044


In [None]:
# benchmark for frame times
!python tools/analysis_tools/benchmark.py {config_file} {checkpoint_file} --samples 1000

### Doesn't work on Jupyter.. output from CLI:  
...  
Done image [1000/ 1000], fps: 17.6 img / s  
Done image [1050/ 1000], fps: 17.6 img / s  
Done image [1100/ 1000], fps: 17.6 img / s  


In [None]:
# run test for evaluation metrics
!python tools/test.py {config_file} {checkpoint_file} --out results/{cfg}.pkl --eval mAP

### Doesn't work on Jupyter.. output from CLI:
----------- AP11 Results ------------

Car AP11@0.70, 0.70, 0.70:
bbox AP11:89.6458, 87.2916, 83.8113
bev  AP11:88.5880, 82.4283, 77.8551
3d   AP11:79.8306, 65.1712, 61.5498
aos  AP11:83.19, 76.79, 73.14
Car AP11@0.70, 0.50, 0.50:
bbox AP11:89.6458, 87.2916, 83.8113
bev  AP11:90.3632, 89.2448, 88.0912
3d   AP11:90.1994, 88.7744, 87.0859
aos  AP11:83.19, 76.79, 73.14

----------- AP40 Results ------------

Car AP40@0.70, 0.70, 0.70:
bbox AP40:95.2842, 88.1286, 85.0026
bev  AP40:91.8911, 84.2700, 79.4809
3d   AP40:79.6917, 66.7353, 60.5476
aos  AP40:87.93, 76.83, 73.28
Car AP40@0.70, 0.50, 0.50:
bbox AP40:95.2842, 88.1286, 85.0026
bev  AP40:96.2568, 93.2672, 89.0795
3d   AP40:95.9810, 91.2978, 88.3686
aos  AP40:87.93, 76.83, 73.28

{'KITTI/Car_3D_AP11_easy_strict': 79.83062966779826, 'KITTI/Car_BEV_AP11_easy_strict': 88.58802421554634, 'KITTI/Car_2D_AP11_easy_strict': 89.64578766776405, 'KITTI/Car_3D_AP11_moderate_strict': 65.17121474728967, 'KITTI/Car_BEV_AP11_moderate_strict': 82.42832885411896, 'KITTI/Car_2D_AP11_moderate_strict': 87.29162882392338, 'KITTI/Car_3D_AP11_hard_strict': 61.549807208906934, 'KITTI/Car_BEV_AP11_hard_strict': 77.85507635089097, 'KITTI/Car_2D_AP11_hard_strict': 83.81125076905737, 'KITTI/Car_3D_AP11_easy_loose': 90.19941548722413, 'KITTI/Car_BEV_AP11_easy_loose': 90.36316578307999, 'KITTI/Car_2D_AP11_easy_loose': 89.64578766776405, 'KITTI/Car_3D_AP11_moderate_loose': 88.77440939634663, 'KITTI/Car_BEV_AP11_moderate_loose': 89.24479160773001, 'KITTI/Car_2D_AP11_moderate_loose': 87.29162882392338, 'KITTI/Car_3D_AP11_hard_loose': 87.08591071849486, 'KITTI/Car_BEV_AP11_hard_loose': 88.0911625655181, 'KITTI/Car_2D_AP11_hard_loose': 83.81125076905737, 'KITTI/Car_3D_AP40_easy_strict': 79.69171431231406, 'KITTI/Car_BEV_AP40_easy_strict': 91.89108370251878, 'KITTI/Car_2D_AP40_easy_strict': 95.284171819415, 'KITTI/Car_3D_AP40_moderate_strict': 66.73525737109627, 'KITTI/Car_BEV_AP40_moderate_strict': 84.2700050122073, 'KITTI/Car_2D_AP40_moderate_strict': 88.12860013375555, 'KITTI/Car_3D_AP40_hard_strict': 60.547591798506204, 'KITTI/Car_BEV_AP40_hard_strict': 79.48087607263709, 'KITTI/Car_2D_AP40_hard_strict': 85.00255713226694, 'KITTI/Car_3D_AP40_easy_loose': 95.98101966783057, 'KITTI/Car_BEV_AP40_easy_loose': 96.25677776446494, 'KITTI/Car_2D_AP40_easy_loose': 95.284171819415, 'KITTI/Car_3D_AP40_moderate_loose': 91.29781710541863, 'KITTI/Car_BEV_AP40_moderate_loose': 93.26717957725904, 'KITTI/Car_2D_AP40_moderate_loose': 88.12860013375555, 'KITTI/Car_3D_AP40_hard_loose': 88.36862254390748, 'KITTI/Car_BEV_AP40_hard_loose': 89.07952990740313, 'KITTI/Car_2D_AP40_hard_loose': 85.00255713226694}

## SECOND 3D 3-class

In [14]:
cfg = 'hv_second_secfpn_6x8_80e_kitti-3d-3class'
config_file = './configs/second/hv_second_secfpn_6x8_80e_kitti-3d-3class.py'
checkpoint_file = './checkpoints/hv_second_secfpn_6x8_80e_kitti-3d-3class_20210831_022017-ae782e87.pth'

model = init_model(config_file, checkpoint_file, device='cuda:0')

load checkpoint from local path: ./checkpoints/hv_second_secfpn_6x8_80e_kitti-3d-3class_20210831_022017-ae782e87.pth


In [15]:
# Run inference on first N frames and generate 3D boxes, projected on 2D images
for idx in range(50):
    vis_result_dir = f'./visualize/{cfg}'

    # Visualize Results

    # get information from dataset
    data_info = kitti_dataset.get_data_info(idx)
    pcd = data_info['pts_filename']

    # get inference result
    result, data = inference_detector(model, pcd)

    # get data for multi-modal result
    kitti_dataset.pre_pipeline(data_info)
    example = kitti_dataset.pipeline(data_info)

    img_path = data_info['img_info']['filename']
    img = cv2.imread(img_path)
    gt_bboxes = data_info['ann_info']['gt_bboxes_3d']
    pred_bboxes = result[0]['boxes_3d']
    img_metas = example['img_metas']
    proj_mat = data_info['lidar2img']

    # get frame number
    frame_num = Path(img_path).name.strip('.png')

    try:
        # write multi modality result image
        show_multi_modality_result(img, gt_bboxes, pred_bboxes, proj_mat, 
                vis_result_dir, frame_num, box_mode='lidar', img_metas=img_metas,
                gt_bbox_color=(255,0,0), pred_bbox_color=(0,255,0))

        # combine pred and gt results to singe image
        frame_dir = f'{vis_result_dir}/{frame_num}'
        img_pred = cv2.imread(f'{frame_dir}/{frame_num}_pred.png')
        img_gt = cv2.imread(f'{frame_dir}/{frame_num}_gt.png')
        img_combined = cv2.addWeighted(img_pred, 0.5, img_gt, 0.5, 0)
        cv2.imwrite(f'{frame_dir}/{frame_num}_comb.png', img_combined)

        show_result_meshlab(data, result, f'./visualize/{cfg}')
    except:
        print(f'Frame not processed: {frame_num}')

Frame not processed: 000001
Frame not processed: 000023
Frame not processed: 000044


In [None]:
# benchmark for frame times
!python tools/analysis_tools/benchmark.py {config_file} {checkpoint_file}

### Doesn't work in Jupyter.. CLI output:
Done image [50 / 2000], fps: 16.1 img / s
Done image [100/ 2000], fps: 16.0 img / s
Done image [150/ 2000], fps: 16.0 img / s

In [None]:
# run test for evaluation metrics
!python tools/test.py {config_file} {checkpoint_file} --out results/{cfg}.pkl --eval mAP

### Doesn't work in Jupyter.. CLI output:
----------- AP11 Results ------------

Pedestrian AP11@0.50, 0.50, 0.50:
bbox AP11:73.7550, 68.4076, 64.6571
bev  AP11:69.2206, 63.5570, 60.4052
3d   AP11:63.3788, 60.5671, 54.0604
aos  AP11:67.36, 61.81, 58.34
Pedestrian AP11@0.50, 0.25, 0.25:
bbox AP11:73.7550, 68.4076, 64.6571
bev  AP11:80.7587, 76.1746, 72.4978
3d   AP11:80.7399, 76.0979, 72.3720
aos  AP11:67.36, 61.81, 58.34
Cyclist AP11@0.50, 0.50, 0.50:
bbox AP11:91.2581, 85.2285, 83.1152
bev  AP11:87.8381, 80.4979, 76.1620
3d   AP11:86.4992, 78.2987, 74.7236
aos  AP11:90.71, 84.50, 82.44
Cyclist AP11@0.50, 0.25, 0.25:
bbox AP11:91.2581, 85.2285, 83.1152
bev  AP11:91.0090, 82.7671, 79.9255
3d   AP11:91.0090, 82.7671, 79.9255
aos  AP11:90.71, 84.50, 82.44
Car AP11@0.70, 0.70, 0.70:
bbox AP11:90.8836, 89.9547, 88.9471
bev  AP11:90.6845, 88.7383, 85.9317
3d   AP11:89.2603, 78.5222, 76.0248
aos  AP11:90.67, 89.26, 87.95
Car AP11@0.70, 0.50, 0.50:
bbox AP11:90.8836, 89.9547, 88.9471
bev  AP11:90.8662, 90.1417, 89.4695
3d   AP11:90.8662, 90.0687, 89.2961
aos  AP11:90.67, 89.26, 87.95

Overall AP11@easy, moderate, hard:
bbox AP11:85.2989, 81.1969, 78.9065
bev  AP11:82.5811, 77.5977, 74.1663
3d   AP11:79.7128, 72.4627, 68.2696
aos  AP11:82.91, 78.52, 76.24

----------- AP40 Results ------------

Pedestrian AP40@0.50, 0.50, 0.50:
bbox AP40:74.3547, 69.4173, 64.2615
bev  AP40:70.2345, 64.4968, 59.4465
3d   AP40:64.9544, 60.2418, 54.0644
aos  AP40:67.39, 62.13, 57.31
Pedestrian AP40@0.50, 0.25, 0.25:
bbox AP40:74.3547, 69.4173, 64.2615
bev  AP40:82.7000, 78.3221, 72.9153
3d   AP40:82.6694, 78.1682, 72.7831
aos  AP40:67.39, 62.13, 57.31
Cyclist AP40@0.50, 0.50, 0.50:
bbox AP40:93.4897, 88.3711, 85.1957
bev  AP40:91.7921, 82.3268, 77.7973
3d   AP40:90.3307, 80.3776, 75.8640
aos  AP40:92.91, 87.55, 84.38
Cyclist AP40@0.50, 0.25, 0.25:
bbox AP40:93.4897, 88.3711, 85.1957
bev  AP40:93.3629, 85.7862, 81.4629
3d   AP40:93.3629, 85.7862, 81.4629
aos  AP40:92.91, 87.55, 84.38
Car AP40@0.70, 0.70, 0.70:
bbox AP40:96.9766, 93.0415, 90.1944
bev  AP40:96.5077, 91.5413, 86.6969
3d   AP40:92.3159, 82.0360, 76.5721
aos  AP40:96.69, 92.26, 89.15
Car AP40@0.70, 0.50, 0.50:
bbox AP40:96.9766, 93.0415, 90.1944
bev  AP40:97.0193, 95.3207, 92.5971
3d   AP40:96.9709, 95.2382, 92.4301
aos  AP40:96.69, 92.26, 89.15

Overall AP40@easy, moderate, hard:
bbox AP40:88.2736, 83.6100, 79.8839
bev  AP40:86.1781, 79.4550, 74.6469
3d   AP40:82.5337, 74.2185, 68.8335
aos  AP40:85.67, 80.65, 76.95

{'KITTI/Pedestrian_3D_AP11_easy_strict': 63.378808836448705, 'KITTI/Pedestrian_BEV_AP11_easy_strict': 69.22062898682243, 'KITTI/Pedestrian_2D_AP11_easy_strict': 73.75497020422756, 'KITTI/Pedestrian_3D_AP11_moderate_strict': 60.56713375934933, 'KITTI/Pedestrian_BEV_AP11_moderate_strict': 63.556973590641356, 'KITTI/Pedestrian_2D_AP11_moderate_strict': 68.4075671949771, 'KITTI/Pedestrian_3D_AP11_hard_strict': 54.06039663605235, 'KITTI/Pedestrian_BEV_AP11_hard_strict': 60.405241976651226, 'KITTI/Pedestrian_2D_AP11_hard_strict': 64.65705156532951, 'KITTI/Pedestrian_3D_AP11_easy_loose': 80.7399435673157, 'KITTI/Pedestrian_BEV_AP11_easy_loose': 80.7587248503988, 'KITTI/Pedestrian_2D_AP11_easy_loose': 73.75497020422756, 'KITTI/Pedestrian_3D_AP11_moderate_loose': 76.09791797431441, 'KITTI/Pedestrian_BEV_AP11_moderate_loose': 76.17458778864685, 'KITTI/Pedestrian_2D_AP11_moderate_loose': 68.4075671949771, 'KITTI/Pedestrian_3D_AP11_hard_loose': 72.37197635528075, 'KITTI/Pedestrian_BEV_AP11_hard_loose': 72.49775995794508, 'KITTI/Pedestrian_2D_AP11_hard_loose': 64.65705156532951, 'KITTI/Cyclist_3D_AP11_easy_strict': 86.49915650409918, 'KITTI/Cyclist_BEV_AP11_easy_strict': 87.83805995449615, 'KITTI/Cyclist_2D_AP11_easy_strict': 91.25808747851212, 'KITTI/Cyclist_3D_AP11_moderate_strict': 78.2987054397223, 'KITTI/Cyclist_BEV_AP11_moderate_strict': 80.49785507397532, 'KITTI/Cyclist_2D_AP11_moderate_strict': 85.22847270041788, 'KITTI/Cyclist_3D_AP11_hard_strict': 74.72358917697693, 'KITTI/Cyclist_BEV_AP11_hard_strict': 76.16198551229279, 'KITTI/Cyclist_2D_AP11_hard_strict': 83.1152134567463, 'KITTI/Cyclist_3D_AP11_easy_loose': 91.00902072313617, 'KITTI/Cyclist_BEV_AP11_easy_loose': 91.00902072313617, 'KITTI/Cyclist_2D_AP11_easy_loose': 91.25808747851212, 'KITTI/Cyclist_3D_AP11_moderate_loose': 82.76713104083579, 'KITTI/Cyclist_BEV_AP11_moderate_loose': 82.76713104083579, 'KITTI/Cyclist_2D_AP11_moderate_loose': 85.22847270041788, 'KITTI/Cyclist_3D_AP11_hard_loose': 79.9255363579469, 'KITTI/Cyclist_BEV_AP11_hard_loose': 79.9255363579469, 'KITTI/Cyclist_2D_AP11_hard_loose': 83.1152134567463, 'KITTI/Car_3D_AP11_easy_strict': 89.26029076896285, 'KITTI/Car_BEV_AP11_easy_strict': 90.68452363403246, 'KITTI/Car_2D_AP11_easy_strict': 90.88364993215738, 'KITTI/Car_3D_AP11_moderate_strict': 78.52220845552267, 'KITTI/Car_BEV_AP11_moderate_strict': 88.73826840227028, 'KITTI/Car_2D_AP11_moderate_strict': 89.95469772849603, 'KITTI/Car_3D_AP11_hard_strict': 76.02475671188385, 'KITTI/Car_BEV_AP11_hard_strict': 85.93167484758585, 'KITTI/Car_2D_AP11_hard_strict': 88.9470978144287, 'KITTI/Car_3D_AP11_easy_loose': 90.8661842086398, 'KITTI/Car_BEV_AP11_easy_loose': 90.8661842086398, 'KITTI/Car_2D_AP11_easy_loose': 90.88364993215738, 'KITTI/Car_3D_AP11_moderate_loose': 90.06872718339079, 'KITTI/Car_BEV_AP11_moderate_loose': 90.14166345552775, 'KITTI/Car_2D_AP11_moderate_loose': 89.95469772849603, 'KITTI/Car_3D_AP11_hard_loose': 89.29611270994666, 'KITTI/Car_BEV_AP11_hard_loose': 89.46950152045356, 'KITTI/Car_2D_AP11_hard_loose': 88.9470978144287, 'KITTI/Overall_3D_AP11_easy': 79.71275203650357, 'KITTI/Overall_BEV_AP11_easy': 82.58107085845035, 'KITTI/Overall_2D_AP11_easy': 85.29890253829903, 'KITTI/Overall_3D_AP11_moderate': 72.46268255153143, 'KITTI/Overall_BEV_AP11_moderate': 77.59769902229566, 'KITTI/Overall_2D_AP11_moderate': 81.19691254129701, 'KITTI/Overall_3D_AP11_hard': 68.26958084163772, 'KITTI/Overall_BEV_AP11_hard': 74.16630077884328, 'KITTI/Overall_2D_AP11_hard': 78.90645427883484, 'KITTI/Pedestrian_3D_AP40_easy_strict': 64.95443581334301, 'KITTI/Pedestrian_BEV_AP40_easy_strict': 70.23446297110333, 'KITTI/Pedestrian_2D_AP40_easy_strict': 74.35471503152448, 'KITTI/Pedestrian_3D_AP40_moderate_strict': 60.24181698916367, 'KITTI/Pedestrian_BEV_AP40_moderate_strict': 64.49677030489973, 'KITTI/Pedestrian_2D_AP40_moderate_strict': 69.4173046038715, 'KITTI/Pedestrian_3D_AP40_hard_strict': 54.06439663637721, 'KITTI/Pedestrian_BEV_AP40_hard_strict': 59.44648819763664, 'KITTI/Pedestrian_2D_AP40_hard_strict': 64.26148385763007, 'KITTI/Pedestrian_3D_AP40_easy_loose': 82.6693543031793, 'KITTI/Pedestrian_BEV_AP40_easy_loose': 82.6999659332612, 'KITTI/Pedestrian_2D_AP40_easy_loose': 74.35471503152448, 'KITTI/Pedestrian_3D_AP40_moderate_loose': 78.16815414539106, 'KITTI/Pedestrian_BEV_AP40_moderate_loose': 78.32207905969324, 'KITTI/Pedestrian_2D_AP40_moderate_loose': 69.4173046038715, 'KITTI/Pedestrian_3D_AP40_hard_loose': 72.78308923160193, 'KITTI/Pedestrian_BEV_AP40_hard_loose': 72.91527610736948, 'KITTI/Pedestrian_2D_AP40_hard_loose': 64.26148385763007, 'KITTI/Cyclist_3D_AP40_easy_strict': 90.33072942191971, 'KITTI/Cyclist_BEV_AP40_easy_strict': 91.79209534088216, 'KITTI/Cyclist_2D_AP40_easy_strict': 93.48966504062123, 'KITTI/Cyclist_3D_AP40_moderate_strict': 80.37761775322097, 'KITTI/Cyclist_BEV_AP40_moderate_strict': 82.32682616980748, 'KITTI/Cyclist_2D_AP40_moderate_strict': 88.37105993553249, 'KITTI/Cyclist_3D_AP40_hard_strict': 75.86402264879266, 'KITTI/Cyclist_BEV_AP40_hard_strict': 77.79728850465564, 'KITTI/Cyclist_2D_AP40_hard_strict': 85.1957074441176, 'KITTI/Cyclist_3D_AP40_easy_loose': 93.36294375696464, 'KITTI/Cyclist_BEV_AP40_easy_loose': 93.36294375696464, 'KITTI/Cyclist_2D_AP40_easy_loose': 93.48966504062123, 'KITTI/Cyclist_3D_AP40_moderate_loose': 85.78616184628982, 'KITTI/Cyclist_BEV_AP40_moderate_loose': 85.78616184628982, 'KITTI/Cyclist_2D_AP40_moderate_loose': 88.37105993553249, 'KITTI/Cyclist_3D_AP40_hard_loose': 81.46290145710988, 'KITTI/Cyclist_BEV_AP40_hard_loose': 81.46290145710988, 'KITTI/Cyclist_2D_AP40_hard_loose': 85.1957074441176, 'KITTI/Car_3D_AP40_easy_strict': 92.31585936578668, 'KITTI/Car_BEV_AP40_easy_strict': 96.5076799978313, 'KITTI/Car_2D_AP40_easy_strict': 96.97655747275691, 'KITTI/Car_3D_AP40_moderate_strict': 82.03598987337992, 'KITTI/Car_BEV_AP40_moderate_strict': 91.54127493702988, 'KITTI/Car_2D_AP40_moderate_strict': 93.04151587194201, 'KITTI/Car_3D_AP40_hard_strict': 76.57210808197235, 'KITTI/Car_BEV_AP40_hard_strict': 86.69686358954954, 'KITTI/Car_2D_AP40_hard_strict': 90.19443616118072, 'KITTI/Car_3D_AP40_easy_loose': 96.97090666658067, 'KITTI/Car_BEV_AP40_easy_loose': 97.01934767289137, 'KITTI/Car_2D_AP40_easy_loose': 96.97655747275691, 'KITTI/Car_3D_AP40_moderate_loose': 95.23816453605511, 'KITTI/Car_BEV_AP40_moderate_loose': 95.3207301353992, 'KITTI/Car_2D_AP40_moderate_loose': 93.04151587194201, 'KITTI/Car_3D_AP40_hard_loose': 92.43012627686896, 'KITTI/Car_BEV_AP40_hard_loose': 92.59705109140491, 'KITTI/Car_2D_AP40_hard_loose': 90.19443616118072, 'KITTI/Overall_3D_AP40_easy': 82.53367486701647, 'KITTI/Overall_BEV_AP40_easy': 86.17807943660559, 'KITTI/Overall_2D_AP40_easy': 88.27364584830087, 'KITTI/Overall_3D_AP40_moderate': 74.21847487192152, 'KITTI/Overall_BEV_AP40_moderate': 79.45495713724569, 'KITTI/Overall_2D_AP40_moderate': 83.60996013711532, 'KITTI/Overall_3D_AP40_hard': 68.83350912238073, 'KITTI/Overall_BEV_AP40_hard': 74.6468800972806, 'KITTI/Overall_2D_AP40_hard': 79.88387582097613}

## PointPillars 3D Car

In [16]:
cfg = 'hv_pointpillars_secfpn_6x8_160e_kitti-3d-car'
config_file = 'configs/pointpillars/hv_pointpillars_secfpn_6x8_160e_kitti-3d-car.py'
checkpoint_file = 'checkpoints/hv_pointpillars_secfpn_6x8_160e_kitti-3d-car_20220331_134606-d42d15ed.pth'

model = init_model(config_file, checkpoint_file, device='cuda:0')

load checkpoint from local path: checkpoints/hv_pointpillars_secfpn_6x8_160e_kitti-3d-car_20220331_134606-d42d15ed.pth


In [17]:
# Run inference on first N frames and generate 3D boxes, projected on 2D images
for idx in range(50):
    vis_result_dir = f'./visualize/{cfg}'

    # Visualize Results

    # get information from dataset
    data_info = kitti_dataset.get_data_info(idx)
    pcd = data_info['pts_filename']

    # get inference result
    result, data = inference_detector(model, pcd)

    # get data for multi-modal result
    kitti_dataset.pre_pipeline(data_info)
    example = kitti_dataset.pipeline(data_info)

    img_path = data_info['img_info']['filename']
    img = cv2.imread(img_path)
    gt_bboxes = data_info['ann_info']['gt_bboxes_3d']
    pred_bboxes = result[0]['boxes_3d']
    img_metas = example['img_metas']
    proj_mat = data_info['lidar2img']

    # get frame number
    frame_num = Path(img_path).name.strip('.png')

    try:
        # write multi modality result image
        show_multi_modality_result(img, gt_bboxes, pred_bboxes, proj_mat, 
                vis_result_dir, frame_num, box_mode='lidar', img_metas=img_metas,
                gt_bbox_color=(255,0,0), pred_bbox_color=(0,255,0))

        # combine pred and gt results to singe image
        frame_dir = f'{vis_result_dir}/{frame_num}'
        img_pred = cv2.imread(f'{frame_dir}/{frame_num}_pred.png')
        img_gt = cv2.imread(f'{frame_dir}/{frame_num}_gt.png')
        img_combined = cv2.addWeighted(img_pred, 0.5, img_gt, 0.5, 0)
        cv2.imwrite(f'{frame_dir}/{frame_num}_comb.png', img_combined)

        show_result_meshlab(data, result, f'./visualize/{cfg}')
    except:
        print(f'Frame not processed: {frame_num}')

Frame not processed: 000019
Frame not processed: 000044


In [14]:
# benchmark for frame times
!python tools/analysis_tools/benchmark.py {config_file} {checkpoint_file}

  'Please follow `getting_started.md` to install MinkowskiEngine.`')
  'dir_offset and dir_limit_offset will be depressed and be '
load checkpoint from local path: checkpoints/hv_pointpillars_secfpn_6x8_160e_kitti-3d-car_20220331_134606-d42d15ed.pth
Done image [50 / 2000], fps: 28.1 img / s
Done image [100/ 2000], fps: 28.2 img / s
Done image [150/ 2000], fps: 28.1 img / s
Done image [200/ 2000], fps: 28.1 img / s
Done image [250/ 2000], fps: 28.2 img / s
Done image [300/ 2000], fps: 28.2 img / s
Done image [350/ 2000], fps: 28.2 img / s
Done image [400/ 2000], fps: 28.2 img / s
Done image [450/ 2000], fps: 28.2 img / s
Done image [500/ 2000], fps: 28.1 img / s
Done image [550/ 2000], fps: 28.2 img / s
Done image [600/ 2000], fps: 28.2 img / s
Done image [650/ 2000], fps: 28.1 img / s
Done image [700/ 2000], fps: 28.2 img / s
Done image [750/ 2000], fps: 28.2 img / s
Done image [800/ 2000], fps: 28.2 img / s
Done image [850/ 2000], fps: 28.2 img / s
Done image [900/ 2000], fps: 28.2 im

In [15]:
# run test for evaluation metrics
!python tools/test.py {config_file} {checkpoint_file} --out results/{cfg}.pkl --eval mAP

  'Please follow `getting_started.md` to install MinkowskiEngine.`')
  f'Setting OMP_NUM_THREADS environment variable for each process '
  f'Setting MKL_NUM_THREADS environment variable for each process '
  'dir_offset and dir_limit_offset will be depressed and be '
load checkpoint from local path: checkpoints/hv_pointpillars_secfpn_6x8_160e_kitti-3d-car_20220331_134606-d42d15ed.pth
[>>>>>>>>>>>>>>>>>>>>>>>>>>>] 1497/1497, 26.4 task/s, elapsed: 57s, ETA:     0s
writing results to results/hv_pointpillars_secfpn_6x8_160e_kitti-3d-car.pkl

Converting prediction to KITTI format
[>>>>>>>>>>>>>>>>>>>>>>>>>>>] 1497/1497, 465.4 task/s, elapsed: 3s, ETA:     0s
Result is saved to /tmp/tmp7s65psch/results.pkl.


----------- AP11 Results ------------

Car AP11@0.70, 0.70, 0.70:
bbox AP11:97.6346, 90.3955, 89.8723
bev  AP11:90.5147, 89.4159, 88.2075
3d   AP11:89.9229, 84.8491, 79.1779
aos  AP11:97.45, 90.10, 89.28
Car AP11@0.70, 0.50, 0.50:
bbox AP11:97.6346, 90.3955, 89.8723
bev  AP11:97.8195, 90

## PointPillars 3D 3-class

In [18]:
cfg = 'hv_pointpillars_secfpn_6x8_160e_kitti-3d-3class'
config_file = 'configs/pointpillars/hv_pointpillars_secfpn_6x8_160e_kitti-3d-3class.py'
checkpoint_file = 'checkpoints/hv_pointpillars_secfpn_6x8_160e_kitti-3d-3class_20220301_150306-37dc2420.pth'

model = init_model(config_file, checkpoint_file, device='cuda:0')

load checkpoint from local path: checkpoints/hv_pointpillars_secfpn_6x8_160e_kitti-3d-3class_20220301_150306-37dc2420.pth


In [19]:
# Run inference on first N frames and generate 3D boxes, projected on 2D images
for idx in range(50):
    vis_result_dir = f'./visualize/{cfg}'

    # Visualize Results

    # get information from dataset
    data_info = kitti_dataset.get_data_info(idx)
    pcd = data_info['pts_filename']

    # get inference result
    result, data = inference_detector(model, pcd)

    # get data for multi-modal result
    kitti_dataset.pre_pipeline(data_info)
    example = kitti_dataset.pipeline(data_info)

    img_path = data_info['img_info']['filename']
    img = cv2.imread(img_path)
    gt_bboxes = data_info['ann_info']['gt_bboxes_3d']
    pred_bboxes = result[0]['boxes_3d']
    img_metas = example['img_metas']
    proj_mat = data_info['lidar2img']

    # get frame number
    frame_num = Path(img_path).name.strip('.png')

    try:
        # write multi modality result image
        show_multi_modality_result(img, gt_bboxes, pred_bboxes, proj_mat, 
                vis_result_dir, frame_num, box_mode='lidar', img_metas=img_metas,
                gt_bbox_color=(255,0,0), pred_bbox_color=(0,255,0))

        # combine pred and gt results to singe image
        frame_dir = f'{vis_result_dir}/{frame_num}'
        img_pred = cv2.imread(f'{frame_dir}/{frame_num}_pred.png')
        img_gt = cv2.imread(f'{frame_dir}/{frame_num}_gt.png')
        img_combined = cv2.addWeighted(img_pred, 0.5, img_gt, 0.5, 0)
        cv2.imwrite(f'{frame_dir}/{frame_num}_comb.png', img_combined)

        show_result_meshlab(data, result, f'./visualize/{cfg}')
    except:
        print(f'Frame not processed: {frame_num}')

Frame not processed: 000012


In [18]:
# benchmark for frame times
!python tools/analysis_tools/benchmark.py {config_file} {checkpoint_file}

  'Please follow `getting_started.md` to install MinkowskiEngine.`')
  'dir_offset and dir_limit_offset will be depressed and be '
load checkpoint from local path: checkpoints/hv_pointpillars_secfpn_6x8_160e_kitti-3d-3class_20220301_150306-37dc2420.pth
Done image [50 / 2000], fps: 20.7 img / s
Done image [100/ 2000], fps: 20.4 img / s
Done image [150/ 2000], fps: 20.4 img / s
Done image [200/ 2000], fps: 20.4 img / s
Done image [250/ 2000], fps: 20.5 img / s
Done image [300/ 2000], fps: 20.4 img / s
Done image [350/ 2000], fps: 20.4 img / s
Done image [400/ 2000], fps: 20.4 img / s
Done image [450/ 2000], fps: 20.4 img / s
Done image [500/ 2000], fps: 20.4 img / s
Done image [550/ 2000], fps: 20.4 img / s
Done image [600/ 2000], fps: 20.4 img / s
Done image [650/ 2000], fps: 20.4 img / s
Done image [700/ 2000], fps: 20.4 img / s
Done image [750/ 2000], fps: 20.4 img / s
Done image [800/ 2000], fps: 20.4 img / s
Done image [850/ 2000], fps: 20.4 img / s
Done image [900/ 2000], fps: 20.4

In [19]:
# run test for evaluation metrics
!python tools/test.py {config_file} {checkpoint_file} --out results/{cfg}.pkl --eval mAP

  'Please follow `getting_started.md` to install MinkowskiEngine.`')
  f'Setting OMP_NUM_THREADS environment variable for each process '
  f'Setting MKL_NUM_THREADS environment variable for each process '
  'dir_offset and dir_limit_offset will be depressed and be '
load checkpoint from local path: checkpoints/hv_pointpillars_secfpn_6x8_160e_kitti-3d-3class_20220301_150306-37dc2420.pth
[>>>>>>>>>>>>>>>>>>>>>>>>>>>] 1497/1497, 21.8 task/s, elapsed: 69s, ETA:     0s
writing results to results/hv_pointpillars_secfpn_6x8_160e_kitti-3d-3class.pkl

Converting prediction to KITTI format
[>>>>>>>>>>>>>>>>>>>>>>>>>>>] 1497/1497, 409.0 task/s, elapsed: 4s, ETA:     0s
Result is saved to /tmp/tmpl99q4vxs/results.pkl.


----------- AP11 Results ------------

Pedestrian AP11@0.50, 0.50, 0.50:
bbox AP11:73.0963, 68.1344, 64.0474
bev  AP11:69.3156, 64.5951, 58.6195
3d   AP11:67.3901, 61.7238, 56.2310
aos  AP11:54.72, 52.52, 49.62
Pedestrian AP11@0.50, 0.25, 0.25:
bbox AP11:73.0963, 68.1344, 64.0474
b

## Dynamic Voxelization 3D Car

In [20]:
cfg = 'dv_pointpillars_secfpn_6x8_160e_kitti-3d-car'
config_file = 'configs/dynamic_voxelization/dv_pointpillars_secfpn_6x8_160e_kitti-3d-car.py'
checkpoint_file = 'work_dirs/dv_pointpillars_secfpn_6x8_160e_kitti-3d-car/latest.pth'

model = init_model(config_file, checkpoint_file, device='cuda:0')

load checkpoint from local path: work_dirs/dv_pointpillars_secfpn_6x8_160e_kitti-3d-car/latest.pth


In [21]:
# Run inference on first N frames and generate 3D boxes, projected on 2D images
for idx in range(50):
    vis_result_dir = f'./visualize/{cfg}'

    # Visualize Results

    # get information from dataset
    data_info = kitti_dataset.get_data_info(idx)
    pcd = data_info['pts_filename']

    # get inference result
    result, data = inference_detector(model, pcd)

    # get data for multi-modal result
    kitti_dataset.pre_pipeline(data_info)
    example = kitti_dataset.pipeline(data_info)

    img_path = data_info['img_info']['filename']
    img = cv2.imread(img_path)
    gt_bboxes = data_info['ann_info']['gt_bboxes_3d']
    pred_bboxes = result[0]['boxes_3d']
    img_metas = example['img_metas']
    proj_mat = data_info['lidar2img']

    # get frame number
    frame_num = Path(img_path).name.strip('.png')

    try:
        # write multi modality result image
        show_multi_modality_result(img, gt_bboxes, pred_bboxes, proj_mat, 
                vis_result_dir, frame_num, box_mode='lidar', img_metas=img_metas,
                gt_bbox_color=(255,0,0), pred_bbox_color=(0,255,0))

        # combine pred and gt results to singe image
        frame_dir = f'{vis_result_dir}/{frame_num}'
        img_pred = cv2.imread(f'{frame_dir}/{frame_num}_pred.png')
        img_gt = cv2.imread(f'{frame_dir}/{frame_num}_gt.png')
        img_combined = cv2.addWeighted(img_pred, 0.5, img_gt, 0.5, 0)
        cv2.imwrite(f'{frame_dir}/{frame_num}_comb.png', img_combined)

        show_result_meshlab(data, result, f'./visualize/{cfg}')
    except:
        print(f'Frame not processed: {frame_num}')

Frame not processed: 000019
Frame not processed: 000030
Frame not processed: 000064


In [22]:
# benchmark for frame times
!python tools/analysis_tools/benchmark.py {config_file} {checkpoint_file}

  'Please follow `getting_started.md` to install MinkowskiEngine.`')
  'dir_offset and dir_limit_offset will be depressed and be '
load checkpoint from local path: work_dirs/dv_pointpillars_secfpn_6x8_160e_kitti-3d-car/latest.pth
Done image [50 / 2000], fps: 45.0 img / s
Done image [100/ 2000], fps: 41.7 img / s
Done image [150/ 2000], fps: 38.8 img / s
Done image [200/ 2000], fps: 37.4 img / s
Done image [250/ 2000], fps: 37.3 img / s
Done image [300/ 2000], fps: 36.7 img / s
Done image [350/ 2000], fps: 36.4 img / s
Done image [400/ 2000], fps: 36.6 img / s
Done image [450/ 2000], fps: 36.8 img / s
Done image [500/ 2000], fps: 37.0 img / s
Done image [550/ 2000], fps: 36.8 img / s
Done image [600/ 2000], fps: 36.8 img / s
Done image [650/ 2000], fps: 36.8 img / s
Done image [700/ 2000], fps: 36.8 img / s
Done image [750/ 2000], fps: 36.8 img / s
Done image [800/ 2000], fps: 36.9 img / s
Done image [850/ 2000], fps: 36.8 img / s
Done image [900/ 2000], fps: 36.8 img / s
Done image [95

In [23]:
# run test for evaluation metrics
!python tools/test.py {config_file} {checkpoint_file} --out results/{cfg}.pkl --eval mAP

  'Please follow `getting_started.md` to install MinkowskiEngine.`')
  f'Setting OMP_NUM_THREADS environment variable for each process '
  f'Setting MKL_NUM_THREADS environment variable for each process '
  'dir_offset and dir_limit_offset will be depressed and be '
load checkpoint from local path: work_dirs/dv_pointpillars_secfpn_6x8_160e_kitti-3d-car/latest.pth
[>>>>>>>>>>>>>>>>>>>>>>>>>>>] 1497/1497, 31.2 task/s, elapsed: 48s, ETA:     0s
writing results to results/dv_pointpillars_secfpn_6x8_160e_kitti-3d-car.pkl

Converting prediction to KITTI format
[>>>>>>>>>>>>>>>>>>>>>>>>>>>] 1497/1497, 454.9 task/s, elapsed: 3s, ETA:     0s
Result is saved to /tmp/tmp2_o63qjv/results.pkl.


----------- AP11 Results ------------

Car AP11@0.70, 0.70, 0.70:
bbox AP11:90.7258, 89.6986, 88.2731
bev  AP11:89.9788, 88.1361, 84.9006
3d   AP11:87.0824, 76.6987, 73.0169
aos  AP11:90.30, 87.65, 85.26
Car AP11@0.70, 0.50, 0.50:
bbox AP11:90.7258, 89.6986, 88.2731
bev  AP11:90.7916, 90.1393, 89.4374
3d   

## Dynamic Voxelization 3D 3-class

In [24]:
cfg = 'dv_pointpillars_secfpn_6x8_160e_kitti-3d-3class'
config_file = 'configs/dynamic_voxelization/dv_pointpillars_secfpn_6x8_160e_kitti-3d-3class.py'
checkpoint_file = 'work_dirs/dv_pointpillars_secfpn_6x8_160e_kitti-3d-3class/latest.pth'

model = init_model(config_file, checkpoint_file, device='cuda:0')

load checkpoint from local path: work_dirs/dv_pointpillars_secfpn_6x8_160e_kitti-3d-3class/latest.pth


In [25]:
# Run inference on first N frames and generate 3D boxes, projected on 2D images
for idx in range(50):
    vis_result_dir = f'./visualize/{cfg}'

    # Visualize Results

    # get information from dataset
    data_info = kitti_dataset.get_data_info(idx)
    pcd = data_info['pts_filename']

    # get inference result
    result, data = inference_detector(model, pcd)

    # get data for multi-modal result
    kitti_dataset.pre_pipeline(data_info)
    example = kitti_dataset.pipeline(data_info)

    img_path = data_info['img_info']['filename']
    img = cv2.imread(img_path)
    gt_bboxes = data_info['ann_info']['gt_bboxes_3d']
    pred_bboxes = result[0]['boxes_3d']
    img_metas = example['img_metas']
    proj_mat = data_info['lidar2img']

    # get frame number
    frame_num = Path(img_path).name.strip('.png')

    try:
        # write multi modality result image
        show_multi_modality_result(img, gt_bboxes, pred_bboxes, proj_mat, 
                vis_result_dir, frame_num, box_mode='lidar', img_metas=img_metas,
                gt_bbox_color=(255,0,0), pred_bbox_color=(0,255,0))

        # combine pred and gt results to singe image
        frame_dir = f'{vis_result_dir}/{frame_num}'
        img_pred = cv2.imread(f'{frame_dir}/{frame_num}_pred.png')
        img_gt = cv2.imread(f'{frame_dir}/{frame_num}_gt.png')
        img_combined = cv2.addWeighted(img_pred, 0.5, img_gt, 0.5, 0)
        cv2.imwrite(f'{frame_dir}/{frame_num}_comb.png', img_combined)

        show_result_meshlab(data, result, f'./visualize/{cfg}')
    except:
        print(f'Frame not processed: {frame_num}')

        show_result_meshlab(data, result, f'./visualize/{cfg}')

Frame not processed: 000012
Frame not processed: 000023
Frame not processed: 000044


In [26]:
# benchmark for frame times
!python tools/analysis_tools/benchmark.py {config_file} {checkpoint_file}

  'Please follow `getting_started.md` to install MinkowskiEngine.`')
  'dir_offset and dir_limit_offset will be depressed and be '
load checkpoint from local path: work_dirs/dv_pointpillars_secfpn_6x8_160e_kitti-3d-3class/latest.pth
Done image [50 / 2000], fps: 27.5 img / s
Done image [100/ 2000], fps: 27.2 img / s
Done image [150/ 2000], fps: 27.0 img / s
Done image [200/ 2000], fps: 27.0 img / s
Done image [250/ 2000], fps: 27.1 img / s
Done image [300/ 2000], fps: 27.1 img / s
Done image [350/ 2000], fps: 27.1 img / s
Done image [400/ 2000], fps: 27.2 img / s
Done image [450/ 2000], fps: 27.2 img / s
Done image [500/ 2000], fps: 27.1 img / s
Done image [550/ 2000], fps: 27.2 img / s
Done image [600/ 2000], fps: 27.1 img / s
Done image [650/ 2000], fps: 27.1 img / s
Done image [700/ 2000], fps: 27.1 img / s
Done image [750/ 2000], fps: 27.2 img / s
Done image [800/ 2000], fps: 27.3 img / s
Done image [850/ 2000], fps: 27.4 img / s
Done image [900/ 2000], fps: 27.4 img / s
Done image 

In [27]:
# run test for evaluation metrics
!python tools/test.py {config_file} {checkpoint_file} --out results/{cfg}.pkl --eval mAP

  'Please follow `getting_started.md` to install MinkowskiEngine.`')
  f'Setting OMP_NUM_THREADS environment variable for each process '
  f'Setting MKL_NUM_THREADS environment variable for each process '
  'dir_offset and dir_limit_offset will be depressed and be '
load checkpoint from local path: work_dirs/dv_pointpillars_secfpn_6x8_160e_kitti-3d-3class/latest.pth
[>>>>>>>>>>>>>>>>>>>>>>>>>>>] 1497/1497, 28.7 task/s, elapsed: 52s, ETA:     0s
writing results to results/dv_pointpillars_secfpn_6x8_160e_kitti-3d-3class.pkl

Converting prediction to KITTI format
[>>>>>>>>>>>>>>>>>>>>>>>>>>>] 1497/1497, 405.5 task/s, elapsed: 4s, ETA:     0s
Result is saved to /tmp/tmpovfotrti/results.pkl.


----------- AP11 Results ------------

Pedestrian AP11@0.50, 0.50, 0.50:
bbox AP11:64.8998, 59.3612, 55.6291
bev  AP11:62.5878, 56.8600, 52.3448
3d   AP11:57.4399, 52.1735, 47.9837
aos  AP11:32.37, 28.83, 27.03
Pedestrian AP11@0.50, 0.25, 0.25:
bbox AP11:64.8998, 59.3612, 55.6291
bev  AP11:72.7762, 67