## Load Required Libraries to Support Code Execution 

In [1]:
# Load Required Libraries
from argparse import ArgumentParser
import time
import torch
import cv2
import os
import numpy as np
from mmdet3d.utils import get_root_logger
from mmdet3d.apis import inference_detector, inference_multi_modality_detector, init_model, show_result_meshlab
from mmdet3d.datasets import build_dataloader, build_dataset
from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
from os import path as osp
from mmdet3d.core import (Box3DMode, CameraInstance3DBoxes, Coord3DMode,
                          DepthInstance3DBoxes, LiDARInstance3DBoxes,
                          show_multi_modality_result, show_result,
                          show_seg_result, show_results_modified)
from mmdet3d.core import show_results_modified as show_results_modified
from mmdet3d.core.evaluation.kitti_utils import kitti_eval

## Import and Initialize SECOND 3D Object Detection 

In [2]:
# build the model from a config file and a checkpoint file
# Specify model to be used
config_filename = './configs/second/hv_second_secfpn_6x8_80e_kitti-3d-3class.py'
checkpoint_filename = './checkpoints/hv_second_secfpn_6x8_80e_kitti-3d-3class_20210831_022017-ae782e87.pth'

# Initialize Model
model_single = init_model(config_filename, checkpoint_filename, device='cuda:0')

# Get dataset structure that contains truth information
cfg = model_single.cfg
model = MMDataParallel(model_single)
dataset = build_dataset(cfg.data.test)

# Setup dataloader structure that allows for easy to data for each data capture
test_dataloader_default_args = dict(samples_per_gpu=1, workers_per_gpu=2, dist=False, shuffle=False)
test_loader_cfg = {**test_dataloader_default_args, **cfg.data.get('test_dataloader', {})}
data_loader = build_dataloader(dataset, **test_loader_cfg)

# Startup model
model.eval()



load checkpoint from local path: ./checkpoints/hv_second_secfpn_6x8_80e_kitti-3d-3class_20210831_022017-ae782e87.pth


MMDataParallel(
  (module): VoxelNet(
    (backbone): SECOND(
      (blocks): ModuleList(
        (0): Sequential(
          (0): Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (1): BatchNorm2d(128, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
          (2): ReLU(inplace=True)
          (3): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (4): BatchNorm2d(128, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
          (5): ReLU(inplace=True)
          (6): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (7): BatchNorm2d(128, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
          (8): ReLU(inplace=True)
          (9): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (10): BatchNorm2d(128, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
          (11)

## Function to Convert Data to Plottable Format 

In [3]:
def convert_data_2_plot_frame(input_data, points, pred_boxes, gt_annos):
    
    # Convert LiDAR points to a plotable frame
    points   = Coord3DMode.convert(points, box_mode, Coord3DMode.DEPTH)
    
    # Convert predicted bounding boxes to a plotable frame
    show_pred_bboxes = Box3DMode.convert(pred_bboxes, box_mode, Box3DMode.DEPTH)

    # Extract 3D boxes from ground truth data
    gt_bbox = np.zeros((len(gt_annos['score']),7))
    for i in range(0, len(gt_annos['score'])):
        gt_bbox[i] = np.concatenate((gt_annos['location'][i], gt_annos['dimensions'][i],[gt_annos['rotation_y'][i]]))
    
    # Convert ground truth 3D boxes to a plotable frame
    rect = input_data['calib']['R0_rect'].astype(np.float32)
    Trv2c = input_data['calib']['Tr_velo_to_cam'].astype(np.float32)
    gt_bboxes_3d = CameraInstance3DBoxes(gt_bbox).convert_to(Box3DMode.LIDAR, np.linalg.inv(rect @ Trv2c))
    show_gt_bboxes = Box3DMode.convert(gt_bboxes_3d.tensor.numpy(), Box3DMode.LIDAR, Box3DMode.DEPTH)

    return points, show_pred_bboxes, show_gt_bboxes

## Evaluate and Display Single Test LiDAR Scan 

In [4]:
# Get LiDAR Data
input_data = dataset.data_infos[0]
pcd_file = './data/kitti/{}'.format(input_data['point_cloud']['velodyne_path'])

# Run Point Cloud through Model
result, data = inference_detector(model_single, pcd_file)

# Get bounding boxes and scores of model predictions
pred_bboxes = result[0]['boxes_3d'].tensor.numpy()
pred_scores = result[0]['scores_3d'].numpy()

# Get ground truth boxes and classes of only valid classes
gt_annos = dataset.remove_dontcare(input_data['annos'])

# Get box mode and points from data
box_mode = data['img_metas'][0][0]['box_mode_3d']
points   = data['points'][0][0].cpu().detach().numpy()

# Get points and bounding boxes in plottable frame
points, show_pred_bboxes, show_gt_bboxes = convert_data_2_plot_frame(input_data, points, pred_bboxes, gt_annos)

# Generate images showing ground truth, predicted, and lidar points
show_results_modified.show_results_modified(
    points,
    show_gt_bboxes,
    show_pred_bboxes,
    show=True)

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


## Run Model over all LiDAR Scans 

In [5]:
# Evaluate the entire kitti training dataset on model

# Set score threshold for prediction visualization
score_threshold = 0.5

# Initialize results structure
results = []
count = 0

# Start timer
t0 = time.time()

# Loop through each data set in training set
for data in data_loader:

    # Get results for each dataset
    with torch.no_grad():
        result = model(return_loss=False, rescale=True, **data)

    # Get bounding boxes and scores of model predictions
    pred_bboxes = result[0]['boxes_3d'].tensor.numpy()
    pred_scores = result[0]['scores_3d'].numpy()

    # filter out low score bboxes for visualization
    inds = pred_scores > score_threshold
    pred_bboxes = pred_bboxes[inds]

    # Convert points and predicted bounding boxes to a plotable frame
    box_mode = data['img_metas'][0]._data[0][0]['box_mode_3d']
    points   = data['points'][0]._data[0][0].cpu().numpy()

    # Remove classed in truth data that are "DontCare"
    gt_annos = dataset.remove_dontcare(dataset.data_infos[count]['annos'])
    
    # Get points and bounding boxes in plottable frame
    points, show_pred_bboxes, show_gt_bboxes = convert_data_2_plot_frame(input_data, points, pred_bboxes, gt_annos)

    # Generate images showing ground truth, predicted, and lidar points
    show_results_modified.show_results_modified(
        points,
        show_gt_bboxes,
        show_pred_bboxes,
        False,
        './data/results',
        'training_results_file{}'.format(count))

    # Place results in list
    results.extend(result)
    
    # Increment counter
    count = count + 1
    
    # Display status
    if count%100 == 0:
        print('LiDAR Scan Number:', count, 'Elapsed time:', time.time()-t0, 'Time per LiDAR Scan:', (time.time()-t0)/count)

LiDAR Scan Number: 100 Elapsed time: 30.688058137893677 Time per LiDAR Scan: 0.30688058853149414
LiDAR Scan Number: 200 Elapsed time: 61.755903482437134 Time per LiDAR Scan: 0.30877952218055726
LiDAR Scan Number: 300 Elapsed time: 92.91029143333435 Time per LiDAR Scan: 0.30970097382863365
LiDAR Scan Number: 400 Elapsed time: 124.19964551925659 Time per LiDAR Scan: 0.3104991161823273
LiDAR Scan Number: 500 Elapsed time: 156.1624083518982 Time per LiDAR Scan: 0.312324818611145
LiDAR Scan Number: 600 Elapsed time: 187.48369026184082 Time per LiDAR Scan: 0.31247281829516094
LiDAR Scan Number: 700 Elapsed time: 219.2533118724823 Time per LiDAR Scan: 0.3132190193448748
LiDAR Scan Number: 800 Elapsed time: 250.6405680179596 Time per LiDAR Scan: 0.3133007109165192
LiDAR Scan Number: 900 Elapsed time: 283.3333854675293 Time per LiDAR Scan: 0.31481487353642784
LiDAR Scan Number: 1000 Elapsed time: 316.4081280231476 Time per LiDAR Scan: 0.3164081287384033
LiDAR Scan Number: 1100 Elapsed time: 349

## Evaluate Model Performance 

In [6]:
# Method to define pipeline for evaluation for 3D object detection using LiDAR
eval_kwargs = cfg.get('evaluation', {}).copy()
for key in ['interval', 'tmpdir', 'start', 'gpu_collect', 'save_best','rule']:
    eval_kwargs.pop(key, None)
    
# Setup the logger structure
get_root_logger(log_file=None, log_level=0, name='mmdet3d')

# Run kitti evaluation
scoring_results = dataset.evaluate(results, **eval_kwargs)


Converting prediction to KITTI format
[>>>>>>>>>>>>>>>>>>>>>>>>>>>] 3769/3769, 605.7 task/s, elapsed: 6s, ETA:     0s
Result is saved to /tmp/tmpiritt4ni/results.pkl.


----------- AP11 Results ------------

Pedestrian AP11@0.50, 0.50, 0.50:
bbox AP11:72.4862, 67.5661, 63.9363
bev  AP11:68.7299, 62.6823, 56.9828
3d   AP11:62.5762, 57.9007, 52.4955
aos  AP11:66.32, 60.94, 57.08
Pedestrian AP11@0.50, 0.25, 0.25:
bbox AP11:72.4862, 67.5661, 63.9363
bev  AP11:80.1513, 77.4199, 71.8873
3d   AP11:79.8539, 77.3603, 71.7925
aos  AP11:66.32, 60.94, 57.08
Cyclist AP11@0.50, 0.50, 0.50:
bbox AP11:85.6506, 77.0225, 73.8574
bev  AP11:82.2988, 67.1939, 63.5440
3d   AP11:78.5558, 62.3563, 58.9345
aos  AP11:85.45, 75.88, 72.66
Cyclist AP11@0.50, 0.25, 0.25:
bbox AP11:85.6506, 77.0225, 73.8574
bev  AP11:85.8134, 74.9172, 71.5251
3d   AP11:85.8134, 74.9172, 71.5251
aos  AP11:85.45, 75.88, 72.66
Car AP11@0.70, 0.70, 0.70:
bbox AP11:90.8448, 89.6040, 88.4684
bev  AP11:90.4053, 87.7193, 85.5287
3d   AP11:

## Create Video from Subset of Training Cases 

In [7]:
# Define data path and video name
imageDirectory = './data/test/'
videoFilename = './data/test/training_example_video.avi'

# Get all images from training result directory
images = [image for image in os.listdir(imageDirectory) if image.endswith(".png")]

# Read single image to get size of image
image = cv2.imread(os.path.join(imageDirectory, images[0]))
height, width, _ = image.shape

# Start video 
video = cv2.VideoWriter(videoFilename, 0, 1, (width, height))

# Loop through each image and place in video
count = 0
for image in images:
    video.write(cv2.imread(os.path.join(imageDirectory, image)))
    count = count + 1
    if count == 100:
        break

# Close Video
cv2.destroyAllWindows()
video.release()