## This notebook generates the result file from mmdetection3d applied on nuscenes
It is a by-the-book approach to use models from the mmdetection3d model zoo.

#### Note: nuscenes-dev (either the original version from nuscene website, or the modified version, provided by us) and mmdetection3d must be correctly installed 

In [1]:
from nuscenes import NuScenes
from nuscenes.eval.prediction.splits import *
from nuscenes.eval.detection import *
from nuscenes.eval.detection import *
from nuscenes.eval.detection.configs import *

import nuscenes.eval.detection.config as cnfig
import nuscenes.eval.detection.evaluate as dcl
    
from nuscenes.prediction import *
from nuscenes.map_expansion.map_api import *

import numpy as np
import math
import json

In [2]:
DATAROOT = '/cluster/work/andronn/MasterThesis/MASTER/mmdetection3d/data/nuscenes'
nuscenes = NuScenes('v1.0-trainval', dataroot=DATAROOT)

Loading NuScenes tables for version v1.0-trainval...
23 category,
8 attribute,
4 visibility,
64386 instance,
12 sensor,
10200 calibrated_sensor,
2631083 ego_pose,
68 log,
850 scene,
34149 sample,
2631083 sample_data,
1166187 sample_annotation,
4 map,
Done loading in 49.472 seconds.
Reverse indexing ...
Done reverse indexing in 17.6 seconds.


In [3]:
#ego_list=nuscenes.ego_pose
#curr_scene = nuscenes.scene[0]
#nuscenes.list_scenes()
#dir(nuscenes)
#len(nuscenes.sample)
#s=nuscenes.sample[0]
#s['data']['RADAR_FRONT']
#curr_scene
#nuscenes.sample[0]
#sample_curr=next(item for item in nuscenes.sample if item["token"] == '30e55a3ec6184d8cb1944b39ba19d622')
#sample_curr.items()

#### Path of the detector and to store results
Path to configure detector and where detector results are stored. To be configured differently for each detector.

In [4]:
#PWD; the python command is launched from this path
PWD='/cluster/work/andronn/MasterThesis/MASTER/mmdetection3d/'
#here we store results
OUTPUT='/cluster/work/andronn/MasterThesis/MASTER/mmdetection3d'
#here we store results
PATH='/cluster/work/andronn/MasterThesis/MASTER/mmdetection3d/pgd_results/img_bbox/'
FILE_JSON='results_nusc.json'
RESULT_PATH=PATH+FILE_JSON

This is simply the execution of algorithms from the mmdetection3d model zoo. Algorithms  operate on the nuscene data. So there is nothing surpising: 

### Available detectors

#### 1 pointpillars with backbone FPN (Feature Pyramid Networks) -- LIDAR-only
Results in: './pointpillars_nuscenes_results/pts_bbox/results_nusc-fpn.json' 

python tools/test.py configs/pointpillars/hv_pointpillars_fpn_sbn-all_4x8_2x_nus-3d.py checkpoints/hv_pointpillars_fpn_sbn-all_4x8_2x_nus-3d_20200620_230405-2fa62f3d.pth --format-only --options 'jsonfile_prefix=./pointpillars_nuscenes_results'

mAP: 40%

#### 2 pointpillars with backbone SECFPN -- LIDAR-only
Results in: './pointpillars_nuscenes_results/pts_bbox/results_nusc-secfpn.json' 

python tools/test.py configs/pointpillars/hv_pointpillars_secfpn_sbn-all_4x8_2x_nus-3d.py checkpoints/hv_pointpillars_secfpn_sbn-all_4x8_2x_nus-3d_20200620_230725-0817d270.pth --format-only --options 'jsonfile_prefix=./pointpillars_nuscenes_results'

mAP: 35.17%

#### 3 RegNET-X (RegNET WITH RegNetX-1.6gF-FPN)
Results in: './regnetX_nuscenes_results-secfpn/pts_bbox/results_nusc-secfpn.json'

python tools/test.py configs/regnet/hv_pointpillars_regnet-1.6gf_fpn_sbn-all_4x8_2x_nus-3d.py checkpoints/hv_pointpillars_regnet-1.6gf_fpn_sbn-all_4x8_2x_nus-3d_20200629_050311-dcd4e090.pth --format-only --options 'jsonfile_prefix=./regnetX_nuscenes_results-secfpn'

mAP: 48.24%

#### 4 RegNET-X (RegNET WITH RegNetX-400MF-FPN)
Results in: './regnetX_nuscenes_results-secfpn/pts_bbox/results_nusc-400MF.json'

python tools/test.py configs/regnet/hv_pointpillars_regnet-400mf_fpn_sbn-all_4x8_2x_nus-3d.py checkpoints/hv_pointpillars_regnet-400mf_fpn_sbn-all_4x8_2x_nus-3d_20200620_230239-c694dce7.pth --format-only --options 'jsonfile_prefix=./regnetX_nuscenes_results-secfpn'

mAP: 44.84

#### 5 RegNET-X (RegNET WITH 400MF-SECFPN)
Results in: './regnetX_nuscenes_results-secfpn/pts_bbox/results_nusc-400mfsecfpn.json'

python tools/test.py configs/regnet/hv_pointpillars_regnet-400mf_secfpn_sbn-all_4x8_2x_nus-3d.py checkpoints/hv_pointpillars_regnet-400mf_secfpn_sbn-all_4x8_2x_nus-3d_20200620_230334-53044f32.pth --format-only --options 'jsonfile_prefix=./regnetX_nuscenes_results-secfpn'

mAP: 41.15%

#### 6 SSN with backbone SECFPN
Results in: './ssn_nuscenes_results/pts_bbox/results_nusc-secfpn.json'

python tools/test.py configs/ssn/hv_ssn_secfpn_sbn-all_2x16_2x_nus-3d.py checkpoints/hv_ssn_secfpn_sbn-all_2x16_2x_nus-3d_20201023_193737-5fda3f00.pth --format-only --options 'jsonfile_prefix=./ssn_nuscenes_results'

mAP: 41.56%

#### 7 SSN with backbone REGNET 
Results in: './ssn_nuscenes_results/pts_bbox/results_nusc-regnet.json'

python tools/test.py configs/ssn/hv_ssn_regnet-400mf_secfpn_sbn-all_2x16_2x_nus-3d.py checkpoints/hv_ssn_regnet-400mf_secfpn_sbn-all_2x16_2x_nus-3d_20201024_232447-7af3d8c8.pth --format-only --options 'jsonfile_prefix=./ssn_nuscenes_results'

mAP:46.95%

#### 8 FCOSD with backbone resnet 101 (finetuned) -- camera only
Results in './fcos_results/img_bbox/results_nusc.json'

python tools/test.py configs/fcos3d/fcos3d_r101_caffe_fpn_gn-head_dcn_2x8_1x_nus-mono3d_finetune.py checkpoints/fcos3d_r101_caffe_fpn_gn-head_dcn_2x8_1x_nus-mono3d_finetune_20210427_091419-35aaaad0.pth --format-only --options 'jsonfile_prefix=./fcos_results'

mAP: 32.1%

#### 9 PGD with backbone resnet 101 (finetuned) -- camera only
Results in './pgd_results/img_bbox/results_nusc.json'

python tools/test.py configs/pgd/pgd_r101_caffe_fpn_gn-head_2x16_2x_nus-mono3d_finetune.py checkpoints/pgd_r101_caffe_fpn_gn-head_2x16_2x_nus-mono3d_finetune_20211114_162135-5ec7c1cd.pth --format-only --options 'jsonfile_prefix=./pgd_results'

mAP: 35,8%

ACRONYMS (as naming in the following differs with respect to the paper)

First column is the algorithm name (as close as possible to naming in mmdetection3d), second is the abbreviation we use in our paper

- FCOSD-RESNET101              FCOS
- pointpillars-secfpn          SEC
- pointpillars-fpn             FPN
- regnet-regnetX_400MF-FPN     REG400
- ssn-SECFPN                   SSN
- regnet-regnetX_400MF-SECFPN  REGSEC (REG400SEC)
- ssn-REGNET                   SSNREG
- regnet-regnetX_FPN           REG1.6
- pgd                          PGD


### Test the detection

This is the official nuScenes detection evaluation code.
Results are written to the provided output_dir.
nuScenes uses the following detection metrics:
    - Mean Average Precision (mAP): Uses center-distance as matching criterion; averaged over distance thresholds.
    - True Positive (TP) metrics: Average of translation, velocity, scale, orientation and attribute errors.
    - nuScenes Detection Score (NDS): The weighted sum of the above.
Here is an overview of the functions in this method:
    - init: Loads GT annotations and predictions stored in JSON format and filters the boxes.
    - run: Performs evaluation and dumps the metric data to disk.
    - render: Renders various plots and dumps to disk.
We assume that:
    - Every sample_token is given in the results, although there may be not predictions for that sample.
    Please see https://www.nuscenes.org/object-detection for more details.

DetectionEval:
    :param nusc: A NuScenes object.
        :param config: A DetectionConfig object.
        :param result_path: Path of the nuScenes JSON result file.
        :param eval_set: The dataset split to evaluate on, e.g. train, val or test.
        :param output_dir: Folder to save plots and results to.
        :param verbose: Whether to print to stdout.

                def main(self,
             plot_examples: int = 0,
             render_curves: bool = True) -> Dict[str, Any]:


In [13]:
confvalue=cnfig.config_factory("detection_cvpr_2019")

{
  "class_range": {
    "car": 50,
    "truck": 50,
    "bus": 50,
    "trailer": 50,
    "construction_vehicle": 50,
    "pedestrian": 40,
    "motorcycle": 40,
    "bicycle": 40,
    "traffic_cone": 30,
    "barrier": 30
  },
  "dist_fcn": "center_distance",
  "dist_ths": [0.5, 1.0, 2.0, 4.0],
  "dist_th_tp": 2.0,
  "min_recall": 0.1,
  "min_precision": 0.1,
  "max_boxes_per_sample": 500,
  "mean_ap_weight": 5
}


In [14]:
#number of scene that compose the val set
eval=val = \
    ['scene-0003', 'scene-0012', 'scene-0013', 'scene-0014', 'scene-0015', 'scene-0016', 'scene-0017', 'scene-0018',
     'scene-0035', 'scene-0036', 'scene-0038', 'scene-0039', 'scene-0092', 'scene-0093', 'scene-0094', 'scene-0095',
     'scene-0096', 'scene-0097', 'scene-0098', 'scene-0099', 'scene-0100', 'scene-0101', 'scene-0102', 'scene-0103',
     'scene-0104', 'scene-0105', 'scene-0106', 'scene-0107', 'scene-0108', 'scene-0109', 'scene-0110', 'scene-0221',
     'scene-0268', 'scene-0269', 'scene-0270', 'scene-0271', 'scene-0272', 'scene-0273', 'scene-0274', 'scene-0275',
     'scene-0276', 'scene-0277', 'scene-0278', 'scene-0329', 'scene-0330', 'scene-0331', 'scene-0332', 'scene-0344',
     'scene-0345', 'scene-0346', 'scene-0519', 'scene-0520', 'scene-0521', 'scene-0522', 'scene-0523', 'scene-0524',
     'scene-0552', 'scene-0553', 'scene-0554', 'scene-0555', 'scene-0556', 'scene-0557', 'scene-0558', 'scene-0559',
     'scene-0560', 'scene-0561', 'scene-0562', 'scene-0563', 'scene-0564', 'scene-0565', 'scene-0625', 'scene-0626',
     'scene-0627', 'scene-0629', 'scene-0630', 'scene-0632', 'scene-0633', 'scene-0634', 'scene-0635', 'scene-0636',
     'scene-0637', 'scene-0638', 'scene-0770', 'scene-0771', 'scene-0775', 'scene-0777', 'scene-0778', 'scene-0780',
     'scene-0781', 'scene-0782', 'scene-0783', 'scene-0784', 'scene-0794', 'scene-0795', 'scene-0796', 'scene-0797',
     'scene-0798', 'scene-0799', 'scene-0800', 'scene-0802', 'scene-0904', 'scene-0905', 'scene-0906', 'scene-0907',
     'scene-0908', 'scene-0909', 'scene-0910', 'scene-0911', 'scene-0912', 'scene-0913', 'scene-0914', 'scene-0915',
     'scene-0916', 'scene-0917', 'scene-0919', 'scene-0920', 'scene-0921', 'scene-0922', 'scene-0923', 'scene-0924',
     'scene-0925', 'scene-0926', 'scene-0927', 'scene-0928', 'scene-0929', 'scene-0930', 'scene-0931', 'scene-0962',
     'scene-0963', 'scene-0966', 'scene-0967', 'scene-0968', 'scene-0969', 'scene-0971', 'scene-0972', 'scene-1059',
     'scene-1060', 'scene-1061', 'scene-1062', 'scene-1063', 'scene-1064', 'scene-1065', 'scene-1066', 'scene-1067',
     'scene-1068', 'scene-1069', 'scene-1070', 'scene-1071', 'scene-1072', 'scene-1073']

In [15]:
len(val)

150

In [20]:
#execute detection on val set
dt=dcl.DetectionEval(nuscenes,confvalue, RESULT_PATH, 'val', OUTPUT, verbose=True)

Initializing nuScenes detection evaluation
Loaded results from /cluster/work/andronn/MasterThesis/MASTER/mmdetection3d/pgd_results/img_bbox/results_nusc.json. Found detections for 6019 samples.


  0%|          | 0/6019 [00:00<?, ?it/s]

Loading annotations for val split from nuScenes version: v1.0-trainval


100%|██████████| 6019/6019 [00:13<00:00, 459.23it/s]


Loaded ground truth annotations for 6019 samples.
Filtering predictions
=> Original number of boxes: 530978
=> After distance based filtering: 530743
=> After LIDAR and RADAR points based filtering: 530743
=> After bike rack filtering: 530371
Filtering ground truth annotations
=> Original number of boxes: 187528
=> After distance based filtering: 134565
=> After LIDAR and RADAR points based filtering: 121871
=> After bike rack filtering: 121861


In [21]:
# type(dt)
# type(dt.gt_boxes) # nuscenes.eval.common.data_classes.EvalBoxes
# len(dt.gt_boxes.boxes) # è 6019
# type(dt.sample_tokens)
# len(dt.sample_tokens) # è 6019
# type(dt.sample_tokens[0]) # è str
# dt.gt_boxes.boxes.values() --> dict_values([[{'sample_token': 'fd8420396768425eabec9bdddf7e64b6', 'translation': [242.87, 926.036, 0.898], 'size': [1.726, 4.257, 1.489], 'rotation': [0.787419398050721, 0.0, 0.0, -0.616417627565468], 'velocity': array([ 0.16021727, -0.69494243]), 'ego_translation': (-7.026109314307774, 8.483742683721516, 0.898), 'num_pts': 173, 'detection_name': 'car', 'detection_score': -1.0, 'attribute_name': 'vehicle.moving'}, {'sample_token': 'fd8420396768425eabec9bdddf7e64b6', 'translation': [244.281, 934.941, 1.099], 'size': [1.71, 4.248, 1.527], 'rotation': [0.7424261677818073, 0.0, 0.0, 0.6699278956670037], 'velocity': array([0., 0.]), 'ego_translation': (-5.615109314307773, 17.388742683721603, 1.099), 'num_pts': 35, 'detection_name': 'car', 'detection_score': -1.0, 'attribute_name': 'vehicle.parked'}, 

In [24]:
dt.main(plot_examples=2,render_curves=True)

Rendering sample token 5376e3a2874542d8b440faa899e52b97
Rendering sample token 14f665de1fa34d0a9d12838a5b77d687
Accumulating metric data...
Calculating metrics...
Rendering PR and TP curves
Saving metrics to: /cluster/work/andronn/MasterThesis/MASTER/mmdetection3d
mAP: 0.3584
mATE: 0.6674
mASE: 0.2643
mAOE: 0.4346
mAVE: 0.9600
mAAE: 0.1766
NDS: 0.4289
Eval time: 180.0s

Per-class results:
Object Class	AP	ATE	ASE	AOE	AVE	AAE
car	0.538	0.514	0.150	0.095	1.332	0.140
truck	0.278	0.755	0.212	0.146	1.120	0.210
bus	0.372	0.702	0.191	0.165	1.973	0.295
trailer	0.139	1.019	0.237	0.660	0.477	0.223
construction_vehicle	0.060	0.846	0.409	0.984	0.099	0.256
pedestrian	0.431	0.651	0.290	0.594	0.593	0.167
motorcycle	0.354	0.633	0.263	0.566	1.456	0.112
bicycle	0.335	0.618	0.295	0.579	0.630	0.010
traffic_cone	0.581	0.443	0.314	nan	nan	nan
barrier	0.495	0.492	0.282	0.121	nan	nan


{'label_aps': defaultdict(<function nuscenes.eval.detection.data_classes.DetectionMetrics.__init__.<locals>.<lambda>()>,
             {'car': defaultdict(float,
                          {0.5: 0.17238441152622827,
                           1.0: 0.4495187717391653,
                           2.0: 0.7026284591770717,
                           4.0: 0.8276765380551043}),
              'truck': defaultdict(float,
                          {0.5: 0.019446003955399506,
                           1.0: 0.14714843901282196,
                           2.0: 0.37997869043784227,
                           4.0: 0.5664050916588804}),
              'bus': defaultdict(float,
                          {0.5: 0.028375015895221865,
                           1.0: 0.23725596599480853,
                           2.0: 0.5249402174030049,
                           4.0: 0.6977897910464691}),
              'trailer': defaultdict(float,
                          {0.5: 0.0,
                           1.0: 0.0130