In [None]:
### This file is currently under development. ###

In [8]:
# %pip install nuscenes-devkit

In [1]:
import os
import json

import torch
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt

import cv2
from PIL import Image

from ultralytics import YOLO

from nuscenes.nuscenes import NuScenes
from nuscenes.utils.data_classes import RadarPointCloud
from nuscenes.eval.detection.evaluate import NuScenesEval
from nuscenes.eval.detection.data_classes import DetectionConfig
from nuscenes.eval.prediction.splits import get_prediction_challenge_split
from nuscenes.prediction import PredictHelper

## Load Model and Dataset

In [4]:
model = YOLO('weights/yolo11m.pt')

Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11m.pt to 'weights/yolo11m.pt'...


100%|█████████████████████████████████| 38.8M/38.8M [00:03<00:00, 11.5MB/s]


In [8]:
DATAROOT = '../data/nuscenes_full'

In [9]:
nusc = NuScenes(version='v1.0-trainval', dataroot=DATAROOT, verbose=True)

Loading NuScenes tables for version v1.0-trainval...
23 category,
8 attribute,
4 visibility,
64386 instance,
12 sensor,
10200 calibrated_sensor,
2631083 ego_pose,
68 log,
850 scene,
34149 sample,
2631083 sample_data,
1166187 sample_annotation,
4 map,
Done loading in 22.328 seconds.
Reverse indexing ...
Done reverse indexing in 5.4 seconds.


In [10]:
helper = PredictHelper(nusc)

## Perform Predictions

In [11]:
IOU_THRESHOLD = 0.6
CONFIDENCE_THRESHOLD = 0.4

In [46]:
START_INDEX = 0
END_INDEX = 3

In [47]:
sensor = 'CAM_FRONT'

In [57]:
for i in range(START_INDEX, END_INDEX):
    test_scene = nusc.scene[i]
    first_sample_token = test_scene['first_sample_token']
    my_sample = nusc.get('sample', first_sample_token)
    cam_front_data = nusc.get('sample_data', my_sample['data'][sensor])

    radar_front_data = nusc.get('sample_data', my_sample['data']['RADAR_FRONT'])
    pc = RadarPointCloud.from_file(os.path.join(DATAROOT, radar_front_data["filename"]))
    data = pc.points.astype(dtype=np.float32).T

    image_path = os.path.join(nusc.dataroot, cam_front_data['filename'])
    results = model(image_path, conf=CONFIDENCE_THRESHOLD, iou=IOU_THRESHOLD)

    # dict_key = f"sample_token_{i+1}"
    dict_key = f"{first_sample_token}"
    arr = []
    
    for (object_index, box) in enumerate(results[0].boxes):
        x1, y1, x2, y2 = map(int, box.xyxy.tolist()[0])
        class_id = int(box.cls)
        class_name = results[0].names[class_id]
        confidence = box.conf.item()
        
        my_annotation_token = my_sample['anns'][object_index]
        my_annotation_metadata =  nusc.get('sample_annotation', my_annotation_token)

        attr_tokens = my_annotation_metadata['attribute_tokens']
        if(len(attr_tokens) == 0):
            name = ""
        else:
            attr = nusc.get('attribute', attr_tokens[0])['name']
            name = attr.split('.')[0]

        instance_token = my_annotation_metadata["instance_token"]
        instance = nusc.get('instance', instance_token)
        category = nusc.get('category', instance['category_token'])
        detection_name = category["name"].split('.')[1]

        if detection_name == "trafficcone":
            detection_name = "traffic_cone"
        
        prediction_object = {
            'sample_token': first_sample_token,
            'translation': [],  # Center of 3D box
            'size': [],  # Dimensions of 3D box
            'rotation': [],  # Rotation as quaternion
            'velocity': [],
            'detection_name': detection_name,  # e.g., 'car', 'pedestrian'
            'detection_score': confidence,  # Between 0 and 1
            'attribute_name': attr
        }

        arr.append(prediction_object)

    prediction_results["results"][dict_key] = arr

    print("===================================================")



image 1/1 /home/012392471@SJSUAD/master_project/nuscenes_tests/../data/nuscenes_full/samples/CAM_FRONT/n015-2018-07-18-11-07-57+0800__CAM_FRONT__1531883530412470.jpg: 384x640 1 person, 1 truck, 2 traffic lights, 124.1ms
Speed: 1.2ms preprocess, 124.1ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

image 1/1 /home/012392471@SJSUAD/master_project/nuscenes_tests/../data/nuscenes_full/samples/CAM_FRONT/n015-2018-07-18-11-07-57+0800__CAM_FRONT__1531883719412465.jpg: 384x640 4 cars, 1 truck, 2 traffic lights, 123.2ms
Speed: 1.2ms preprocess, 123.2ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

image 1/1 /home/012392471@SJSUAD/master_project/nuscenes_tests/../data/nuscenes_full/samples/CAM_FRONT/n015-2018-08-02-17-16-37+0800__CAM_FRONT__1533201470412460.jpg: 384x640 1 car, 124.4ms
Speed: 1.2ms preprocess, 124.4ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


In [67]:
prediction_results

{'meta': {'use_camera': True,
  'use_lidar': False,
  'use_radar': False,
  'use_map': False,
  'use_external': False},
 'results': {'fd8420396768425eabec9bdddf7e64b6': [{'sample_token': 'fd8420396768425eabec9bdddf7e64b6',
    'translation': [242.87, 926.036, 0.898],
    'size': [1.726, 4.257, 1.489],
    'rotation': [0.787419398050721, 0.0, 0.0, -0.616417627565468],
    'velocity': [0.25, 0.13633444905281067],
    'detection_name': 'car',
    'detection_score': 0.9393414855003357,
    'attribute_name': 'vehicle.moving'}],
  'e93e98b63d3b40209056d129dc53ceee': [{'sample_token': 'e93e98b63d3b40209056d129dc53ceee',
    'translation': [994.031, 612.51, 0.728],
    'size': [0.3, 0.291, 0.734],
    'rotation': [-0.04208490861058176, 0.0, 0.0, 0.9991140377690821],
    'velocity': [-3.5, -0.017412938177585602],
    'detection_name': 'traffic_cone',
    'detection_score': 0.8136498332023621,
    'attribute_name': 'vehicle.moving'},
   {'sample_token': 'e93e98b63d3b40209056d129dc53ceee',
    't

In [59]:
with open('results.json', 'w') as fp:
    json.dump(prediction_results, fp)

## Perform Evaluation

In [60]:
with open('detection_cvpr_2019.json', 'r') as fp:
    config_str = json.load(fp)

In [61]:
detection_config = DetectionConfig(
    class_range = config_str["class_range"],
    dist_fcn = config_str["dist_fcn"],
    dist_ths = config_str["dist_ths"],
    dist_th_tp = config_str["dist_th_tp"],
    min_recall = config_str["min_recall"],
    min_precision = config_str["min_precision"],
    max_boxes_per_sample = config_str["max_boxes_per_sample"],
    mean_ap_weight = config_str["mean_ap_weight"]
)

In [62]:
# dot notation didn't work because you can't use dot 
#    on dictionaries only objects

In [63]:
evaluator = NuScenesEval(nusc,
                         config=detection_config,
                         eval_set='val',
                         result_path="results.json",
                         output_dir='outputs/')

Initializing nuScenes detection evaluation
Loaded results from results.json. Found detections for 3 samples.
Loading annotations for val split from nuScenes version: v1.0-trainval


100%|█████████████████████████████████| 6019/6019 [00:07<00:00, 813.98it/s]

Loaded ground truth annotations for 6019 samples.





AssertionError: Samples in split doesn't match samples in predictions.