In [8]:
# %pip install nuscenes-devkit

In [69]:
import os
import json

import torch
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt

import cv2
from PIL import Image

from ultralytics import YOLO

from nuscenes.nuscenes import NuScenes
from nuscenes.utils.data_classes import RadarPointCloud
from nuscenes.eval.detection.evaluate import NuScenesEval
from nuscenes.eval.detection.data_classes import DetectionConfig
from nuscenes.eval.prediction.splits import get_prediction_challenge_split
from nuscenes.prediction import PredictHelper

## Load Model and Dataset

In [10]:
model = YOLO('weights/yolov9t.pt')

In [78]:
DATAROOT = '../data/nuscenes'

In [79]:
nusc = NuScenes(version='v1.0-mini', dataroot=DATAROOT, verbose=True)

Loading NuScenes tables for version v1.0-mini...
23 category,
8 attribute,
4 visibility,
911 instance,
12 sensor,
120 calibrated_sensor,
31206 ego_pose,
8 log,
10 scene,
404 sample,
31206 sample_data,
18538 sample_annotation,
4 map,
Done loading in 0.316 seconds.
Reverse indexing ...
Done reverse indexing in 0.1 seconds.


In [82]:
# mini_pred = get_prediction_challenge_split("mini_train", dataroot=DATAROOT)

In [13]:
helper = PredictHelper(nusc)

## Perform Predictions

In [14]:
IOU_THRESHOLD = 0.6
CONFIDENCE_THRESHOLD = 0.4

In [15]:
START_INDEX = 0
END_INDEX = 3

In [16]:
sensor = 'CAM_FRONT'

In [56]:
prediction_results = {
    "meta": {
        "use_camera": True,
        "use_lidar": False,
        "use_radar": False,
        "use_map": False, 
        "use_external": False 
    },
    "results": {}
}
for i in range(START_INDEX, END_INDEX):
    test_scene = nusc.scene[i]
    first_sample_token = test_scene['first_sample_token']
    my_sample = nusc.get('sample', first_sample_token)
    cam_front_data = nusc.get('sample_data', my_sample['data'][sensor])

    radar_front_data = nusc.get('sample_data', my_sample['data']['RADAR_FRONT'])
    pc = RadarPointCloud.from_file(os.path.join(DATAROOT, radar_front_data["filename"]))
    data = pc.points.astype(dtype=np.float32).T
    
    image_path = os.path.join(nusc.dataroot, cam_front_data['filename'])
    results = model(image_path, conf=CONFIDENCE_THRESHOLD, iou=IOU_THRESHOLD)

    # dict_key = f"sample_token_{i+1}"
    dict_key = f"{first_sample_token}"
    arr = []

    for (object_index, box) in enumerate(results[0].boxes):
        x1, y1, x2, y2 = map(int, box.xyxy.tolist()[0])
        class_id = int(box.cls)
        class_name = results[0].names[class_id]
        confidence = box.conf.item()
        
        my_annotation_token = my_sample['anns'][object_index]
        my_annotation_metadata =  nusc.get('sample_annotation', my_annotation_token)
        
        attr = nusc.get('attribute', my_annotation_metadata['attribute_tokens'][0])['name']
        name = attr.split('.')[0]

        instance_token = my_annotation_metadata["instance_token"]
        instance = nusc.get('instance', instance_token)
        category = nusc.get('category', instance['category_token'])
        detection_name = category["name"].split('.')[1]
        
        velocity = data[object_index][7:9].tolist()
        
        prediction_object = {
            'sample_token': first_sample_token,
            'translation': my_annotation_metadata["translation"],  # Center of 3D box
            'size': my_annotation_metadata["size"],  # Dimensions of 3D box
            'rotation': my_annotation_metadata["rotation"],  # Rotation as quaternion
            'velocity': velocity,
            'detection_name': detection_name,  # e.g., 'car', 'pedestrian'
            'detection_score': confidence,  # Between 0 and 1
            'attribute_name': attr
        }

        arr.append(prediction_object)

    prediction_results["results"][dict_key] = arr

    print("===================================================")


image 1/1 /home/012392471@SJSUAD/master_project/nuscenes_tests/../data/nuscenes/samples/CAM_FRONT/n015-2018-07-24-11-22-45+0800__CAM_FRONT__1532402927612460.jpg: 384x640 3 cars, 1 truck, 40.0ms
Speed: 1.3ms preprocess, 40.0ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

image 1/1 /home/012392471@SJSUAD/master_project/nuscenes_tests/../data/nuscenes/samples/CAM_FRONT/n008-2018-08-01-15-16-36-0400__CAM_FRONT__1533151603512404.jpg: 384x640 3 persons, 5 cars, 1 traffic light, 40.1ms
Speed: 1.2ms preprocess, 40.1ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

image 1/1 /home/012392471@SJSUAD/master_project/nuscenes_tests/../data/nuscenes/samples/CAM_FRONT/n008-2018-08-28-16-43-51-0400__CAM_FRONT__1535489296012404.jpg: 384x640 1 person, 1 bicycle, 4 traffic lights, 39.6ms
Speed: 1.2ms preprocess, 39.6ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)


In [58]:
with open('results.json', 'w') as fp:
    json.dump(prediction_results, fp)

## Perform Evaluation

In [59]:
with open('detection_cvpr_2019.json', 'r') as fp:
    config_str = json.load(fp)

In [60]:
detection_config = DetectionConfig(
    class_range = config_str["class_range"],
    dist_fcn = config_str["dist_fcn"],
    dist_ths = config_str["dist_ths"],
    dist_th_tp = config_str["dist_th_tp"],
    min_recall = config_str["min_recall"],
    min_precision = config_str["min_precision"],
    max_boxes_per_sample = config_str["max_boxes_per_sample"],
    mean_ap_weight = config_str["mean_ap_weight"]
)

In [61]:
# dot notation didn't work because you can't use dot 
#    on dictionaries only objects

In [66]:
evaluator = NuScenesEval(nusc, 
                         config=detection_config,
                         eval_set='val', 
                         result_path="results.json", 
                         output_dir='outputs/')

Initializing nuScenes detection evaluation
Loaded results from results.json. Found detections for 3 samples.
Loading annotations for val split from nuScenes version: v1.0-mini


AssertionError: Error: Requested split val which is not compatible with NuScenes version v1.0-mini