In [1]:
#Start########################################################################################################################
##############################################################################################################################
################COCO dataset images into the camera views of nuscenes #######################################################
##############################################################################################################################
##############################################################################################################################
##############################################################################################################################

In [153]:
import os, io, json, zipfile, shutil, time, math, random
from pathlib import Path
from collections import defaultdict

import requests
import numpy as np
import pandas as pd
from PIL import Image
from tqdm import tqdm
from pycocotools.coco import COCO

# ========= OOD categories (non-road) =========
CATEGORIES_FILTER = [
    # Animals not typical in city streets
    "zebra", "giraffe", "elephant", "bear", "sheep", "cow", "horse", "bird",
    # Sports / leisure
    "surfboard", "frisbee", "snowboard", "kite", "skateboard",
    # Household / indoor furniture & appliances
    "bed",
    "teddy bear"
]

# Added realistic on-road OODs (deduped + order-preserving)
MORE_ROAD_OOD = [
    # Animals (very plausible)
    "dog", "cat",


    # Sports / leisure (extras)
    "sports ball", "skis",

    # Furniture / clutter often seen curbside
    "chair", "bench", "potted plant",

    # Large non-road vehicles/objects
    "boat",
]

# Merge with deduplication, preserving the original order where possible
CATEGORIES_FILTER = list(dict.fromkeys(CATEGORIES_FILTER + MORE_ROAD_OOD))


# ========= Limits / filters =========
MAX_PER_CATEGORY        = 4     # hard cap per category (set 3–4 as you like)
MIN_MASK_AREA           = 1000  # pixels; skip tiny bits
FULL_BODY_MIN_H_RATIO   = 0.40  # bbox_h / image_h >= this → keep
FULL_BODY_MIN_W_RATIO   = 0.15  # bbox_w / image_w >= this → keep
SPLITS = ["val2017", "train2017"]

# ========= Behavior =========
USE_LOCAL_COCO = False
LOCAL_INST_JSON = {  # only used if USE_LOCAL_COCO=True
    "train2017": "/path/to/annotations/instances_train2017.json",
    "val2017":   "/path/to/annotations/instances_val2017.json",
}

IMG_CACHE_DIR   = Path("/tmp/coco_img_cache")  # image download cache
REQUEST_TIMEOUT = 15
RETRY_LIMIT     = 3
RANDOM_SEED     = 42
random.seed(RANDOM_SEED)

ANN_ZIP_URL = "http://images.cocodataset.org/annotations/annotations_trainval2017.zip"

IMG_CACHE_DIR.mkdir(parents=True, exist_ok=True)


Loading annotations for val2017…
loading annotations into memory...
Done (t=0.22s)
creating index...
index created!
val2017: extracting 22 categories.


val2017: categories: 100%|█████████████████████████████████████████████████████| 22/22 [07:54<00:00, 21.56s/it]


Loading annotations for train2017…
loading annotations into memory...
Done (t=9.06s)
creating index...
index created!
train2017: extracting 22 categories.


train2017: categories: 100%|████████████████████████████████████████████████| 22/22 [1:18:43<00:00, 214.68s/it]

Saved/updated manifest → /data/Asad/ASSETS/manifest.csv

Per-category totals (including pre-existing files):
bear                 4
bed                  4
bench                4
bird                 4
boat                 4
cat                  4
chair                4
cow                  4
dog                  4
elephant             4
frisbee              4
giraffe              4
horse                4
kite                 4
sheep                4
skateboard           4
skis                 1
snowboard            4
sports_ball          4
surfboard            4
teddy_bear           4
zebra                4





In [12]:
#End########################################################################################################################
##############################################################################################################################
################COCO dataset images into the 6 camera views of nuscenes #######################################################
##############################################################################################################################
##############################################################################################################################
##############################################################################################################################

In [154]:
# ============================================================
# nuScenes-mini → cloned novel set with OOD objects in all 6 cams
# Saves detection_novel.json (OOD) and detection_id.json (ID) in DST
# ============================================================
from pathlib import Path
import random, uuid, json, shutil, os
import numpy as np
import cv2, albumentations as A
from tqdm import tqdm
from PIL import Image

from nuscenes.nuscenes import NuScenes
from nuscenes.utils.geometry_utils import view_points, BoxVisibility
from pyquaternion import Quaternion

# ╭──CONFIG ─────────────────────────────────────────────╮
SRC        = Path("/data/Asad/NuScenesMini")        # original dataset root
DST        = Path("/data/Asad/NuScenesMiniNovel")   # clone will be written here
ASSETS     = Path("/data/Asad/ASSETS")              # flat folder of RGBA PNGs

N_PASTES_PER_SCENE = 50
NOVEL_RATE = 0.5
MIN_CAMS_PER_FRAME = 3
MAX_CAMS_PER_FRAME = 6

SEED = 42
# ╰───────────────────────────────────────────────────────────╯

random.seed(SEED); np.random.seed(SEED)

assert SRC.exists(),                 f"SRC not found: {SRC}"
assert ASSETS.exists() and any(ASSETS.glob("*.png")), "ASSETS folder empty"
if DST.exists():
    raise FileExistsError(f"{DST} already exists — remove it or choose a new path")

# copy whole dataset -------------------------------------------------
print(" Copying nuScenes-mini tree …")
shutil.copytree(SRC, DST)
print("  Copied to", DST)

# assets + augment pipeline -----------------------------------------
asset_paths = sorted(ASSETS.glob("*.png"))
print(f" {len(asset_paths)} assets found in {ASSETS}")

augment = A.Compose(
    [
        A.RandomScale(scale_limit=(0.3, 0.6), p=1.0),
        A.Rotate(limit=8, border_mode=cv2.BORDER_CONSTANT, p=0.6),
        A.RandomBrightnessContrast(p=0.45),
        A.HorizontalFlip(p=0.25),
    ],
    additional_targets={"mask": "mask"},
)

def paste_object(img_bgr: np.ndarray, obj_rgba: np.ndarray):

    h, w = img_bgr.shape[:2]

    # ensure 4-channel RGBA
    if obj_rgba.ndim == 2:
        obj_rgba = np.dstack([
            obj_rgba, obj_rgba, obj_rgba,
            255 * np.ones_like(obj_rgba)
        ])
    if obj_rgba.shape[2] == 3:
        alpha = 255 * np.ones(obj_rgba.shape[:2], obj_rgba.dtype)
        obj_rgba = np.dstack([obj_rgba, alpha])
    if obj_rgba.shape[2] != 4:
        return img_bgr, None

    # Split color + alpha
    obj_rgb  = obj_rgba[:, :, :3]   # RGB from PIL
    alpha    = obj_rgba[:, :, 3]

    aug = augment(image=obj_rgb, mask=alpha)
    obj_rgb_aug, alpha_mask = aug["image"], aug["mask"]


    obj_bgr = cv2.cvtColor(obj_rgb_aug, cv2.COLOR_RGB2BGR)

    oh, ow = obj_bgr.shape[:2]

    # keep inside lower half, resize if needed
    max_w, max_h = w - 10, int(h * 0.5) - 10
    if ow >= max_w or oh >= max_h:
        scale = 0.9 * min(max_w / max(ow, 1), max_h / max(oh, 1))
        if scale <= 0:
            return img_bgr, None
        obj_bgr    = cv2.resize(obj_bgr,    None, fx=scale, fy=scale, interpolation=cv2.INTER_AREA)
        alpha_mask = cv2.resize(alpha_mask, None, fx=scale, fy=scale, interpolation=cv2.INTER_AREA)
        oh, ow     = obj_bgr.shape[:2]

    # place somewhere in lower half
    x0 = random.randint(0, max(0, w - ow))
    y0 = random.randint(int(h * 0.5), max(int(h * 0.5), h - oh))

    roi     = img_bgr[y0:y0 + oh, x0:x0 + ow]
    alpha_f = (alpha_mask[:, :, None] / 255.0).astype(np.float32)

    img_bgr[y0:y0 + oh, x0:x0 + ow] = (1 - alpha_f) * roi + alpha_f * obj_bgr

    return img_bgr, (x0, y0, x0 + ow, y0 + oh)


#  build scene → frames → {channel: (path, sd_token)} -----------------
print("Indexing scenes across 6 cameras …")
ALL_CAM_CHANNELS = [
    "CAM_FRONT",
    "CAM_FRONT_LEFT",
    "CAM_FRONT_RIGHT",
    "CAM_BACK",
    "CAM_BACK_LEFT",
    "CAM_BACK_RIGHT",
]

nusc = NuScenes(version="v1.0-mini", dataroot=str(DST), verbose=False)

# scene_to_frames: {scene_token: {sample_token: {channel: (img_path, sd_token)}}}
scene_to_frames = {}
for sd in nusc.sample_data:
    ch = sd["channel"]
    if ch not in ALL_CAM_CHANNELS:
        continue
    sample_token = sd["sample_token"]
    scene_token  = nusc.get("sample", sample_token)["scene_token"]
    img_path     = DST / sd["filename"]
    scene_to_frames.setdefault(scene_token, {}).setdefault(sample_token, {})[ch] = (img_path, sd["token"])

#  multi-camera main loop ---------------------------------------------
print("Pasting novel objects across multiple cameras …")
new_boxes = []  # entries for detection_novel.json

scenes = list(scene_to_frames.items())
random.shuffle(scenes)

for scene_token, frames in tqdm(scenes, desc="Scenes"):
    sample_tokens = sorted(frames.keys())

    # --- 5a: guarantee some pastes across this scene ---
    guaranteed_remaining = N_PASTES_PER_SCENE

    for sample_token in sample_tokens:
        if guaranteed_remaining <= 0:
            break

        cams = list(frames[sample_token].keys())
        random.shuffle(cams)
        k = random.randint(MIN_CAMS_PER_FRAME, min(MAX_CAMS_PER_FRAME, len(cams)))
        k = min(k, guaranteed_remaining)

        for ch in cams[:k]:
            img_path, sd_token = frames[sample_token][ch]
            try:
                img_rgb = np.array(Image.open(img_path))
            except Exception:
                print("Can't open", img_path)
                continue

            img_bgr  = img_rgb[..., ::-1]
            obj_path = random.choice(asset_paths)
            try:
                obj_rgba = np.array(Image.open(obj_path).convert("RGBA"))
            except Exception:
                continue

            img_bgr, bbox = paste_object(img_bgr, obj_rgba)
            if bbox:
                x0, y0, x1, y1 = map(int, bbox)
                new_boxes.append(
                    {
                        "sample_data_token": sd_token,
                        "translation": [0, 0, 0],
                        "size": [0, 0, 0],
                        "rotation": [0, 0, 0, 1],
                        "velocity": None,
                        "detection_name": f"novel.{obj_path.stem}",
                        "detection_score": 1.0,
                        "attribute_name": "",
                        "bbox_2d": [x0, y0, x1, y1],
                        "token": str(uuid.uuid4()),
                    }
                )
                Image.fromarray(img_bgr[..., ::-1]).save(img_path, quality=95)
                guaranteed_remaining -= 1
                if guaranteed_remaining <= 0:
                    break

    for sample_token in sample_tokens:
        if random.random() >= NOVEL_RATE:
            continue
        cams = list(frames[sample_token].keys())
        random.shuffle(cams)
        k = random.randint(MIN_CAMS_PER_FRAME, min(MAX_CAMS_PER_FRAME, len(cams)))
        for ch in cams[:k]:
            img_path, sd_token = frames[sample_token][ch]
            try:
                img_rgb = np.array(Image.open(img_path))
            except Exception:
                continue
            img_bgr  = img_rgb[..., ::-1]
            obj_path = random.choice(asset_paths)
            try:
                obj_rgba = np.array(Image.open(obj_path).convert("RGBA"))
            except Exception:
                continue
            img_bgr, bbox = paste_object(img_bgr, obj_rgba)
            if bbox:
                x0, y0, x1, y1 = map(int, bbox)
                new_boxes.append(
                    {
                        "sample_data_token": sd_token,
                        "translation": [0, 0, 0],
                        "size": [0, 0, 0],
                        "rotation": [0, 0, 0, 1],
                        "velocity": None,
                        "detection_name": f"novel.{obj_path.stem}",
                        "detection_score": 1.0,
                        "attribute_name": "",
                        "bbox_2d": [x0, y0, x1, y1],
                        "token": str(uuid.uuid4()),
                    }
                )
                Image.fromarray(img_bgr[..., ::-1]).save(img_path, quality=95)

print(f"  ✔ Finished OOD injection: {len(new_boxes)} novel objects added.")

det_src = SRC / "v1.0-mini" / "detection.json"
if det_src.exists():
    det_data = json.loads(det_src.read_text())
else:
    det_data = {"results": {}, "meta": {"version": "v1.0-mini"}}

for rec in new_boxes:
    det_data["results"].setdefault(rec["sample_data_token"], []).append(rec)

out_novel = DST / "v1.0-mini" / "detection_novel.json"
out_novel.parent.mkdir(parents=True, exist_ok=True)
out_novel.write_text(json.dumps(det_data))
print("  ✔ Detection JSON →", out_novel)

print(" Projecting GT 3D boxes to 2D for all 6 cameras …")
nusc_src = NuScenes(version="v1.0-mini", dataroot=str(SRC), verbose=False)
id_results = {}

for sd in tqdm(nusc_src.sample_data, desc="ID boxes"):
    if sd["channel"] not in ALL_CAM_CHANNELS:
        continue

    sd_token = sd["token"]
    cs = nusc_src.get("calibrated_sensor", sd["calibrated_sensor_token"])
    cam_intrinsic = np.array(cs["camera_intrinsic"])
    _, boxes, _ = nusc_src.get_sample_data(sd_token, box_vis_level=BoxVisibility.ANY)

    entries = []
    for box in boxes:
        # 3D corners in camera frame → project to 2D
        corners_3d = box.corners()  # (3,8)
        pts_2d = view_points(corners_3d, cam_intrinsic, normalize=True)  # (3,8)
        xs, ys = pts_2d[0], pts_2d[1]
        x0, y0 = float(xs.min()), float(ys.min())
        x1, y1 = float(xs.max()), float(ys.max())

        entries.append({
            "sample_data_token": sd_token,
            "translation": list(box.center),                 # [x, y, z]
            "size":       list(box.wlh[::-1]),               # [l, w, h] (kept as before)
            "rotation":   list(box.orientation.elements),    # [x,y,z,w]
            "velocity":   [box.velocity[0], box.velocity[1], 0.0] if box.velocity is not None else None,
            "detection_name": box.name,                      # e.g., "car"
            "detection_score": 1.0,
            "attribute_name": "",
            "bbox_2d":    [x0, y0, x1, y1],
            "token":      str(uuid.uuid4()),
        })

    if entries:
        id_results[sd_token] = entries

out_id = DST / "v1.0-mini" / "detection_id.json"
out_id.write_text(json.dumps({"results": id_results, "meta": {"version": "v1.0-mini"}}, indent=2))
print("  ID Detection JSON →", out_id)

print("\n  Clone ready at:", DST)
print("    - OOD boxes: ", out_novel)
print("    - ID boxes : ", out_id)


 Copying nuScenes-mini tree …
  Copied to /data/Asad/NuScenesMiniNovel
 85 assets found in /data/Asad/ASSETS
Indexing scenes across 6 cameras …
Pasting novel objects across multiple cameras …


Scenes: 100%|██████████████████████████████████████████████████████████████████| 10/10 [00:38<00:00,  3.88s/it]


  ✔ Finished OOD injection: 1367 novel objects added.
  ✔ Detection JSON → /data/Asad/NuScenesMiniNovel/v1.0-mini/detection_novel.json
 Projecting GT 3D boxes to 2D for all 6 cameras …


ID boxes: 100%|█████████████████████████████████████████████████████████| 31206/31206 [01:44<00:00, 299.92it/s]


  ID Detection JSON → /data/Asad/NuScenesMiniNovel/v1.0-mini/detection_id.json

  Clone ready at: /data/Asad/NuScenesMiniNovel
    - OOD boxes:  /data/Asad/NuScenesMiniNovel/v1.0-mini/detection_novel.json
    - ID boxes :  /data/Asad/NuScenesMiniNovel/v1.0-mini/detection_id.json


In [3]:
from pathlib import Path
import json
from nuscenes.nuscenes import NuScenes

DST = Path("/data/Asad/NuScenesMiniNovel")
JSONDIR = DST / "v1.0-mini"
ALL_CAM_CHANNELS = ["CAM_FRONT","CAM_FRONT_LEFT","CAM_FRONT_RIGHT","CAM_BACK","CAM_BACK_LEFT","CAM_BACK_RIGHT"]

nusc = NuScenes(version="v1.0-mini", dataroot=str(DST), verbose=False)

bad = []
missing_files = []
mixed_scenes = []

for scene in nusc.scene:
    scene_tok = scene["token"]
    st = scene["first_sample_token"]
    while st:
        sample = nusc.get('sample', st)
        ch2sd = sample["data"]  # channel -> sample_data_token

        # Require all 6 cams present
        if not all(ch in ch2sd for ch in ALL_CAM_CHANNELS):
            st = sample["next"]
            continue

        # Check each sd belongs to same sample & scene
        scene_toks = set()
        sample_toks = set()
        files = []

        for ch in ALL_CAM_CHANNELS:
            sd_tok = ch2sd[ch]
            sd = nusc.get('sample_data', sd_tok)
            files.append((ch, sd["filename"]))
            sample_toks.add(sd["sample_token"])
            # back out the scene via sample token
            s_tmp = nusc.get('sample', sd["sample_token"])
            scene_toks.add(s_tmp["scene_token"])

            # file exists?
            fpath = DST / sd["filename"]
            if not fpath.exists():
                missing_files.append((sd_tok, f"{fpath}"))

        if len(sample_toks) != 1:
            bad.append(("mixed_sample_tokens", scene["name"], st, list(sample_toks), files))
        if len(scene_toks) != 1:
            mixed_scenes.append(("mixed_scene_tokens", scene["name"], st, list(scene_toks), files))

        st = sample["next"]

print(f"Missing files: {len(missing_files)}")
print(f"Mixed sample tokens in a single frame: {len(bad)}")
print(f"Mixed scene tokens in a single frame: {len(mixed_scenes)}")

if missing_files:
    print("\nExample missing file:", missing_files[0])
if bad:
    print("\nExample mixed-sample frame:", bad[0])
if mixed_scenes:
    print("\nExample mixed-scene frame:", mixed_scenes[0])


Missing files: 0
Mixed sample tokens in a single frame: 0
Mixed scene tokens in a single frame: 0


In [4]:
import cv2
from PIL import Image, ImageDraw
import numpy as np
from tqdm import tqdm
from nuscenes.nuscenes import NuScenes

DST = Path("/data/Asad/NuScenesMiniNovel")
JSONDIR = DST / "v1.0-mini"
OUTDIR = DST / "videos_sync"
OUTDIR.mkdir(parents=True, exist_ok=True)

FPS = 6
GRID = (3,2)
ALL_CAM_CHANNELS = ["CAM_FRONT","CAM_FRONT_LEFT","CAM_FRONT_RIGHT","CAM_BACK","CAM_BACK_LEFT","CAM_BACK_RIGHT"]

iden  = json.loads((JSONDIR / "detection_id.json").read_text())["results"]
novel = json.loads((JSONDIR / "detection_novel.json").read_text())["results"]

nusc = NuScenes(version="v1.0-mini", dataroot=str(DST), verbose=False)

def draw_overlay_with_label(img_rgb, boxes_id, boxes_ood, label_text):
    img = Image.fromarray(img_rgb.copy())
    drw = ImageDraw.Draw(img)
    bb = drw.textbbox((6,6), label_text)
    drw.rectangle([bb[0]+2, bb[1]+2, bb[2]+10, bb[3]+10], fill=(0,0,0))
    drw.text((8,8), label_text, fill=(255,255,255))
    # ID solid
    for b in boxes_id:
        x0,y0,x1,y1 = b["bbox_2d"]
        drw.rectangle([x0,y0,x1,y1], outline=(0,255,0), width=3)
    # OOD dashed
    dash = 12
    for b in boxes_ood:
        x0,y0,x1,y1 = b["bbox_2d"]
        for x in range(int(x0), int(x1), dash*2):
            drw.line([(x,y0),(min(x+dash,x1),y0)], fill=(255,0,0), width=3)
            drw.line([(x,y1),(min(x+dash,x1),y1)], fill=(255,0,0), width=3)
        for y in range(int(y0), int(y1), dash*2):
            drw.line([(x0,y),(x0,min(y+dash,y1))], fill=(255,0,0), width=3)
            drw.line([(x1,y),(x1,min(y+dash,y1))], fill=(255,0,0), width=3)
    return np.asarray(img)

def tile_grid(imgs, grid=(3,2), pad=6, bg=0):
    rows, cols = grid
    assert len(imgs) == rows*cols
    target_h = min(im.shape[0] for im in imgs)
    resized = [cv2.resize(im, (int(im.shape[1]*target_h/im.shape[0]), target_h), interpolation=cv2.INTER_AREA) for im in imgs]
    row_imgs = []
    for r in range(rows):
        row = resized[r*cols:(r+1)*cols]
        maxw = max(im.shape[1] for im in row)
        row = [np.pad(im, ((0,0),(0,maxw-im.shape[1]),(0,0)), constant_values=bg) for im in row]
        row_imgs.append(np.concatenate(row, axis=1))
    maxw = max(im.shape[1] for im in row_imgs)
    row_imgs = [np.pad(im, ((0,0),(0,maxw-im.shape[1]),(0,0)), constant_values=bg) for im in row_imgs]
    pad_arr = np.full((pad, maxw, 3), bg, dtype=np.uint8)
    out = row_imgs[0]
    for r in row_imgs[1:]:
        out = np.concatenate([out, pad_arr, r], axis=0)
    return out

for scene in tqdm(nusc.scene, desc="Scenes→Video"):
    name = scene["name"].replace("/", "_")
    out_path = str((OUTDIR / f"{name}.mp4").resolve())
    writer = None

    sample_tok = scene["first_sample_token"]
    while sample_tok:
        sample = nusc.get('sample', sample_tok)
        ch2sd = sample["data"]

        # must have all 6 cameras
        if not all(ch in ch2sd for ch in ALL_CAM_CHANNELS):
            sample_tok = sample["next"]
            continue

        # Safety assertions: same sample & scene
        sample_tokens = set()
        scene_tokens = set()
        frame_imgs = []

        for ch in ALL_CAM_CHANNELS:
            sd_tok = ch2sd[ch]
            sd = nusc.get('sample_data', sd_tok)
            sample_tokens.add(sd["sample_token"])
            scene_tokens.add(nusc.get('sample', sd["sample_token"])["scene_token"])

            f = DST / sd["filename"]
            if not f.exists():
                frame_imgs = []
                break

            img = np.array(Image.open(f).convert("RGB"))
            img = draw_overlay_with_label(img, iden.get(sd_tok, []), novel.get(sd_tok, []), ch)
            frame_imgs.append(img)

        if len(frame_imgs) != 6:
            sample_tok = sample["next"]
            continue

        assert len(sample_tokens) == 1, f"Mixed sample tokens in one frame: {sample_tokens}"
        assert len(scene_tokens) == 1, f"Mixed scene tokens in one frame: {scene_tokens}"

        grid_img = tile_grid(frame_imgs, grid=GRID, pad=6, bg=0)

        if writer is None:
            h,w = grid_img.shape[:2]
            fourcc = cv2.VideoWriter_fourcc(*"mp4v")
            writer = cv2.VideoWriter(out_path, fourcc, FPS, (w,h))

        writer.write(cv2.cvtColor(grid_img, cv2.COLOR_RGB2BGR))
        sample_tok = sample["next"]

    if writer is not None:
        writer.release()
        print(f"  Wrote {out_path}")
    else:
        print(f"  (No frames for scene {name})")


Scenes→Video:  10%|██████                                                       | 1/10 [00:06<00:55,  6.16s/it]

  Wrote /data/Asad/NuScenesMiniNovel/videos_sync/scene-0061.mp4


Scenes→Video:  20%|████████████▏                                                | 2/10 [00:12<00:48,  6.10s/it]

  Wrote /data/Asad/NuScenesMiniNovel/videos_sync/scene-0103.mp4


Scenes→Video:  30%|██████████████████▎                                          | 3/10 [00:17<00:40,  5.80s/it]

  Wrote /data/Asad/NuScenesMiniNovel/videos_sync/scene-0553.mp4


Scenes→Video:  40%|████████████████████████▍                                    | 4/10 [00:23<00:35,  5.99s/it]

  Wrote /data/Asad/NuScenesMiniNovel/videos_sync/scene-0655.mp4


Scenes→Video:  50%|██████████████████████████████▌                              | 5/10 [00:29<00:29,  5.93s/it]

  Wrote /data/Asad/NuScenesMiniNovel/videos_sync/scene-0757.mp4


Scenes→Video:  60%|████████████████████████████████████▌                        | 6/10 [00:36<00:24,  6.09s/it]

  Wrote /data/Asad/NuScenesMiniNovel/videos_sync/scene-0796.mp4


Scenes→Video:  70%|██████████████████████████████████████████▋                  | 7/10 [00:42<00:18,  6.28s/it]

  Wrote /data/Asad/NuScenesMiniNovel/videos_sync/scene-0916.mp4


Scenes→Video:  80%|████████████████████████████████████████████████▊            | 8/10 [00:50<00:13,  6.57s/it]

  Wrote /data/Asad/NuScenesMiniNovel/videos_sync/scene-1077.mp4


Scenes→Video:  90%|██████████████████████████████████████████████████████▉      | 9/10 [00:57<00:06,  6.72s/it]

  Wrote /data/Asad/NuScenesMiniNovel/videos_sync/scene-1094.mp4


Scenes→Video: 100%|████████████████████████████████████████████████████████████| 10/10 [01:03<00:00,  6.39s/it]

  Wrote /data/Asad/NuScenesMiniNovel/videos_sync/scene-1100.mp4





In [5]:
##############################################################################################################################
##############################################################################################################################
################Detection approach  #########################################################################################
##############################################################################################################################
##############################################################################################################################
##############################################################################################################################

In [6]:
import torch, os, platform
print("Torch:", torch.__version__, "CUDA:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("Device:", torch.cuda.get_device_name(0))
    torch.backends.cudnn.benchmark = True  # speed up conv backends
    os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"  # reduces OOM fragmentation



Torch: 1.13.0+cu117 CUDA: True
Device: NVIDIA RTX 6000 Ada Generation


In [7]:
!pip -q install timm==0.9.12 scikit-learn==1.3.2
# PyG wheels compatible with torch 1.13.* + cu117:
!pip -q install torch-geometric==2.3.1 \
  torch-scatter==2.1.1 torch-sparse==0.6.17 torch-cluster==1.6.1 torch-spline-conv==1.2.2 \
  -f https://data.pyg.org/whl/torch-1.13.0+cu117.html



[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.2[0m[39;49m -> [0m[32;49m26.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.2[0m[39;49m -> [0m[32;49m26.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [8]:
import os, torch, json
from pathlib import Path

print("Torch:", torch.__version__, "CUDA:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("Device:", torch.cuda.get_device_name(0))
    torch.backends.cudnn.benchmark = True
    os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

DATAROOT = Path("/data/Asad/NuScenesMiniNovel")
JSONDIR  = DATAROOT / "v1.0-mini"
ALL_CAMS = ["CAM_FRONT","CAM_FRONT_LEFT","CAM_FRONT_RIGHT","CAM_BACK","CAM_BACK_LEFT","CAM_BACK_RIGHT"]

# Load nuScenes tables used
sd_rows   = {d["token"]: d for d in json.loads((JSONDIR / "sample_data.json").read_text())}
samples   = {s["token"]: s for s in json.loads((JSONDIR / "sample.json").read_text())}
scenes    = json.loads((JSONDIR / "scene.json").read_text())
sensor_by = {s["token"]: s for s in json.loads((JSONDIR / "sensor.json").read_text())}
calib_by  = {c["token"]: c for c in json.loads((JSONDIR / "calibrated_sensor.json").read_text())}

# Your GT (ID = in-dist, OOD = novel)
gt_id    = json.loads((JSONDIR / "detection_id.json").read_text())["results"]
gt_ood   = json.loads((JSONDIR / "detection_novel.json").read_text())["results"]

# Utility: channel for a sample_data row
def channel_of_sd_row(sd_row):
    calib = calib_by[sd_row["calibrated_sensor_token"]]
    sensor = sensor_by[calib["sensor_token"]]
    return sensor["channel"]

sample_to_ch2sd = {}
for sd in sd_rows.values():
    ch = channel_of_sd_row(sd)
    if not ch.startswith("CAM_"):
        continue
    st = sd["sample_token"]
    sample_to_ch2sd.setdefault(st, {})[ch] = sd["token"]


Torch: 1.13.0+cu117 CUDA: True
Device: NVIDIA RTX 6000 Ada Generation


In [10]:
import hashlib, numpy as np
from PIL import Image
from torchvision import transforms
import timm

Image.MAX_IMAGE_PIXELS = None

# Cache
CACHE_DIR = DATAROOT / ".cache" / "emb_v1"
CACHE_DIR.mkdir(parents=True, exist_ok=True)
BATCH_SIZE = 64  # tune for your GPU

try:
    backbone = timm.create_model('dinov2_small', pretrained=True, num_classes=0).to(device).eval()
except Exception:
    backbone = timm.create_model('vit_base_patch16_224.dino', pretrained=True, num_classes=0).to(device).eval()

FEAT_DIM = backbone.num_features
preproc = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225))
])

def _sanitize_box(box, W, H, min_size=2):
    # clamp + fix ordering + remove NaNs; returns integer box or None
    x0, y0, x1, y1 = [float(v) for v in box]
    if x1 < x0: x0, x1 = x1, x0
    if y1 < y0: y0, y1 = y1, y0
    if not np.all(np.isfinite([x0,y0,x1,y1])): return None
    x0 = max(0.0, min(x0, W - 1)); y0 = max(0.0, min(y0, H - 1))
    x1 = max(x0 + 1.0, min(x1, W)); y1 = max(y0 + 1.0, min(y1, H))
    w, h = (x1-x0), (y1-y0)
    if w < min_size or h < min_size: return None
    # avoid near-full-image crops that can trigger PIL bomb checks
    if (w*h) > 0.98*(W*H):
        pad = 1.0
        x0 = max(0.0, x0 + pad); y0 = max(0.0, y0 + pad)
        x1 = min(W,   x1 - pad); y1 = min(H,   y1 - pad)
    return (int(round(x0)), int(round(y0)), int(round(x1)), int(round(y1)))

def _box_key(sd_token, box_tuple):
    s = f"{sd_token}|{box_tuple[0]},{box_tuple[1]},{box_tuple[2]},{box_tuple[3]}|{backbone.default_cfg.get('architecture','dino')}|224"
    return hashlib.md5(s.encode()).hexdigest()

def _cache_path(key): 
    return CACHE_DIR / f"{key}.npy"

@torch.no_grad()
def embed_crops_batched(pil_img: Image.Image, boxes, sd_token):
    """Return (N, FEAT_DIM) embeddings; invalid boxes -> zero vectors. Uses GPU batching + cache."""
    W, H = pil_img.size
    embs = [None] * len(boxes)
    to_run, run_meta = [], []  # (tensor list), list[(i, cache_key)]

    # prepare
    for i, box in enumerate(boxes):
        sb = _sanitize_box(box, W, H)
        if sb is None:
            embs[i] = np.zeros((FEAT_DIM,), dtype=np.float32)
            continue
        key = _box_key(sd_token, sb)
        cp = _cache_path(key)
        if cp.exists():
            embs[i] = np.load(cp)
        else:
            crop = pil_img.crop(sb)
            to_run.append(preproc(crop))
            run_meta.append((i, key))

    # run in batches
    for start in range(0, len(to_run), BATCH_SIZE):
        batch = torch.stack(to_run[start:start+BATCH_SIZE], dim=0).to(device, non_blocking=True)
        with torch.cuda.amp.autocast(enabled=(device.type=="cuda")):
            Z = backbone(batch)  # (B, D)
        Z = Z.float().cpu().numpy()
        for (i, key), z in zip(run_meta[start:start+BATCH_SIZE], Z):
            np.save(_cache_path(key), z)
            embs[i] = z

    # fill any remaining
    for i in range(len(embs)):
        if embs[i] is None:
            embs[i] = np.zeros((FEAT_DIM,), dtype=np.float32)

    return np.stack(embs, axis=0)


In [11]:
import numpy as np
from PIL import Image
import torch
from torch_geometric.data import Data as GeoData
from torch_geometric.nn import knn_graph

def build_graphs(split="train", knn_k=6, include_id_from_ood_frames=True):

    graphs = []
    for sc in scenes:
        s_tok = sc["first_sample_token"]
        while s_tok:
            sample = samples[s_tok]                 # <-- capture once
            ch2sd  = sample_to_ch2sd.get(s_tok, {}) # sync cams for this timestamp

            for ch in ALL_CAMS:
                sd_tok = ch2sd.get(ch)
                if not sd_tok:
                    continue

                fn = sd_rows[sd_tok]["filename"]
                img_path = DATAROOT / fn
                if not img_path.exists():
                    continue

                img = Image.open(img_path).convert("RGB")
                W, H = img.width, img.height

                # gather boxes + labels
                id_boxes   = [b["bbox_2d"] for b in gt_id.get(sd_tok, [])]
                ood_boxes  = [b["bbox_2d"] for b in gt_ood.get(sd_tok, [])]
                has_ood    = len(ood_boxes) > 0

                boxes, labels = [], []
                if include_id_from_ood_frames or not has_ood:
                    boxes += id_boxes; labels += [0]*len(id_boxes)
                if split != "train":
                    boxes += ood_boxes; labels += [1]*len(ood_boxes)

                if not boxes:
                    continue

                # embeddings (batched + cached)
                emb = embed_crops_batched(img, boxes, sd_tok)  # (N, D)

                # geometry + pos
                geo, pos = [], []
                for x0,y0,x1,y1 in boxes:
                    cx, cy = 0.5*(x0+x1), 0.5*(y0+y1)
                    w, h   = max(1.0, x1-x0), max(1.0, y1-y0)
                    asp, area = w/h, (w*h)/(W*H)
                    geo.append([cx/W, cy/H, w/W, h/H, asp, area])
                    pos.append([cx/W, cy/H])

                X  = torch.from_numpy(np.concatenate([emb, np.array(geo, dtype=np.float32)], axis=1))
                P  = torch.from_numpy(np.array(pos, dtype=np.float32))
                Y  = torch.from_numpy(np.array(labels, dtype=np.int64))

                k = min(knn_k, max(1, len(P)-1))
                ei = knn_graph(P, k=k)

                g = GeoData(x=X, pos=P, y=Y, edge_index=ei)
                g.meta = {"sd_token": sd_tok, "channel": ch, "scene": sc["name"]}
                graphs.append(g)

            # advance safely to next sample (may be "")
            s_tok = sample["next"]
    return graphs


train_graphs = build_graphs("train", knn_k=6, include_id_from_ood_frames=True)
val_graphs   = build_graphs("val",   knn_k=6, include_id_from_ood_frames=True)
print(f"Graphs built: {len(train_graphs)} train | {len(val_graphs)} val")

def todev(g: GeoData):
    return GeoData(
        x=g.x.to(device), pos=g.pos.to(device), y=g.y.to(device),
        edge_index=g.edge_index.to(device)
    )


Graphs built: 2171 train | 2274 val


In [163]:
import torch
import numpy as np

EPS = 1e-8

def is_valid_graph(g):
    return (g.x is not None) and (g.x.numel() > 0) and (g.x.size(0) >= 2) and (g.edge_index.numel() > 0)

def nan_to_num_(t):
    return torch.nan_to_num(t, nan=0.0, posinf=1e4, neginf=-1e4)

# 1) Filter degenerate graphs
train_graphs = [g for g in train_graphs if is_valid_graph(g)]
val_graphs   = [g for g in val_graphs   if is_valid_graph(g)]

if len(train_graphs) == 0:
    raise RuntimeError("No valid training graphs after filtering. Check detection boxes / graph build.")

# 2) Compute feature normalization on train set
with torch.no_grad():
    Xs = []
    for g in train_graphs[:min(200, len(train_graphs))]:
        Xs.append(g.x.float())
    Xs = torch.cat(Xs, dim=0)
    mu = torch.mean(Xs, dim=0)
    sd = torch.std(Xs, dim=0)
    sd = torch.clamp(sd, min=1e-3) 

def normalize_graphs(graphs):
    out = []
    for g in graphs:
        gx = g.x.float()
        gx = (gx - mu) / sd
        gx = nan_to_num_(gx)
        g.x = gx
        # also sanitize positions just in case
        g.pos = nan_to_num_(g.pos.float())
        out.append(g)
    return out

train_graphs = normalize_graphs(train_graphs)
val_graphs   = normalize_graphs(val_graphs)

print(f"After filtering: {len(train_graphs)} train | {len(val_graphs)} val")


After filtering: 1984 train | 2087 val


In [12]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.utils import negative_sampling
from sklearn.metrics import roc_auc_score, roc_curve

if len(train_graphs) == 0 or len(val_graphs) == 0:
    raise RuntimeError("No graphs built. Check paths/JSONs and that frames contain ID/OOD boxes.")

in_dim = train_graphs[0].x.size(1)


class GEncoder(nn.Module):
    def __init__(self, in_dim, hid=256, z=128):
        super().__init__()
        self.g1 = GCNConv(in_dim, hid)
        self.g2 = GCNConv(hid, z)

    def forward(self, x, ei):
        h = F.relu(self.g1(x, ei))
        z = self.g2(h, ei)
        return z


class VGAE(nn.Module):
    def __init__(self, in_dim, hid=256, z=128):
        super().__init__()
        self.mu = GEncoder(in_dim, hid, z)
        self.lv = GEncoder(in_dim, hid, z)
        self.dec = nn.Sequential(
            nn.Linear(z, hid),
            nn.ReLU(),
            nn.Linear(hid, in_dim),
        )

    def forward(self, x, ei):
        mu = self.mu(x, ei)
        logv = self.lv(x, ei)
        std = torch.exp(0.5 * logv)
        eps = torch.randn_like(std)
        z = mu + eps * std
        xrec = self.dec(z)
        return z, xrec, mu, logv


def vgae_loss(x, xrec, mu, logv, ei, z, return_components=False):
    xrec = torch.nan_to_num(xrec)
    mu = torch.nan_to_num(mu)
    logv = torch.clamp(torch.nan_to_num(logv), min=-10.0, max=10.0)

    feat = F.mse_loss(xrec, x)

    if ei.numel() > 0:
        pos = ei
        num_pos = pos.size(1)
        if num_pos > 0:
            neg = negative_sampling(pos, num_nodes=z.size(0), num_neg_samples=num_pos)
            pos_log = (z[pos[0]] * z[pos[1]]).sum(dim=1)
            neg_log = (z[neg[0]] * z[neg[1]]).sum(dim=1)
            pos_tgt = torch.ones_like(pos_log)
            neg_tgt = torch.zeros_like(neg_log)
            logits = torch.cat([pos_log, neg_log], dim=0)
            target = torch.cat([pos_tgt, neg_tgt], dim=0)
            edge = F.binary_cross_entropy_with_logits(torch.nan_to_num(logits), target)
        else:
            edge = torch.tensor(0.0, device=logv.device)
    else:
        edge = torch.tensor(0.0, device=logv.device)

    kl = -0.5 * torch.mean(1 + logv - mu.pow(2) - torch.exp(logv))

    total = feat + 0.1 * edge + 1e-3 * kl
    if torch.isnan(total):
        total = feat + 1e-3 * kl

    if return_components:
        return total, feat.detach().item(), edge.detach().item(), kl.detach().item()
    return total



def eval_vgae(model, graphs, device):
    model.eval()
    losses = []
    with torch.no_grad():
        for g in graphs:
            gd = todev(g)
            z, xrec, mu, logv = model(gd.x, gd.edge_index)
            loss = vgae_loss(gd.x, xrec, mu, logv, gd.edge_index, z)
            if not torch.isnan(loss):
                losses.append(loss.item())
    return float(np.mean(losses)) if losses else float("nan")


model_vgae = VGAE(in_dim).to(device)
opt = torch.optim.Adam(model_vgae.parameters(), lr=5e-5, weight_decay=1e-5)

max_epochs = 100
patience   = 10
best_val = float("inf")
best_state = None
epochs_no_improve = 0

for epoch in range(1, max_epochs + 1):
    np.random.shuffle(train_graphs)
    model_vgae.train()
    train_losses = []

    for g in train_graphs:
        gd = todev(g)
        z, xrec, mu, logv = model_vgae(gd.x, gd.edge_index)
        loss = vgae_loss(gd.x, xrec, mu, logv, gd.edge_index, z)

        opt.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model_vgae.parameters(), max_norm=1.0)
        opt.step()

        if not torch.isnan(loss):
            train_losses.append(loss.item())

    mean_train = float(np.mean(train_losses)) if train_losses else float("nan")
    val_loss = eval_vgae(model_vgae, val_graphs, device)

    print(f"[VGAE] epoch {epoch:03d}  train={mean_train:.6f}  val={val_loss:.6f}")

    if val_loss < best_val:
        best_val = val_loss
        best_state = {k: v.cpu().clone() for k, v in model_vgae.state_dict().items()}
        epochs_no_improve = 0
    else:
        epochs_no_improve += 1

    if epochs_no_improve >= patience:
        print(f"Early stopping at epoch {epoch}")
        break

if best_state is not None:
    model_vgae.load_state_dict(best_state)
    model_vgae.to(device)
    print(f"Loaded best model with val_loss={best_val:.6f}")


[VGAE] epoch 001  train=inf  val=9.327306
[VGAE] epoch 002  train=10.396014  val=9.327306
[VGAE] epoch 003  train=10.396014  val=9.327306
[VGAE] epoch 004  train=10.396014  val=9.327306
[VGAE] epoch 005  train=10.396014  val=9.327306
[VGAE] epoch 006  train=10.396014  val=9.327306
[VGAE] epoch 007  train=10.396014  val=9.327306
[VGAE] epoch 008  train=10.396014  val=9.327306
[VGAE] epoch 009  train=10.396014  val=9.327306
[VGAE] epoch 010  train=10.396014  val=9.327306
[VGAE] epoch 011  train=10.396014  val=9.327306
Early stopping at epoch 11
Loaded best model with val_loss=9.327306


In [13]:
import numpy as np, torch
from collections import defaultdict
from sklearn.metrics import roc_auc_score, roc_curve
from sklearn.covariance import LedoitWolf

def neighbor_mean_features(g):
    X = g.x.float().cpu()                   # already normalized from earlier stabilize step
    ei = g.edge_index
    N, D = X.shape
    if ei.numel() == 0:
        return X.numpy(), X.numpy(), g.y.cpu().numpy()
    src, dst = ei
    ones = torch.ones_like(src, dtype=torch.float32)
    deg  = torch.zeros(N, dtype=torch.float32).scatter_add_(0, src.cpu(), ones)
    sumN = torch.zeros(N, D, dtype=torch.float32).index_add_(0, src.cpu(), X[dst.cpu()])
    C = torch.where(deg.view(-1,1) > 0, sumN/deg.clamp_min(1.0).view(-1,1), X)  # neighbor mean; fallback to self
    return X.numpy(), C.numpy(), g.y.cpu().numpy()

# === Fit on TRAIN (ID nodes only) ===
X_tr, C_tr = [], []
for g in train_graphs:
    x, c, y = neighbor_mean_features(g)
    m = (y == 0)
    if m.any():
        X_tr.append(x[m]); C_tr.append(c[m])
Z_tr = np.concatenate([np.concatenate(X_tr,0), np.concatenate(C_tr,0)], axis=1)  # (N_id, 2D)

mu = Z_tr.mean(0, keepdims=True)
sd = Z_tr.std(0, keepdims=True).clip(1e-3)
Zs = (Z_tr - mu)/sd

cov = LedoitWolf().fit(Zs)
mu_loc  = cov.location_           # (2D,)
prec    = cov.precision_          # (2D,2D)

def maha2(Z):
    Zs = (Z - mu)/sd
    d  = Zs - mu_loc
    return np.einsum("nd,dd,nd->n", d, prec, d)     # higher = more OOD

def fpr_at_tpr(y, s, target=0.95):
    fpr, tpr, thr = roc_curve(y, s)
    i = np.argmin(np.abs(tpr - target))
    return float(fpr[i]), float(tpr[i]), float(thr[i])

# === Score VAL ===
scores_all, labels_all = [], []
per_cam_scores, per_cam_labels = defaultdict(list), defaultdict(list)

for g in val_graphs:
    x, c, y = neighbor_mean_features(g)
    Z = np.concatenate([x, c], axis=1)
    s = maha2(Z)
    scores_all.append(s); labels_all.append(y)
    cam = g.meta["channel"]
    per_cam_scores[cam].append(s); per_cam_labels[cam].append(y)

scores_all = np.concatenate(scores_all)
labels_all = np.concatenate(labels_all)

auroc = roc_auc_score(labels_all, scores_all)
fpr95, tpr95, _ = fpr_at_tpr(labels_all, scores_all, 0.95)
print(f"[Ctx-Mahalanobis] AUROC={auroc:.4f}  FPR@95={fpr95:.4f} (TPR≈{tpr95:.3f})")

print("\n[Ctx-Mahalanobis] per-camera:")
for cam in sorted(per_cam_scores.keys()):
    s = np.concatenate(per_cam_scores[cam]); y = np.concatenate(per_cam_labels[cam])
    if len(set(y)) < 2:
        print(f"{cam:16s} — not enough pos/neg"); continue
    auc = roc_auc_score(y, s); f95,_,_ = fpr_at_tpr(y, s, 0.95)
    print(f"{cam:16s} AUROC={auc:.4f}  FPR@95={f95:.4f}  n={len(y)}")


[Ctx-Mahalanobis] AUROC=0.8636  FPR@95=0.4091 (TPR≈0.950)

[Ctx-Mahalanobis] per-camera:
CAM_BACK         AUROC=0.8771  FPR@95=0.3839  n=6575
CAM_BACK_LEFT    AUROC=0.7829  FPR@95=0.5963  n=1189
CAM_BACK_RIGHT   AUROC=0.8331  FPR@95=0.5048  n=3450
CAM_FRONT        AUROC=0.8975  FPR@95=0.2755  n=5708
CAM_FRONT_LEFT   AUROC=0.8459  FPR@95=0.4265  n=2176
CAM_FRONT_RIGHT  AUROC=0.8085  FPR@95=0.4987  n=4339


In [14]:
# Build VGAE outputs per graph for later ensembling/plots
v_out = []
for g in val_graphs:
    scores = vgae_node_scores(g)          # shape (N_nodes,)
    labels = g.y.cpu().numpy()            # 0 = ID, 1 = OOD
    v_out.append({"scores": scores, "labels": labels})


NameError: name 'vgae_node_scores' is not defined

In [15]:
sv = np.concatenate([o["scores"] for o in v_out])   # VGAE OOD score
sr = scores_all                                     # Ctx-Maha OOD score
y  = np.concatenate([o["labels"] for o in v_out])

best_auc, best_fpr95, best_a = -1, None, None
from sklearn.metrics import roc_curve, roc_auc_score
def fpr_at_tpr(y, s, target=0.95):
    fpr, tpr, thr = roc_curve(y, s)
    i = np.argmin(np.abs(tpr - target))
    return float(fpr[i]), float(tpr[i]), float(thr[i])

for a in np.linspace(0,1,21):
    s = a*sv + (1-a)*sr
    auc = roc_auc_score(y, s)
    f95,_,_ = fpr_at_tpr(y, s, 0.95)
    if auc > best_auc:
        best_auc, best_fpr95, best_a = auc, f95, a
print(f"[VGAE + CtxMaha] best α={best_a:.2f}  AUROC={best_auc:.4f}  FPR@95={best_fpr95:.4f}")


ValueError: need at least one array to concatenate

In [16]:
# ========================= VISUALIZATION PACK =========================
# Requirements: assumes in memory
# - DATAROOT, JSONDIR, ALL_CAMS
# - sd_rows, samples, scenes, sensor_by, calib_by, sample_to_ch2sd
# - gt_id, gt_ood
# - val_graphs (built earlier)
# - model_vgae, device
#
# Optional but nice-to-have:
# - v_out from your VGAE scoring cell (not required here)

import os, math, random
from pathlib import Path
from typing import List, Tuple
import numpy as np
from PIL import Image, ImageDraw
import cv2
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
import torch

RANDOM_SEED_FOR_OOD_SAMPLES = 42
random.seed(RANDOM_SEED_FOR_OOD_SAMPLES)

OUTDIR = Path("/data/Asad/NuScenesMiniNovel/vis_results/")
OUTDIR.mkdir(parents=True, exist_ok=True)

val_graph_by_sd = {g.meta["sd_token"]: g for g in val_graphs}


def get_boxes_and_labels_for_sd(sd_token: str) -> Tuple[List[List[float]], List[int]]:
    boxes, labels = [], []
    for b in gt_id.get(sd_token, []):
        boxes.append(b["bbox_2d"])
        labels.append(0)
    for b in gt_ood.get(sd_token, []):
        boxes.append(b["bbox_2d"])
        labels.append(1)
    return boxes, labels


def get_boxes_labels_classes_for_sd(sd_token: str):
    boxes, labels, classes = [], [], []

    for b in gt_id.get(sd_token, []):
        boxes.append(b["bbox_2d"])
        labels.append(0)
        cls = b.get("detection_name", "ID")
        classes.append(str(cls))

    for b in gt_ood.get(sd_token, []):
        boxes.append(b["bbox_2d"])
        labels.append(1)
        cls = b.get("detection_name", "OOD")
        classes.append(str(cls))

    return boxes, labels, classes


def img_path_for_sd(sd_token: str) -> Path:
    return DATAROOT / sd_rows[sd_token]["filename"]


def channel_for_sd(sd_token: str) -> str:
    try:
        calib = calib_by[sd_rows[sd_token]["calibrated_sensor_token"]]
        return sensor_by[calib["sensor_token"]]["channel"]
    except Exception:
        return "?"


# Colors in BGR for OpenCV
CLR_ID  = (0, 200, 0)
CLR_OOD = (0, 0, 200)
CLR_EDGE = (255, 255, 255)
CLR_TXT = (255, 255, 255)


def draw_frame_with_graph(sd_token: str, show_edges: bool = True) -> np.ndarray:
    ip = img_path_for_sd(sd_token)
    if not ip.exists():
        return np.zeros((720, 1280, 3), dtype=np.uint8)

    rgb = np.array(Image.open(ip).convert("RGB"))
    img = cv2.cvtColor(rgb, cv2.COLOR_RGB2BGR)
    H, W = img.shape[:2]

    boxes, labels = get_boxes_and_labels_for_sd(sd_token)
    for (x0, y0, x1, y1), lab in zip(boxes, labels):
        p0 = (max(0, int(x0)), max(0, int(y0)))
        p1 = (min(W - 1, int(x1)), min(H - 1, int(y1)))
        cv2.rectangle(img, p0, p1, CLR_OOD if lab == 1 else CLR_ID, 2)

    g = val_graph_by_sd.get(sd_token, None)
    if show_edges and g is not None and g.pos is not None and g.pos.shape[0] == len(boxes):
        pos = g.pos.cpu().numpy()
        cx = (pos[:, 0] * W).astype(int)
        cy = (pos[:, 1] * H).astype(int)
        ei = g.edge_index.cpu().numpy()
        for s, t in zip(ei[0], ei[1]):
            p_start = (int(cx[s]), int(cy[s]))
            p_end   = (int(cx[t]), int(cy[t]))
            cv2.arrowedLine(
                img,
                p_start,
                p_end,
                CLR_EDGE,
                1,
                tipLength=0.2,
                line_type=cv2.LINE_AA
            )

    tag = f"{channel_for_sd(sd_token)} | {sd_token[:8]}"
    cv2.rectangle(img, (5, 5), (5 + len(tag) * 9, 28), (0, 0, 0), -1)
    cv2.putText(
        img,
        tag,
        (10, 25),
        cv2.FONT_HERSHEY_SIMPLEX,
        0.6,
        CLR_TXT,
        1,
        cv2.LINE_AA
    )
    return img


def make_scene_multiview_video(scene_name: str, fps=6, resize=(640, 360),
                               max_frames=None, show_edges=True):
    scene_row = next((s for s in scenes if s["name"] == scene_name), None)
    assert scene_row is not None, f"Scene {scene_name} not found"
    s_tok = scene_row["first_sample_token"]
    w, h = resize
    grid_w, grid_h = 3 * w, 2 * h
    out_path = OUTDIR / f"{scene_name}__multiview.mp4"
    vw = cv2.VideoWriter(
        str(out_path),
        cv2.VideoWriter_fourcc(*"mp4v"),
        fps,
        (grid_w, grid_h)
    )
    n = 0
    while s_tok:
        ch2sd = sample_to_ch2sd.get(s_tok, {})
        tiles = []
        for ch in ALL_CAMS:
            sd_tok = ch2sd.get(ch)
            if sd_tok is None:
                tile = np.zeros((h, w, 3), dtype=np.uint8)
            else:
                frm = draw_frame_with_graph(sd_tok, show_edges=show_edges)
                tile = cv2.resize(frm, (w, h), interpolation=cv2.INTER_AREA)
            tiles.append(tile)
        grid = np.vstack([np.hstack(tiles[:3]), np.hstack(tiles[3:6])])
        cv2.rectangle(
            grid,
            (10, grid_h - 40),
            (10 + 450, grid_h - 10),
            (0, 0, 0),
            -1
        )
        cv2.putText(
            grid,
            f"{scene_name}  |  frame {n}",
            (15, grid_h - 18),
            cv2.FONT_HERSHEY_SIMPLEX,
            0.6,
            (255, 255, 255),
            1,
            cv2.LINE_AA
        )
        vw.write(grid)
        n += 1
        if max_frames and n >= max_frames:
            break
        s_tok = samples[s_tok]["next"]
    vw.release()
    return str(out_path), n


scenes_to_render = [s["name"] for s in scenes][:2]
video_paths = []
for scn in scenes_to_render:
    p, n = make_scene_multiview_video(
        scn, fps=6, resize=(640, 360), max_frames=80, show_edges=True
    )
    video_paths.append((p, n))

snap_paths = []
for scn in scenes_to_render:
    s_tok = next(s for s in scenes if s["name"] == scn)["first_sample_token"]
    ch2sd = sample_to_ch2sd.get(s_tok, {})
    fig, axes = plt.subplots(2, 3, figsize=(12, 8))
    axes = axes.ravel()
    for i, ch in enumerate(ALL_CAMS):
        ax = axes[i]
        ax.set_title(ch)
        sd_tok = ch2sd.get(ch)
        if sd_tok is None or sd_tok not in val_graph_by_sd:
            ax.text(0.5, 0.5, "(no data)", ha="center", va="center")
            ax.axis("off")
            continue
        g = val_graph_by_sd[sd_tok]
        pos = g.pos.cpu().numpy()
        y = g.y.cpu().numpy()
        ei = g.edge_index.cpu().numpy()

        for s, t in zip(ei[0], ei[1]):
            x_start, y_start = pos[s, 0], pos[s, 1]
            x_end, y_end = pos[t, 0], pos[t, 1]
            ax.annotate(
                "",
                xy=(x_end, y_end),
                xytext=(x_start, y_start),
                arrowprops=dict(
                    arrowstyle="->",
                    linewidth=0.5,
                    color="0.7"
                )
            )

        m_id = (y == 0)
        m_ood = (y == 1)
        ax.scatter(pos[m_id, 0], pos[m_id, 1], s=15, label="ID")
        ax.scatter(pos[m_ood, 0], pos[m_ood, 1], s=20, marker="x", label="OOD")
        ax.set_xlim(0, 1)
        ax.set_ylim(1, 0)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.legend(loc="lower right", fontsize=8)
    fig.suptitle(f"{scn} — Directed Image-plane Graphs (per camera)")
    outp = OUTDIR / f"{scn}__graphs_snapshot.png"
    fig.tight_layout()
    fig.savefig(outp, dpi=150)
    plt.close(fig)
    snap_paths.append(str(outp))

nodes, labels = [], []
max_nodes = 4000
count = 0
for g in val_graphs:
    if count >= max_nodes:
        break
    with torch.no_grad():
        mu = model_vgae.mu(g.x.to(device), g.edge_index.to(device))
        z = mu.detach().cpu().numpy()
    y = g.y.cpu().numpy()
    nodes.append(z)
    labels.append(y)
    count += len(y)

nodes = np.concatenate(nodes, 0)
labels = np.concatenate(labels, 0)
tsne_path = None
if len(nodes) >= 10:
    Z2 = TSNE(
        n_components=2,
        init="pca",
        perplexity=30,
        learning_rate="auto",
        n_iter=1000
    ).fit_transform(nodes)
    fig = plt.figure(figsize=(8, 6))
    m_id = (labels == 0)
    m_ood = (labels == 1)
    plt.scatter(Z2[m_id, 0], Z2[m_id, 1], s=8, label="ID")
    plt.scatter(Z2[m_ood, 0], Z2[m_ood, 1], s=8, label="OOD", marker="x")
    plt.legend()
    plt.title("t-SNE of VGAE Latents (val subset)")
    tsne_path = OUTDIR / "tsne_vgae_val.png"
    plt.tight_layout()
    plt.savefig(tsne_path, dpi=150)
    plt.close(fig)


@torch.no_grad()
def vgae_node_scores(g):
    x = g.x.to(device)
    ei = g.edge_index.to(device)

    z, xrec, _, _ = model_vgae(x, ei)
    err = ((x - xrec) ** 2).sum(dim=1)

    src, dst = ei
    sim = torch.sigmoid((z[src] * z[dst]).sum(dim=1))

    deg = torch.zeros(z.size(0), device=z.device).scatter_add_(
        0, src, torch.ones_like(src, dtype=torch.float32)
    )
    agg = torch.zeros(z.size(0), device=z.device).scatter_add_(0, src, sim)
    mean_sim = torch.where(deg > 0, agg / deg, torch.zeros_like(deg))

    return (err + (1.0 - mean_sim)).cpu().numpy()


# Build pool with class names
pool = []
for g in val_graphs:
    sd = g.meta["sd_token"]
    boxes, labs, clss = get_boxes_labels_classes_for_sd(sd)
    if len(boxes) != g.x.size(0):
        continue
    s = vgae_node_scores(g)
    for bb, lb, cls, ss in zip(boxes, labs, clss, s):
        pool.append((float(ss), sd, bb, int(lb), cls))

# Filter OOD by class name: horse, dog, elephant, forklift, lawn mower


OOD_KEEP_SUBSTRINGS = [
    "horse","zebra", "giraffe", "elephant", "bear", "sheep", "cow", "horse", "bird",
    "dog", "cat","sports ball","chair", "bench","boat","elephant",
    "forklift", "fork lift",
    "lawnmower", "lawn mower",
]
OOD_KEEP_SUBSTRINGS = [s.lower() for s in OOD_KEEP_SUBSTRINGS]

def is_kept_ood_class(cls_name: str) -> bool:
    c = cls_name.lower()
    return any(sub in c for sub in OOD_KEEP_SUBSTRINGS)

# Split and filter
pool_ood_scored, pool_id_scored = [], []
for score, sd, bb, lb, cls in pool:
    if lb == 1:
        if is_kept_ood_class(cls):
            pool_ood_scored.append((score, sd, bb))
    else:
        pool_id_scored.append((score, sd, bb))


# ---- NEW: Deduplicate OOD crops → keep best OOD per frame (sd_token) ----
# This ensures at most one OOD crop per image in the contact sheet.
best_ood_per_frame = {}  # sd_token -> (score, sd, bb)
for score, sd, bb in pool_ood_scored:
    if (sd not in best_ood_per_frame) or (score > best_ood_per_frame[sd][0]):
        best_ood_per_frame[sd] = (score, sd, bb)

unique_ood = list(best_ood_per_frame.values())

# ---- Randomly choose up to 50 OOD crops from unique frames ----
NUM_OOD_PICS = 100
if len(unique_ood) > NUM_OOD_PICS:
    top_ood = random.sample(unique_ood, NUM_OOD_PICS)
else:
    top_ood = unique_ood[:]  # all of them if fewer than 50

# ---- For ID, keep highest scoring ones (e.g., 20) ----
K_ID = 20
pool_id_scored.sort(key=lambda x: -x[0])
top_id = pool_id_scored[:K_ID]

def make_contact_sheet(items, title, out_path, tile=128, cols=10):
    rows = math.ceil(len(items) / cols)
    W, H = cols * tile, rows * tile + 40

    sheet = Image.new("RGB", (W, H), (20, 20, 20))
    draw  = ImageDraw.Draw(sheet)
    draw.text((10, 10), title, fill=(255, 255, 255))
    yoff = 40

    for i, (score, sd, bb) in enumerate(items):
        im = Image.open(img_path_for_sd(sd)).convert("RGB")
        x0, y0, x1, y1 = [int(v) for v in bb]
        x0 = max(0,     min(x0, im.width  - 1))
        y0 = max(0,     min(y0, im.height - 1))
        x1 = max(x0+1,  min(x1, im.width))
        y1 = max(y0+1,  min(y1, im.height))

        crop = im.crop((x0, y0, x1, y1)).resize((tile, tile), Image.BILINEAR)
        r, c = divmod(i, cols)

        sheet.paste(crop, (c * tile, yoff + r * tile))
        draw.text((c * tile + 4, yoff + r * tile + 4),
                  f"{score:.2f}", fill=(255, 255, 0))

    sheet = sheet.convert("RGB")
    sheet.save(out_path, format="PNG")
    return out_path

sheet_ood = OUTDIR / "topK_vgae_OOD_50rand.png"
sheet_id  = OUTDIR / "topK_vgae_ID.png"

if len(top_ood):
    make_contact_sheet(top_ood, "Random 50 VGAE OOD (unique frames, filtered classes)", sheet_ood)
if len(top_id):
    make_contact_sheet(top_id, "Top-K VGAE ID (highest score among ID)", sheet_id)

print("Saved:")
for p, n in video_paths:
    print(f"  - Multiview video: {p}  (frames: {n})")
for p in snap_paths:
    print(f"  - Graph snapshot: {p}")
if tsne_path:
    print(f"  - t-SNE: {tsne_path}")
if sheet_ood.exists():
    print(f"  - Contact sheet (OOD, unique frames): {sheet_ood}")
if sheet_id.exists():
    print(f"  - Contact sheet (ID):                  {sheet_id}")


ValueError: Input X contains NaN.
TSNE does not accept missing values encoded as NaN natively. For supervised learning, you might want to consider sklearn.ensemble.HistGradientBoostingClassifier and Regressor which accept missing values encoded as NaNs natively. Alternatively, it is possible to preprocess the data, for instance by using an imputer transformer in a pipeline or drop samples with missing values. See https://scikit-learn.org/stable/modules/impute.html You can find a list of all estimators that handle NaN values at the following page: https://scikit-learn.org/stable/modules/impute.html#estimators-that-handle-nan-values

In [17]:
# === Temporal graph evolution videos (per scene × per camera) ===
import os, math
from pathlib import Path
import numpy as np
import cv2
import matplotlib.pyplot as plt

VIS_DIR = Path("/data/Asad/NuScenesMiniNovel/vis_results")
VIS_DIR.mkdir(parents=True, exist_ok=True)

# Quick index: sd_token -> graph (val split)
val_graph_by_sd = {g.meta["sd_token"]: g for g in val_graphs}

# Colors (BGR)
CLR_BG   = (245,245,245)
CLR_EDGE = (180,180,180)
CLR_ID   = (40,170,40)
CLR_OOD  = (30,60,220)

def draw_graph_frame(g, size=800, point_sz=6, draw_edges=True):
    W = H = size
    canvas = np.full((H, W, 3), CLR_BG, dtype=np.uint8)
    pos = g.pos.cpu().numpy()
    y   = g.y.cpu().numpy()
    cx = (pos[:,0]*W).astype(int)
    cy = (pos[:,1]*H).astype(int)
    ei = g.edge_index.cpu().numpy()
    if draw_edges and ei.size > 0:
        for s,t in zip(ei[0], ei[1]):
            cv2.line(canvas, (cx[s],cy[s]), (cx[t],cy[t]), CLR_EDGE, 1, cv2.LINE_AA)
    for i,(x,yid) in enumerate(zip(range(len(cx)), y)):
        color = CLR_OOD if yid==1 else CLR_ID
        cv2.circle(canvas, (cx[i], cy[i]), point_sz, color, -1, lineType=cv2.LINE_AA)
    return canvas

def make_temporal_graph_video(scene_name, camera, fps=6, max_frames=None, size=800, draw_edges=True):
    scene_row = next((s for s in scenes if s["name"]==scene_name), None)
    if scene_row is None:
        raise ValueError(f"Scene {scene_name} not found")
    s_tok = scene_row["first_sample_token"]
    out_path = VIS_DIR / f"{scene_name}__{camera}__graph_temporal.mp4"
    vw = cv2.VideoWriter(str(out_path), cv2.VideoWriter_fourcc(*'mp4v'), fps, (size, size))
    n = 0
    while s_tok:
        ch2sd = sample_to_ch2sd.get(s_tok, {})
        sd_tok = ch2sd.get(camera, None)
        if sd_tok and sd_tok in val_graph_by_sd:
            g = val_graph_by_sd[sd_tok]
            frame = draw_graph_frame(g, size=size, draw_edges=draw_edges)
            # tag
            cv2.rectangle(frame, (10, size-40), (10+520, size-10), (0,0,0), -1)
            cv2.putText(frame, f"{scene_name} | {camera} | t={n}", (16,size-16),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255,255,255), 1, cv2.LINE_AA)
            vw.write(frame)
            n += 1
            if max_frames and n>=max_frames:
                break
        s_tok = samples[s_tok]["next"]
    vw.release()
    return str(out_path), n

# Render a few examples (change the lists as you like)
scenes_to_render = [s["name"] for s in scenes][:10]   # first two scenes; extend if you want more
cams_to_render   = ["CAM_FRONT","CAM_FRONT_LEFT","CAM_FRONT_RIGHT","CAM_BACK","CAM_BACK_LEFT","CAM_BACK_RIGHT"]

made = []
for scn in scenes_to_render:
    for cam in cams_to_render:
        p, n = make_temporal_graph_video(scn, cam, fps=6, max_frames=150, size=800, draw_edges=True)
        made.append((p,n))
print("Temporal graph videos:")
for p,n in made:
    print(f"  {p}  (frames: {n})")


Temporal graph videos:
  /data/Asad/NuScenesMiniNovel/vis_results/scene-0061__CAM_FRONT__graph_temporal.mp4  (frames: 39)
  /data/Asad/NuScenesMiniNovel/vis_results/scene-0061__CAM_FRONT_LEFT__graph_temporal.mp4  (frames: 39)
  /data/Asad/NuScenesMiniNovel/vis_results/scene-0061__CAM_FRONT_RIGHT__graph_temporal.mp4  (frames: 39)
  /data/Asad/NuScenesMiniNovel/vis_results/scene-0061__CAM_BACK__graph_temporal.mp4  (frames: 39)
  /data/Asad/NuScenesMiniNovel/vis_results/scene-0061__CAM_BACK_LEFT__graph_temporal.mp4  (frames: 37)
  /data/Asad/NuScenesMiniNovel/vis_results/scene-0061__CAM_BACK_RIGHT__graph_temporal.mp4  (frames: 39)
  /data/Asad/NuScenesMiniNovel/vis_results/scene-0103__CAM_FRONT__graph_temporal.mp4  (frames: 40)
  /data/Asad/NuScenesMiniNovel/vis_results/scene-0103__CAM_FRONT_LEFT__graph_temporal.mp4  (frames: 40)
  /data/Asad/NuScenesMiniNovel/vis_results/scene-0103__CAM_FRONT_RIGHT__graph_temporal.mp4  (frames: 39)
  /data/Asad/NuScenesMiniNovel/vis_results/scene-0103__C

In [18]:
# === ROC curves (overall + per camera) saved to vis_results ===
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, roc_auc_score

VIS_DIR = Path("/data/Asad/NuScenesMiniNovel/vis_results")
VIS_DIR.mkdir(parents=True, exist_ok=True)

# Collect VGAE scores
sv_vgae = np.concatenate([o["scores"] for o in v_out])
y_all   = np.concatenate([o["labels"] for o in v_out])
cams_all= []
for g, o in zip(val_graphs, v_out):
    cams_all.extend([g.meta["channel"]]*len(o["labels"]))
cams_all = np.array(cams_all)

# Optional: context-Mahalanobis (if available)
have_ctx = 'scores_all' in globals() and 'labels_all' in globals() and len(scores_all)==len(y_all)

# Overall ROC
plt.figure(figsize=(6,6))
fpr_v, tpr_v, _ = roc_curve(y_all, sv_vgae); auc_v = roc_auc_score(y_all, sv_vgae)
plt.plot(fpr_v, tpr_v, label=f"VGAE (AUROC={auc_v:.3f})")

if have_ctx:
    fpr_c, tpr_c, _ = roc_curve(y_all, scores_all); auc_c = roc_auc_score(y_all, scores_all)
    plt.plot(fpr_c, tpr_c, label=f"Context-Mahalanobis (AUROC={auc_c:.3f})")

plt.plot([0,1],[0,1],'k--', linewidth=1)
plt.xlim(0,1); plt.ylim(0,1)
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("Overall ROC")
plt.legend(loc="lower right")
out_overall = VIS_DIR / "roc_overall.png"
plt.tight_layout(); plt.savefig(out_overall, dpi=200); plt.close()
print("Saved:", out_overall)

# Per-camera ROCs
for cam in sorted(set(cams_all)):
    m = (cams_all==cam)
    y_cam = y_all[m]; s_cam = sv_vgae[m]
    if len(set(y_cam))<2: 
        continue
    plt.figure(figsize=(6,6))
    fpr_v, tpr_v, _ = roc_curve(y_cam, s_cam); auc_v = roc_auc_score(y_cam, s_cam)
    plt.plot(fpr_v, tpr_v, label=f"VGAE (AUROC={auc_v:.3f})")
    if have_ctx:
        s_cam_c = scores_all[m]
        fpr_c, tpr_c, _ = roc_curve(y_cam, s_cam_c); auc_c = roc_auc_score(y_cam, s_cam_c)
        plt.plot(fpr_c, tpr_c, label=f"Ctx-Maha (AUROC={auc_c:.3f})")
    plt.plot([0,1],[0,1],'k--', linewidth=1)
    plt.xlim(0,1); plt.ylim(0,1)
    plt.xlabel("False Positive Rate"); plt.ylabel("True Positive Rate")
    plt.title(f"ROC — {cam}")
    plt.legend(loc="lower right")
    out_cam = VIS_DIR / f"roc_{cam}.png"
    plt.tight_layout(); plt.savefig(out_cam, dpi=200); plt.close()
    print("Saved:", out_cam)


ValueError: need at least one array to concatenate

In [19]:


import numpy as np, cv2, math
from pathlib import Path
from PIL import Image
from collections import deque

VIS_DIR = Path("/data/Asad/NuScenesMiniNovel/vis_results")
VIS_DIR.mkdir(parents=True, exist_ok=True)

val_graph_by_sd = {g.meta["sd_token"]: g for g in val_graphs}

# colors (BGR)
CLR_ID   = (40,170,40)
CLR_OOD  = (60,60,230)
CLR_EDGE = (255,255,255)
CLR_TXT  = (255,255,255)
CLR_TRAIL= (255,215,0)   # gold trails

def channel_for_sd(sd_token: str) -> str:
    try:
        calib = calib_by[sd_rows[sd_token]["calibrated_sensor_token"]]
        return sensor_by[calib["sensor_token"]]["channel"]
    except Exception:
        return "?"

def boxes_and_labels(sd_token):
    boxes, labels = [], []
    for b in gt_id.get(sd_token, []):
        boxes.append(b["bbox_2d"]); labels.append(0)
    for b in gt_ood.get(sd_token, []):
        boxes.append(b["bbox_2d"]); labels.append(1)
    return boxes, labels

def load_frame_bgr(sd_token):
    img_path = DATAROOT / sd_rows[sd_token]["filename"]
    if not img_path.exists():
        return None
    rgb = np.array(Image.open(img_path).convert("RGB"))
    return cv2.cvtColor(rgb, cv2.COLOR_RGB2BGR)

def overlay_graph_on_image(bgr, g, boxes, labels, draw_edges=True):
    H, W = bgr.shape[:2]
    # boxes
    for (x0,y0,x1,y1), lab in zip(boxes, labels):
        p0 = (max(0,int(x0)), max(0,int(y0)))
        p1 = (min(W-1,int(x1)), min(H-1,int(y1)))
        cv2.rectangle(bgr, p0, p1, CLR_OOD if lab==1 else CLR_ID, 2)

    # graph edges in image plane using normalized centers
    if g is not None and g.pos is not None and g.pos.size(0)==len(boxes):
        pos = g.pos.cpu().numpy()
        cx, cy = (pos[:,0]*W).astype(int), (pos[:,1]*H).astype(int)
        if draw_edges and g.edge_index is not None and g.edge_index.numel() > 0:
            ei = g.edge_index.cpu().numpy()
            for s,t in zip(ei[0], ei[1]):
                cv2.line(bgr, (cx[s],cy[s]), (cx[t],cy[t]), CLR_EDGE, 1, cv2.LINE_AA)
        # return centers for tracking
        return bgr, np.stack([cx, cy], axis=1)
    return bgr, None

def associate_tracks(prev_xy, curr_xy, max_dist=60):

    if prev_xy is None or len(prev_xy)==0 or curr_xy is None or len(curr_xy)==0:
        return []
    P, C = prev_xy.shape[0], curr_xy.shape[0]
    d2 = ((prev_xy[:,None,:]-curr_xy[None,:,:])**2).sum(axis=2)  # (P,C)
    pairs = []
    used_p, used_c = set(), set()
    # Flatten and sort by distance
    flat = [(d2[i,j], i, j) for i in range(P) for j in range(C)]
    flat.sort(key=lambda x: x[0])
    for d, i, j in flat:
        if i in used_p or j in used_c: 
            continue
        if d <= max_dist**2:
            used_p.add(i); used_c.add(j); pairs.append((i,j))
    return pairs

def make_temporal_overlay_video(scene_name, camera, fps=6, max_frames=None, trail_len=10, draw_edges=True, resize_to=None):

    scene_row = next((s for s in scenes if s["name"]==scene_name), None)
    assert scene_row is not None, f"Scene {scene_name} not found"
    s_tok = scene_row["first_sample_token"]

    # set up video writer after reading first valid frame to get size
    first_frame = None
    tmp_tok = s_tok
    while tmp_tok and first_frame is None:
        sd_tok = sample_to_ch2sd.get(tmp_tok, {}).get(camera)
        if sd_tok:
            first_frame = load_frame_bgr(sd_tok)
        tmp_tok = samples[tmp_tok]["next"]
    if first_frame is None:
        raise RuntimeError(f"No frames found for {scene_name} {camera}")

    if resize_to is not None:
        W, H = resize_to
        first_frame = cv2.resize(first_frame, (W,H), interpolation=cv2.INTER_AREA)
    else:
        H, W = first_frame.shape[:2][0], first_frame.shape[:2][1]
        H, W = first_frame.shape[0], first_frame.shape[1]

    out_path = VIS_DIR / f"{scene_name}__{camera}__overlay_temporal.mp4"
    vw = cv2.VideoWriter(str(out_path), cv2.VideoWriter_fourcc(*'mp4v'), fps, (W, H))

    prev_centers = None
    trails = []  # list[deque[(x,y)]]

    n = 0
    cur = s_tok
    while cur:
        sd_tok = sample_to_ch2sd.get(cur, {}).get(camera)
        if not sd_tok:
            cur = samples[cur]["next"]; 
            continue

        img = load_frame_bgr(sd_tok)
        if img is None:
            cur = samples[cur]["next"]; 
            continue
        if (img.shape[1], img.shape[0]) != (W, H):
            img = cv2.resize(img, (W,H), interpolation=cv2.INTER_AREA)

        g = val_graph_by_sd.get(sd_tok, None)
        boxes, labels = boxes_and_labels(sd_tok)

        # draw boxes + edges, get centers
        img, centers = overlay_graph_on_image(img, g, boxes, labels, draw_edges=draw_edges)

        # update trails
        if centers is None or len(centers)==0:
            # nothing to track; decay trails
            for dq in trails:
                if dq: dq.append(dq[-1])
                while len(dq) > trail_len: dq.popleft()
        else:
            if prev_centers is None or len(prev_centers)==0 or len(trails)==0:
                trails = [deque(maxlen=trail_len) for _ in range(len(centers))]
                for i,(x,y) in enumerate(centers):
                    trails[i].append((int(x),int(y)))
            else:
                pairs = associate_tracks(prev_centers, centers, max_dist=60)
                # map previous trails to current order; unmatched get new trails
                new_trails = [deque(maxlen=trail_len) for _ in range(len(centers))]
                matched_curr = set()
                for ip, jc in pairs:
                    new_trails[jc] = trails[ip]  # re-use the deque
                    new_trails[jc].append((int(centers[jc,0]), int(centers[jc,1])))
                    matched_curr.add(jc)
                for jc in range(len(centers)):
                    if jc not in matched_curr:
                        # new node → fresh trail
                        new_trails[jc].append((int(centers[jc,0]), int(centers[jc,1])))
                trails = new_trails

        # draw trails with fading alpha
        overlay = img.copy()
        for dq in trails:
            pts = list(dq)
            for t in range(1, len(pts)):
                alpha = t / len(pts)  # older is dimmer
                color = (int(CLR_TRAIL[0]*alpha), int(CLR_TRAIL[1]*alpha), int(CLR_TRAIL[2]*alpha))
                cv2.line(overlay, pts[t-1], pts[t], color, 2, cv2.LINE_AA)
        img = cv2.addWeighted(overlay, 0.6, img, 0.4, 0)

        # tag
        cv2.rectangle(img, (10, H-40), (10+700, H-10), (0,0,0), -1)
        cv2.putText(img, f"{scene_name} | {camera} | t={n} | sd={sd_tok[:8]}", (16, H-16),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.6, CLR_TXT, 1, cv2.LINE_AA)

        vw.write(img)
        n += 1
        if max_frames and n>=max_frames:
            break

        prev_centers = centers
        cur = samples[cur]["next"]

    vw.release()
    print(f"Saved overlay video: {out_path}  (frames: {n})")
    return str(out_path), n

# ---- render a few examples; tweak lists as needed ----
scenes_to_render = [s["name"] for s in scenes]

cams_to_render   = ["CAM_FRONT","CAM_FRONT_LEFT","CAM_FRONT_RIGHT","CAM_BACK","CAM_BACK_LEFT","CAM_BACK_RIGHT"]

made = []
for scn in scenes_to_render:
    for cam in cams_to_render:
        try:
            p, n = make_temporal_overlay_video(scn, cam, fps=6, max_frames=150, trail_len=12, draw_edges=True, resize_to=None)
            made.append((p,n))
        except Exception as e:
            print(f"Skip {scn} {cam} due to error: {e}")

print("Done. Outputs:")
for p,n in made:
    print(f"  {p}  (frames: {n})")


Saved overlay video: /data/Asad/NuScenesMiniNovel/vis_results/scene-0061__CAM_FRONT__overlay_temporal.mp4  (frames: 39)
Saved overlay video: /data/Asad/NuScenesMiniNovel/vis_results/scene-0061__CAM_FRONT_LEFT__overlay_temporal.mp4  (frames: 39)
Saved overlay video: /data/Asad/NuScenesMiniNovel/vis_results/scene-0061__CAM_FRONT_RIGHT__overlay_temporal.mp4  (frames: 39)
Saved overlay video: /data/Asad/NuScenesMiniNovel/vis_results/scene-0061__CAM_BACK__overlay_temporal.mp4  (frames: 39)
Saved overlay video: /data/Asad/NuScenesMiniNovel/vis_results/scene-0061__CAM_BACK_LEFT__overlay_temporal.mp4  (frames: 39)
Saved overlay video: /data/Asad/NuScenesMiniNovel/vis_results/scene-0061__CAM_BACK_RIGHT__overlay_temporal.mp4  (frames: 39)
Saved overlay video: /data/Asad/NuScenesMiniNovel/vis_results/scene-0103__CAM_FRONT__overlay_temporal.mp4  (frames: 40)
Saved overlay video: /data/Asad/NuScenesMiniNovel/vis_results/scene-0103__CAM_FRONT_LEFT__overlay_temporal.mp4  (frames: 40)
Saved overlay vi

In [20]:
##############################################################################################################################
##############################################################################################################################
###########################################Testing Ir on Random Images multiview##############################################
##############################################################################################################################
##############################################################################################################################
##############################################################################################################################

In [21]:
# ======================= QUICK TESTER (CUDA) =======================
# VGAE + Context-Mahalanobis (CUDA) overlay visualizations
# Saves annotated PNGs + CSV to: /data/Asad/NuScenesMiniNovel/test_vis

import os, csv, random
from pathlib import Path
from typing import List, Tuple
import numpy as np
import torch
import cv2
from PIL import Image
from sklearn.covariance import LedoitWolf
from sklearn.metrics import roc_curve

# ---- sanity: required globals present ----
req = ["model_vgae","device","train_graphs","val_graphs","DATAROOT","sd_rows","samples","scenes","sample_to_ch2sd","ALL_CAMS","gt_id","gt_ood"]
missing = [k for k in req if k not in globals()]
assert not missing, f"Missing globals: {missing}. Run the earlier setup/training cells first."

OUTDIR = Path("/data/Asad/NuScenesMiniNovel/vis_results/test_results")
OUTDIR.mkdir(parents=True, exist_ok=True)

def fit_ctx_stats_cuda(train_graphs):
    Xs, Cs = [], []
    for g in train_graphs:
        X = g.x.float().cpu()                       # (N,D) CPU for sklearn
        ei = g.edge_index
        N, D = X.shape
        if ei.numel() == 0:
            C = X
        else:
            src, dst = ei
            ones = torch.ones_like(src, dtype=torch.float32)
            deg  = torch.zeros(N, dtype=torch.float32).scatter_add_(0, src.cpu(), ones)
            sumN = torch.zeros(N, D, dtype=torch.float32).index_add_(0, src.cpu(), X[dst.cpu()])
            C = torch.where(deg.view(-1,1) > 0, sumN/deg.clamp_min(1.0).view(-1,1), X)
        y = g.y.cpu().numpy()
        m = (y == 0)                                # ID nodes only
        if m.any():
            Xs.append(X[m].numpy()); Cs.append(C[m].numpy())
    assert Xs, "No ID nodes in train_graphs to fit Mahalanobis stats."

    Z_tr = np.concatenate([np.concatenate(Xs,0), np.concatenate(Cs,0)], axis=1).astype(np.float32)  # (M,2D)
    mu   = Z_tr.mean(0, keepdims=True)
    sd   = np.clip(Z_tr.std(0, keepdims=True), 1e-3, None)
    Zs   = (Z_tr - mu)/sd
    cov  = LedoitWolf().fit(Zs)
    mu_loc = cov.location_.astype(np.float32)       # (2D,)
    prec   = cov.precision_.astype(np.float32)      # (2D,2D)

    # to CUDA tensors for fast scoring
    maha_mu_t     = torch.from_numpy(mu).to(device)            # (1,2D)
    maha_sd_t     = torch.from_numpy(sd).to(device)            # (1,2D)
    maha_mu_loc_t = torch.from_numpy(mu_loc).to(device)        # (2D,)
    maha_prec_t   = torch.from_numpy(prec).to(device)          # (2D,2D)
    return maha_mu_t, maha_sd_t, maha_mu_loc_t, maha_prec_t

# cache stats (fit once)
if not all(k in globals() for k in ["maha_mu_t","maha_sd_t","maha_mu_loc_t","maha_prec_t"]):
    maha_mu_t, maha_sd_t, maha_mu_loc_t, maha_prec_t = fit_ctx_stats_cuda(train_graphs)

@torch.no_grad()
def ctx_maha_scores_cuda(g):
    X = g.x.to(device).float()                                  # (N,D)
    ei = g.edge_index
    N, D = X.size()
    if ei.numel() == 0:
        C = X
    else:
        src, dst = ei.to(device)
        ones = torch.ones_like(src, dtype=torch.float32, device=device)
        deg  = torch.zeros(N, dtype=torch.float32, device=device).scatter_add_(0, src, ones)
        sumN = torch.zeros(N, D, dtype=torch.float32, device=device).index_add_(0, src, X[dst])
        C = torch.where(deg.view(-1,1) > 0, sumN/deg.clamp_min(1.0).view(-1,1), X)
    Z  = torch.cat([X, C], dim=1)                               # (N,2D)
    Zs = (Z - maha_mu_t) / maha_sd_t
    d  = Zs - maha_mu_loc_t
    s  = (d @ maha_prec_t) * d
    return s.sum(dim=1)                                         # (N,)

@torch.no_grad()
def vgae_node_scores(g):
    x = g.x.to(device); ei = g.edge_index.to(device)
    z, xrec, _, _ = model_vgae(x, ei)
    err = ((x - xrec)**2).sum(dim=1)
    src, dst = ei
    sim = torch.sigmoid((z[src]*z[dst]).sum(dim=1))
    deg = torch.zeros(z.size(0), device=z.device).scatter_add_(0, src, torch.ones_like(src, dtype=torch.float32))
    agg = torch.zeros(z.size(0), device=z.device).scatter_add_(0, src, sim)
    mean_sim = torch.where(deg>0, agg/deg, torch.zeros_like(deg))
    return (err + (1 - mean_sim)).cpu().numpy()

def get_boxes_and_labels(sd_token: str) -> Tuple[List[List[float]], List[int]]:
    boxes, labels = [], []
    for b in gt_id.get(sd_token, []):
        boxes.append(b["bbox_2d"]); labels.append(0)
    for b in gt_ood.get(sd_token, []):
        boxes.append(b["bbox_2d"]); labels.append(1)
    return boxes, labels

def img_path_for_sd(sd_token: str) -> Path:
    return DATAROOT / sd_rows[sd_token]["filename"]

val_scores_v, val_labels = [], []
for g in val_graphs:
    val_scores_v.append(vgae_node_scores(g))
    val_labels.append(g.y.cpu().numpy())
val_scores_v = np.concatenate(val_scores_v)
val_labels   = np.concatenate(val_labels)
fpr, tpr, thr = roc_curve(val_labels, val_scores_v)
thr95_vgae = float(thr[np.argmin(np.abs(tpr - 0.95))])
p_lo, p_hi = np.percentile(val_scores_v, [5, 95])
p_lo, p_hi = float(p_lo), float(p_hi)

def score_to_color(score, lo, hi):
    if hi <= lo: hi = lo + 1e-6
    t = float(np.clip((score - lo) / (hi - lo), 0.0, 1.0))
    r = int(255 * t)
    g = int(255 * (1 - 0.5*t))
    b = 0
    return (b, g, r)

random.seed(0)
scene_names = [s["name"] for s in scenes]
pick_scenes = scene_names[:2]
frames = []
for scn in pick_scenes:
    s_tok = next(s for s in scenes if s["name"]==scn)["first_sample_token"]
    cnt = 0
    while s_tok and cnt < 10:
        frames.append((scn, s_tok))
        s_tok = samples[s_tok]["next"]
        cnt += 1

ALPHA = 1.0  

csv_path = OUTDIR / "predictions_index.csv"
with open(csv_path, "w", newline="") as f:
    writer = csv.writer(f)
    writer.writerow(["scene","sample_token","sd_token","channel","det_idx","x0","y0","x1","y1",
                     "score_vgae","score_ctx","score_fused","pred_is_ood","gt_label"])
    saved = 0

    for scn, s_tok in frames:
        ch2sd = sample_to_ch2sd.get(s_tok, {})
        for ch in ALL_CAMS:
            sd_tok = ch2sd.get(ch)
            if not sd_tok: 
                continue
            ip = img_path_for_sd(sd_tok)
            if not ip.exists():
                continue
            # graph
            g = next((gg for gg in val_graphs if gg.meta["sd_token"]==sd_tok), None)
            if g is None or g.x.size(0)==0:
                continue

            # scores
            sv = vgae_node_scores(g)
            sc = ctx_maha_scores_cuda(g).detach().cpu().numpy()
            sF = ALPHA*sv + (1-ALPHA)*sc

            # draw
            boxes, labels = get_boxes_and_labels(sd_tok)
            bgr = cv2.cvtColor(np.array(Image.open(ip).convert("RGB")), cv2.COLOR_RGB2BGR)
            H, W = bgr.shape[:2]
            for i, (bb, lab) in enumerate(zip(boxes, labels)):
                x0,y0,x1,y1 = map(int, bb)
                x0 = max(0, min(x0, W-1)); y0 = max(0, min(y0, H-1))
                x1 = max(x0+1, min(x1, W)); y1 = max(y0+1, min(y1, H))
                col = score_to_color(sF[i], p_lo, p_hi)
                cv2.rectangle(bgr, (x0,y0), (x1,y1), col, 2)
                tag = f"{sF[i]:.2f}{' *' if sF[i]>=thr95_vgae else ''}"
                cv2.rectangle(bgr, (x0, max(0,y0-18)), (x0+max(60,8*len(tag)), y0-2), (0,0,0), -1)
                cv2.putText(bgr, tag, (x0+2, y0-5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,255,255), 1, cv2.LINE_AA)
                writer.writerow([scn, s_tok, sd_tok, ch, i, x0,y0,x1,y1,
                                 float(sv[i]), float(sc[i]), float(sF[i]),
                                 int(sF[i]>=thr95_vgae), int(lab)])
                saved += 1

            # footer + save
            cv2.rectangle(bgr, (10, H-40), (10+900, H-10), (0,0,0), -1)
            cv2.putText(bgr, f"{scn} | {ch} | s_tok={s_tok[:8]} | sd_tok={sd_tok[:8]} | alpha={ALPHA}",
                        (16, H-16), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255,255,255), 1, cv2.LINE_AA)
            out_img = OUTDIR / f"{scn}__{ch}__{s_tok[:8]}__{sd_tok[:8]}.png"
            cv2.imwrite(str(out_img), bgr)

print(f"Saved annotated PNGs + CSV to: {OUTDIR}")
print(f"CSV index: {csv_path}")
# ===================== END QUICK TESTER (CUDA) ======================


ValueError: Input contains NaN.

In [None]:
from IPython.display import display, Image
from pathlib import Path
IMGDIR = Path("/data/Asad/NuScenesMiniNovel/test_vis")
for p in sorted(IMGDIR.glob("*.png"))[:6]:
    display(Image(filename=str(p)))


In [None]:
import pandas as pd
from pathlib import Path
csv_path = Path("/data/Asad/NuScenesMiniNovel/vis_results/test_results/predictions_index.csv")
df = pd.read_csv(csv_path)

# overall
tp = ((df.pred_is_ood==1) & (df.gt_label==1)).sum()
fp = ((df.pred_is_ood==1) & (df.gt_label==0)).sum()
tn = ((df.pred_is_ood==0) & (df.gt_label==0)).sum()
fn = ((df.pred_is_ood==0) & (df.gt_label==1)).sum()
prec = tp / max(tp+fp,1); rec = tp / max(tp+fn,1); f1 = 2*prec*rec / max(prec+rec,1e-9)
print(f"Confusion @thr95  TP:{tp} FP:{fp} TN:{tn} FN:{fn}  |  Precision:{prec:.3f} Recall:{rec:.3f} F1:{f1:.3f}")

# per-camera
print("\nPer-camera:")
print(df.groupby("channel")[["pred_is_ood","gt_label"]]
      .apply(lambda g: pd.Series({
          "TP": int(((g.pred_is_ood==1)&(g.gt_label==1)).sum()),
          "FP": int(((g.pred_is_ood==1)&(g.gt_label==0)).sum()),
          "TN": int(((g.pred_is_ood==0)&(g.gt_label==0)).sum()),
          "FN": int(((g.pred_is_ood==0)&(g.gt_label==1)).sum()),
      })))


In [22]:
import numpy as np, pandas as pd
csv_path = Path("/data/Asad/NuScenesMiniNovel/vis_results/test_results/predictions_index.csv")
df = pd.read_csv(csv_path)

# recompute fused with different alpha, measure AUROC quickly
from sklearn.metrics import roc_auc_score, roc_curve
best = None
for a in np.linspace(0,1,11):
    s = a*df["score_vgae"].values + (1-a)*df["score_ctx"].values
    auc = roc_auc_score(df["gt_label"].values, s)
    fpr, tpr, thr = roc_curve(df["gt_label"].values, s)
    i = np.argmin(np.abs(tpr-0.95))
    fpr95 = fpr[i]
    best = max(best or (-1,None,None), (auc, a, fpr95), key=lambda x: x[0])
    print(f"alpha={a:.2f}  AUROC={auc:.4f}  FPR@95={fpr95:.4f}")
print("\nBest:", best)

import numpy as np
import pandas as pd
from pathlib import Path
from sklearn.metrics import roc_auc_score, roc_curve

csv_path = Path("/data/Asad/NuScenesMiniNovel/vis_results/test_results/predictions_index.csv")
df = pd.read_csv(csv_path)

# ---- 1) Tune alpha on validation (grid search) ----
best_auc, best_alpha, best_fpr95 = -1, None, None

for a in np.linspace(0, 1, 11):  # {0.0, 0.1, ..., 1.0}
    s = a * df["score_vgae"].values + (1 - a) * df["score_ctx"].values

    auc = roc_auc_score(df["gt_label"].values, s)

    fpr, tpr, thr = roc_curve(df["gt_label"].values, s)
    i = np.argmin(np.abs(tpr - 0.95))
    fpr95 = fpr[i]

    print(f"alpha={a:.2f}  AUROC={auc:.4f}  FPR@95={fpr95:.4f}")

    if auc > best_auc:
        best_auc, best_alpha, best_fpr95 = auc, a, fpr95

print(f"\nSelected alpha (validation): {best_alpha:.2f}")
print(f"Best validation AUROC: {best_auc:.4f}  |  FPR@95: {best_fpr95:.4f}")

# ---- 2) Fix alpha and store fused score (this is what paper claims) ----
ALPHA = best_alpha
df["score_fused"] = ALPHA * df["score_vgae"].values + (1 - ALPHA) * df["score_ctx"].values

# optional: save for downstream plotting/reporting
out_path = csv_path.parent / "predictions_index_with_fused.csv"
df.to_csv(out_path, index=False)
print("\nSaved:", out_path)


alpha=0.00  AUROC=0.9707  FPR@95=0.0805
alpha=0.10  AUROC=0.9715  FPR@95=0.0774
alpha=0.20  AUROC=0.9722  FPR@95=0.0750
alpha=0.30  AUROC=0.9731  FPR@95=0.0744
alpha=0.40  AUROC=0.9740  FPR@95=0.0714
alpha=0.50  AUROC=0.9754  FPR@95=0.0665
alpha=0.60  AUROC=0.9771  FPR@95=0.0569
alpha=0.70  AUROC=0.9791  FPR@95=0.0520
alpha=0.80  AUROC=0.9814  FPR@95=0.0448
alpha=0.90  AUROC=0.9847  FPR@95=0.0472
alpha=1.00  AUROC=0.9841  FPR@95=0.0665

Best: (0.984690591885721, 0.9, 0.047186932849364795)
alpha=0.00  AUROC=0.9707  FPR@95=0.0805
alpha=0.10  AUROC=0.9715  FPR@95=0.0774
alpha=0.20  AUROC=0.9722  FPR@95=0.0750
alpha=0.30  AUROC=0.9731  FPR@95=0.0744
alpha=0.40  AUROC=0.9740  FPR@95=0.0714
alpha=0.50  AUROC=0.9754  FPR@95=0.0665
alpha=0.60  AUROC=0.9771  FPR@95=0.0569
alpha=0.70  AUROC=0.9791  FPR@95=0.0520
alpha=0.80  AUROC=0.9814  FPR@95=0.0448
alpha=0.90  AUROC=0.9847  FPR@95=0.0472
alpha=1.00  AUROC=0.9841  FPR@95=0.0665

Selected alpha (validation): 0.90
Best validation AUROC: 0.9847  

In [None]:
from pathlib import Path
out = Path("/data/Asad/NuScenesMiniNovel/vis_results/test_results/index.html")
imgs = sorted(Path("/data/Asad/NuScenesMiniNovel/vis_results/test_results").glob("*.png"))
with open(out, "w") as f:
    f.write("<html><body style='font-family: sans-serif'>\n<h2>OOD Prediction Gallery</h2>\n")
    for p in imgs:
        f.write(f"<div><img src='{p.name}' style='max-width: 100%;'><p>{p.name}</p></div><hr/>\n")
    f.write("</body></html>\n")
print("Gallery:", out)


In [None]:
import torch, pickle, numpy as np, os
save_dir = "data/Asad/NuScenesMiniNovel/vis_results/test_results/checkpoints"
os.makedirs(save_dir, exist_ok=True)
torch.save(model_vgae.state_dict(), f"{save_dir}/vgae.pt")

# context-maha CUDA stats (save CPU copies)
stats = {
    "mu":      (maha_mu_t.detach().cpu().numpy()),
    "sd":      (maha_sd_t.detach().cpu().numpy()),
    "mu_loc":  (maha_mu_loc_t.detach().cpu().numpy()),
    "prec":    (maha_prec_t.detach().cpu().numpy()),
}
with open(f"{save_dir}/ctx_maha.pkl", "wb") as f: pickle.dump(stats, f)
print("Saved:", save_dir)


In [179]:
# === nuScenes-OOD: multi-model OOD evaluation with per-camera AUROC and CSV export ===
# - Dataset layout: <NUSCENES_OOD_ROOT>/<JSONDIR_NAME> with:
#     sample.json, sample_data.json, scene.json, calibrated_sensor.json, sensor.json,
#     detection_id.json, detection_novel.json (both contain {"results": {sd_token: [{"bbox_2d":[x1,y1,x2,y2]}, ...]}})
# - KPIs (overall + per camera): AP@0.5, P@0.5, R@0.5, OOD-FP, AUROC, FPR@95, N, Time(s)
# - Models (try/except): FasterRCNN_R50, FasterRCNN_MBV3, RetinaNet_R50, SSDLite_MBV3, SSD300_VGG16, YOLOv8n/s (optional)

import os, json, time, math, random, warnings, csv
from pathlib import Path
import numpy as np
import torch, torchvision
from PIL import Image, ImageFile
import torchvision.transforms as T

ImageFile.LOAD_TRUNCATED_IMAGES = True

# ------------------ CONFIG ------------------
NUSCENES_OOD_ROOT  = Path("/data/Asad/NuScenesMiniNovel")   
JSONDIR_NAME       = "v1.0-mini"                            
MAX_IMAGES         = 500                                     
IOU_MATCH          = 0.5                                     
SCORE_THRESH       = 0.05                                    
CSV_DIR            = Path("/data/Asad/NuScenesMiniNovel/vis_results/")
CSV_DIR.mkdir(parents=True, exist_ok=True)
CSV_OUT            = CSV_DIR / "ood_report.csv"

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Torch: {torch.__version__} | CUDA: {torch.cuda.is_available()} | Device: {device}")

# ------------------ DATASET ------------------
def load_json(p): 
    with open(p,"r") as f: 
        return json.load(f)

def load_frame_index(dataroot: Path):
    jsondir = dataroot / JSONDIR_NAME
    sd_rows = {d["token"]: d for d in load_json(jsondir / "sample_data.json")}
    samples = {s["token"]: s for s in load_json(jsondir / "sample.json")}
    scenes  = load_json(jsondir / "scene.json")
    calib_by  = {c["token"]: c for c in load_json(jsondir / "calibrated_sensor.json")}
    sensor_by = {s["token"]: s for s in load_json(jsondir / "sensor.json")}
    id_path   = jsondir / "detection_id.json"
    ood_path  = jsondir / "detection_novel.json"
    gt_id  = load_json(id_path)["results"] if id_path.exists() else {}
    gt_ood = load_json(ood_path)["results"] if ood_path.exists() else {}

    def channel_of_sd_row(sd_row):
        calib  = calib_by[sd_row["calibrated_sensor_token"]]
        sensor = sensor_by[calib["sensor_token"]]
        return sensor["channel"]

    sample_to_ch2sd = {}
    for sd in sd_rows.values():
        ch = channel_of_sd_row(sd)
        if not ch.startswith("CAM_"): 
            continue
        st = sd["sample_token"]
        sample_to_ch2sd.setdefault(st, {})[ch] = sd["token"]

    frames = []
    ALL_CAMS = ["CAM_FRONT","CAM_FRONT_LEFT","CAM_FRONT_RIGHT","CAM_BACK","CAM_BACK_LEFT","CAM_BACK_RIGHT"]
    for sc in scenes:
        s_tok = sc["first_sample_token"]
        while s_tok:
            sample = samples[s_tok]
            ch2sd  = sample_to_ch2sd.get(s_tok, {})
            for ch in ALL_CAMS:
                sd_tok = ch2sd.get(ch)
                if not sd_tok: 
                    continue
                fn = sd_rows[sd_tok]["filename"]
                img_path = dataroot / fn
                if not img_path.exists():
                    continue
                id_boxes  = [b["bbox_2d"] for b in gt_id.get(sd_tok, [])]
                ood_boxes = [b["bbox_2d"] for b in gt_ood.get(sd_tok, [])]
                frames.append((
                    str(img_path),            # 0
                    sd_tok,                   # 1
                    ch,                       # 2 (camera)
                    np.array(id_boxes,  dtype=np.float32),  # 3
                    np.array(ood_boxes, dtype=np.float32)   # 4
                ))
            s_tok = sample["next"]
    return frames

def pick_subset(frames, k=None, seed=13):
    if (k is None) or (k>=len(frames)): 
        return frames
    rng = random.Random(seed)
    idx = list(range(len(frames)))
    rng.shuffle(idx)
    return [frames[i] for i in idx[:k]]

# ------------------ MODELS ------------------
to_tensor = T.ToTensor()

# Torchvision detectors (COCO-trained)
def load_frcnn_r50():
    m = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights="DEFAULT")
    return m.to(device).eval()

def load_frcnn_mbv3():
    m = torchvision.models.detection.fasterrcnn_mobilenet_v3_large_fpn(weights="DEFAULT")
    return m.to(device).eval()

def load_retinanet_r50():
    m = torchvision.models.detection.retinanet_resnet50_fpn(weights="DEFAULT")
    return m.to(device).eval()

def load_ssdlite_mbv3():
    m = torchvision.models.detection.ssdlite320_mobilenet_v3_large(weights="DEFAULT")
    return m.to(device).eval()

def load_ssd300_vgg16():
    m = torchvision.models.detection.ssd300_vgg16(weights="DEFAULT")
    return m.to(device).eval()

def infer_torchvision_detector(model, pil_img):
    x = to_tensor(pil_img).to(device)
    with torch.no_grad():
        out = model([x])[0]
    boxes  = out["boxes"].detach().float().cpu().numpy()
    scores = out["scores"].detach().float().cpu().numpy()
    return boxes.astype(np.float32), scores.astype(np.float32), "conf"

try:
    from ultralytics import YOLO
    _has_yolo = True
except Exception:
    _has_yolo = False

def load_yolov8n():
    if not _has_yolo: return None
    return YOLO("yolov8n.pt").to(device)

def load_yolov8s():
    if not _has_yolo: return None
    return YOLO("yolov8s.pt").to(device)

def infer_yolov8(model, pil_img):
    im = np.array(pil_img.convert("RGB"))
    ydev = 0 if device.type == "cuda" else "cpu"
    r = model.predict(source=im, verbose=False, conf=0.001, device=ydev)[0]
    if r is None or r.boxes is None or len(r.boxes) == 0:
        return np.zeros((0,4), dtype=np.float32), np.zeros((0,), dtype=np.float32), "conf"
    xyxy = r.boxes.xyxy.cpu().numpy().astype(np.float32)
    if hasattr(r, "probs") and r.probs is not None:
        scores = r.probs.data.cpu().numpy().max(axis=1).astype(np.float32)
        return xyxy, scores, "MSP"
    else:
        scores = r.boxes.conf.cpu().numpy().astype(np.float32)
        return xyxy, scores, "conf"

# ------------------ METRICS ------------------
def iou_xyxy(a, b):
    Na, Nb = a.shape[0], b.shape[0]
    if Na==0 or Nb==0:
        return np.zeros((Na,Nb), dtype=np.float32)
    ax1, ay1, ax2, ay2 = a[:,0], a[:,1], a[:,2], a[:,3]
    bx1, by1, bx2, by2 = b[:,0], b[:,1], b[:,2], b[:,3]
    inter_x1 = np.maximum(ax1[:,None], bx1[None,:])
    inter_y1 = np.maximum(ay1[:,None], by1[None,:])
    inter_x2 = np.minimum(ax2[:,None], bx2[None,:])
    inter_y2 = np.minimum(ay2[:,None], by2[None,:])
    inter_w = np.clip(inter_x2 - inter_x1, 0, None)
    inter_h = np.clip(inter_y2 - inter_y1, 0, None)
    inter = inter_w * inter_h
    area_a = (ax2-ax1)*(ay2-ay1)
    area_b = (bx2-bx1)*(by2-by1)
    union = area_a[:,None] + area_b[None,:] - inter
    return np.where(union>0, inter/union, 0.0)

def ap50_single_class(all_scores, all_tp, total_gt_pos):
    if len(all_scores)==0:
        return 0.0, 0.0, 0.0
    order = np.argsort(-np.array(all_scores))
    tp = np.array(all_tp)[order].astype(np.float32)
    fp = 1.0 - tp
    cum_tp = np.cumsum(tp)
    cum_fp = np.cumsum(fp)
    recall = cum_tp / max(1, total_gt_pos)
    precision = cum_tp / np.maximum(1, (cum_tp+cum_fp))
    # all-points interpolation
    mrec = np.concatenate(([0.0], recall, [1.0]))
    mpre = np.concatenate(([0.0], precision, [0.0]))
    for i in range(mpre.size-1, 0, -1):
        mpre[i-1] = max(mpre[i-1], mpre[i])
    idx = np.where(mrec[1:] != mrec[:-1])[0]
    ap = float(np.sum((mrec[idx+1]-mrec[idx]) * mpre[idx+1]))
    best_i = int(np.argmax(2*precision*recall/(precision+recall+1e-9)))
    return ap, float(precision[best_i]), float(recall[best_i])

# ------------------ EVAL ------------------
ALL_CAMS = ["CAM_FRONT","CAM_FRONT_LEFT","CAM_FRONT_RIGHT","CAM_BACK","CAM_BACK_LEFT","CAM_BACK_RIGHT"]

def evaluate_dataset(frames, model_name, model, infer_fn, compute_ood_roc=True):
    # global accumulators
    all_scores, all_tp = [], []
    total_id_gt = 0
    det_ood_hits = 0
    total_dets = 0
    roc_scores_glob, roc_labels_glob = [], []

    # per-camera accumulators
    per_cam = {
        cam: {
            "scores": [], "tp": [], "id_gt": 0,
            "OOD_FP_hits": 0, "detections": 0,
            "roc_scores": [], "roc_labels": [], "N": 0
        } for cam in ALL_CAMS
    }

    have_any_ood = any(len(fr[4])>0 for fr in frames)
    t0 = time.time()

    for (img_path, sd_tok, cam, id_boxes, ood_boxes) in frames:
        try:
            img = Image.open(img_path).convert("RGB")
        except Exception:
            continue

        boxes, conf_like, score_type = infer_fn(model, img)
        keep = conf_like >= SCORE_THRESH if conf_like is not None and len(conf_like)>0 else np.array([], dtype=bool)
        boxes = boxes[keep] if boxes.size else boxes
        conf_like = conf_like[keep] if keep.size else conf_like

        # === ID AP accounting (global + per-cam) ===
        total_id_gt += int(id_boxes.shape[0])
        used = np.zeros((id_boxes.shape[0],), dtype=bool)
        if boxes.shape[0] > 0 and id_boxes.shape[0] > 0:
            IoU = iou_xyxy(boxes, id_boxes)
            order = np.argsort(-conf_like)
            for di in order:
                j = int(np.argmax(IoU[di]))
                if IoU[di, j] >= IOU_MATCH and not used[j]:
                    all_scores.append(float(conf_like[di])); all_tp.append(1); used[j]=True
                else:
                    all_scores.append(float(conf_like[di])); all_tp.append(0)
        else:
            for s in (conf_like if conf_like is not None else []):
                all_scores.append(float(s)); all_tp.append(0)

        # per-camera AP accum
        cam_used = np.zeros((id_boxes.shape[0],), dtype=bool)
        if boxes.shape[0] > 0 and id_boxes.shape[0] > 0:
            IoU = iou_xyxy(boxes, id_boxes)
            order = np.argsort(-conf_like)
            for di in order:
                j = int(np.argmax(IoU[di]))
                if IoU[di, j] >= IOU_MATCH and not cam_used[j]:
                    per_cam[cam]["scores"].append(float(conf_like[di]))
                    per_cam[cam]["tp"].append(1)
                    cam_used[j]=True
                else:
                    per_cam[cam]["scores"].append(float(conf_like[di]))
                    per_cam[cam]["tp"].append(0)
        else:
            for s in (conf_like if conf_like is not None else []):
                per_cam[cam]["scores"].append(float(s))
                per_cam[cam]["tp"].append(0)
        per_cam[cam]["id_gt"] += int(id_boxes.shape[0])

        # === OOD-FP rate: any det overlapping any OOD GT ===
        total_dets += int(boxes.shape[0])
        per_cam[cam]["detections"] += int(boxes.shape[0])
        if boxes.shape[0] > 0 and ood_boxes.shape[0] > 0:
            IoU_ood = iou_xyxy(boxes, ood_boxes)
            hits = (IoU_ood.max(axis=1) >= IOU_MATCH).sum()
            det_ood_hits += int(hits)
            per_cam[cam]["OOD_FP_hits"] += int(hits)

        # === AUROC/FPR@95 per-GT (global + per-cam), only if OOD exists ===
        if compute_ood_roc and have_any_ood:
            if boxes.shape[0] > 0:
                IoU_id  = iou_xyxy(id_boxes,  boxes) if id_boxes.shape[0]>0 else np.zeros((0, boxes.shape[0]))
                IoU_ood = iou_xyxy(ood_boxes, boxes) if ood_boxes.shape[0]>0 else np.zeros((0, boxes.shape[0]))
            else:
                IoU_id  = np.zeros((id_boxes.shape[0],  0))
                IoU_ood = np.zeros((ood_boxes.shape[0], 0))
            det_ood_score = (1.0 - conf_like) if (conf_like is not None and len(conf_like)>0) else np.array([])

            # ID GT -> label 0
            for gi in range(id_boxes.shape[0]):
                if IoU_id.shape[1] > 0 and IoU_id[gi].max() >= IOU_MATCH:
                    di = int(IoU_id[gi].argmax())
                    s = float(det_ood_score[di]) if det_ood_score.size>0 else 0.0
                    roc_scores_glob.append(s); roc_labels_glob.append(0)
                    per_cam[cam]["roc_scores"].append(s); per_cam[cam]["roc_labels"].append(0)
                else:
                    roc_scores_glob.append(0.0); roc_labels_glob.append(0)
                    per_cam[cam]["roc_scores"].append(0.0); per_cam[cam]["roc_labels"].append(0)
            # OOD GT -> label 1
            for gi in range(ood_boxes.shape[0]):
                if IoU_ood.shape[1] > 0 and IoU_ood[gi].max() >= IOU_MATCH:
                    di = int(IoU_ood[gi].argmax())
                    s = float(det_ood_score[di]) if det_ood_score.size>0 else 1.0
                    roc_scores_glob.append(s); roc_labels_glob.append(1)
                    per_cam[cam]["roc_scores"].append(s); per_cam[cam]["roc_labels"].append(1)
                else:
                    roc_scores_glob.append(1.0); roc_labels_glob.append(1)
                    per_cam[cam]["roc_scores"].append(1.0); per_cam[cam]["roc_labels"].append(1)

        per_cam[cam]["N"] += 1

    # Aggregate global AP on ID
    ap, p, r = ap50_single_class(all_scores, all_tp, total_id_gt)
    ood_fp_rate = det_ood_hits / max(1, total_dets)
    results = {
        "AP50": ap, "P@0.5": p, "R@0.5": r,
        "OOD_FP_rate": ood_fp_rate,
        "N_imgs": len(frames),
        "time_s": time.time()-t0
    }

    # Global AUROC/FPR@95
    if compute_ood_roc and len(set(roc_labels_glob))>1:
        from sklearn.metrics import roc_auc_score, roc_curve
        scores = np.array(roc_scores_glob); labels = np.array(roc_labels_glob)
        auroc = float(roc_auc_score(labels, scores))
        fpr, tpr, thr = roc_curve(labels, scores)
        i95 = int(np.argmin(np.abs(tpr - 0.95)))
        results["AUROC"]  = auroc
        results["FPR@95"] = float(fpr[i95])

    # Per-camera AP + AUROC/FPR@95 + OOD-FP
    results["per_camera"] = {}
    for cam in ALL_CAMS:
        A = per_cam[cam]
        ap_c, p_c, r_c = ap50_single_class(A["scores"], A["tp"], A["id_gt"])
        oodfp_c = (A["OOD_FP_hits"] / max(1, A["detections"])) if A["detections"]>0 else float('nan')
        cam_res = {"AP50": ap_c, "P@0.5": p_c, "R@0.5": r_c, "OOD_FP": oodfp_c, "N": A["N"]}
        # per-cam AUROC
        if compute_ood_roc and len(A["roc_labels"])>0 and len(set(A["roc_labels"]))>1:
            from sklearn.metrics import roc_auc_score, roc_curve
            sc = np.array(A["roc_scores"]); lb = np.array(A["roc_labels"])
            try:
                au = float(roc_auc_score(lb, sc))
                fpr, tpr, thr = roc_curve(lb, sc)
                i95 = int(np.argmin(np.abs(tpr - 0.95)))
                cam_res["AUROC"]  = au
                cam_res["FPR@95"] = float(fpr[i95])
            except Exception:
                pass
        results["per_camera"][cam] = cam_res

    # Camera-average (macro) for printing/CSV convenience
    def cam_mean(key):
        vals = [results["per_camera"][c].get(key, float('nan')) for c in ALL_CAMS]
        vals = [v for v in vals if not (isinstance(v,float) and math.isnan(v))]
        return float(np.mean(vals)) if len(vals)>0 else float('nan')
    results["camera_avg"] = {
        "AP50": cam_mean("AP50"),
        "P@0.5": cam_mean("P@0.5"),
        "R@0.5": cam_mean("R@0.5"),
        "OOD_FP": cam_mean("OOD_FP"),
        "AUROC": cam_mean("AUROC"),
        "FPR@95": cam_mean("FPR@95")
    }
    return results

def pretty_row(cols, widths):
    return "  ".join(str(c).ljust(w) for c,w in zip(cols, widths))

# Load frames
frames_all = load_frame_index(NUSCENES_OOD_ROOT)
frames = pick_subset(frames_all, MAX_IMAGES)
print(f"Loaded OOD frames: {len(frames)}")

# Build model list
models = []
try:   models.append(("FasterRCNN_R50", load_frcnn_r50(),  infer_torchvision_detector))
except Exception as e: warnings.warn(f"FasterRCNN_R50 not available: {e}")
try:   models.append(("FasterRCNN_MBV3", load_frcnn_mbv3(), infer_torchvision_detector))
except Exception as e: warnings.warn(f"FasterRCNN_MBV3 not available: {e}")
try:   models.append(("RetinaNet_R50",  load_retinanet_r50(), infer_torchvision_detector))
except Exception as e: warnings.warn(f"RetinaNet_R50 not available: {e}")
try:   models.append(("SSDLite_MBV3",   load_ssdlite_mbv3(),  infer_torchvision_detector))
except Exception as e: warnings.warn(f"SSDLite_MBV3 not available: {e}")
try:   models.append(("SSD300_VGG16",   load_ssd300_vgg16(),  infer_torchvision_detector))
except Exception as e: warnings.warn(f"SSD300_VGG16 not available: {e}")

try:
    y8n = load_yolov8n()
    if y8n is not None: models.append(("YOLOv8n", y8n, infer_yolov8))
except Exception as e: warnings.warn(f"YOLOv8n not available: {e}")
try:
    y8s = load_yolov8s()
    if y8s is not None: models.append(("YOLOv8s", y8s, infer_yolov8))
except Exception as e: warnings.warn(f"YOLOv8s not available: {e}")

# Print header
hdr = ["Model","AP@0.5","P@0.5","R@0.5","OOD-FP","AUROC","FPR@95","N","Time(s)","CamAvg(AUROC/FPR)"]
w   = [16, 8, 8, 8, 8, 8, 8, 6, 8, 18]
print(pretty_row(hdr, w))
print("-"*120)

rows_for_csv = []
for name, model, infer_fn in models:
    res = evaluate_dataset(frames, name, model, infer_fn, compute_ood_roc=True)
    auroc = res.get("AUROC", float('nan'))
    fpr95 = res.get("FPR@95", float('nan'))
    cam_avg = res["camera_avg"]
    cam_avg_str = f"{cam_avg['AUROC']:.3f}/{cam_avg['FPR@95']:.3f}" if not math.isnan(cam_avg["AUROC"]) else "—"

    print(pretty_row([
        name,
        f"{res['AP50']:.3f}",
        f"{res['P@0.5']:.3f}",
        f"{res['R@0.5']:.3f}",
        f"{res['OOD_FP_rate']:.3f}",
        f"{auroc:.4f}" if not math.isnan(auroc) else "—",
        f"{fpr95:.4f}" if not math.isnan(fpr95) else "—",
        res["N_imgs"],
        f"{res['time_s']:.1f}",
        cam_avg_str
    ], w))

    # Per-camera print
    for cam in ALL_CAMS:
        c = res["per_camera"][cam]
        out_auroc = c.get("AUROC", float('nan'))
        out_fpr95 = c.get("FPR@95", float('nan'))
        print(f"   {cam}: AP={c['AP50']:.3f}, P={c['P@0.5']:.3f}, R={c['R@0.5']:.3f}, "
              f"OOD-FP={c['OOD_FP'] if not isinstance(c['OOD_FP'],float) or not math.isnan(c['OOD_FP']) else '—'}, "
              f"AUROC={out_auroc if not math.isnan(out_auroc) else '—'}, "
              f"FPR@95={out_fpr95 if not math.isnan(out_fpr95) else '—'}, N={c['N']}")

    row = {
        "model": name,
        "overall_AP50": res["AP50"], "overall_P@0.5": res["P@0.5"], "overall_R@0.5": res["R@0.5"],
        "overall_OOD_FP": res["OOD_FP_rate"], "overall_AUROC": auroc if not math.isnan(auroc) else "",
        "overall_FPR@95": fpr95 if not math.isnan(fpr95) else "",
        "N": res["N_imgs"], "time_s": res["time_s"],
        "camera_avg_AP50": cam_avg["AP50"], "camera_avg_P@0.5": cam_avg["P@0.5"], "camera_avg_R@0.5": cam_avg["R@0.5"],
        "camera_avg_OOD_FP": cam_avg["OOD_FP"], "camera_avg_AUROC": cam_avg["AUROC"] if not math.isnan(cam_avg["AUROC"]) else "",
        "camera_avg_FPR@95": cam_avg["FPR@95"] if not math.isnan(cam_avg["FPR@95"]) else ""
    }
    for cam in ALL_CAMS:
        c = res["per_camera"][cam]
        row[f"{cam}_AP50"]   = c["AP50"]
        row[f"{cam}_P@0.5"]  = c["P@0.5"]
        row[f"{cam}_R@0.5"]  = c["R@0.5"]
        row[f"{cam}_OOD_FP"] = c["OOD_FP"] if not (isinstance(c["OOD_FP"], float) and math.isnan(c["OOD_FP"])) else ""
        row[f"{cam}_AUROC"]  = c.get("AUROC","")
        row[f"{cam}_FPR@95"] = c.get("FPR@95","")
        row[f"{cam}_N"]      = c["N"]
    rows_for_csv.append(row)

# Save CSV
if rows_for_csv:
    fieldnames = list(rows_for_csv[0].keys())
    with open(CSV_OUT, "w", newline="") as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()
        for r in rows_for_csv:
            writer.writerow(r)
    print(f"\nSaved CSV → {CSV_OUT}")

print("\nNotes:")
print("• AP@0.5/P/R use only ID GT (single-class, greedy 1–1 matches at IoU≥0.5).")
print("• OOD-FP is the fraction of detections that overlap any OOD GT (IoU≥0.5).")
print("• AUROC/FPR@95 use per-GT scores: matched → 1−confidence (MSP for YOLO, conf for others),")
print("  missed ID → 0.0, missed OOD → 1.0. Reported overall and per camera, plus camera-average.")


Torch: 1.13.0+cu117 | CUDA: True | Device: cuda
Loaded OOD frames: 500
Model             AP@0.5    P@0.5     R@0.5     OOD-FP    AUROC     FPR@95    N       Time(s)   CamAvg(AUROC/FPR) 
------------------------------------------------------------------------------------------------------------------------
FasterRCNN_R50    0.174     0.421     0.262     0.040     0.7574    0.4115    500     15.2      0.774/0.393       
   CAM_FRONT: AP=0.234, P=0.446, R=0.326, OOD-FP=0.03220654777748203, AUROC=0.7455914500850134, FPR@95=0.4972067039106145, N=79
   CAM_FRONT_LEFT: AP=0.171, P=0.472, R=0.250, OOD-FP=0.03484486873508353, AUROC=0.800706669972725, FPR@95=0.40825688073394495, N=76
   CAM_FRONT_RIGHT: AP=0.192, P=0.372, R=0.292, OOD-FP=0.04242081447963801, AUROC=0.7565264358158794, FPR@95=0.4150513112884835, N=87
   CAM_BACK: AP=0.174, P=0.463, R=0.237, OOD-FP=0.03866745984533016, AUROC=0.761265484714172, FPR@95=0.3786793953858393, N=82
   CAM_BACK_LEFT: AP=0.114, P=0.324, R=0.235, OOD-FP=0.06

In [180]:
# === nuScenes-OOD: multi-model OOD evaluation with per-camera AUROC and CSV export MORE DETECTORS===
# - Dataset layout: <NUSCENES_OOD_ROOT>/<JSONDIR_NAME> with:
#     sample.json, sample_data.json, scene.json, calibrated_sensor.json, sensor.json,
#     detection_id.json, detection_novel.json (both contain {"results": {sd_token: [{"bbox_2d":[x1,y1,x2,y2]}, ...]}})
# - KPIs (overall + per camera): AP@0.5, P@0.5, R@0.5, OOD-FP, AUROC, FPR@95, N, Time(s)
# - Models (try/except): FasterRCNN_R50, FasterRCNN_MBV3, RetinaNet_R50, SSDLite_MBV3, SSD300_VGG16,
#                        YOLOv8n/s/m/l, YOLOv11n/s (if present), YOLOv5s/m (torch.hub),
#                        DETR_R50/DC5/Deformable (torchvision)

import os, json, time, math, random, warnings, csv
from pathlib import Path
import numpy as np
import torch, torchvision
from PIL import Image, ImageFile
import torchvision.transforms as T

ImageFile.LOAD_TRUNCATED_IMAGES = True

# ------------------ CONFIG ------------------
NUSCENES_OOD_ROOT  = Path("/data/Asad/NuScenesMiniNovel")   
JSONDIR_NAME       = "v1.0-mini"                             
MAX_IMAGES         = 500                                     
IOU_MATCH          = 0.5                                    
SCORE_THRESH       = 0.05                                   
CSV_DIR            = Path("/data/Asad/NuScenesMiniNovel/vis_results/")
CSV_DIR.mkdir(parents=True, exist_ok=True)
CSV_OUT            = CSV_DIR / "ood_report.csv"

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Torch: {torch.__version__} | CUDA: {torch.cuda.is_available()} | Device: {device}")

# ------------------ DATASET ------------------
def load_json(p): 
    with open(p,"r") as f: 
        return json.load(f)

def load_frame_index(dataroot: Path):
    jsondir = dataroot / JSONDIR_NAME
    sd_rows = {d["token"]: d for d in load_json(jsondir / "sample_data.json")}
    samples = {s["token"]: s for s in load_json(jsondir / "sample.json")}
    scenes  = load_json(jsondir / "scene.json")
    calib_by  = {c["token"]: c for c in load_json(jsondir / "calibrated_sensor.json")}
    sensor_by = {s["token"]: s for s in load_json(jsondir / "sensor.json")}
    id_path   = jsondir / "detection_id.json"
    ood_path  = jsondir / "detection_novel.json"
    gt_id  = load_json(id_path)["results"] if id_path.exists() else {}
    gt_ood = load_json(ood_path)["results"] if ood_path.exists() else {}

    def channel_of_sd_row(sd_row):
        calib  = calib_by[sd_row["calibrated_sensor_token"]]
        sensor = sensor_by[calib["sensor_token"]]
        return sensor["channel"]

    sample_to_ch2sd = {}
    for sd in sd_rows.values():
        ch = channel_of_sd_row(sd)
        if not ch.startswith("CAM_"): 
            continue
        st = sd["sample_token"]
        sample_to_ch2sd.setdefault(st, {})[ch] = sd["token"]

    frames = []
    ALL_CAMS = ["CAM_FRONT","CAM_FRONT_LEFT","CAM_FRONT_RIGHT","CAM_BACK","CAM_BACK_LEFT","CAM_BACK_RIGHT"]
    for sc in scenes:
        s_tok = sc["first_sample_token"]
        while s_tok:
            sample = samples[s_tok]
            ch2sd  = sample_to_ch2sd.get(s_tok, {})
            for ch in ALL_CAMS:
                sd_tok = ch2sd.get(ch)
                if not sd_tok: 
                    continue
                fn = sd_rows[sd_tok]["filename"]
                img_path = dataroot / fn
                if not img_path.exists():
                    continue
                id_boxes  = [b["bbox_2d"] for b in gt_id.get(sd_tok, [])]
                ood_boxes = [b["bbox_2d"] for b in gt_ood.get(sd_tok, [])]
                frames.append((
                    str(img_path),            # 0
                    sd_tok,                   # 1
                    ch,                       # 2 (camera)
                    np.array(id_boxes,  dtype=np.float32),  # 3
                    np.array(ood_boxes, dtype=np.float32)   # 4
                ))
            s_tok = sample["next"]
    return frames

def pick_subset(frames, k=None, seed=13):
    if (k is None) or (k>=len(frames)): 
        return frames
    rng = random.Random(seed)
    idx = list(range(len(frames)))
    rng.shuffle(idx)
    return [frames[i] for i in idx[:k]]

# ------------------ MODELS ------------------
to_tensor = T.ToTensor()

# Torchvision detectors (COCO-trained)
def load_frcnn_r50():
    m = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights="DEFAULT")
    return m.to(device).eval()

def load_frcnn_mbv3():
    m = torchvision.models.detection.fasterrcnn_mobilenet_v3_large_fpn(weights="DEFAULT")
    return m.to(device).eval()

def load_retinanet_r50():
    m = torchvision.models.detection.retinanet_resnet50_fpn(weights="DEFAULT")
    return m.to(device).eval()

def load_ssdlite_mbv3():
    m = torchvision.models.detection.ssdlite320_mobilenet_v3_large(weights="DEFAULT")
    return m.to(device).eval()

def load_ssd300_vgg16():
    m = torchvision.models.detection.ssd300_vgg16(weights="DEFAULT")
    return m.to(device).eval()

# --- New: DETR family (torchvision) ---
def load_detr_r50():
    m = torchvision.models.detection.detr_resnet50(weights="DEFAULT")
    return m.to(device).eval()

def load_detr_r50_dc5():
    m = torchvision.models.detection.detr_resnet50_dc5(weights="DEFAULT")
    return m.to(device).eval()

def load_deformable_detr_r50():
    m = torchvision.models.detection.deformable_detr_resnet50(weights="DEFAULT")
    return m.to(device).eval()

def infer_torchvision_detector(model, pil_img):
    x = to_tensor(pil_img).to(device)
    with torch.no_grad():
        out = model([x])[0]
    boxes  = out["boxes"].detach().float().cpu().numpy()
    scores = out["scores"].detach().float().cpu().numpy()
    # Return "conf" to mean raw detection confidence (vs MSP)
    return boxes.astype(np.float32), scores.astype(np.float32), "conf"

# YOLOv8 / YOLOv11 (optional via ultralytics)
try:
    from ultralytics import YOLO
    _has_yolo = True
except Exception:
    _has_yolo = False

def load_yolov8n():
    if not _has_yolo: return None
    return YOLO("yolov8n.pt").to(device)
def load_yolov8s():
    if not _has_yolo: return None
    return YOLO("yolov8s.pt").to(device)

# --- New: more YOLOv8 sizes ---
def load_yolov8m():
    if not _has_yolo: return None
    return YOLO("yolov8m.pt").to(device)
def load_yolov8l():
    if not _has_yolo: return None
    return YOLO("yolov8l.pt").to(device)

# --- New: YOLOv11 (if present in your ultralytics version) ---
def load_yolo11n():
    if not _has_yolo: return None
    try:
        return YOLO("yolo11n.pt").to(device)
    except Exception:
        return None
def load_yolo11s():
    if not _has_yolo: return None
    try:
        return YOLO("yolo11s.pt").to(device)
    except Exception:
        return None

def infer_yolov8(model, pil_img):
    import numpy as _np
    im = _np.array(pil_img.convert("RGB"))
    ydev = 0 if device.type == "cuda" else "cpu"
    r = model.predict(source=im, verbose=False, conf=0.001, device=ydev)[0]
    if r is None or r.boxes is None or len(r.boxes) == 0:
        return np.zeros((0,4), dtype=np.float32), np.zeros((0,), dtype=np.float32), "conf"
    xyxy = r.boxes.xyxy.cpu().numpy().astype(np.float32)
    # If class probabilities available, use MSP for OOD ROC (1 - max prob)
    if hasattr(r, "probs") and r.probs is not None:
        scores = r.probs.data.cpu().numpy().max(axis=1).astype(np.float32)
        return xyxy, scores, "MSP"
    else:
        scores = r.boxes.conf.cpu().numpy().astype(np.float32)
        return xyxy, scores, "conf"

# --- New: YOLOv5 via torch.hub (optional; needs internet or local cache) ---
def _try_import_yaml():
    try:
        import yaml  # noqa: F401
    except Exception:
        pass
_try_import_yaml()

def load_yolov5s():
    try:
        m = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)  # noqa: E402
        return m.autoshape().to(device).eval()
    except Exception:
        return None

def load_yolov5m():
    try:
        m = torch.hub.load('ultralytics/yolov5', 'yolov5m', pretrained=True)  # noqa: E402
        return m.autoshape().to(device).eval()
    except Exception:
        return None

def infer_yolov5(model, pil_img):
    import numpy as _np
    im = _np.array(pil_img.convert("RGB"))
    with torch.no_grad():
        r = model(im, size=640)
    if r is None or len(getattr(r, "xyxy", [])) == 0 or r.xyxy[0].numel() == 0:
        return np.zeros((0,4), np.float32), np.zeros((0,), np.float32), "conf"
    det = r.xyxy[0].detach().cpu().numpy()  # [N,6]: x1,y1,x2,y2,conf,cls
    boxes  = det[:, :4].astype(np.float32)
    scores = det[:, 4].astype(np.float32)
    return boxes, scores, "conf"

# ------------------ METRICS ------------------
def iou_xyxy(a, b):
    Na, Nb = a.shape[0], b.shape[0]
    if Na==0 or Nb==0:
        return np.zeros((Na,Nb), dtype=np.float32)
    ax1, ay1, ax2, ay2 = a[:,0], a[:,1], a[:,2], a[:,3]
    bx1, by1, bx2, by2 = b[:,0], b[:,1], b[:,2], b[:,3]
    inter_x1 = np.maximum(ax1[:,None], bx1[None,:])
    inter_y1 = np.maximum(ay1[:,None], by1[None,:])
    inter_x2 = np.minimum(ax2[:,None], bx2[None,:])
    inter_y2 = np.minimum(ay2[:,None], by2[None,:])
    inter_w = np.clip(inter_x2 - inter_x1, 0, None)
    inter_h = np.clip(inter_y2 - inter_y1, 0, None)
    inter = inter_w * inter_h
    area_a = (ax2-ax1)*(ay2-ay1)
    area_b = (bx2-bx1)*(by2-by1)
    union = area_a[:,None] + area_b[None,:] - inter
    return np.where(union>0, inter/union, 0.0)

def ap50_single_class(all_scores, all_tp, total_gt_pos):
    if len(all_scores)==0:
        return 0.0, 0.0, 0.0
    order = np.argsort(-np.array(all_scores))
    tp = np.array(all_tp)[order].astype(np.float32)
    fp = 1.0 - tp
    cum_tp = np.cumsum(tp)
    cum_fp = np.cumsum(fp)
    recall = cum_tp / max(1, total_gt_pos)
    precision = cum_tp / np.maximum(1, (cum_tp+cum_fp))
    # all-points interpolation
    mrec = np.concatenate(([0.0], recall, [1.0]))
    mpre = np.concatenate(([0.0], precision, [0.0]))
    for i in range(mpre.size-1, 0, -1):
        mpre[i-1] = max(mpre[i-1], mpre[i])
    idx = np.where(mrec[1:] != mrec[:-1])[0]
    ap = float(np.sum((mrec[idx+1]-mrec[idx]) * mpre[idx+1]))
    best_i = int(np.argmax(2*precision*recall/(precision+recall+1e-9)))
    return ap, float(precision[best_i]), float(recall[best_i])

# ------------------ EVAL ------------------
ALL_CAMS = ["CAM_FRONT","CAM_FRONT_LEFT","CAM_FRONT_RIGHT","CAM_BACK","CAM_BACK_LEFT","CAM_BACK_RIGHT"]

def evaluate_dataset(frames, model_name, model, infer_fn, compute_ood_roc=True):
    # global accumulators
    all_scores, all_tp = [], []
    total_id_gt = 0
    det_ood_hits = 0
    total_dets = 0
    roc_scores_glob, roc_labels_glob = [], []

    # per-camera accumulators
    per_cam = {
        cam: {
            "scores": [], "tp": [], "id_gt": 0,
            "OOD_FP_hits": 0, "detections": 0,
            "roc_scores": [], "roc_labels": [], "N": 0
        } for cam in ALL_CAMS
    }

    have_any_ood = any(len(fr[4])>0 for fr in frames)
    t0 = time.time()

    for (img_path, sd_tok, cam, id_boxes, ood_boxes) in frames:
        try:
            img = Image.open(img_path).convert("RGB")
        except Exception:
            continue

        boxes, conf_like, score_type = infer_fn(model, img)
        keep = conf_like >= SCORE_THRESH if conf_like is not None and len(conf_like)>0 else np.array([], dtype=bool)
        boxes = boxes[keep] if boxes.size else boxes
        conf_like = conf_like[keep] if keep.size else conf_like

        # === ID AP accounting (global + per-cam) ===
        total_id_gt += int(id_boxes.shape[0])
        used = np.zeros((id_boxes.shape[0],), dtype=bool)
        if boxes.shape[0] > 0 and id_boxes.shape[0] > 0:
            IoU = iou_xyxy(boxes, id_boxes)
            order = np.argsort(-conf_like)
            for di in order:
                j = int(np.argmax(IoU[di]))
                if IoU[di, j] >= IOU_MATCH and not used[j]:
                    all_scores.append(float(conf_like[di])); all_tp.append(1); used[j]=True
                else:
                    all_scores.append(float(conf_like[di])); all_tp.append(0)
        else:
            for s in (conf_like if conf_like is not None else []):
                all_scores.append(float(s)); all_tp.append(0)

        # per-camera AP accum
        cam_used = np.zeros((id_boxes.shape[0],), dtype=bool)
        if boxes.shape[0] > 0 and id_boxes.shape[0] > 0:
            IoU = iou_xyxy(boxes, id_boxes)
            order = np.argsort(-conf_like)
            for di in order:
                j = int(np.argmax(IoU[di]))
                if IoU[di, j] >= IOU_MATCH and not cam_used[j]:
                    per_cam[cam]["scores"].append(float(conf_like[di]))
                    per_cam[cam]["tp"].append(1)
                    cam_used[j]=True
                else:
                    per_cam[cam]["scores"].append(float(conf_like[di]))
                    per_cam[cam]["tp"].append(0)
        else:
            for s in (conf_like if conf_like is not None else []):
                per_cam[cam]["scores"].append(float(s))
                per_cam[cam]["tp"].append(0)
        per_cam[cam]["id_gt"] += int(id_boxes.shape[0])

        # === OOD-FP rate: any det overlapping any OOD GT ===
        total_dets += int(boxes.shape[0])
        per_cam[cam]["detections"] += int(boxes.shape[0])
        if boxes.shape[0] > 0 and ood_boxes.shape[0] > 0:
            IoU_ood = iou_xyxy(boxes, ood_boxes)
            hits = (IoU_ood.max(axis=1) >= IOU_MATCH).sum()
            det_ood_hits += int(hits)
            per_cam[cam]["OOD_FP_hits"] += int(hits)

        if compute_ood_roc and have_any_ood:
            if boxes.shape[0] > 0:
                IoU_id  = iou_xyxy(id_boxes,  boxes) if id_boxes.shape[0]>0 else np.zeros((0, boxes.shape[0]))
                IoU_ood = iou_xyxy(ood_boxes, boxes) if ood_boxes.shape[0]>0 else np.zeros((0, boxes.shape[0]))
            else:
                IoU_id  = np.zeros((id_boxes.shape[0],  0))
                IoU_ood = np.zeros((ood_boxes.shape[0], 0))
            det_ood_score = (1.0 - conf_like) if (conf_like is not None and len(conf_like)>0) else np.array([])

            # ID GT -> label 0
            for gi in range(id_boxes.shape[0]):
                if IoU_id.shape[1] > 0 and IoU_id[gi].max() >= IOU_MATCH:
                    di = int(IoU_id[gi].argmax())
                    s = float(det_ood_score[di]) if det_ood_score.size>0 else 0.0
                    roc_scores_glob.append(s); roc_labels_glob.append(0)
                    per_cam[cam]["roc_scores"].append(s); per_cam[cam]["roc_labels"].append(0)
                else:
                    roc_scores_glob.append(0.0); roc_labels_glob.append(0)
                    per_cam[cam]["roc_scores"].append(0.0); per_cam[cam]["roc_labels"].append(0)
            # OOD GT -> label 1
            for gi in range(ood_boxes.shape[0]):
                if IoU_ood.shape[1] > 0 and IoU_ood[gi].max() >= IOU_MATCH:
                    di = int(IoU_ood[gi].argmax())
                    s = float(det_ood_score[di]) if det_ood_score.size>0 else 1.0
                    roc_scores_glob.append(s); roc_labels_glob.append(1)
                    per_cam[cam]["roc_scores"].append(s); per_cam[cam]["roc_labels"].append(1)
                else:
                    roc_scores_glob.append(1.0); roc_labels_glob.append(1)
                    per_cam[cam]["roc_scores"].append(1.0); per_cam[cam]["roc_labels"].append(1)

        per_cam[cam]["N"] += 1

    # Aggregate global AP on ID
    ap, p, r = ap50_single_class(all_scores, all_tp, total_id_gt)
    ood_fp_rate = det_ood_hits / max(1, total_dets)
    results = {
        "AP50": ap, "P@0.5": p, "R@0.5": r,
        "OOD_FP_rate": ood_fp_rate,
        "N_imgs": len(frames),
        "time_s": time.time()-t0
    }

    # Global AUROC/FPR@95
    if compute_ood_roc and len(set(roc_labels_glob))>1:
        from sklearn.metrics import roc_auc_score, roc_curve
        scores = np.array(roc_scores_glob); labels = np.array(roc_labels_glob)
        auroc = float(roc_auc_score(labels, scores))
        fpr, tpr, thr = roc_curve(labels, scores)
        i95 = int(np.argmin(np.abs(tpr - 0.95)))
        results["AUROC"]  = auroc
        results["FPR@95"] = float(fpr[i95])

    # Per-camera AP + AUROC/FPR@95 + OOD-FP
    results["per_camera"] = {}
    for cam in ALL_CAMS:
        A = per_cam[cam]
        ap_c, p_c, r_c = ap50_single_class(A["scores"], A["tp"], A["id_gt"])
        oodfp_c = (A["OOD_FP_hits"] / max(1, A["detections"])) if A["detections"]>0 else float('nan')
        cam_res = {"AP50": ap_c, "P@0.5": p_c, "R@0.5": r_c, "OOD_FP": oodfp_c, "N": A["N"]}
        # per-cam AUROC
        if compute_ood_roc and len(A["roc_labels"])>0 and len(set(A["roc_labels"]))>1:
            from sklearn.metrics import roc_auc_score, roc_curve
            sc = np.array(A["roc_scores"]); lb = np.array(A["roc_labels"])
            try:
                au = float(roc_auc_score(lb, sc))
                fpr, tpr, thr = roc_curve(lb, sc)
                i95 = int(np.argmin(np.abs(tpr - 0.95)))
                cam_res["AUROC"]  = au
                cam_res["FPR@95"] = float(fpr[i95])
            except Exception:
                pass
        results["per_camera"][cam] = cam_res

    # Camera-average (macro) for printing/CSV convenience
    def cam_mean(key):
        vals = [results["per_camera"][c].get(key, float('nan')) for c in ALL_CAMS]
        vals = [v for v in vals if not (isinstance(v,float) and math.isnan(v))]
        return float(np.mean(vals)) if len(vals)>0 else float('nan')
    results["camera_avg"] = {
        "AP50": cam_mean("AP50"),
        "P@0.5": cam_mean("P@0.5"),
        "R@0.5": cam_mean("R@0.5"),
        "OOD_FP": cam_mean("OOD_FP"),
        "AUROC": cam_mean("AUROC"),
        "FPR@95": cam_mean("FPR@95")
    }
    return results

# ------------------ RUN ------------------
def pretty_row(cols, widths):
    return "  ".join(str(c).ljust(w) for c,w in zip(cols, widths))

# Load frames
frames_all = load_frame_index(NUSCENES_OOD_ROOT)
frames = pick_subset(frames_all, MAX_IMAGES)
print(f"Loaded OOD frames: {len(frames)}")

# Build model list
models = []
try:   models.append(("FasterRCNN_R50", load_frcnn_r50(),  infer_torchvision_detector))
except Exception as e: warnings.warn(f"FasterRCNN_R50 not available: {e}")
try:   models.append(("FasterRCNN_MBV3", load_frcnn_mbv3(), infer_torchvision_detector))
except Exception as e: warnings.warn(f"FasterRCNN_MBV3 not available: {e}")
try:   models.append(("RetinaNet_R50",  load_retinanet_r50(), infer_torchvision_detector))
except Exception as e: warnings.warn(f"RetinaNet_R50 not available: {e}")
try:   models.append(("SSDLite_MBV3",   load_ssdlite_mbv3(),  infer_torchvision_detector))
except Exception as e: warnings.warn(f"SSDLite_MBV3 not available: {e}")
try:   models.append(("SSD300_VGG16",   load_ssd300_vgg16(),  infer_torchvision_detector))
except Exception as e: warnings.warn(f"SSD300_VGG16 not available: {e}")

# DETR family
try:   models.append(("DETR_R50",          load_detr_r50(),            infer_torchvision_detector))
except Exception as e: warnings.warn(f"DETR_R50 not available: {e}")
try:   models.append(("DETR_R50_DC5",      load_detr_r50_dc5(),        infer_torchvision_detector))
except Exception as e: warnings.warn(f"DETR_R50_DC5 not available: {e}")
try:   models.append(("DeformableDETR_R50",load_deformable_detr_r50(), infer_torchvision_detector))
except Exception as e: warnings.warn(f"DeformableDETR_R50 not available: {e}")

# YOLOv8 baseline sizes
try:
    y8n = load_yolov8n()
    if y8n is not None: models.append(("YOLOv8n", y8n, infer_yolov8))
except Exception as e: warnings.warn(f"YOLOv8n not available: {e}")
try:
    y8s = load_yolov8s()
    if y8s is not None: models.append(("YOLOv8s", y8s, infer_yolov8))
except Exception as e: warnings.warn(f"YOLOv8s not available: {e}")

# YOLOv8 larger
try:
    y8m = load_yolov8m()
    if y8m is not None: models.append(("YOLOv8m", y8m, infer_yolov8))
except Exception as e: warnings.warn(f"YOLOv8m not available: {e}")
try:
    y8l = load_yolov8l()
    if y8l is not None: models.append(("YOLOv8l", y8l, infer_yolov8))
except Exception as e: warnings.warn(f"YOLOv8l not available: {e}")

# YOLOv11 (if present)
try:
    y11n = load_yolo11n()
    if y11n is not None: models.append(("YOLOv11n", y11n, infer_yolov8))
except Exception as e: warnings.warn(f"YOLOv11n not available: {e}")
try:
    y11s = load_yolo11s()
    if y11s is not None: models.append(("YOLOv11s", y11s, infer_yolov8))
except Exception as e: warnings.warn(f"YOLOv11s not available: {e}")

# YOLOv5 (torch.hub)
try:
    y5s = load_yolov5s()
    if y5s is not None: models.append(("YOLOv5s", y5s, infer_yolov5))
except Exception as e: warnings.warn(f"YOLOv5s not available: {e}")
try:
    y5m = load_yolov5m()
    if y5m is not None: models.append(("YOLOv5m", y5m, infer_yolov5))
except Exception as e: warnings.warn(f"YOLOv5m not available: {e}")

# Print header
hdr = ["Model","AP@0.5","P@0.5","R@0.5","OOD-FP","AUROC","FPR@95","N","Time(s)","CamAvg(AUROC/FPR)"]
w   = [16, 8, 8, 8, 8, 8, 8, 6, 8, 18]
print(pretty_row(hdr, w))
print("-"*120)

# Evaluate + write CSV
rows_for_csv = []
for name, model, infer_fn in models:
    res = evaluate_dataset(frames, name, model, infer_fn, compute_ood_roc=True)
    auroc = res.get("AUROC", float('nan'))
    fpr95 = res.get("FPR@95", float('nan'))
    cam_avg = res["camera_avg"]
    cam_avg_str = f"{cam_avg['AUROC']:.3f}/{cam_avg['FPR@95']:.3f}" if not math.isnan(cam_avg["AUROC"]) else "—"

    print(pretty_row([
        name,
        f"{res['AP50']:.3f}",
        f"{res['P@0.5']:.3f}",
        f"{res['R@0.5']:.3f}",
        f"{res['OOD_FP_rate']:.3f}",
        f"{auroc:.4f}" if not math.isnan(auroc) else "—",
        f"{fpr95:.4f}" if not math.isnan(fpr95) else "—",
        res["N_imgs"],
        f"{res['time_s']:.1f}",
        cam_avg_str
    ], w))

    # Per-camera print
    for cam in ALL_CAMS:
        c = res["per_camera"][cam]
        out_auroc = c.get("AUROC", float('nan'))
        out_fpr95 = c.get("FPR@95", float('nan'))
        print(f"   {cam}: AP={c['AP50']:.3f}, P={c['P@0.5']:.3f}, R={c['R@0.5']:.3f}, "
              f"OOD-FP={c['OOD_FP'] if not isinstance(c['OOD_FP'],float) or not math.isnan(c['OOD_FP']) else '—'}, "
              f"AUROC={out_auroc if not math.isnan(out_auroc) else '—'}, "
              f"FPR@95={out_fpr95 if not math.isnan(out_fpr95) else '—'}, N={c['N']}")

    row = {
        "model": name,
        "overall_AP50": res["AP50"], "overall_P@0.5": res["P@0.5"], "overall_R@0.5": res["R@0.5"],
        "overall_OOD_FP": res["OOD_FP_rate"], "overall_AUROC": auroc if not math.isnan(auroc) else "",
        "overall_FPR@95": fpr95 if not math.isnan(fpr95) else "",
        "N": res["N_imgs"], "time_s": res["time_s"],
        "camera_avg_AP50": cam_avg["AP50"], "camera_avg_P@0.5": cam_avg["P@0.5"], "camera_avg_R@0.5": cam_avg["R@0.5"],
        "camera_avg_OOD_FP": cam_avg["OOD_FP"], "camera_avg_AUROC": cam_avg["AUROC"] if not math.isnan(cam_avg["AUROC"]) else "",
        "camera_avg_FPR@95": cam_avg["FPR@95"] if not math.isnan(cam_avg["FPR@95"]) else ""
    }
    for cam in ALL_CAMS:
        c = res["per_camera"][cam]
        row[f"{cam}_AP50"]   = c["AP50"]
        row[f"{cam}_P@0.5"]  = c["P@0.5"]
        row[f"{cam}_R@0.5"]  = c["R@0.5"]
        row[f"{cam}_OOD_FP"] = c["OOD_FP"] if not (isinstance(c["OOD_FP"], float) and math.isnan(c["OOD_FP"])) else ""
        row[f"{cam}_AUROC"]  = c.get("AUROC","")
        row[f"{cam}_FPR@95"] = c.get("FPR@95","")
        row[f"{cam}_N"]      = c["N"]
    rows_for_csv.append(row)

# Save CSV
if rows_for_csv:
    fieldnames = list(rows_for_csv[0].keys())
    with open(CSV_OUT, "w", newline="") as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()
        for r in rows_for_csv:
            writer.writerow(r)
    print(f"\nSaved CSV → {CSV_OUT}")

print("\nNotes:")
print("AP@0.5/P/R use only ID GT (single-class, greedy 1–1 matches at IoU≥0.5).")
print("OOD-FP is the fraction of detections that overlap any OOD GT (IoU≥0.5).")
print("AUROC/FPR@95 use per-GT scores: matched → 1−confidence (MSP for YOLO, conf for others),")
print("missed ID -> 0.0, missed OOD -> 1.0. Reported overall and per camera, plus camera-average.")


Torch: 1.13.0+cu117 | CUDA: True | Device: cuda
Loaded OOD frames: 500
[31m[1mrequirements:[0m Ultralytics requirements ['numpy>=1.23.5', 'tqdm>=4.66.3', 'setuptools>=70.0.0', 'urllib3>=2.5.0 ; python_version > "3.8"'] not found, attempting AutoUpdate...


Using cache found in /home/asad/.cache/torch/hub/ultralytics_yolov5_master

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.2[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
[31mERROR: Exception:
Traceback (most recent call last):
  File "/home/asad/miniconda3/envs/py310/lib/python3.10/site-packages/pip/_vendor/packaging/markers.py", line 280, in __init__
    self._markers = _normalize_extra_values(_parse_marker(marker))
  File "/home/asad/miniconda3/envs/py310/lib/python3.10/site-packages/pip/_vendor/packaging/_parser.py", line 253, in parse_marker
    return _parse_full_marker(Tokenizer(source, rules=DEFAULT_RULES))
  File "/home/asad/miniconda3/envs/py310/lib/python3.10/site-packages/pip/_vendor/packaging/_parser.py", line 257, in _parse_full_marker
    retval = _parse_marker(tokenizer)
  File "/home/asad/miniconda3/envs/py310/lib/pyt

Retry 1/2 failed: Command 'pip install --no-cache-dir "numpy>=1.23.5" "tqdm>=4.66.3" "setuptools>=70.0.0" "urllib3>=2.5.0 ; python_version > "3.8"" ' returned non-zero exit status 2.
Retry 2/2 failed: Command 'pip install --no-cache-dir "numpy>=1.23.5" "tqdm>=4.66.3" "setuptools>=70.0.0" "urllib3>=2.5.0 ; python_version > "3.8"" ' returned non-zero exit status 2.
[31m[1mrequirements:[0m ❌ Command 'pip install --no-cache-dir "numpy>=1.23.5" "tqdm>=4.66.3" "setuptools>=70.0.0" "urllib3>=2.5.0 ; python_version > "3.8"" ' returned non-zero exit status 2.



[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.2[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
[31mERROR: Exception:
Traceback (most recent call last):
  File "/home/asad/miniconda3/envs/py310/lib/python3.10/site-packages/pip/_vendor/packaging/markers.py", line 280, in __init__
    self._markers = _normalize_extra_values(_parse_marker(marker))
  File "/home/asad/miniconda3/envs/py310/lib/python3.10/site-packages/pip/_vendor/packaging/_parser.py", line 253, in parse_marker
    return _parse_full_marker(Tokenizer(source, rules=DEFAULT_RULES))
  File "/home/asad/miniconda3/envs/py310/lib/python3.10/site-packages/pip/_vendor/packaging/_parser.py", line 257, in _parse_full_marker
    retval = _parse_marker(tokenizer)
  File "/home/asad/miniconda3/envs/py310/lib/python3.10/site-packages/pip/_vendor/packaging/_parser.py", line 266, in _pars

[31m[1mrequirements:[0m Ultralytics requirements ['numpy>=1.23.5', 'tqdm>=4.66.3', 'setuptools>=70.0.0', 'urllib3>=2.5.0 ; python_version > "3.8"'] not found, attempting AutoUpdate...


Using cache found in /home/asad/.cache/torch/hub/ultralytics_yolov5_master

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.2[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
[31mERROR: Exception:
Traceback (most recent call last):
  File "/home/asad/miniconda3/envs/py310/lib/python3.10/site-packages/pip/_vendor/packaging/markers.py", line 280, in __init__
    self._markers = _normalize_extra_values(_parse_marker(marker))
  File "/home/asad/miniconda3/envs/py310/lib/python3.10/site-packages/pip/_vendor/packaging/_parser.py", line 253, in parse_marker
    return _parse_full_marker(Tokenizer(source, rules=DEFAULT_RULES))
  File "/home/asad/miniconda3/envs/py310/lib/python3.10/site-packages/pip/_vendor/packaging/_parser.py", line 257, in _parse_full_marker
    retval = _parse_marker(tokenizer)
  File "/home/asad/miniconda3/envs/py310/lib/pyt

Retry 1/2 failed: Command 'pip install --no-cache-dir "numpy>=1.23.5" "tqdm>=4.66.3" "setuptools>=70.0.0" "urllib3>=2.5.0 ; python_version > "3.8"" ' returned non-zero exit status 2.
Retry 2/2 failed: Command 'pip install --no-cache-dir "numpy>=1.23.5" "tqdm>=4.66.3" "setuptools>=70.0.0" "urllib3>=2.5.0 ; python_version > "3.8"" ' returned non-zero exit status 2.
[31m[1mrequirements:[0m ❌ Command 'pip install --no-cache-dir "numpy>=1.23.5" "tqdm>=4.66.3" "setuptools>=70.0.0" "urllib3>=2.5.0 ; python_version > "3.8"" ' returned non-zero exit status 2.



[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.2[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
[31mERROR: Exception:
Traceback (most recent call last):
  File "/home/asad/miniconda3/envs/py310/lib/python3.10/site-packages/pip/_vendor/packaging/markers.py", line 280, in __init__
    self._markers = _normalize_extra_values(_parse_marker(marker))
  File "/home/asad/miniconda3/envs/py310/lib/python3.10/site-packages/pip/_vendor/packaging/_parser.py", line 253, in parse_marker
    return _parse_full_marker(Tokenizer(source, rules=DEFAULT_RULES))
  File "/home/asad/miniconda3/envs/py310/lib/python3.10/site-packages/pip/_vendor/packaging/_parser.py", line 257, in _parse_full_marker
    retval = _parse_marker(tokenizer)
  File "/home/asad/miniconda3/envs/py310/lib/python3.10/site-packages/pip/_vendor/packaging/_parser.py", line 266, in _pars

Model             AP@0.5    P@0.5     R@0.5     OOD-FP    AUROC     FPR@95    N       Time(s)   CamAvg(AUROC/FPR) 
------------------------------------------------------------------------------------------------------------------------
FasterRCNN_R50    0.174     0.421     0.262     0.040     0.7574    0.4115    500     15.3      0.774/0.393       
   CAM_FRONT: AP=0.234, P=0.446, R=0.326, OOD-FP=0.03220654777748203, AUROC=0.7455914500850134, FPR@95=0.4972067039106145, N=79
   CAM_FRONT_LEFT: AP=0.171, P=0.472, R=0.250, OOD-FP=0.03484486873508353, AUROC=0.800706669972725, FPR@95=0.40825688073394495, N=76
   CAM_FRONT_RIGHT: AP=0.192, P=0.372, R=0.292, OOD-FP=0.04242081447963801, AUROC=0.7565264358158794, FPR@95=0.4150513112884835, N=87
   CAM_BACK: AP=0.174, P=0.463, R=0.237, OOD-FP=0.03866745984533016, AUROC=0.761265484714172, FPR@95=0.3786793953858393, N=82
   CAM_BACK_LEFT: AP=0.114, P=0.324, R=0.235, OOD-FP=0.061744112030553785, AUROC=0.8225081699346405, FPR@95=0.3088235294117647, 

In [181]:
# === nuScenes (plain) VEHICLE detection benchmark (no OOD labels) ==========================
# - Builds 2D vehicle GT by projecting nuScenes 3D boxes into each camera.
# - KPIs (overall + per camera): AP@0.5, P@0.5, R@0.5, N, Time(s). (No AUROC/FPR here.)
# - Models (try/except): FasterRCNN_R50, FasterRCNN_MBV3, RetinaNet_R50, SSDLite_MBV3, SSD300_VGG16, YOLOv8n/s (optional)

import os, time, math, csv, warnings
from pathlib import Path
import numpy as np
from PIL import Image, ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

import torch, torchvision
import torchvision.transforms as T

# --- Paths & Config ---
NUSCENES_ROOT = Path("/data/Asad/NuScenesMini")   # contains v1.0-mini/ or v1.0-trainval/
VERSION       = "v1.0-mini"                        # or "v1.0-trainval"
MAX_IMAGES    = 500                                # cap total frames across all cameras (None for all)
IOU_MATCH     = 0.5
SCORE_THRESH  = 0.05

CSV_DIR = Path("/data/Asad/NuScenesMini/vis_results/")
CSV_DIR.mkdir(parents=True, exist_ok=True)
CSV_OUT = CSV_DIR / "nuscenes_report.csv"

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Torch: {torch.__version__} | CUDA: {torch.cuda.is_available()} | Device: {device}")

# --- nuScenes devkit ---
try:
    from nuscenes.nuscenes import NuScenes
    from nuscenes.utils.data_classes import Box
    from nuscenes.utils.geometry_utils import view_points
    from pyquaternion import Quaternion
except Exception as e:
    raise RuntimeError(
        "This cell requires the nuScenes devkit:\n"
        "  pip install nuscenes-devkit pyquaternion"
    ) from e

# ----------------- 1) Build 2D VEHICLE GT (by projecting 3D) -----------------
ALL_CAMS = ["CAM_FRONT","CAM_FRONT_LEFT","CAM_FRONT_RIGHT",
            "CAM_BACK","CAM_BACK_LEFT","CAM_BACK_RIGHT"]

VEHICLE_PREFIXES = (
    "vehicle.car", "vehicle.bus", "vehicle.truck", "vehicle.trailer",
    "vehicle.construction", "vehicle.emergency", "vehicle.other",
    "vehicle.motorcycle", "vehicle.bicycle"
)

def is_vehicle_category(cat: str) -> bool:
    return any(cat.startswith(p) for p in VEHICLE_PREFIXES)

def project_box_to_image(nusc: NuScenes, ann_token: str, sd_token: str, img_hw):
    ann = nusc.get("sample_annotation", ann_token)
    if not is_vehicle_category(ann["category_name"]):
        return None

    box = Box(center=ann["translation"], size=ann["size"],
              orientation=Quaternion(ann["rotation"]),
              name=ann["category_name"], token=ann_token)

    sd = nusc.get("sample_data", sd_token)
    ep = nusc.get("ego_pose", sd["ego_pose_token"])
    cs = nusc.get("calibrated_sensor", sd["calibrated_sensor_token"])

    # World -> ego
    box.translate(-np.array(ep["translation"]))
    box.rotate(Quaternion(ep["rotation"]).inverse)

    # Ego -> camera
    box.translate(-np.array(cs["translation"]))
    box.rotate(Quaternion(cs["rotation"]).inverse)

    if box.center[2] <= 0.1:
        return None

    K = np.array(cs["camera_intrinsic"], dtype=np.float32)
    pts = view_points(box.corners(), K, normalize=True)  # (3,8)

    h, w = img_hw
    xs, ys = pts[0], pts[1]
    x1 = float(np.clip(xs.min(), 0, w-1))
    y1 = float(np.clip(ys.min(), 0, h-1))
    x2 = float(np.clip(xs.max(), 0, w-1))
    y2 = float(np.clip(ys.max(), 0, h-1))
    if x2 <= x1 or y2 <= y1:
        return None
    if (x2-x1)*(y2-y1) < 20:  # tiny
        return None
    return np.array([x1, y1, x2, y2], dtype=np.float32)

def collect_frames_with_vehicle_gt(nusc: NuScenes, max_images=None):
    frames = []
    count = 0
    for sample in nusc.sample:
        anns = sample["anns"]
        for cam in ALL_CAMS:
            sd_tok = sample["data"].get(cam)
            if sd_tok is None: 
                continue
            sd = nusc.get("sample_data", sd_tok)
            img_path = Path(nusc.dataroot) / sd["filename"]
            if not img_path.exists():
                continue
            try:
                with Image.open(img_path) as im:
                    w, h = im.size
            except Exception:
                continue

            gts = []
            for ann_tok in anns:
                bb = project_box_to_image(nusc, ann_tok, sd_tok, (h, w))
                if bb is not None:
                    gts.append(bb)
            if len(gts) == 0:
                continue

            frames.append((str(img_path), sd_tok, cam,
                           np.stack(gts, axis=0).astype(np.float32)))
            count += 1
            if (max_images is not None) and (count >= max_images):
                return frames
    return frames

to_tensor = T.ToTensor()

# COCO vehicle class ids for torchvision models
VEHICLE_COCO_IDS = {2, 3, 4, 6, 7, 8}  # bicycle, car, motorcycle, bus, train, truck

def load_frcnn_r50():
    m = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights="DEFAULT")
    return m.to(device).eval()

def load_frcnn_mbv3():
    m = torchvision.models.detection.fasterrcnn_mobilenet_v3_large_fpn(weights="DEFAULT")
    return m.to(device).eval()

def load_retinanet_r50():
    m = torchvision.models.detection.retinanet_resnet50_fpn(weights="DEFAULT")
    return m.to(device).eval()

def load_ssdlite_mbv3():
    m = torchvision.models.detection.ssdlite320_mobilenet_v3_large(weights="DEFAULT")
    return m.to(device).eval()

def load_ssd300_vgg16():
    m = torchvision.models.detection.ssd300_vgg16(weights="DEFAULT")
    return m.to(device).eval()

def infer_torchvision_detector(model, pil_img):
    x = to_tensor(pil_img).to(device)
    with torch.no_grad():
        out = model([x])[0]
    boxes  = out["boxes"].detach().float().cpu().numpy()
    scores = out["scores"].detach().float().cpu().numpy()
    labels = out["labels"].detach().cpu().numpy()
    keep   = np.isin(labels, list(VEHICLE_COCO_IDS))
    return boxes[keep].astype(np.float32), scores[keep].astype(np.float32)

# YOLOv8 (optional)
try:
    from ultralytics import YOLO
    _has_yolo = True
except Exception:
    _has_yolo = False

YOLO_VEHICLE_NAMES = {"bicycle","car","motorcycle","bus","train","truck"}

def load_yolov8n():
    if not _has_yolo: return None
    return YOLO("yolov8n.pt").to(device)

def load_yolov8s():
    if not _has_yolo: return None
    return YOLO("yolov8s.pt").to(device)

def infer_yolov8(model, pil_img):
    im = np.array(pil_img.convert("RGB"))
    ydev = 0 if device.type=="cuda" else "cpu"
    r = model.predict(source=im, verbose=False, conf=0.001, device=ydev)[0]
    if r is None or r.boxes is None or len(r.boxes)==0:
        return np.zeros((0,4), np.float32), np.zeros((0,), np.float32)
    xyxy = r.boxes.xyxy.cpu().numpy().astype(np.float32)
    cls  = r.boxes.cls.cpu().numpy().astype(int)
    if hasattr(r, "probs") and r.probs is not None:
        scores = r.probs.data.cpu().numpy().max(axis=1).astype(np.float32)
    else:
        scores = r.boxes.conf.cpu().numpy().astype(np.float32)
    names = r.names if hasattr(r,"names") else model.model.names
    cls_names = [names[c] for c in cls]
    keep = np.array([n in YOLO_VEHICLE_NAMES for n in cls_names], dtype=bool)
    return xyxy[keep], scores[keep]

def iou_xyxy(a, b):
    Na, Nb = a.shape[0], b.shape[0]
    if Na==0 or Nb==0:
        return np.zeros((Na,Nb), dtype=np.float32)
    ax1, ay1, ax2, ay2 = a[:,0], a[:,1], a[:,2], a[:,3]
    bx1, by1, bx2, by2 = b[:,0], b[:,1], b[:,2], b[:,3]
    inter_x1 = np.maximum(ax1[:,None], bx1[None,:])
    inter_y1 = np.maximum(ay1[:,None], by1[None,:])
    inter_x2 = np.minimum(ax2[:,None], bx2[None,:])
    inter_y2 = np.minimum(ay2[:,None], by2[None,:])
    inter_w  = np.clip(inter_x2 - inter_x1, 0, None)
    inter_h  = np.clip(inter_y2 - inter_y1, 0, None)
    inter    = inter_w * inter_h
    area_a   = (ax2-ax1)*(ay2-ay1)
    area_b   = (bx2-bx1)*(by2-by1)
    union    = area_a[:,None] + area_b[None,:] - inter
    return np.where(union>0, inter/union, 0.0)

def ap50_single_class(all_scores, all_tp, total_gt_pos):
    if len(all_scores)==0:
        return 0.0, 0.0, 0.0
    order = np.argsort(-np.array(all_scores))
    tp = np.array(all_tp)[order].astype(np.float32)
    fp = 1.0 - tp
    cum_tp = np.cumsum(tp); cum_fp = np.cumsum(fp)
    recall = cum_tp / max(1, total_gt_pos)
    precision = cum_tp / np.maximum(1, (cum_tp+cum_fp))
    # all-points interpolation
    mrec = np.concatenate(([0.0], recall, [1.0]))
    mpre = np.concatenate(([0.0], precision, [0.0]))
    for i in range(mpre.size-1, 0, -1):
        mpre[i-1] = max(mpre[i-1], mpre[i])
    idx = np.where(mrec[1:] != mrec[:-1])[0]
    ap = float(np.sum((mrec[idx+1]-mrec[idx]) * mpre[idx+1]))
    best_i = int(np.argmax(2*precision*recall/(precision+recall+1e-9)))
    return ap, float(precision[best_i]), float(recall[best_i])

# ----------------- 4) Evaluation -----------------
def evaluate_dataset(frames, model_name, model, infer_fn):
    all_scores, all_tp = [], []
    total_gt = 0
    per_cam = {cam: {"scores":[], "tp":[], "gt":0, "N":0} for cam in ALL_CAMS}

    t0 = time.time()
    for (img_path, sd_tok, cam, gt) in frames:
        try:
            img = Image.open(img_path).convert("RGB")
        except Exception:
            continue
        boxes, scores = infer_fn(model, img)
        keep = scores >= SCORE_THRESH
        boxes = boxes[keep]; scores = scores[keep]

        total_gt += int(gt.shape[0])
        # global
        used = np.zeros((gt.shape[0],), dtype=bool)
        if boxes.shape[0] > 0 and gt.shape[0] > 0:
            IoU = iou_xyxy(boxes, gt)
            order = np.argsort(-scores)
            for di in order:
                j = int(np.argmax(IoU[di]))
                if IoU[di, j] >= IOU_MATCH and not used[j]:
                    all_scores.append(float(scores[di])); all_tp.append(1); used[j]=True
                else:
                    all_scores.append(float(scores[di])); all_tp.append(0)
        else:
            for s in scores:
                all_scores.append(float(s)); all_tp.append(0)

        # per-camera
        cA = per_cam[cam]
        cA["gt"] += int(gt.shape[0]); cA["N"] += 1
        used_c = np.zeros((gt.shape[0],), dtype=bool)
        if boxes.shape[0] > 0 and gt.shape[0] > 0:
            IoU = iou_xyxy(boxes, gt)
            order = np.argsort(-scores)
            for di in order:
                j = int(np.argmax(IoU[di]))
                if IoU[di, j] >= IOU_MATCH and not used_c[j]:
                    cA["scores"].append(float(scores[di])); cA["tp"].append(1); used_c[j]=True
                else:
                    cA["scores"].append(float(scores[di])); cA["tp"].append(0)
        else:
            for s in scores:
                cA["scores"].append(float(s)); cA["tp"].append(0)

    ap, p, r = ap50_single_class(all_scores, all_tp, total_gt)
    results = {
        "AP50": ap, "P@0.5": p, "R@0.5": r,
        "N_imgs": len(frames), "time_s": time.time()-t0,
        "per_camera": {}
    }

    # per-camera metrics
    for cam in ALL_CAMS:
        A = per_cam[cam]
        ap_c, p_c, r_c = ap50_single_class(A["scores"], A["tp"], A["gt"])
        results["per_camera"][cam] = {"AP50": ap_c, "P@0.5": p_c, "R@0.5": r_c, "N": A["N"]}

    # camera-average (macro)
    def cam_mean(key):
        vals = [results["per_camera"][c][key] for c in ALL_CAMS if results["per_camera"][c]["N"]>0]
        return float(np.mean(vals)) if len(vals)>0 else float('nan')
    results["camera_avg"] = {
        "AP50": cam_mean("AP50"), "P@0.5": cam_mean("P@0.5"), "R@0.5": cam_mean("R@0.5")
    }
    return results

# ----------------- 5) Run -----------------
def pretty_row(cols, widths):
    return "  ".join(str(c).ljust(w) for c,w in zip(cols, widths))

# Init nuScenes + collect frames
nusc = NuScenes(version=VERSION, dataroot=str(NUSCENES_ROOT), verbose=True)
frames_all = collect_frames_with_vehicle_gt(nusc, max_images=None)
frames = frames_all[:MAX_IMAGES] if (MAX_IMAGES is not None) else frames_all
print(f"Collected frames with vehicle GT: {len(frames)}")

# Models
models = []
try:   models.append(("FasterRCNN_R50",  load_frcnn_r50(),  infer_torchvision_detector))
except Exception as e: warnings.warn(f"FasterRCNN_R50 not available: {e}")
try:   models.append(("FasterRCNN_MBV3", load_frcnn_mbv3(), infer_torchvision_detector))
except Exception as e: warnings.warn(f"FasterRCNN_MBV3 not available: {e}")
try:   models.append(("RetinaNet_R50",   load_retinanet_r50(), infer_torchvision_detector))
except Exception as e: warnings.warn(f"RetinaNet_R50 not available: {e}")
try:   models.append(("SSDLite_MBV3",    load_ssdlite_mbv3(),  infer_torchvision_detector))
except Exception as e: warnings.warn(f"SSDLite_MBV3 not available: {e}")
try:   models.append(("SSD300_VGG16",    load_ssd300_vgg16(),  infer_torchvision_detector))
except Exception as e: warnings.warn(f"SSD300_VGG16 not available: {e}")

try:
    y8n = load_yolov8n()
    if y8n is not None: models.append(("YOLOv8n", y8n, infer_yolov8))
except Exception as e: warnings.warn(f"YOLOv8n not available: {e}")
try:
    y8s = load_yolov8s()
    if y8s is not None: models.append(("YOLOv8s", y8s, infer_yolov8))
except Exception as e: warnings.warn(f"YOLOv8s not available: {e}")

# Header
hdr = ["Model","AP@0.5","P@0.5","R@0.5","N","Time(s)","CamAvg(AP/P/R)"]
w   = [16, 8, 8, 8, 6, 8, 20]
print(pretty_row(hdr, w))
print("-"*100)

# CSV rows
rows_for_csv = []

for name, model, infer_fn in models:
    res = evaluate_dataset(frames, name, model, infer_fn)
    cam_avg = res["camera_avg"]
    cam_avg_str = f"{cam_avg['AP50']:.3f}/{cam_avg['P@0.5']:.3f}/{cam_avg['R@0.5']:.3f}"

    print(pretty_row([
        name,
        f"{res['AP50']:.3f}",
        f"{res['P@0.5']:.3f}",
        f"{res['R@0.5']:.3f}",
        res["N_imgs"],
        f"{res['time_s']:.1f}",
        cam_avg_str
    ], w))

    for cam in ALL_CAMS:
        c = res["per_camera"][cam]
        print(f"   {cam}: AP={c['AP50']:.3f}, P={c['P@0.5']:.3f}, R={c['R@0.5']:.3f}, N={c['N']}")

    # CSV row (overall + per camera + camera-average)
    row = {
        "model": name,
        "overall_AP50": res["AP50"], "overall_P@0.5": res["P@0.5"], "overall_R@0.5": res["R@0.5"],
        "N": res["N_imgs"], "time_s": res["time_s"],
        "camera_avg_AP50": cam_avg["AP50"], "camera_avg_P@0.5": cam_avg["P@0.5"], "camera_avg_R@0.5": cam_avg["R@0.5"],
    }
    for cam in ALL_CAMS:
        c = res["per_camera"][cam]
        row[f"{cam}_AP50"]  = c["AP50"]
        row[f"{cam}_P@0.5"] = c["P@0.5"]
        row[f"{cam}_R@0.5"] = c["R@0.5"]
        row[f"{cam}_N"]     = c["N"]
    rows_for_csv.append(row)

# Save CSV
if rows_for_csv:
    fieldnames = list(rows_for_csv[0].keys())
    with open(CSV_OUT, "w", newline="") as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()
        for r in rows_for_csv:
            writer.writerow(r)
    print(f"\nSaved CSV → {CSV_OUT}")

print("\nNotes:")
print("This evaluates single-class VEHICLE detection (COCO vehicle-like classes) on RGB cameras.")
print("AP@0.5/P/R use greedy one-to-one matching at IoU≥0.5 against 2D GT projected from 3D boxes.")
print("Reported overall metrics, per-camera metrics, and camera-average (macro).")


Torch: 1.13.0+cu117 | CUDA: True | Device: cuda
Loading NuScenes tables for version v1.0-mini...
23 category,
8 attribute,
4 visibility,
911 instance,
12 sensor,
120 calibrated_sensor,
31206 ego_pose,
8 log,
10 scene,
404 sample,
31206 sample_data,
18538 sample_annotation,
4 map,
Done loading in 0.193 seconds.
Reverse indexing ...
Done reverse indexing in 0.1 seconds.
Collected frames with vehicle GT: 500
Model             AP@0.5    P@0.5     R@0.5     N       Time(s)   CamAvg(AP/P/R)      
----------------------------------------------------------------------------------------------------
FasterRCNN_R50    0.368     0.557     0.385     500     14.9      0.364/0.587/0.373   
   CAM_FRONT: AP=0.398, P=0.549, R=0.435, N=93
   CAM_FRONT_LEFT: AP=0.360, P=0.622, R=0.365, N=80
   CAM_FRONT_RIGHT: AP=0.307, P=0.454, R=0.331, N=82
   CAM_BACK: AP=0.434, P=0.761, R=0.405, N=93
   CAM_BACK_LEFT: AP=0.332, P=0.559, R=0.369, N=76
   CAM_BACK_RIGHT: AP=0.350, P=0.579, R=0.329, N=76
FasterRCNN_MBV3

In [182]:
# === nuScenes (plain) VEHICLE detection benchmark (no OOD labels) with more detectors ==========================
# - Builds 2D vehicle GT by projecting nuScenes 3D boxes into each camera.
# - KPIs (overall + per camera): AP@0.5, P@0.5, R@0.5, N, Time(s). (No AUROC/FPR here.)
# - Models (try/except): FasterRCNN_R50, FasterRCNN_MBV3, RetinaNet_R50, SSDLite_MBV3, SSD300_VGG16,
#                        YOLOv8n/s/m/l, YOLOv11n/s (if available), YOLOv5s/m, DETR_R50/DC5/Deformable

import os, time, math, csv, warnings
from pathlib import Path
import numpy as np
from PIL import Image, ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

import torch, torchvision
import torchvision.transforms as T

# --- Paths & Config ---
NUSCENES_ROOT = Path("/data/Asad/NuScenesMini")   
VERSION       = "v1.0-mini"                        
MAX_IMAGES    = 500                               
IOU_MATCH     = 0.5
SCORE_THRESH  = 0.05

CSV_DIR = Path("/data/Asad/NuScenesMini/vis_results/")
CSV_DIR.mkdir(parents=True, exist_ok=True)
CSV_OUT = CSV_DIR / "nuscenes_report.csv"

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Torch: {torch.__version__} | CUDA: {torch.cuda.is_available()} | Device: {device}")

# --- nuScenes devkit ---
try:
    from nuscenes.nuscenes import NuScenes
    from nuscenes.utils.data_classes import Box
    from nuscenes.utils.geometry_utils import view_points
    from pyquaternion import Quaternion
except Exception as e:
    raise RuntimeError(
        "This cell requires the nuScenes devkit:\n"
        "  pip install nuscenes-devkit pyquaternion"
    ) from e

# ----------------- 1) Build 2D VEHICLE GT (by projecting 3D) -----------------
ALL_CAMS = ["CAM_FRONT","CAM_FRONT_LEFT","CAM_FRONT_RIGHT",
            "CAM_BACK","CAM_BACK_LEFT","CAM_BACK_RIGHT"]

VEHICLE_PREFIXES = (
    "vehicle.car", "vehicle.bus", "vehicle.truck", "vehicle.trailer",
    "vehicle.construction", "vehicle.emergency", "vehicle.other",
    "vehicle.motorcycle", "vehicle.bicycle"
)

def is_vehicle_category(cat: str) -> bool:
    return any(cat.startswith(p) for p in VEHICLE_PREFIXES)

def project_box_to_image(nusc: NuScenes, ann_token: str, sd_token: str, img_hw):
    ann = nusc.get("sample_annotation", ann_token)
    if not is_vehicle_category(ann["category_name"]):
        return None

    box = Box(center=ann["translation"], size=ann["size"],
              orientation=Quaternion(ann["rotation"]),
              name=ann["category_name"], token=ann_token)

    sd = nusc.get("sample_data", sd_token)
    ep = nusc.get("ego_pose", sd["ego_pose_token"])
    cs = nusc.get("calibrated_sensor", sd["calibrated_sensor_token"])

    # World -> ego
    box.translate(-np.array(ep["translation"]))
    box.rotate(Quaternion(ep["rotation"]).inverse)

    # Ego -> camera
    box.translate(-np.array(cs["translation"]))
    box.rotate(Quaternion(cs["rotation"]).inverse)

    if box.center[2] <= 0.1:
        return None

    K = np.array(cs["camera_intrinsic"], dtype=np.float32)
    pts = view_points(box.corners(), K, normalize=True)  # (3,8)

    h, w = img_hw
    xs, ys = pts[0], pts[1]
    x1 = float(np.clip(xs.min(), 0, w-1))
    y1 = float(np.clip(ys.min(), 0, h-1))
    x2 = float(np.clip(xs.max(), 0, w-1))
    y2 = float(np.clip(ys.max(), 0, h-1))
    if x2 <= x1 or y2 <= y1:
        return None
    if (x2-x1)*(y2-y1) < 20:  # tiny
        return None
    return np.array([x1, y1, x2, y2], dtype=np.float32)

def collect_frames_with_vehicle_gt(nusc: NuScenes, max_images=None):
    frames = []
    count = 0
    for sample in nusc.sample:
        anns = sample["anns"]
        for cam in ALL_CAMS:
            sd_tok = sample["data"].get(cam)
            if sd_tok is None: 
                continue
            sd = nusc.get("sample_data", sd_tok)
            img_path = Path(nusc.dataroot) / sd["filename"]
            if not img_path.exists():
                continue
            try:
                with Image.open(img_path) as im:
                    w, h = im.size
            except Exception:
                continue

            gts = []
            for ann_tok in anns:
                bb = project_box_to_image(nusc, ann_tok, sd_tok, (h, w))
                if bb is not None:
                    gts.append(bb)
            if len(gts) == 0:
                continue

            frames.append((str(img_path), sd_tok, cam,
                           np.stack(gts, axis=0).astype(np.float32)))
            count += 1
            if (max_images is not None) and (count >= max_images):
                return frames
    return frames

# ----------------- 2) Models (lite SOTA) -----------------
to_tensor = T.ToTensor()

VEHICLE_COCO_IDS = {2, 3, 4, 6, 7, 8} 

def load_frcnn_r50():
    m = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights="DEFAULT")
    return m.to(device).eval()

def load_frcnn_mbv3():
    m = torchvision.models.detection.fasterrcnn_mobilenet_v3_large_fpn(weights="DEFAULT")
    return m.to(device).eval()

def load_retinanet_r50():
    m = torchvision.models.detection.retinanet_resnet50_fpn(weights="DEFAULT")
    return m.to(device).eval()

def load_ssdlite_mbv3():
    m = torchvision.models.detection.ssdlite320_mobilenet_v3_large(weights="DEFAULT")
    return m.to(device).eval()

def load_ssd300_vgg16():
    m = torchvision.models.detection.ssd300_vgg16(weights="DEFAULT")
    return m.to(device).eval()

# ----- New: DETR family (torchvision) -----
def load_detr_r50():
    m = torchvision.models.detection.detr_resnet50(weights="DEFAULT")
    return m.to(device).eval()

def load_detr_r50_dc5():
    m = torchvision.models.detection.detr_resnet50_dc5(weights="DEFAULT")
    return m.to(device).eval()

def load_deformable_detr_r50():
    # available in newer torchvision; try/except at callsite too
    m = torchvision.models.detection.deformable_detr_resnet50(weights="DEFAULT")
    return m.to(device).eval()

def infer_torchvision_detector(model, pil_img):
    x = to_tensor(pil_img).to(device)
    with torch.no_grad():
        out = model([x])[0]
    boxes  = out["boxes"].detach().float().cpu().numpy()
    scores = out["scores"].detach().float().cpu().numpy()
    labels = out["labels"].detach().cpu().numpy()
    keep   = np.isin(labels, list(VEHICLE_COCO_IDS))
    return boxes[keep].astype(np.float32), scores[keep].astype(np.float32)

# YOLOv8 / YOLOv11 (optional via ultralytics)
try:
    from ultralytics import YOLO
    _has_yolo = True
except Exception:
    _has_yolo = False

YOLO_VEHICLE_NAMES = {"bicycle","car","motorcycle","bus","train","truck"}

def load_yolov8n():
    if not _has_yolo: return None
    return YOLO("yolov8n.pt").to(device)

def load_yolov8s():
    if not _has_yolo: return None
    return YOLO("yolov8s.pt").to(device)

# ----- New: more YOLOv8 sizes -----
def load_yolov8m():
    if not _has_yolo: return None
    return YOLO("yolov8m.pt").to(device)

def load_yolov8l():
    if not _has_yolo: return None
    return YOLO("yolov8l.pt").to(device)

# ----- New: YOLOv11 (if present in your ultralytics version) -----
def load_yolo11n():
    if not _has_yolo: return None
    try:
        return YOLO("yolo11n.pt").to(device)
    except Exception:
        return None

def load_yolo11s():
    if not _has_yolo: return None
    try:
        return YOLO("yolo11s.pt").to(device)
    except Exception:
        return None

def infer_yolov8(model, pil_img):
    im = np.array(pil_img.convert("RGB"))
    ydev = 0 if device.type=="cuda" else "cpu"
    r = model.predict(source=im, verbose=False, conf=0.001, device=ydev)[0]
    if r is None or r.boxes is None or len(r.boxes)==0:
        return np.zeros((0,4), np.float32), np.zeros((0,), np.float32)
    xyxy = r.boxes.xyxy.cpu().numpy().astype(np.float32)
    cls  = r.boxes.cls.cpu().numpy().astype(int)
    if hasattr(r, "probs") and r.probs is not None:
        scores = r.probs.data.cpu().numpy().max(axis=1).astype(np.float32)
    else:
        scores = r.boxes.conf.cpu().numpy().astype(np.float32)
    names = r.names if hasattr(r,"names") else model.model.names
    cls_names = [names[c] for c in cls]
    keep = np.array([n in YOLO_VEHICLE_NAMES for n in cls_names], dtype=bool)
    return xyxy[keep], scores[keep]

# ----- New: YOLOv5 via torch.hub (optional; needs internet or local cache) -----
def _try_import_yaml():
    try:
        import yaml  # noqa: F401
    except Exception:
        pass
_try_import_yaml()

def load_yolov5s():
    try:
        m = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)  # noqa
        return m.autoshape().to(device).eval()
    except Exception:
        return None

def load_yolov5m():
    try:
        m = torch.hub.load('ultralytics/yolov5', 'yolov5m', pretrained=True)  # noqa
        return m.autoshape().to(device).eval()
    except Exception:
        return None

YOLOV5_VEHICLE_NAMES = {"bicycle","car","motorcycle","bus","train","truck"}

def infer_yolov5(model, pil_img):
    im = np.array(pil_img.convert("RGB"))
    with torch.no_grad():
        r = model(im, size=640)
    # r.xyxy is a list (one per image)
    if r is None or len(getattr(r, "xyxy", [])) == 0 or r.xyxy[0].numel() == 0:
        return np.zeros((0,4), np.float32), np.zeros((0,), np.float32)
    det = r.xyxy[0].detach().cpu().numpy()  # [N,6]: x1,y1,x2,y2,conf,cls
    boxes  = det[:, :4].astype(np.float32)
    scores = det[:, 4].astype(np.float32)
    cls    = det[:, 5].astype(int)
    names = r.names if hasattr(r, "names") else getattr(model, "names", {})
    # names can be dict or list; normalize get
    if isinstance(names, list):
        get_name = lambda i: names[i] if (0 <= i < len(names)) else str(i)
    else:
        get_name = lambda i: names.get(int(i), str(int(i)))
    cls_names = [get_name(int(c)) for c in cls]
    keep = np.array([n in YOLOV5_VEHICLE_NAMES for n in cls_names], dtype=bool)
    return boxes[keep], scores[keep]

# ----------------- 3) Metrics -----------------
def iou_xyxy(a, b):
    Na, Nb = a.shape[0], b.shape[0]
    if Na==0 or Nb==0:
        return np.zeros((Na,Nb), dtype=np.float32)
    ax1, ay1, ax2, ay2 = a[:,0], a[:,1], a[:,2], a[:,3]
    bx1, by1, bx2, by2 = b[:,0], b[:,1], b[:,2], b[:,3]
    inter_x1 = np.maximum(ax1[:,None], bx1[None,:])
    inter_y1 = np.maximum(ay1[:,None], by1[None,:])
    inter_x2 = np.minimum(ax2[:,None], bx2[None,:])
    inter_y2 = np.minimum(ay2[:,None], by2[None,:])
    inter_w  = np.clip(inter_x2 - inter_x1, 0, None)
    inter_h  = np.clip(inter_y2 - inter_y1, 0, None)
    inter    = inter_w * inter_h
    area_a   = (ax2-ax1)*(ay2-ay1)
    area_b   = (bx2-bx1)*(by2-by1)
    union    = area_a[:,None] + area_b[None,:] - inter
    return np.where(union>0, inter/union, 0.0)

def ap50_single_class(all_scores, all_tp, total_gt_pos):
    if len(all_scores)==0:
        return 0.0, 0.0, 0.0
    order = np.argsort(-np.array(all_scores))
    tp = np.array(all_tp)[order].astype(np.float32)
    fp = 1.0 - tp
    cum_tp = np.cumsum(tp); cum_fp = np.cumsum(fp)
    recall = cum_tp / max(1, total_gt_pos)
    precision = cum_tp / np.maximum(1, (cum_tp+cum_fp))
    # all-points interpolation
    mrec = np.concatenate(([0.0], recall, [1.0]))
    mpre = np.concatenate(([0.0], precision, [0.0]))
    for i in range(mpre.size-1, 0, -1):
        mpre[i-1] = max(mpre[i-1], mpre[i])
    idx = np.where(mrec[1:] != mrec[:-1])[0]
    ap = float(np.sum((mrec[idx+1]-mrec[idx]) * mpre[idx+1]))
    best_i = int(np.argmax(2*precision*recall/(precision+recall+1e-9)))
    return ap, float(precision[best_i]), float(recall[best_i])

# ----------------- 4) Evaluation -----------------
def evaluate_dataset(frames, model_name, model, infer_fn):
    all_scores, all_tp = [], []
    total_gt = 0
    per_cam = {cam: {"scores":[], "tp":[], "gt":0, "N":0} for cam in ALL_CAMS}

    t0 = time.time()
    for (img_path, sd_tok, cam, gt) in frames:
        try:
            img = Image.open(img_path).convert("RGB")
        except Exception:
            continue
        boxes, scores = infer_fn(model, img)
        keep = scores >= SCORE_THRESH
        boxes = boxes[keep]; scores = scores[keep]

        total_gt += int(gt.shape[0])
        # global
        used = np.zeros((gt.shape[0],), dtype=bool)
        if boxes.shape[0] > 0 and gt.shape[0] > 0:
            IoU = iou_xyxy(boxes, gt)
            order = np.argsort(-scores)
            for di in order:
                j = int(np.argmax(IoU[di]))
                if IoU[di, j] >= IOU_MATCH and not used[j]:
                    all_scores.append(float(scores[di])); all_tp.append(1); used[j]=True
                else:
                    all_scores.append(float(scores[di])); all_tp.append(0)
        else:
            for s in scores:
                all_scores.append(float(s)); all_tp.append(0)

        # per-camera
        cA = per_cam[cam]
        cA["gt"] += int(gt.shape[0]); cA["N"] += 1
        used_c = np.zeros((gt.shape[0],), dtype=bool)
        if boxes.shape[0] > 0 and gt.shape[0] > 0:
            IoU = iou_xyxy(boxes, gt)
            order = np.argsort(-scores)
            for di in order:
                j = int(np.argmax(IoU[di]))
                if IoU[di, j] >= IOU_MATCH and not used_c[j]:
                    cA["scores"].append(float(scores[di])); cA["tp"].append(1); used_c[j]=True
                else:
                    cA["scores"].append(float(scores[di])); cA["tp"].append(0)
        else:
            for s in scores:
                cA["scores"].append(float(s)); cA["tp"].append(0)

    ap, p, r = ap50_single_class(all_scores, all_tp, total_gt)
    results = {
        "AP50": ap, "P@0.5": p, "R@0.5": r,
        "N_imgs": len(frames), "time_s": time.time()-t0,
        "per_camera": {}
    }

    # per-camera metrics
    for cam in ALL_CAMS:
        A = per_cam[cam]
        ap_c, p_c, r_c = ap50_single_class(A["scores"], A["tp"], A["gt"])
        results["per_camera"][cam] = {"AP50": ap_c, "P@0.5": p_c, "R@0.5": r_c, "N": A["N"]}

    # camera-average (macro)
    def cam_mean(key):
        vals = [results["per_camera"][c][key] for c in ALL_CAMS if results["per_camera"][c]["N"]>0]
        return float(np.mean(vals)) if len(vals)>0 else float('nan')
    results["camera_avg"] = {
        "AP50": cam_mean("AP50"), "P@0.5": cam_mean("P@0.5"), "R@0.5": cam_mean("R@0.5")
    }
    return results

# ----------------- 5) Run -----------------
def pretty_row(cols, widths):
    return "  ".join(str(c).ljust(w) for c,w in zip(cols, widths))

# Init nuScenes + collect frames
nusc = NuScenes(version=VERSION, dataroot=str(NUSCENES_ROOT), verbose=True)
frames_all = collect_frames_with_vehicle_gt(nusc, max_images=None)
frames = frames_all[:MAX_IMAGES] if (MAX_IMAGES is not None) else frames_all
print(f"Collected frames with vehicle GT: {len(frames)}")

# Models
models = []
try:   models.append(("FasterRCNN_R50",  load_frcnn_r50(),  infer_torchvision_detector))
except Exception as e: warnings.warn(f"FasterRCNN_R50 not available: {e}")
try:   models.append(("FasterRCNN_MBV3", load_frcnn_mbv3(), infer_torchvision_detector))
except Exception as e: warnings.warn(f"FasterRCNN_MBV3 not available: {e}")
try:   models.append(("RetinaNet_R50",   load_retinanet_r50(), infer_torchvision_detector))
except Exception as e: warnings.warn(f"RetinaNet_R50 not available: {e}")
try:   models.append(("SSDLite_MBV3",    load_ssdlite_mbv3(),  infer_torchvision_detector))
except Exception as e: warnings.warn(f"SSDLite_MBV3 not available: {e}")
try:   models.append(("SSD300_VGG16",    load_ssd300_vgg16(),  infer_torchvision_detector))
except Exception as e: warnings.warn(f"SSD300_VGG16 not available: {e}")

# DETR family
try:   models.append(("DETR_R50",          load_detr_r50(),            infer_torchvision_detector))
except Exception as e: warnings.warn(f"DETR_R50 not available: {e}")
try:   models.append(("DETR_R50_DC5",      load_detr_r50_dc5(),        infer_torchvision_detector))
except Exception as e: warnings.warn(f"DETR_R50_DC5 not available: {e}")
try:   models.append(("DeformableDETR_R50",load_deformable_detr_r50(), infer_torchvision_detector))
except Exception as e: warnings.warn(f"DeformableDETR_R50 not available: {e}")

# YOLOv8 baseline sizes
try:
    y8n = load_yolov8n()
    if y8n is not None: models.append(("YOLOv8n", y8n, infer_yolov8))
except Exception as e: warnings.warn(f"YOLOv8n not available: {e}")
try:
    y8s = load_yolov8s()
    if y8s is not None: models.append(("YOLOv8s", y8s, infer_yolov8))
except Exception as e: warnings.warn(f"YOLOv8s not available: {e}")

# YOLOv8 larger
try:
    y8m = load_yolov8m()
    if y8m is not None: models.append(("YOLOv8m", y8m, infer_yolov8))
except Exception as e: warnings.warn(f"YOLOv8m not available: {e}")
try:
    y8l = load_yolov8l()
    if y8l is not None: models.append(("YOLOv8l", y8l, infer_yolov8))
except Exception as e: warnings.warn(f"YOLOv8l not available: {e}")

# YOLOv11 (if present)
try:
    y11n = load_yolo11n()
    if y11n is not None: models.append(("YOLOv11n", y11n, infer_yolov8))
except Exception as e: warnings.warn(f"YOLOv11n not available: {e}")
try:
    y11s = load_yolo11s()
    if y11s is not None: models.append(("YOLOv11s", y11s, infer_yolov8))
except Exception as e: warnings.warn(f"YOLOv11s not available: {e}")

# YOLOv5 (torch.hub)
try:
    y5s = load_yolov5s()
    if y5s is not None: models.append(("YOLOv5s", y5s, infer_yolov5))
except Exception as e: warnings.warn(f"YOLOv5s not available: {e}")
try:
    y5m = load_yolov5m()
    if y5m is not None: models.append(("YOLOv5m", y5m, infer_yolov5))
except Exception as e: warnings.warn(f"YOLOv5m not available: {e}")

# Header
hdr = ["Model","AP@0.5","P@0.5","R@0.5","N","Time(s)","CamAvg(AP/P/R)"]
w   = [16, 8, 8, 8, 6, 8, 20]
print(pretty_row(hdr, w))
print("-"*100)

# CSV rows
rows_for_csv = []

for name, model, infer_fn in models:
    res = evaluate_dataset(frames, name, model, infer_fn)
    cam_avg = res["camera_avg"]
    cam_avg_str = f"{cam_avg['AP50']:.3f}/{cam_avg['P@0.5']:.3f}/{cam_avg['R@0.5']:.3f}"

    print(pretty_row([
        name,
        f"{res['AP50']:.3f}",
        f"{res['P@0.5']:.3f}",
        f"{res['R@0.5']:.3f}",
        res["N_imgs"],
        f"{res['time_s']:.1f}",
        cam_avg_str
    ], w))

    for cam in ALL_CAMS:
        c = res["per_camera"][cam]
        print(f"   {cam}: AP={c['AP50']:.3f}, P={c['P@0.5']:.3f}, R={c['R@0.5']:.3f}, N={c['N']}")

    # CSV row (overall + per camera + camera-average)
    row = {
        "model": name,
        "overall_AP50": res["AP50"], "overall_P@0.5": res["P@0.5"], "overall_R@0.5": res["R@0.5"],
        "N": res["N_imgs"], "time_s": res["time_s"],
        "camera_avg_AP50": cam_avg["AP50"], "camera_avg_P@0.5": cam_avg["P@0.5"], "camera_avg_R@0.5": cam_avg["R@0.5"],
    }
    for cam in ALL_CAMS:
        c = res["per_camera"][cam]
        row[f"{cam}_AP50"]  = c["AP50"]
        row[f"{cam}_P@0.5"] = c["P@0.5"]
        row[f"{cam}_R@0.5"] = c["R@0.5"]
        row[f"{cam}_N"]     = c["N"]
    rows_for_csv.append(row)

# Save CSV
if rows_for_csv:
    fieldnames = list(rows_for_csv[0].keys())
    with open(CSV_OUT, "w", newline="") as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()
        for r in rows_for_csv:
            writer.writerow(r)
    print(f"\nSaved CSV → {CSV_OUT}")

print("\nNotes:")
print("This evaluates single-class VEHICLE detection (COCO vehicle-like classes) on RGB cameras.")
print("AP@0.5/P/R use greedy one-to-one matching at IoU≥0.5 against 2D GT projected from 3D boxes.")
print("Reported overall metrics, per-camera metrics, and camera-average (macro).")


Torch: 1.13.0+cu117 | CUDA: True | Device: cuda
Loading NuScenes tables for version v1.0-mini...
23 category,
8 attribute,
4 visibility,
911 instance,
12 sensor,
120 calibrated_sensor,
31206 ego_pose,
8 log,
10 scene,
404 sample,
31206 sample_data,
18538 sample_annotation,
4 map,
Done loading in 0.166 seconds.
Reverse indexing ...
Done reverse indexing in 0.1 seconds.
Collected frames with vehicle GT: 500
[31m[1mrequirements:[0m Ultralytics requirements ['numpy>=1.23.5', 'tqdm>=4.66.3', 'setuptools>=70.0.0', 'urllib3>=2.5.0 ; python_version > "3.8"'] not found, attempting AutoUpdate...


Using cache found in /home/asad/.cache/torch/hub/ultralytics_yolov5_master

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.2[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
[31mERROR: Exception:
Traceback (most recent call last):
  File "/home/asad/miniconda3/envs/py310/lib/python3.10/site-packages/pip/_vendor/packaging/markers.py", line 280, in __init__
    self._markers = _normalize_extra_values(_parse_marker(marker))
  File "/home/asad/miniconda3/envs/py310/lib/python3.10/site-packages/pip/_vendor/packaging/_parser.py", line 253, in parse_marker
    return _parse_full_marker(Tokenizer(source, rules=DEFAULT_RULES))
  File "/home/asad/miniconda3/envs/py310/lib/python3.10/site-packages/pip/_vendor/packaging/_parser.py", line 257, in _parse_full_marker
    retval = _parse_marker(tokenizer)
  File "/home/asad/miniconda3/envs/py310/lib/pyt

Retry 1/2 failed: Command 'pip install --no-cache-dir "numpy>=1.23.5" "tqdm>=4.66.3" "setuptools>=70.0.0" "urllib3>=2.5.0 ; python_version > "3.8"" ' returned non-zero exit status 2.
Retry 2/2 failed: Command 'pip install --no-cache-dir "numpy>=1.23.5" "tqdm>=4.66.3" "setuptools>=70.0.0" "urllib3>=2.5.0 ; python_version > "3.8"" ' returned non-zero exit status 2.
[31m[1mrequirements:[0m ❌ Command 'pip install --no-cache-dir "numpy>=1.23.5" "tqdm>=4.66.3" "setuptools>=70.0.0" "urllib3>=2.5.0 ; python_version > "3.8"" ' returned non-zero exit status 2.



[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.2[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
[31mERROR: Exception:
Traceback (most recent call last):
  File "/home/asad/miniconda3/envs/py310/lib/python3.10/site-packages/pip/_vendor/packaging/markers.py", line 280, in __init__
    self._markers = _normalize_extra_values(_parse_marker(marker))
  File "/home/asad/miniconda3/envs/py310/lib/python3.10/site-packages/pip/_vendor/packaging/_parser.py", line 253, in parse_marker
    return _parse_full_marker(Tokenizer(source, rules=DEFAULT_RULES))
  File "/home/asad/miniconda3/envs/py310/lib/python3.10/site-packages/pip/_vendor/packaging/_parser.py", line 257, in _parse_full_marker
    retval = _parse_marker(tokenizer)
  File "/home/asad/miniconda3/envs/py310/lib/python3.10/site-packages/pip/_vendor/packaging/_parser.py", line 266, in _pars

[31m[1mrequirements:[0m Ultralytics requirements ['numpy>=1.23.5', 'tqdm>=4.66.3', 'setuptools>=70.0.0', 'urllib3>=2.5.0 ; python_version > "3.8"'] not found, attempting AutoUpdate...


Using cache found in /home/asad/.cache/torch/hub/ultralytics_yolov5_master

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.2[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
[31mERROR: Exception:
Traceback (most recent call last):
  File "/home/asad/miniconda3/envs/py310/lib/python3.10/site-packages/pip/_vendor/packaging/markers.py", line 280, in __init__
    self._markers = _normalize_extra_values(_parse_marker(marker))
  File "/home/asad/miniconda3/envs/py310/lib/python3.10/site-packages/pip/_vendor/packaging/_parser.py", line 253, in parse_marker
    return _parse_full_marker(Tokenizer(source, rules=DEFAULT_RULES))
  File "/home/asad/miniconda3/envs/py310/lib/python3.10/site-packages/pip/_vendor/packaging/_parser.py", line 257, in _parse_full_marker
    retval = _parse_marker(tokenizer)
  File "/home/asad/miniconda3/envs/py310/lib/pyt

Retry 1/2 failed: Command 'pip install --no-cache-dir "numpy>=1.23.5" "tqdm>=4.66.3" "setuptools>=70.0.0" "urllib3>=2.5.0 ; python_version > "3.8"" ' returned non-zero exit status 2.
Retry 2/2 failed: Command 'pip install --no-cache-dir "numpy>=1.23.5" "tqdm>=4.66.3" "setuptools>=70.0.0" "urllib3>=2.5.0 ; python_version > "3.8"" ' returned non-zero exit status 2.
[31m[1mrequirements:[0m ❌ Command 'pip install --no-cache-dir "numpy>=1.23.5" "tqdm>=4.66.3" "setuptools>=70.0.0" "urllib3>=2.5.0 ; python_version > "3.8"" ' returned non-zero exit status 2.



[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.2[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
[31mERROR: Exception:
Traceback (most recent call last):
  File "/home/asad/miniconda3/envs/py310/lib/python3.10/site-packages/pip/_vendor/packaging/markers.py", line 280, in __init__
    self._markers = _normalize_extra_values(_parse_marker(marker))
  File "/home/asad/miniconda3/envs/py310/lib/python3.10/site-packages/pip/_vendor/packaging/_parser.py", line 253, in parse_marker
    return _parse_full_marker(Tokenizer(source, rules=DEFAULT_RULES))
  File "/home/asad/miniconda3/envs/py310/lib/python3.10/site-packages/pip/_vendor/packaging/_parser.py", line 257, in _parse_full_marker
    retval = _parse_marker(tokenizer)
  File "/home/asad/miniconda3/envs/py310/lib/python3.10/site-packages/pip/_vendor/packaging/_parser.py", line 266, in _pars

Model             AP@0.5    P@0.5     R@0.5     N       Time(s)   CamAvg(AP/P/R)      
----------------------------------------------------------------------------------------------------
FasterRCNN_R50    0.368     0.557     0.385     500     15.2      0.364/0.587/0.373   
   CAM_FRONT: AP=0.398, P=0.549, R=0.435, N=93
   CAM_FRONT_LEFT: AP=0.360, P=0.622, R=0.365, N=80
   CAM_FRONT_RIGHT: AP=0.307, P=0.454, R=0.331, N=82
   CAM_BACK: AP=0.434, P=0.761, R=0.405, N=93
   CAM_BACK_LEFT: AP=0.332, P=0.559, R=0.369, N=76
   CAM_BACK_RIGHT: AP=0.350, P=0.579, R=0.329, N=76
FasterRCNN_MBV3   0.362     0.612     0.354     500     9.9       0.363/0.658/0.354   
   CAM_FRONT: AP=0.388, P=0.561, R=0.403, N=93
   CAM_FRONT_LEFT: AP=0.379, P=0.696, R=0.348, N=80
   CAM_FRONT_RIGHT: AP=0.322, P=0.507, R=0.347, N=82
   CAM_BACK: AP=0.362, P=0.717, R=0.349, N=93
   CAM_BACK_LEFT: AP=0.379, P=0.835, R=0.345, N=76
   CAM_BACK_RIGHT: AP=0.346, P=0.634, R=0.334, N=76
RetinaNet_R50     0.395     0.603   

In [183]:
# ================================
# Count all road objects that appear
# in ANY of the 6 NuScenes cameras
# ================================

from pathlib import Path
from collections import Counter
from nuscenes.nuscenes import NuScenes
from nuscenes.utils.geometry_utils import BoxVisibility

# Your dataset path
DST = Path("/data/Asad/NuScenesMiniNovel")

# Load NuScenes
nusc = NuScenes(version='v1.0-mini', dataroot=str(DST), verbose=True)

# Six cameras in NuScenes
CAM_CHANNELS = [
    "CAM_FRONT",
    "CAM_FRONT_RIGHT",
    "CAM_BACK_RIGHT",
    "CAM_BACK",
    "CAM_BACK_LEFT",
    "CAM_FRONT_LEFT",
]

# Define "road objects": vehicles + pedestrians
def is_road_object(category):
    return category.startswith("vehicle.") or category.startswith("human.pedestrian.")

# Per-camera counts
per_cam_total = Counter()

# Count of unique road objects appearing in ANY camera
unique_objects_seen = set()

print("\nCounting all road objects appearing in camera views...\n")

for sample in nusc.sample:
    sample_ann_tokens = sample["anns"]

    # Track which annotations for this sample appear in at least one camera
    sample_objects_seen_in_any_camera = set()

    for cam in CAM_CHANNELS:
        cam_token = sample["data"][cam]

        # Get boxes that appear in this camera's FOV
        _, boxes, _ = nusc.get_sample_data(
            cam_token,
            box_vis_level=BoxVisibility.ANY,
            selected_anntokens=sample_ann_tokens
        )

        for box in boxes:
            category = box.name

            if not is_road_object(category):
                continue

            # Count per camera
            per_cam_total[cam] += 1

            # Mark that this object's annotation appears in this sample
            sample_objects_seen_in_any_camera.add(box.token)

    # Add these objects to the global set
    unique_objects_seen.update(sample_objects_seen_in_any_camera)


print(f"Unique road objects appearing in cameras: {len(unique_objects_seen)}")


for cam in CAM_CHANNELS:
    print(f"{cam:15s}: {per_cam_total[cam]}")


Loading NuScenes tables for version v1.0-mini...
23 category,
8 attribute,
4 visibility,
911 instance,
12 sensor,
120 calibrated_sensor,
31206 ego_pose,
8 log,
10 scene,
404 sample,
31206 sample_data,
18538 sample_annotation,
4 map,
Done loading in 0.194 seconds.
Reverse indexing ...
Done reverse indexing in 0.1 seconds.

Counting all road objects appearing in camera views...

Unique road objects appearing in cameras: 14682
CAM_FRONT      : 4191
CAM_FRONT_RIGHT: 3428
CAM_BACK_RIGHT : 2467
CAM_BACK       : 5041
CAM_BACK_LEFT  : 811
CAM_FRONT_LEFT : 1692


In [184]:
from pathlib import Path
from collections import Counter
from nuscenes.nuscenes import NuScenes
from nuscenes.utils.geometry_utils import BoxVisibility


DST = Path("/data/Asad/NuScenesMiniNovel")

nusc = NuScenes(version='v1.0-mini', dataroot=str(DST), verbose=True)

# 6 camera channels in nuScenes
CAM_CHANNELS = [
    "CAM_FRONT",
    "CAM_FRONT_RIGHT",
    "CAM_BACK_RIGHT",
    "CAM_BACK",
    "CAM_BACK_LEFT",
    "CAM_FRONT_LEFT",
]

def is_road_object(category_name: str) -> bool:
    return category_name.startswith("vehicle.") or category_name.startswith("human.pedestrian.")

per_cam_total = Counter()          
per_cam_visibility = {cam: Counter() for cam in CAM_CHANNELS} 

print("\nCounting road objects and visibility per camera...\n")

for sample in nusc.sample:
    ann_tokens = sample["anns"]

    for cam in CAM_CHANNELS:
        cam_token = sample["data"][cam]

        # Boxes visible in this camera
        _, boxes, _ = nusc.get_sample_data(
            cam_token,
            box_vis_level=BoxVisibility.ANY,
            selected_anntokens=ann_tokens
        )

        for box in boxes:
            category = box.name
            if not is_road_object(category):
                continue

            per_cam_total[cam] += 1

            # Get annotation + visibility level
            ann = nusc.get("sample_annotation", box.token)
            vis_token = ann["visibility_token"]
            vis_level = nusc.get("visibility", vis_token)["level"]   # e.g. 'v40-60', 'v80-100'
            per_cam_visibility[cam][vis_level] += 1


print(f"{'Camera':15s} | {'Total':>7s} | {'Part.Vis':>8s} | {'Part.%':>6s} | {'Full':>7s} | {'Full%':>6s}")
print("-" * 70)

for cam in CAM_CHANNELS:
    total = per_cam_total[cam]
    if total == 0:
        print(f"{cam:15s} | {0:7d} | {0:8d} | {0:6.1f} | {0:7d} | {0:6.1f}")
        continue

    full_visible = per_cam_visibility[cam].get('v80-100', 0)
    partial = total - full_visible   # v0-40, v40-60, v60-80 grouped

    partial_pct = 100.0 * partial / total
    full_pct = 100.0 * full_visible / total

    print(f"{cam:15s} | {total:7d} | {partial:8d} | {partial_pct:6.1f} | {full_visible:7d} | {full_pct:6.1f}")

print("\nNote: 'Part.Vis' = v0-40 + v40-60 + v60-80 (proxy for truncated/occluded).")


Loading NuScenes tables for version v1.0-mini...
23 category,
8 attribute,
4 visibility,
911 instance,
12 sensor,
120 calibrated_sensor,
31206 ego_pose,
8 log,
10 scene,
404 sample,
31206 sample_data,
18538 sample_annotation,
4 map,
Done loading in 0.190 seconds.
Reverse indexing ...
Done reverse indexing in 0.1 seconds.

Counting road objects and visibility per camera...

Camera          |   Total | Part.Vis | Part.% |    Full |  Full%
----------------------------------------------------------------------
CAM_FRONT       |    4191 |     2099 |   50.1 |    2092 |   49.9
CAM_FRONT_RIGHT |    3428 |     2017 |   58.8 |    1411 |   41.2
CAM_BACK_RIGHT  |    2467 |     1476 |   59.8 |     991 |   40.2
CAM_BACK        |    5041 |     2529 |   50.2 |    2512 |   49.8
CAM_BACK_LEFT   |     811 |      321 |   39.6 |     490 |   60.4
CAM_FRONT_LEFT  |    1692 |      680 |   40.2 |    1012 |   59.8

Note: 'Part.Vis' = v0-40 + v40-60 + v60-80 (proxy for truncated/occluded).


In [198]:
import cv2
import numpy as np
from pathlib import Path
from nuscenes.nuscenes import NuScenes

# Paths and setup
DATAROOT = Path("/data/Asad/NuScenesMiniNovel")
VERSION = "v1.0-mini"

OUTDIR = DATAROOT / "multiview_videos"
OUTDIR.mkdir(parents=True, exist_ok=True)

# The 6 camera channels in the order we want them in the grid
ALL_CAMS = [
    "CAM_FRONT",
    "CAM_FRONT_LEFT",
    "CAM_FRONT_RIGHT",
    "CAM_BACK",
    "CAM_BACK_LEFT",
    "CAM_BACK_RIGHT",
]

# Init NuScenes over your *novel* dataset
nusc_novel = NuScenes(version=VERSION, dataroot=str(DATAROOT), verbose=False)


def make_scene_multiview_video(scene_name, fps=6, resize=(640, 360), max_frames=None):
    """
    For a given scene name in the NuScenesMiniNovel dataset, create a multiview
    video (3x2 grid of all 6 cameras) and save it to OUTDIR.
    """
    scene_row = next((s for s in nusc_novel.scene if s["name"] == scene_name), None)
    assert scene_row is not None, f"Scene {scene_name} not found in NuScenesMiniNovel"

    sample_token = scene_row["first_sample_token"]
    w, h = resize

    grid_w, grid_h = 3 * w, 2 * h
    out_path = OUTDIR / f"{scene_name}__multiview.mp4"

    vw = cv2.VideoWriter(
        str(out_path),
        cv2.VideoWriter_fourcc(*"mp4v"),
        fps,
        (grid_w, grid_h),
    )

    frame_idx = 0
    while sample_token:
        sample = nusc_novel.get("sample", sample_token)
        tiles = []

        for ch in ALL_CAMS:
            if ch not in sample["data"]:
                # blank tile if channel missing
                tile = np.zeros((h, w, 3), dtype=np.uint8)
            else:
                sd_token = sample["data"][ch]
                sd = nusc_novel.get("sample_data", sd_token)
                img_path = DATAROOT / sd["filename"]

                img = cv2.imread(str(img_path))
                if img is None:
                    tile = np.zeros((h, w, 3), dtype=np.uint8)
                else:
                    tile = cv2.resize(img, (w, h), interpolation=cv2.INTER_AREA)

            tiles.append(tile)

        # 3x2 grid: first row = FRONT, FRONT_LEFT, FRONT_RIGHT
        #           second row = BACK, BACK_LEFT, BACK_RIGHT
        row1 = np.hstack(tiles[:3])
        row2 = np.hstack(tiles[3:])
        grid = np.vstack([row1, row2])

        # optional overlay: scene + frame idx
        cv2.rectangle(grid, (10, grid_h - 40), (10 + 450, grid_h - 10), (0, 0, 0), -1)
        cv2.putText(
            grid,
            f"{scene_name} | frame {frame_idx}",
            (15, grid_h - 18),
            cv2.FONT_HERSHEY_SIMPLEX,
            0.6,
            (255, 255, 255),
            1,
            cv2.LINE_AA,
        )

        vw.write(grid)
        frame_idx += 1

        if max_frames is not None and frame_idx >= max_frames:
            break

        sample_token = sample["next"]

    vw.release()
    return str(out_path), frame_idx


# Example usage:
# 1) show available scene names
print("Scenes in NuScenesMiniNovel:")
scene_names = [s["name"] for s in nusc_novel.scene]
for n in scene_names:
    print("  -", n)

# 2) Make multiview videos for all scenes (full length)
video_paths = []
for name in scene_names:
    print(f"\nRendering scene {name}...")
    path, n_frames = make_scene_multiview_video(
        scene_name=name,
        fps=6,
        resize=(640, 360),
        max_frames=None,  # set e.g. 100 if you want to truncate
    )
    print(f"  Saved {path} with {n_frames} frames")
    video_paths.append((name, path, n_frames))

print("\nDone. Videos are in:", OUTDIR)


Scenes in NuScenesMiniNovel:
  - scene-0061
  - scene-0103
  - scene-0553
  - scene-0655
  - scene-0757
  - scene-0796
  - scene-0916
  - scene-1077
  - scene-1094
  - scene-1100

Rendering scene scene-0061...
  Saved /data/Asad/NuScenesMiniNovel/multiview_videos/scene-0061__multiview.mp4 with 39 frames

Rendering scene scene-0103...
  Saved /data/Asad/NuScenesMiniNovel/multiview_videos/scene-0103__multiview.mp4 with 40 frames

Rendering scene scene-0553...
  Saved /data/Asad/NuScenesMiniNovel/multiview_videos/scene-0553__multiview.mp4 with 41 frames

Rendering scene scene-0655...
  Saved /data/Asad/NuScenesMiniNovel/multiview_videos/scene-0655__multiview.mp4 with 41 frames

Rendering scene scene-0757...
  Saved /data/Asad/NuScenesMiniNovel/multiview_videos/scene-0757__multiview.mp4 with 41 frames

Rendering scene scene-0796...
  Saved /data/Asad/NuScenesMiniNovel/multiview_videos/scene-0796__multiview.mp4 with 40 frames

Rendering scene scene-0916...
  Saved /data/Asad/NuScenesMiniNov