In [1]:
import json
import os
import pickle

from optimized_ingestion.camera_config import camera_config
from optimized_ingestion.payload import Payload
from optimized_ingestion.pipeline import Pipeline
from optimized_ingestion.stages.decode_frame.parallel_decode_frame import ParallelDecodeFrame
from optimized_ingestion.stages.decode_frame.decode_frame import DecodeFrame
from optimized_ingestion.stages.detection_2d.yolo_detection import YoloDetection
from optimized_ingestion.stages.detection_3d.from_2d_and_road import From2DAndRoad as FromD2DAndRoad
from optimized_ingestion.stages.filter_car_facing_sideway import FilterCarFacingSideway
from optimized_ingestion.stages.detection_estimation import DetectionEstimation
from optimized_ingestion.stages.tracking_2d.strongsort import StrongSORT
from optimized_ingestion.stages.tracking_2d.tracking_2d import Tracking2D, Tracking2DResult
from optimized_ingestion.stages.tracking_3d.from_2d_and_road import From2DAndRoad
from optimized_ingestion.stages.tracking_3d.tracking_3d import Tracking3DResult
from optimized_ingestion.stages.segment_trajectory import SegmentTrajectory
# from optimized_ingestion.trackers.yolov5_strongsort_osnet_tracker import TrackingResult
from optimized_ingestion.video import Video
from optimized_ingestion.video_skipped import VideoSkipped

  from .autonotebook import tqdm as notebook_tqdm
YOLOv5 🚀 2022-11-10 Python-3.10.8 torch-1.13.0+cu117 CUDA:0 (NVIDIA TITAN Xp, 12196MiB)

Using cache found in /data/chanwutk/code/apperception/weights/ultralytics_yolov5_master


Using cuda:0


YOLOv5 🚀 2022-11-22 Python-3.10.8 torch-1.13.0+cu117 CUDA:0 (NVIDIA TITAN Xp, 12196MiB)

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients
Adding AutoShape... 


In [2]:
# from optimized_ingestion.cache import disable_cache
# disable_cache()

In [3]:
BOSTON_VIDEOS = [
#     "scene-0757-CAM_FRONT",
    # "scene-0103-CAM_FRONT",
    # "scene-0553-CAM_FRONT",
    # "scene-0665-CAM_FRONT",
#     "scene-0655-CAM_FRONT_RIGHT",
    "scene-0655-CAM_BACK_RIGHT",
]

NUSCENES_PROCESSED_DATA = "NUSCENES_PROCESSED_DATA"

In [4]:
import torch

In [5]:
class DataclassJSONEncoder(json.JSONEncoder):
    def default(self, o):
        if isinstance(o, Tracking3DResult):
            return {
                "frame_idx": o.frame_idx,
                "detection_id": o.detection_id,
                "object_id": o.object_id,
                "point_from_camera": o.point_from_camera,
                "point": o.point.tolist(),
                "bbox_left": o.bbox_left,
                "bbox_top": o.bbox_top,
                "bbox_w": o.bbox_w,
                "bbox_h": o.bbox_h,
                "object_type": o.object_type,
                "timestamp": str(o.timestamp),
            }
        if isinstance(o, Tracking2DResult):
            return {
                "detection_id": o.detection_id,
                "frame_idx": o.frame_idx,
                "object_id": o.object_id,
                "bbox_left": o.bbox_left,
                "bbox_top": o.bbox_top,
                "bbox_w": o.bbox_w,
                "bbox_h": o.bbox_h,
                "object_type": o.object_type,
                "confidence": o.confidence
            }
        if isinstance(o, torch.Tensor):
            return o.tolist()
        return super().default(o)

In [6]:
if NUSCENES_PROCESSED_DATA in os.environ:
    DATA_DIR = os.environ[NUSCENES_PROCESSED_DATA]
else:
    DATA_DIR = "/work/apperception/data/nuScenes/full-dataset-v1.0/Mini"
with open(os.path.join(DATA_DIR, "videos/boston-seaport", "frames-skip.pickle"), "rb") as f:
    videos = pickle.load(f)

In [7]:
cache_pipeline = Pipeline()
cache_pipeline.add_filter(ParallelDecodeFrame())
cache_pipeline.add_filter(YoloDetection())

for name, video in videos.items():
    frames = VideoSkipped(
        os.path.join(DATA_DIR, "videos/boston-seaport", video["filename"]),
        [camera_config(*f, 0) for f in video["frames"]],
    )

    cache_pipeline.run(Payload(frames))

YOLOv5 🚀 2022-11-10 Python-3.10.8 torch-1.13.0+cu117 CUDA:0 (NVIDIA TITAN Xp, 12196MiB)

Using cache found in /data/chanwutk/code/weights/ultralytics_yolov5_master
YOLOv5 🚀 2022-11-22 Python-3.10.8 torch-1.13.0+cu117 CUDA:0 (NVIDIA TITAN Xp, 12196MiB)

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients
Adding AutoShape... 


Stage:  DecodeFrame.ParallelDecodeFrame
None
389
  filtered frames: 59.383033419023135%
K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.
K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.K..K.KK.K.KK.K.KK.K.KK.K.KK.
K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K..K.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.
K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.K..K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK
Stage:  Detection2D.YoloDetection
None
389
  filtered frames: 59.383033419023135%
K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.
K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.K..K.KK.K.KK.K.KK.K.KK.K.KK.
K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K..K.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.
K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.K..K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK
Stage:  DecodeFrame.ParallelDecodeFrame
None

None
399
  filtered frames: 59.147869674185465%
K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.
K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.K..K.KK.K.K..K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.
K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK...KK.K.KK.K.KK.K.KK.K.KK.
K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK...KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK
Stage:  Detection2D.YoloDetection
None
399
  filtered frames: 59.147869674185465%
K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.
K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.K..K.KK.K.K..K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.
K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK...KK.K.KK.K.KK.K.KK.K.KK.
K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK...KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK
Stage:  DecodeFrame.ParallelDecodeFrame
None
399
  filtered fram

None
398
  filtered frames: 59.2964824120603%
K.KK.K.KK.K.KK.K.K..K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK...KK.K.KK.K.KK.K.KK.K.KK.K.KK.
K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K..K.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.
K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.
K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.K
Stage:  Detection2D.YoloDetection
None
398
  filtered frames: 59.2964824120603%
K.KK.K.KK.K.KK.K.K..K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK...KK.K.KK.K.KK.K.KK.K.KK.K.KK.
K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K..K.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.
K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.
K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.K
Stage:  DecodeFrame.ParallelDecodeFrame
None
398
  filtered frames: 59

In [8]:
pipeline = Pipeline()
pipeline.add_filter(filter=ParallelDecodeFrame())
pipeline.add_filter(filter=YoloDetection())

pipeline.add_filter(filter=FromD2DAndRoad())
pipeline.add_filter(filter=DetectionEstimation())
pipeline.add_filter(filter=StrongSORT())

pipeline.add_filter(filter=From2DAndRoad())
pipeline.add_filter(filter=SegmentTrajectory())

metadata = {}
outputs_with_estimation = []
for name, video in videos.items():
    if not name.endswith('CAM_FRONT'):
        continue
    print(name, '--------------------------------------------------------------------------------')
    frames = VideoSkipped(
        os.path.join(DATA_DIR, "videos/boston-seaport", video["filename"]),
        [camera_config(*f, 0) for f in video["frames"]],
    )

    output = pipeline.run(Payload(frames))
    outputs_with_estimation.append(output)
    metadata[name] = SegmentTrajectory.get(output)

YOLOv5 🚀 2022-11-10 Python-3.10.8 torch-1.13.0+cu117 CUDA:0 (NVIDIA TITAN Xp, 12196MiB)

Using cache found in /data/chanwutk/code/weights/ultralytics_yolov5_master
YOLOv5 🚀 2022-11-22 Python-3.10.8 torch-1.13.0+cu117 CUDA:0 (NVIDIA TITAN Xp, 12196MiB)

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients
Adding AutoShape... 


scene-0103-CAM_FRONT --------------------------------------------------------------------------------
Stage:  DecodeFrame.ParallelDecodeFrame
None
389
  filtered frames: 58.868894601542415%
K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK...KK.K.KK.K.KK.
K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.
K.KK.K.KK.K.K..K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.K..K.KK.K.KK.K.KK.K.KK.
K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.K..K.KK.K.KK.K.KK.K.KK...KK.K.KK.K.KK.K.KK.K.KK
Stage:  Detection2D.YoloDetection
None
389
  filtered frames: 58.868894601542415%
K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK...KK.K.KK.K.KK.
K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.
K.KK.K.KK.K.K..K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.K..K.KK.K.KK.K.KK.K.KK.
K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.

389it [00:00, 4672.58it/s]


None
389
  filtered frames: 58.868894601542415%
K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK...KK.K.KK.K.KK.
K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.
K.KK.K.KK.K.K..K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.K..K.KK.K.KK.K.KK.K.KK.
K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.K..K.KK.K.KK.K.KK.K.KK...KK.K.KK.K.KK.K.KK.K.KK
Stage:  DetectionEstimation


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 388/388 [00:02<00:00, 165.36it/s]
YOLOv5 🚀 2022-11-10 Python-3.10.8 torch-1.13.0+cu117 CUDA:0 (NVIDIA TITAN Xp, 12196MiB)



389
389
  filtered frames: 37.01799485861183%
K..............................................................K.K.KK.K.KK.K.KK.K.KK...KK.K.KK.K.KK.
K.K............K............K.............K............K.KK.K.KK.K.KK.K.K............K.KK...........
..K............K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.K..K.KK.K.KK.K.KK.K.KK.
K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.K..K.KK.K.KK.K.KK.K.KK...KK.K.KK.K.KK.K.KK.K.KK
Stage:  Tracking2D.StrongSORT
Successfully loaded pretrained weights from "/data/chanwutk/code/apperception/weights/osnet_x0_25_msmt17.pt"
** The following layers are discarded due to unmatched keys or layer size: ['classifier.weight', 'classifier.bias']


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 389/389 [00:26<00:00, 14.83it/s]


None
389
  filtered frames: 37.01799485861183%
K..............................................................K.K.KK.K.KK.K.KK.K.KK...KK.K.KK.K.KK.
K.K............K............K.............K............K.KK.K.KK.K.KK.K.K............K.KK...........
..K............K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.K..K.KK.K.KK.K.KK.K.KK.
K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.K..K.KK.K.KK.K.KK.K.KK...KK.K.KK.K.KK.K.KK.K.KK
Stage:  Tracking3D.From2DAndRoad


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 389/389 [00:00<00:00, 1587144.22it/s]


None
389
  filtered frames: 37.01799485861183%
K..............................................................K.K.KK.K.KK.K.KK.K.KK...KK.K.KK.K.KK.
K.K............K............K.............K............K.KK.K.KK.K.KK.K.K............K.KK...........
..K............K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.K..K.KK.K.KK.K.KK.K.KK.
K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.K..K.KK.K.KK.K.KK.K.KK...KK.K.KK.K.KK.K.KK.K.KK
Stage:  SegmentTrajectory
None
389
  filtered frames: 37.01799485861183%
K..............................................................K.K.KK.K.KK.K.KK.K.KK...KK.K.KK.K.KK.
K.K............K............K.............K............K.KK.K.KK.K.KK.K.K............K.KK...........
..K............K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.K..K.KK.K.KK.K.KK.K.KK.
K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.K..K.KK.K.KK.K.KK.K.KK...KK.K.KK.K.KK.K.KK.K.KK
scene-0553-CAM_FRONT -------------------------------------------------------------------------

399it [00:00, 4987.46it/s]


None
399
  filtered frames: 59.3984962406015%
K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.
K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K..K.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.
K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.
K.KK.K.K..K..K.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK
Stage:  DetectionEstimation


  n /= np.linalg.norm(n, 2)
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 398/398 [00:10<00:00, 39.39it/s]
YOLOv5 🚀 2022-11-10 Python-3.10.8 torch-1.13.0+cu117 CUDA:0 (NVIDIA TITAN Xp, 12196MiB)



399
399
  filtered frames: 59.3984962406015%
K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.
K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K..K.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.
K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.
K.KK.K.K..K..K.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK
Stage:  Tracking2D.StrongSORT
Successfully loaded pretrained weights from "/data/chanwutk/code/apperception/weights/osnet_x0_25_msmt17.pt"
** The following layers are discarded due to unmatched keys or layer size: ['classifier.weight', 'classifier.bias']


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 399/399 [00:24<00:00, 16.11it/s]


None
399
  filtered frames: 59.3984962406015%
K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.
K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K..K.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.
K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.
K.KK.K.K..K..K.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK
Stage:  Tracking3D.From2DAndRoad


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 399/399 [00:00<00:00, 1562583.84it/s]


None
399
  filtered frames: 59.3984962406015%
K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.
K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K..K.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.
K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.
K.KK.K.K..K..K.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK
Stage:  SegmentTrajectory
None
399
  filtered frames: 59.3984962406015%
K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.
K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K..K.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.
K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.
K.KK.K.K..K..K.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK
scene-0655-CAM_FRONT -------------------------------------------------------

397it [00:00, 4764.97it/s]


None
397
  filtered frames: 59.69773299748111%
KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK...KK.K.KK.K.KK.K.KK.K.
KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.
KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.
KK.K.KK.K.K..K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK
Stage:  DetectionEstimation


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 396/396 [00:02<00:00, 141.05it/s]
YOLOv5 🚀 2022-11-10 Python-3.10.8 torch-1.13.0+cu117 CUDA:0 (NVIDIA TITAN Xp, 12196MiB)



397
397
  filtered frames: 52.896725440806044%
KK.K............K.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK...KK.K.KK.K.KK.K.KK.K.
KK.K.KK.K.KK.K.KK.K.............................K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.
KK.K.KK.K.KK.K.K.....K.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.
KK.K.KK.K.K..K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK
Stage:  Tracking2D.StrongSORT
Successfully loaded pretrained weights from "/data/chanwutk/code/apperception/weights/osnet_x0_25_msmt17.pt"
** The following layers are discarded due to unmatched keys or layer size: ['classifier.weight', 'classifier.bias']


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 397/397 [00:24<00:00, 16.00it/s]


None
397
  filtered frames: 52.896725440806044%
KK.K............K.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK...KK.K.KK.K.KK.K.KK.K.
KK.K.KK.K.KK.K.KK.K.............................K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.
KK.K.KK.K.KK.K.K.....K.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.
KK.K.KK.K.K..K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK
Stage:  Tracking3D.From2DAndRoad


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 397/397 [00:00<00:00, 1463215.02it/s]


None
397
  filtered frames: 52.896725440806044%
KK.K............K.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK...KK.K.KK.K.KK.K.KK.K.
KK.K.KK.K.KK.K.KK.K.............................K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.
KK.K.KK.K.KK.K.K.....K.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.
KK.K.KK.K.K..K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK
Stage:  SegmentTrajectory
None
397
  filtered frames: 52.896725440806044%
KK.K............K.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK...KK.K.KK.K.KK.K.KK.K.
KK.K.KK.K.KK.K.KK.K.............................K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.
KK.K.KK.K.KK.K.K.....K.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.
KK.K.KK.K.K..K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK
scene-0757-CAM_FRONT -------------------------------------------------------

398it [00:00, 4833.10it/s]


None
398
  filtered frames: 59.54773869346734%
K.KK.K.KK.K.KK.K.K..K.KK.K.KK.K.KK.K.KK.K.KK.K.K..K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.
K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.
K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.
K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.K
Stage:  DetectionEstimation


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 397/397 [00:01<00:00, 217.00it/s]
YOLOv5 🚀 2022-11-10 Python-3.10.8 torch-1.13.0+cu117 CUDA:0 (NVIDIA TITAN Xp, 12196MiB)



398
398
  filtered frames: 53.517587939698494%
K.KK.......................K......................K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.
K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.
K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.
K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.K
Stage:  Tracking2D.StrongSORT
Successfully loaded pretrained weights from "/data/chanwutk/code/apperception/weights/osnet_x0_25_msmt17.pt"
** The following layers are discarded due to unmatched keys or layer size: ['classifier.weight', 'classifier.bias']


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 398/398 [00:53<00:00,  7.42it/s]


None
398
  filtered frames: 53.517587939698494%
K.KK.......................K......................K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.
K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.
K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.
K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.K
Stage:  Tracking3D.From2DAndRoad


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 398/398 [00:00<00:00, 1548546.37it/s]

None
398
  filtered frames: 53.517587939698494%
K.KK.......................K......................K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.
K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.
K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.
K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.K
Stage:  SegmentTrajectory
None
398
  filtered frames: 53.517587939698494%
K.KK.......................K......................K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.
K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.
K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.
K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.KK.K.K





In [9]:
with open(f"./outputs/trackings-with-estimation.json", "w") as f:
    json.dump(metadata, f, cls=DataclassJSONEncoder)

In [10]:
pipeline = Pipeline()
pipeline.add_filter(filter=ParallelDecodeFrame())
pipeline.add_filter(filter=YoloDetection())

# pipeline.add_filter(filter=DetectionEstimation())
pipeline.add_filter(filter=StrongSORT())

pipeline.add_filter(filter=From2DAndRoad())

if NUSCENES_PROCESSED_DATA in os.environ:
    DATA_DIR = os.environ[NUSCENES_PROCESSED_DATA]
else:
    DATA_DIR = "/work/apperception/data/nuScenes/full-dataset-v1.0/Mini"
with open(os.path.join(DATA_DIR, "videos/boston-seaport", "frames.pickle"), "rb") as f:
    videos = pickle.load(f)

metadata = {}
outputs_without_estimation = []
for name, video in videos.items():
#     if name not in BOSTON_VIDEOS:
#         continue
    if not name.endswith('CAM_FRONT'):
        continue
    print(name, '--------------------------------------------------------------------------------')
    frames = Video(
        os.path.join(DATA_DIR, "videos/boston-seaport", video["filename"]),
        [camera_config(*f, 0) for f in video["frames"]],
        video["start"],
    )

    output = pipeline.run(Payload(frames))
    outputs_without_estimation.append(output)
    metadata[name] = From2DAndRoad.get(output)

YOLOv5 🚀 2022-11-10 Python-3.10.8 torch-1.13.0+cu117 CUDA:0 (NVIDIA TITAN Xp, 12196MiB)

Using cache found in /data/chanwutk/code/weights/ultralytics_yolov5_master
YOLOv5 🚀 2022-11-22 Python-3.10.8 torch-1.13.0+cu117 CUDA:0 (NVIDIA TITAN Xp, 12196MiB)

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients
Adding AutoShape... 


scene-0103-CAM_FRONT --------------------------------------------------------------------------------
Stage:  DecodeFrame.ParallelDecodeFrame


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 80/80 [00:22<00:00,  3.52it/s]


None
388
  filtered frames: 100.0%
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
Stage:  Detection2D.YoloDetection


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 388/388 [00:03<00:00, 103.89it/s]
YOLOv5 🚀 2022-11-10 Python-3.10.8 torch-1.13.0+cu117 CUDA:0 (NVIDIA TITAN Xp, 12196MiB)



None
388
  filtered frames: 100.0%
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
Stage:  Tracking2D.StrongSORT
Successfully loaded pretrained weights from "/data/chanwutk/code/apperception/weights/osnet_x0_25_msmt17.pt"
** The following layers are discarded due to unmatched keys or layer size: ['classifier.weight', 'classifier.bias']


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 388/388 [01:50<00:00,  3.51it/s]


None
388
  filtered frames: 100.0%
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
Stage:  Tracking3D.From2DAndRoad


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 388/388 [00:00<00:00, 3193.57it/s]

None
388
  filtered frames: 100.0%
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
scene-0553-CAM_FRONT --------------------------------------------------------------------------------
Stage:  DecodeFrame.ParallelDecodeFrame



100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 80/80 [00:16<00:00,  4.97it/s]


None
398
  filtered frames: 100.0%
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
Stage:  Detection2D.YoloDetection


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 398/398 [00:04<00:00, 92.51it/s]
YOLOv5 🚀 2022-11-10 Python-3.10.8 torch-1.13.0+cu117 CUDA:0 (NVIDIA TITAN Xp, 12196MiB)



None
398
  filtered frames: 100.0%
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
Stage:  Tracking2D.StrongSORT
Successfully loaded pretrained weights from "/data/chanwutk/code/apperception/weights/osnet_x0_25_msmt17.pt"
** The following layers are discarded due to unmatched keys or layer size: ['classifier.weight', 'classifier.bias']


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 398/398 [00:57<00:00,  6.95it/s]


None
398
  filtered frames: 100.0%
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
Stage:  Tracking3D.From2DAndRoad


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 398/398 [00:00<00:00, 6537.84it/s]

None
398
  filtered frames: 100.0%
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
scene-0655-CAM_FRONT --------------------------------------------------------------------------------
Stage:  DecodeFrame.ParallelDecodeFrame



100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 80/80 [00:28<00:00,  2.83it/s]


None
396
  filtered frames: 100.0%
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
Stage:  Detection2D.YoloDetection


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 396/396 [00:04<00:00, 95.41it/s]
YOLOv5 🚀 2022-11-10 Python-3.10.8 torch-1.13.0+cu117 CUDA:0 (NVIDIA TITAN Xp, 12196MiB)



None
396
  filtered frames: 100.0%
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
Stage:  Tracking2D.StrongSORT
Successfully loaded pretrained weights from "/data/chanwutk/code/apperception/weights/osnet_x0_25_msmt17.pt"
** The following layers are discarded due to unmatched keys or layer size: ['classifier.weight', 'classifier.bias']


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 396/396 [01:17<00:00,  5.11it/s]


None
396
  filtered frames: 100.0%
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
Stage:  Tracking3D.From2DAndRoad


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 396/396 [00:00<00:00, 7128.18it/s]

None
396
  filtered frames: 100.0%
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
scene-0757-CAM_FRONT --------------------------------------------------------------------------------
Stage:  DecodeFrame.ParallelDecodeFrame



100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 80/80 [00:16<00:00,  4.85it/s]


None
397
  filtered frames: 100.0%
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
Stage:  Detection2D.YoloDetection


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 397/397 [00:03<00:00, 103.23it/s]
YOLOv5 🚀 2022-11-10 Python-3.10.8 torch-1.13.0+cu117 CUDA:0 (NVIDIA TITAN Xp, 12196MiB)



None
397
  filtered frames: 100.0%
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
Stage:  Tracking2D.StrongSORT
Successfully loaded pretrained weights from "/data/chanwutk/code/apperception/weights/osnet_x0_25_msmt17.pt"
** The following layers are discarded due to unmatched keys or layer size: ['classifier.weight', 'classifier.bias']


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 397/397 [01:32<00:00,  4.29it/s]


None
397
  filtered frames: 100.0%
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
Stage:  Tracking3D.From2DAndRoad


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 397/397 [00:00<00:00, 4299.27it/s]

None
397
  filtered frames: 100.0%
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK





In [11]:
with open(f"./outputs/trackings-without-estimation.json", "w") as f:
    json.dump(metadata, f, cls=DataclassJSONEncoder)

In [12]:
res = Tracking2D.get(outputs_without_estimation[1])

In [13]:
res

[{},
 {},
 {1: Tracking2DResult(frame_idx=2, detection_id=DetectionId(frame_idx=2, obj_order=0), object_id=1, bbox_left=1040.0, bbox_top=184.0, bbox_w=32.0, bbox_h=68.0, object_type='traffic light', confidence=0.8043445944786072),
  2: Tracking2DResult(frame_idx=2, detection_id=DetectionId(frame_idx=2, obj_order=1), object_id=2, bbox_left=1091.0, bbox_top=112.0, bbox_w=37.0, bbox_h=90.0, object_type='traffic light', confidence=0.7631613612174988),
  3: Tracking2DResult(frame_idx=2, detection_id=DetectionId(frame_idx=2, obj_order=2), object_id=3, bbox_left=591.0, bbox_top=327.0, bbox_w=18.0, bbox_h=40.0, object_type='traffic light', confidence=0.6741217374801636),
  4: Tracking2DResult(frame_idx=2, detection_id=DetectionId(frame_idx=2, obj_order=3), object_id=4, bbox_left=1172.0, bbox_top=47.0, bbox_w=52.0, bbox_h=108.0, object_type='traffic light', confidence=0.6731081604957581),
  5: Tracking2DResult(frame_idx=2, detection_id=DetectionId(frame_idx=2, obj_order=4), object_id=5, bbox_le