In [1]:
import os
import time
import pickle

import cv2
from shapely.geometry import Polygon
import numpy.typing as npt


In [2]:
from typing import List, Tuple

In [3]:
from optimized_ingestion.camera_config import camera_config, CameraConfig
from optimized_ingestion.video import Video

In [4]:
from optimized_ingestion.detection_estimation.segment_mapping import map_imgsegment_roadsegment, CameraSegmentMapping
from optimized_ingestion.detection_estimation.utils import trajectory_3d
# from optimized_ingestion.detection_estimation.sample_plan_algorithms import *
from optimized_ingestion.detection_estimation.detection_estimation import construct_all_detection_info, detection_to_img_segment, obj_detection, generate_sample_plan, DetectionInfo, samplePlan


In [5]:
from optimized_ingestion.pipeline import Pipeline
from optimized_ingestion.payload import Payload

from optimized_ingestion.stages.decode_frame.parallel_decode_frame import ParallelDecodeFrame
from optimized_ingestion.stages.detection_2d.yolo_detection import YoloDetection

YOLOv5 🚀 2022-11-10 Python-3.10.6 torch-1.13.0+cu117 CUDA:0 (NVIDIA TITAN Xp, 12196MiB)



Using cuda:0


Using cache found in /home/eecs/chanwutk/.cache/torch/hub/ultralytics_yolov5_master
YOLOv5 🚀 2022-11-10 Python-3.10.6 torch-1.13.0+cu117 CUDA:0 (NVIDIA TITAN Xp, 12196MiB)

YOLOv5 🚀 2022-11-10 Python-3.10.6 torch-1.13.0+cu117 CUDA:0 (NVIDIA TITAN Xp, 12196MiB)

Fusing layers... 
Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients
Adding AutoShape... 
Adding AutoShape... 


In [6]:
BOSTON_VIDEO_DIR = os.path.join(os.environ['NUSCENES_PROCESSED_DATA'], 'videos/boston-seaport')
BOSTON_VIDEO_DIR

'/data/apperception-data/processed/nuscenes/full-dataset-v1.0/Mini/videos/boston-seaport'

In [7]:
car_loc3d_ground_truth = [(1991, 874), (1949.181, 873.164)]

In [8]:
with open(os.path.join(BOSTON_VIDEO_DIR, 'frames.pickle'), 'rb') as f:
    videoconfigs = pickle.load(f)

In [9]:
def display_detection(test_file_path: str, full_img_detection):
    test_frame = cv2.imread(test_file_path)
    for obj_idx, detection in full_img_detection.items():
        obj_cls, bbox = detection
        if obj_cls == 'car':
            x,y,w,h = list(map(int,bbox))
            cv2.rectangle(test_frame,(x-w//2,y-h//2),(x+w//2,y+h//2),(0,255,0),2)
            cv2.putText(test_frame, '_'.join([obj_cls, str(obj_idx)]), (x+w//2+5,y+h//2+5),0,0.3,(0,255,0))
    cv2.imshow('detection', test_frame)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

In [10]:
def get_video(videoname: str, framesdict: dict) -> "Video":
    videodata = framesdict[videoname]
    filename = videodata['filename']
    frames = videodata['frames']
    configs = [camera_config(*f, 0) for f in frames]
    return Video(os.path.join(BOSTON_VIDEO_DIR, filename), configs, videodata['start'])

In [11]:
# ego car trajectory
def prepare_ego(test_video: str) -> "Tuple[Video, List[trajectory_3d]]":
    video = get_video(test_video, videoconfigs)
    ego_trajectory = [trajectory_3d(f.ego_translation, f.timestamp) for f in video]
    return video, ego_trajectory

In [12]:
def generate_sample_plan_once(
    video: "str",
    ego_config: "CameraConfig",
    mapping: "List[CameraSegmentMapping]",
    next_frame_num: "int",
    car_loc3d=None,
    target_car_detection=None,
    all_detection_info: "List[obj_detection]" = None
) -> "Tuple[samplePlan, None]":
    # if all_detection_info is None:
    #     assert target_car_detection and car_loc3d
    #     x,y,w,h = list(map(int, target_car_detection))
    #     car_loc2d = (x, y+h//2)
    #     car_bbox2d = (x-w//2,y-h//2,x+w//2,y+h//2)
    #     car_bbox3d = None
    #     all_detections = []
    #     all_detections.append(obj_detection('car_1', car_loc3d, car_loc2d, car_bbox3d, car_bbox2d))
    #     all_detection_info = construct_all_detection_info(cam_segment_mapping, ego_trajectory, ego_config, all_detections)
    if all_detection_info:
        print(all_detection_info[0].road_type)
    next_sample_plan = generate_sample_plan(video, next_frame_num, all_detection_info,  50)
    # next_frame = None
    next_sample_frame_info = next_sample_plan.get_next_sample_frame_info()
    if next_sample_frame_info:
        next_sample_frame_name, next_sample_frame_num, _ = next_sample_frame_info
        print("next frame name", next_sample_frame_name)
        print("next frame num", next_sample_frame_num)
    #     print(next_sample_plan.action)
        # TODO: should not read next frame -> get the next frame from frames.pickle
        # next_frame = cv2.imread(test_img_base_dir+next_sample_frame_name)
#         cv2.imshow("next_frame", next_frame)
#         cv2.waitKey(0)
#         cv2.destroyAllWindows()
    return next_sample_plan, None

In [13]:
def construct_estimated_all_detection_info(
    detections: "npt.NDArray",
    cam_segment_mapping: "List[CameraSegmentMapping]",
    ego_config: "CameraConfig",
    ego_trajectory: "trajectory_3d"
) -> "List[DetectionInfo]":
    all_detections = []
    for det in detections:
        bbox = det[:4]
        obj_cls = det[5]
        x, y, x2, y2 = list(map(int,bbox))
        w = x2 - x
        h = y2 - y
        car_loc2d = (x + w // 2, y+h//2)
#         print(car_loc2d)
        car_bbox2d = ((x-w//2, y-h//2), (x+w//2, y+h//2))
        car_bbox3d = None
        estimate_3d = detection_to_img_segment(car_loc2d, cam_segment_mapping)
        if estimate_3d and estimate_3d.road_segment_info.segment_type in ['lane', 'laneSection']:
            car_loc3d = tuple(Polygon(estimate_3d.road_segment_info.segment_polygon).centroid.coords)
#             print(tuple(car_loc3d))
            all_detections.append(obj_detection('car_1', car_loc3d, car_loc2d, car_bbox3d, car_bbox2d))
    print("all_detections", all_detections)
    all_detection_info = construct_all_detection_info(cam_segment_mapping, ego_config, ego_trajectory, all_detections)
    return all_detection_info

In [14]:
def dry_run(
    payload: "Payload",
    start_frame_num: "int",
    ego_trajectory: "List[trajectory_3d]",
    video: "str"
):
    skipped_frame_num = []
    next_frame_num = start_frame_num
    action_type_counts = {}
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    display_video = cv2.VideoWriter(f'sampled_frames_{video.replace("/", "_")}.avi',fourcc, 10, (1600, 900))
    start_time = time.time()
    total_detection_time = 0
    total_sample_plan_time = 0
    for i in range(len(payload.video)-1):
        current_ego_config = payload.video[i]
        if i != next_frame_num:
            skipped_frame_num.append(i)
            continue
        next_frame_num = i + 1
        cam_segment_mapping = map_imgsegment_roadsegment(current_ego_config)
        print("mapping length", len(cam_segment_mapping))
        # current_frame = test_img_base_dir + current_ego_config['fileName']
        # display_video.write(cv2.imread(current_frame))
        start_detection_time = time.time()
        all_detection_info = construct_estimated_all_detection_info(YoloDetection.get(payload)[i][0], cam_segment_mapping, current_ego_config, ego_trajectory)
        total_detection_time += time.time()-start_detection_time
        start_generate_sample_plan = time.time()
        next_sample_plan, _ = generate_sample_plan_once(payload.video, current_ego_config, cam_segment_mapping, next_frame_num, all_detection_info=all_detection_info)
        total_sample_plan_time += time.time() - start_generate_sample_plan
        next_action_type = next_sample_plan.get_action_type()
        if next_action_type not in action_type_counts:
            action_type_counts[next_action_type] = 1
        else:
            action_type_counts[next_action_type] += 1
        next_frame_num = next_sample_plan.get_next_frame_num(next_frame_num)

    display_video.release()
    print("sorted_ego_config_length", len(payload.video))
    print("number of skipped", len(skipped_frame_num))
    print(skipped_frame_num)
    print(action_type_counts)
    total_run_time = time.time()-start_time
    num_runs = len(payload.video) - len(skipped_frame_num)
    print("total_run_time", total_run_time)
    print("avg run time", total_run_time/num_runs)
    print("total_detection_time", total_detection_time)
    print("avg detection time", total_detection_time/num_runs)
    print("total_generate_sample_plan_time", total_sample_plan_time)
    print("avg generate_sample_plan time", total_sample_plan_time/num_runs)

In [15]:
pipeline = Pipeline()
pipeline.add_filter(ParallelDecodeFrame())
pipeline.add_filter(YoloDetection())

test_video1 = 'scene-0757-CAM_FRONT'
video1, ego_trajectory1 = prepare_ego(test_video1)
payload1 = pipeline.run(Payload(video1))

YOLOv5 🚀 2022-11-10 Python-3.10.6 torch-1.13.0+cu117 CUDA:0 (NVIDIA TITAN Xp, 12196MiB)

YOLOv5 🚀 2022-11-10 Python-3.10.6 torch-1.13.0+cu117 CUDA:0 (NVIDIA TITAN Xp, 12196MiB)

Using cache found in /home/eecs/chanwutk/.cache/torch/hub/ultralytics_yolov5_master
YOLOv5 🚀 2022-11-10 Python-3.10.6 torch-1.13.0+cu117 CUDA:0 (NVIDIA TITAN Xp, 12196MiB)

YOLOv5 🚀 2022-11-10 Python-3.10.6 torch-1.13.0+cu117 CUDA:0 (NVIDIA TITAN Xp, 12196MiB)

Fusing layers... 
Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients
Adding AutoShape... 
Adding AutoShape... 


Stage:  DecodeFrame.ParallelDecodeFrame


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 80/80 [00:15<00:00,  5.27it/s]


  filtered frames: 100.0%
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
Stage:  Detection2D.YoloDetection


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 397/397 [00:09<00:00, 43.75it/s]

  filtered frames: 100.0%
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK
KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK





In [16]:
dry_run(payload1, 0, ego_trajectory1, test_video1)

  a, b = line.boundary


total mapping time:  0.26101160049438477
mapping length 24
all_detections []
total mapping time:  0.08991837501525879
mapping length 24
all_detections []
total mapping time:  0.06955862045288086
mapping length 24
all_detections []
total mapping time:  0.0667426586151123
mapping length 24
all_detections [obj_detection(id='car_1', car_loc3d=((341.7109839644813, 658.8090580685604),), car_loc2d=(801, 415), car_bbox3d=None, car_bbox2d=((791, 397), (801, 415)))]
lane
relative_direction_2 18.84942836926756 2018-08-30 12:25:08.412404
next frame name None
next frame num 27
total mapping time:  0.10233020782470703
mapping length 24
all_detections []


  for intersect in intersection:


total mapping time:  0.09741592407226562
mapping length 24
all_detections [obj_detection(id='car_1', car_loc3d=((341.7109839644813, 658.8090580685604),), car_loc2d=(723, 415), car_bbox3d=None, car_bbox2d=((711, 391), (723, 415)))]
lane
relative_direction_2 18.84942836926756 2018-08-30 12:25:09.662404
next frame name None
next frame num 49
total mapping time:  0.08903026580810547
mapping length 21
all_detections [obj_detection(id='car_1', car_loc3d=((272.5255063086455, 677.2313501542442),), car_loc2d=(807, 497), car_bbox3d=None, car_bbox2d=((613, 421), (807, 497)))]
lane
relative_direction_2 7.357008780646023 2018-08-30 12:25:10.712404
next frame name None
next frame num 58
total mapping time:  0.09136438369750977
mapping length 21
all_detections [obj_detection(id='car_1', car_loc3d=((275.7307376440405, 681.7115228383127),), car_loc2d=(870, 490), car_bbox3d=None, car_bbox2d=((654, 412), (870, 490)))]
lane
relative_direction_2 8.560413870929743 2018-08-30 12:25:11.162404
next frame name 

total mapping time:  0.06888651847839355
mapping length 14
all_detections []
total mapping time:  0.05882072448730469
mapping length 14
all_detections []
total mapping time:  0.05796003341674805
mapping length 14
all_detections []
total mapping time:  0.05967521667480469
mapping length 12
all_detections []
total mapping time:  0.05949807167053223
mapping length 12
all_detections []
total mapping time:  0.05967092514038086
mapping length 15
all_detections []
total mapping time:  0.05961036682128906
mapping length 15
all_detections []
total mapping time:  0.06023359298706055
mapping length 15
all_detections [obj_detection(id='car_1', car_loc3d=((275.7307376440405, 681.7115228383127),), car_loc2d=(805, 493), car_bbox3d=None, car_bbox2d=((511, 399), (805, 493)))]
lane


Exception: 2018-08-30 12:25:15.562404 2018-08-30 12:25:16.562404