In [None]:
import os
import pickle
import numpy as np
import cv2
from tqdm import tqdm
from pyquaternion import Quaternion
import pandas as pd

from utils.data_dirs import data_dirs

In [None]:
base_dir = os.environ['NUSCENES_RAW_DATA']
base_dir

In [None]:
base_dir, output_dir, folder, EXPERIMENT_DATA, suffix, *_ = data_dirs(False)
print(base_dir)
print(output_dir)
print(folder)
print(EXPERIMENT_DATA)
print(suffix)

In [None]:
metadata_dir = os.path.join(base_dir, folder)
metadata_dir

In [None]:
metadata_filenames = os.listdir(metadata_dir)
print("\n".join(metadata_filenames))

In [None]:
# with open(os.path.join(output_dir, 'annotation.pickle'), 'rb') as f:
#     raw_annotations = pickle.load(f).to_dict('records')
raw_annotations = pd.read_pickle(os.path.join(output_dir, 'annotation.pkl')).to_dict('records')

In [None]:
# with open(os.path.join(output_dir, 'sample_data.pickle'), 'rb') as f:
#     raw_sample_data = pickle.load(f).to_dict('records')
raw_sample_data = pd.read_pickle(os.path.join(output_dir, 'sample_data.pkl')).to_dict('records')

In [None]:
raw_annotations[0]

In [None]:
# Map from annotation token -> annotation
annotation_map = {
    a['token']: a
    for a
    in raw_annotations
}
assert len(annotation_map) == len(raw_annotations), (len(annotation_map), len(raw_annotations))

In [None]:
raw_sample_data[0]

In [None]:
# Map from sample data token (camera config token) -> sample data
sample_data_map = {
    sd['token']: sd
    for sd
    in raw_sample_data
}
assert len(sample_data_map) == len(raw_sample_data)

In [None]:
for a in annotation_map.values():
    for sdt in a['sample_data_tokens']:
        assert sample_data_map[sdt]['sample_token'] == a['sample_token']

In [None]:
len([sd for sd in raw_sample_data if sd['is_key_frame']])

In [None]:
# sample_token_to_annotation_map = {}
# for a in raw_annotations:
#     sample_token = a['sample_token']
#     if sample_token not in annotation_map:
#         sample_token_to_annotation_map[sample_token] = []
#     sample_token_to_annotation_map[sample_token].append(a)

In [None]:
# scenes = {}
# for sd in raw_sample_data:
#     scene_name = sd['scene_name']
#     if scene_name not in scenes:
#         scenes[scene_name] = {}
#     scene = scenes[scene_name]

#     sample_token = sd['sample_token']
#     if sample_token not in scene:
#         scene[sample_token] = []
#     scene[sample_token].append(sd)

In [None]:
# [*scenes.keys()]

In [None]:
# [*scenes['scene-0061'].keys()]

In [None]:
# len(scenes['scene-0061']['378a3a3e9af346308ab9dff8ced46d9c'])

In [None]:
# # Find if each scene / camera has its key fame as its closest to the sample
# closest_frame = {}
# for scene_name, scene in scenes.items():
#     for sample_token, sample in scene.items():
#         camera = {}
#         for s in sample:
#             if s['channel'] not in camera:
#                 camera[s['channel']] = []
#             camera[s['channel']].append((
#                 s['is_key_frame'],
#                 s['timestamp'],
#                 s['sample_timestamp'],
#             ))
        
#         for c_name, _sample_data in camera.items():
#             best_is_key_frame, best_timestamp, best_timestamp_diff = None, None, None
#             for is_key_frame, timestamp, sample_timestamp in _sample_data:
#                 timestamp_diff = sample_timestamp - timestamp
#                 if best_timestamp_diff is None or abs(best_timestamp_diff) > abs(timestamp_diff):
#                     best_is_key_frame, best_timestamp, best_timestamp_diff = is_key_frame, timestamp, timestamp_diff
#             key = f'{sample_token}-{c_name}'
#             assert key not in closest_frame
#             closest_frame[key] = (best_is_key_frame, best_timestamp, best_timestamp_diff)

In [None]:
# [cf for cf in closest_frame.values() if not cf[0]]
# # This filter is empty -> All the key frames are the one that is closest to the sample timestamp

In [None]:
# max([(cf[2]) for cf in closest_frame.values()]) / 1_000_000

In [None]:
raw_annotations[0]

In [None]:
raw_sample_data[0]

In [None]:
def world2pixel(annotation, sample_data):
    ct = np.array(sample_data['camera_translation'])
    cr = Quaternion(sample_data['camera_rotation'])
    ci = np.array(sample_data['camera_intrinsic'])
    at = np.array(annotation['translation'])

    offset = (at - ct) # .reshape((3, 1))
#     point_from_camera = np.dot(cr.unit.inverse.rotation_matrix, offset)
    point_from_camera = cr.inverse.rotate(offset).reshape((3, 1))
    if point_from_camera[2] < 0:
        return np.array([-1, -1, 0])
    assert point_from_camera.shape == (3, 1)
    point2d = np.dot(ci, point_from_camera)
    assert point2d.shape == (3,1)

    return (point2d / point2d[2:3]).reshape((3,))

In [None]:
def in_view(annotation: "dict[str, Any]"):
    def fn(sample_data_token: "str"):
        sample_data = sample_data_map[sample_data_token]
        point2d = world2pixel(annotation, sample_data)
        x, y, _ = point2d
        
        _, _, _z = (Quaternion(sample_data['camera_rotation'])
            .inverse
            .rotate(np.array(annotation['translation']) - np.array(sample_data['camera_translation']))
        )
        return (0 <= x < 1600) and (0 <= y < 900) and _z > 0
    return fn

In [None]:
not_in_view = []
def split(a):
    in_view_a = in_view(a)
    sample_data_tokens = [*filter(in_view_a, a['sample_data_tokens'])]
    channels = [sample_data_map[sdt]['channel'] for sdt in sample_data_tokens]
#     assert len(sample_data_tokens) > 0
    if len(sample_data_tokens) <= 0:
        not_in_view.append(a)
    return {
        **a,
        'sample_data_tokens': sample_data_tokens,
        'out_of_view_sample_data_tokens': [*filter(lambda x : not in_view_a(x), a['sample_data_tokens'])],
        'channels': channels
    }
output_annotations = [
    split(a)
    for a
    in tqdm(raw_annotations)
]

In [None]:
len(not_in_view), len(output_annotations)

In [None]:
[a for a in not_in_view if a['category'] == 'vehicle.car'][0]

In [None]:
missing_objects = set(a['instance_token'] for a in not_in_view)

In [None]:
len(missing_objects)

In [None]:
set(a['category'] for a in not_in_view)

In [None]:
len(output_annotations)

In [None]:
sample_data_to_annotations = {}
sample_data_to_out_of_view_annotations = {}
for o in output_annotations:
    for sdt in o['sample_data_tokens']:
        if sdt not in sample_data_to_annotations:
            sample_data_to_annotations[sdt] = []
        sample_data_to_annotations[sdt].append(o)
    for sdt in o['out_of_view_sample_data_tokens']:
        if sdt not in sample_data_to_out_of_view_annotations:
            sample_data_to_out_of_view_annotations[sdt] = []
        sample_data_to_out_of_view_annotations[sdt].append(o)
print(len(sample_data_to_annotations))
print(len(sample_data_to_out_of_view_annotations))

In [None]:
scenes = {}
for sd in raw_sample_data:
    scene_name = sd['scene_name']
    if scene_name not in scenes:
        scenes[scene_name] = {}
    scene = scenes[scene_name]
    
    channel = sd['channel']
    if channel not in scene:
        scene[channel] = []
    scene[channel].append(sd)

In [None]:
def bird_eye_view(annotation, sample_data):
    ct = np.array(sample_data['ego_translation'])
    cr = Quaternion(sample_data['ego_rotation'])
    at = np.array(annotation['translation'])
    
    offset = (at - ct)
    point_from_ego = cr.inverse.rotate(offset)
    return point_from_ego
    

In [None]:
bbox_lines = [
    [0, 1],
    [0, 2],
    [0, 4],
    [1, 5],
    [1, 3],
    [2, 3],
    [2, 6],
    [3, 7],
    [4, 5],
    [4, 6],
    [5, 7],
    [6, 7],
    [4, 7],
    [5, 6],
]

In [None]:
import shutil

def annotate_videos():
#     shutil.rmtree('./output-videos')
#     os.mkdir('./output-videos')
    try:
        os.mkdir(os.path.join(output_dir, 'annotated-videos'))
    except:
        pass
    
    width = 1600
    height = 900

    mapx = 1400
    mapy = 200

    for scenename, scene in scenes.items():
        for channel, sds in scene.items():
            frames = sorted(sds, key=lambda x: x['timestamp'])
            filename = f'annotated-{scenename}-{channel}.mp4'
            print(filename)

            out = cv2.VideoWriter(
                os.path.join(output_dir, 'annotated-videos', filename),
                cv2.VideoWriter_fourcc(*'mp4v'),
                10,
                (width, height)
            )

            for frame in tqdm(frames):
                if not frame['is_key_frame']:
                    continue
                imagefile = frame['filename']
                img = cv2.imread(os.path.join(base_dir, imagefile))

                sample_data_token = frame['token']
                sd = sample_data_map[sample_data_token]

                # Annotations BBox -> pixel
                for a in sample_data_to_annotations.get(sample_data_token, []):
                    if not in_view(a)(sample_data_token):
                        raise Exception(a)

                    size = (a['size'][1], a['size'][0], a['size'][2])
                    _x0, _y0, _z0 = - (np.array(size) / 2)
                    _x1, _y1, _z1 =   (np.array(size) / 2)
                    _xs = [_x0, _x1]
                    _ys = [_y0, _y1]
                    _zs = [_z0, _z1]

                    points = []
                    for _x in _xs:
                        for _y in _ys:
                            for _z in _zs:
                                _p = Quaternion(a['rotation']).rotate(np.array([_x, _y, _z]))
                                p = world2pixel({
                                    'translation': _p + a['translation'],
                                    'category': a['category'],
                                }, sd)[:2]

                                points.append(p)

                    if a['category'].startswith('vehicle'):
                        color = (255, 0, 0)
                    else:
                        color = (0, 0, 255)
                    for _p0, _p1 in bbox_lines:
                        x0, y0 = points[_p0].astype(int)
                        x1, y1 = points[_p1].astype(int)
                        img = cv2.line(img, (x0, y0), (x1, y1), color, 2)

                # Plot All Annotations
                for a in sample_data_to_annotations.get(sample_data_token, []) + sample_data_to_out_of_view_annotations.get(sample_data_token, []):
                    white = False
                    if not in_view(a)(sample_data_token):
                        white = True

                    point = bird_eye_view(a, sd)
                    x, y, _ = (point * 3).astype(int)
                    x += mapx
                    y += mapy
                    minx = max(0, min(1599, x - 1))
                    miny = 899 - max(0, min(899, y + 1))
                    maxx = max(0, min(1599, x + 1))
                    maxy = 899 - max(0, min(899, y - 1))
                    if white:
                        img[miny:maxy, minx:maxx, 0] = 255
                        img[miny:maxy, minx:maxx, 1] = 255
                        img[miny:maxy, minx:maxx, 2] = 255
                    elif a['category'].startswith('vehicle'):
                        img[miny:maxy, minx:maxx, 0] = 255
                        img[miny:maxy, minx:maxx, 1] = 0
                        img[miny:maxy, minx:maxx, 2] = 0
                    else:
                        img[miny:maxy, minx:maxx, 0] = 0
                        img[miny:maxy, minx:maxx, 1] = 0
                        img[miny:maxy, minx:maxx, 2] = 255

                # Plot Camera View Lines
                for x, y in [(0, 0), (width, height), (width, 0), (0, height)]:
                    [[fx, _, cx], [_, fy, cy], [_, _, s]] = sd['camera_intrinsic']
                    _z = 1000
                    _x = (s * x - cx) * _z / fx
                    _y = (s * y - cy) * _z / fy

                    xx, yy, _ = (Quaternion(sd['ego_rotation']).inverse.rotate(Quaternion(sd['camera_rotation']).rotate(np.array([_x, _y, _z]))) * 3).astype(int)

                    xx += mapx
                    yy += mapy

                    origin = (mapx, 899 - mapy)

                    img = cv2.line(img, origin, (xx, 899 - yy), (225, 225, 225), 2)

                # Plot Ego Position
                x = mapx
                y = mapy
                minx = max(0, min(1599, x - 1))
                miny = 899 - max(0, min(899, y + 1))
                maxx = max(0, min(1599, x + 1))
                maxy = 899 - max(0, min(899, y - 1))

                img[miny:maxy, minx:maxx, 0] = 0
                img[miny:maxy, minx:maxx, 1] = 255
                img[miny:maxy, minx:maxx, 2] = 0


                # Plot Camera Position
                ct = np.array(sd['ego_translation'])
                cr = Quaternion(sd['ego_rotation'])
                at = np.array(sd['camera_translation'])

                offset = (at - ct)
                x, y, _ = (cr.inverse.rotate(offset) * 3).astype(int)
                x += mapx
                y += mapy
                minx = max(0, min(1599, x - 1))
                miny = 899 - max(0, min(899, y + 1))
                maxx = max(0, min(1599, x + 1))
                maxy = 899 - max(0, min(899, y - 2))
                img[miny:maxy, minx:maxx, 0] = 255
                img[miny:maxy, minx:maxx, 1] = 0
                img[miny:maxy, minx:maxx, 2] = 255

                for _ in range(5):
                    out.write(img)
            out.release()
            cv2.destroyAllWindows()
annotate_videos()

In [None]:
# with open(os.path.join(output_dir, 'partitioned_annotation.pickle'), 'wb') as f:
#     pickle.dump(pd.DataFrame.from_dict(output_annotations), f)
df_output_annotations = pd.DataFrame.from_dict(output_annotations)
df_output_annotations.to_pickle(os.path.join(output_dir, 'annotation_partitioned.pkl'))
#     raw_annotations = pickle.load(f).to_dict('records')

df_output_annotations

In [None]:
ground_truth_split = {}

for scene_name, scene in scenes.items():
    for camera_name, camera_configs in scene.items():
        print(scene_name, camera_name)
        key = scene_name + '_' + camera_name
        anns = []
        for config in camera_configs:
            token = config['token']
            if not config['is_key_frame']:
                continue
            if token not in sample_data_to_annotations:
                print(token, token in sample_data_to_out_of_view_annotations)
                continue
            anns.extend((a, config) for a in sample_data_to_annotations[token])
        def format_data(d):
            a, config = d
            # TODO: config is incorrect
            _d = {**a, 'sample_data_token': config['token'], 'timestamp': config['timestamp']}
            del _d['sample_token']
            del _d['heading']
            del _d['sample_data_tokens']
            del _d['out_of_view_sample_data_tokens']
            return _d
        ground_truth_split[key] = [c['token'] for c in camera_configs], [*map(format_data, anns)]

In [None]:
[(key, len(val)) for key, val in ground_truth_split.items()]

In [None]:
import json
with open(os.path.join(output_dir, 'annotation_splitted.json'), 'w') as f:
    json.dump(ground_truth_split, f, indent=2)