In [None]:
import json
import os
from pyquaternion import Quaternion
import pandas as pd
import pickle
import numpy as np
import math
from tqdm import tqdm

from utils.data_dirs import data_dirs
from utils.get_camera_heading import get_camera_heading
from utils.get_camera_position import get_camera_position
from utils.get_camera_rotation import get_camera_rotation
from utils.get_heading_from_north import get_heading_from_north
from utils.get_heading import get_heading
from utils.index import index
from utils.normalize_angle import normalize_angle
from utils.unique import unique

In [None]:
EXPERIMENT = False

In [None]:
base_dir, output_dir, folder, EXPERIMENT_DATA, suffix, *_ = data_dirs(EXPERIMENT)
print(base_dir)
print(output_dir)
print(folder)
print(EXPERIMENT_DATA)
print(suffix)

In [None]:
os.listdir(os.path.join(base_dir, folder))

In [None]:
with open(os.path.join(base_dir, folder, 'calibrated_sensor.json')) as f:
    calibrated_sensor_json = json.load(f)

with open(os.path.join(base_dir, folder, 'category.json')) as f:
    category_json = json.load(f)

with open(os.path.join(base_dir, folder, 'sample.json')) as f:
    sample_json = json.load(f)

with open(os.path.join(base_dir, folder, 'sample_data.json')) as f:
    sample_data_json = json.load(f)

with open(os.path.join(base_dir, folder, 'sample_annotation.json')) as f:
    sample_annotation_json = json.load(f)

with open(os.path.join(base_dir, folder, 'instance.json')) as f:
    instance_json = json.load(f)

with open(os.path.join(base_dir, folder, 'scene.json')) as f:
    scene_json = json.load(f)

with open(os.path.join(base_dir, folder, 'ego_pose.json')) as f:
    ego_pose_json = json.load(f)

with open(os.path.join(base_dir, folder, 'sensor.json')) as f:
    sensor_json = json.load(f)

with open(os.path.join(base_dir, folder, 'log.json')) as f:
    log_json = json.load(f)

In [None]:
if EXPERIMENT:
    files = os.listdir(EXPERIMENT_DATA)
    files_set = set(files)
    sample_data_filter = [s for s in sample_data_json if s['filename'].split('/')[2] in files_set]
else:
    sample_data_filter = sample_data_json
sample_data_filter = [s for s in sample_data_filter if s['fileformat'] == 'jpg']

In [None]:
sample_data_filter[0]

In [None]:
len(log_json)

In [None]:
log_json[0]

In [None]:
sample_tokens = unique(sample_data_filter, 'sample_token')
sample_filter = [
    {
        'sample_token': s['token'],
        'scene_token': s['scene_token'],
        'sample_timestamp': s['timestamp'],
    }
    for s in sample_json
    if s['token'] in sample_tokens
]
len(sample_filter)

In [None]:
calibrated_sensor_tokens = unique(sample_data_filter, 'calibrated_sensor_token')
calibrated_sensor_filter = [
    {
        'calibrated_sensor_token': c['token'],
        'camera_translation': c['translation'],
        'camera_rotation': c['rotation'],
        'camera_intrinsic': c['camera_intrinsic'],
        'sensor_token': c['sensor_token']
    }
    for c in calibrated_sensor_json
    if c['token'] in calibrated_sensor_tokens
]
len(calibrated_sensor_filter)

In [None]:
sensor_tokens = unique(calibrated_sensor_filter, 'sensor_token')
sensor_filter = [
    {
        'sensor_token': s['token'],
        'channel': s['channel'],
        'modality': s['modality']
    }
    for s in sensor_json
    if s['token'] in sensor_tokens
]

In [None]:
ego_pose_tokens = unique(sample_data_filter, 'ego_pose_token')
ego_pose_filter = [
    {
        'ego_pose_token': e['token'],
        'ego_translation': e['translation'],
        'ego_rotation': e['rotation'],
    }
    for e in ego_pose_json
    if e['token'] in ego_pose_tokens
]
len(ego_pose_filter)

In [None]:
scene_tokens = unique(sample_filter, 'scene_token')
scene_filter = [
    {
        'scene_token': s['token'],
        'scene_name': s['name'],
        'log_token': s['log_token'],
    }
    for s in scene_json
    if s['token'] in scene_tokens
]
len(scene_filter)

In [None]:
log_tokens = unique(scene_filter, 'log_token')
log_filter = [
    {
        'log_token': l['token'],
        'location': l['location'],
    }
    for l in log_json
    if l['token'] in log_tokens
]
len(log_filter)

In [None]:
log_map = index(log_filter, 'log_token')
sample_map = index(sample_filter, 'sample_token')
calibrated_sensor_map = index(calibrated_sensor_filter, 'calibrated_sensor_token')
ego_pose_map = index(ego_pose_filter, 'ego_pose_token')
scene_map = index(scene_filter, 'scene_token')
sensor_map = index(sensor_filter, 'sensor_token')

In [None]:
def s_map(s):
    sample = sample_map[s['sample_token']]
    calibrated_sensor = calibrated_sensor_map[s['calibrated_sensor_token']]
    ego_pose = ego_pose_map[s['ego_pose_token']]
    scene = scene_map[sample['scene_token']]
    sensor = sensor_map[calibrated_sensor['sensor_token']]
    assert sensor['modality'] == 'camera'
    
    log = log_map[scene['log_token']]
    
    ego_heading = get_heading_from_north(Quaternion(ego_pose['ego_rotation']))
    camera_heading = get_camera_heading(Quaternion(calibrated_sensor['camera_rotation']))
    ret = {
        **s,
        **sample,
        **calibrated_sensor,
        **ego_pose,
        **scene,
        **sensor,
        **log,
        'ego_heading': ego_heading * 180 / math.pi,
        'camera_heading': normalize_angle(camera_heading + ego_heading) * 180 / math.pi,
        'camera_translation': get_camera_position(
            calibrated_sensor['camera_translation'],
            ego_pose['ego_translation'],
            ego_pose['ego_rotation'],
        ),
        'camera_rotation': get_camera_rotation(
            calibrated_sensor['camera_rotation'],
            ego_pose['ego_rotation'],
        ),
    }
    del ret['ego_pose_token']
    del ret['calibrated_sensor_token']
    del ret['log_token']
    del ret['fileformat']
    del ret['height']
    del ret['width']
    del ret['prev']
    del ret['next']
    del ret['scene_token']
    del ret['sensor_token']
    del ret['modality']
    return ret

sample_data_res = [*tqdm(map(s_map, sample_data_filter), total=len(sample_data_filter))]

len(sample_data_res)

In [None]:
sample_data_res[0]

In [None]:
for sa in sample_annotation_json:
    if sa['sample_token'] not in sample_tokens:
        print(sa['sample_token'])

In [None]:
sample_annotation_filter = [
    sa
    for sa in sample_annotation_json
    if sa['sample_token'] in sample_tokens
]
len(sample_annotation_filter)

In [None]:
instance_tokens = unique(sample_annotation_filter, 'instance_token')
instance_filter = [
    {
        'instance_token': i['token'],
        'category_token': i['category_token']
    }
    for i in instance_json
    if i['token'] in instance_tokens
]
len(instance_filter)

In [None]:
category_tokens = unique(instance_filter, 'category_token')
category_filter = [
    {
        'category_token': c['token'],
        'category': c['name']
    }
    for c in category_json
    if c['token'] in category_tokens
]
len(category_filter)

In [None]:
instance_map = index(instance_filter, 'instance_token')
category_map = index(category_filter, 'category_token')

In [None]:
def sa_map(sa):
    instance = instance_map[sa['instance_token']]
    sample = sample_map[sa['sample_token']]
    scene = scene_map[sample['scene_token']]
    log = log_map[scene['log_token']]
    ret = {
        **sa,
        **instance,
        **category_map[instance['category_token']],
        'heading': (get_heading_from_north(Quaternion(sa['rotation']))) * 180 / math.pi,
        'location': log['location'],
        'scene_name': scene['scene_name']
    }
    
    del ret['visibility_token']
    del ret['attribute_tokens']
    del ret['prev']
    del ret['next']
    del ret['num_lidar_pts']
    del ret['num_radar_pts']
    del ret['category_token']
    
    return ret

sample_annotation_res = [*tqdm(map(sa_map, sample_annotation_filter), total=len(sample_annotation_filter))]
len(sample_annotation_res)

In [None]:
df_sample_data = pd.DataFrame(sample_data_res)
print(len(df_sample_data))
df_sample_data[:1]

In [None]:
df_sample_annotation = pd.DataFrame(sample_annotation_res)
print(len(df_sample_annotation))
df_sample_annotation[:1]

In [None]:
df_sample_data_keyframe = (df_sample_data
    [df_sample_data["is_key_frame"]]
    [["token", "sample_token"]]
    .groupby('sample_token')
    .agg(list)
    .reset_index()
    .rename(columns={'token': 'sample_data_tokens'})
)

df_sample_annotation = (df_sample_annotation
    .set_index("sample_token")
    .join(
        df_sample_data_keyframe.set_index("sample_token"),
        on="sample_token",
    )
    .reset_index()
)
print(len(df_sample_annotation))
df_sample_annotation[:1]

In [None]:
len(df_sample_annotation)

In [None]:
len(df_sample_data_keyframe["sample_data_tokens"].tolist())

In [None]:
len({*df_sample_data_keyframe["sample_token"].tolist()})
len(df_sample_data_keyframe["sample_token"].tolist())

In [None]:
df_sample_data["frame_order"] = (df_sample_data
    .groupby('scene_name')['timestamp']
    .rank(method='first')
    .astype(int)
)

In [None]:
print(len(df_sample_data))

In [None]:
print(len(df_sample_annotation))

In [None]:
df_sample_data.to_pickle(os.path.join(output_dir, f'sample_data{suffix}.pkl'))
df_sample_annotation.to_pickle(os.path.join(output_dir, f'annotation{suffix}.pkl'))

In [None]:
# sample_data_final.to_csv(os.path.join(output_dir, f"sample_data{suffix}.csv"), index=False)

In [None]:
# sample_annotation_final.to_csv(os.path.join(output_dir, f"annotation{suffix}.csv"), index=False)

# Experiment with the data + some validation

In [None]:
(df_sample_data[df_sample_data['is_key_frame']]['timestamp'] - df_sample_data[df_sample_data['is_key_frame']]['sample_timestamp']) / 1_000_000

In [None]:
df_sample_data[df_sample_data['is_key_frame']].groupby('sample_token').agg(
    timestamp_len=pd.NamedAgg(column="timestamp", aggfunc=lambda x: len(set(x))),
    timestamp_avg=pd.NamedAgg(column="timestamp", aggfunc=lambda x: f"{sum(x) / len(x):.5f}"),
    timestamp_range=pd.NamedAgg(column="timestamp", aggfunc=lambda x: max(x) - min(x)),
    timestamp_max=pd.NamedAgg(column="timestamp", aggfunc=lambda x: max(x)),
    timestamp_min=pd.NamedAgg(column="timestamp", aggfunc=lambda x: min(x)),
    sample_timestamp=pd.NamedAgg(column="sample_timestamp", aggfunc=lambda x: set(x))
)


# .agg({
#     'timestamp': (lambda x: len(set(x))),
#     'timestamp': (lambda x: sum(x) / len(x))
# })

In [None]:
42606 / 1_000_000

In [None]:
df_sample_data[df_sample_data['timestamp'] == df_sample_data['sample_timestamp']]

In [None]:
df = df_sample_data.copy(deep=True)
df['timediff'] = df['sample_timestamp'] - df['timestamp']
df['timediffabs'] = abs(df['sample_timestamp'] - df['timestamp'])

In [None]:
df[(df['timediff'] < 0) & (df['is_key_frame'] != True)]

In [None]:
df1 = df.sort_values(['is_key_frame']).groupby(['sample_token', 'channel']).agg(
#     timestamp_len=pd.NamedAgg(column="timestamp", aggfunc=lambda x: len(set(x))),
    timestamp_arr=pd.NamedAgg(column="timediff", aggfunc=lambda x: list(x)),
    iskeyframe_arr=pd.NamedAgg(column="is_key_frame", aggfunc=lambda x: list(x)),
    keyframe_closest=pd.NamedAgg(column="timediff", aggfunc=lambda x: all(xx > x.iloc[-1] for xx in x)),
)

df1