In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from glob import glob
import os
import sys
import json
from typing import Dict, Any

sys.path.append(os.path.join(os.getcwd(), "..", "tools"))

In [None]:
from preprocessing.osdar23_create_temporal_split import TemporalSequenceDetails, create_sequence_details

In [None]:
CLASSES = [
    "lidar__cuboid__person",
    "lidar__cuboid__signal",
    "lidar__cuboid__catenary_pole",
    "lidar__cuboid__signal_pole",
    "lidar__cuboid__train",
    "lidar__cuboid__road_vehicle",
    "lidar__cuboid__buffer_stop",
    "lidar__cuboid__animal",
    "lidar__cuboid__switch",
    "lidar__cuboid__bicycle",
    "lidar__cuboid__crowd",
    "lidar__cuboid__wagons",
    "lidar__cuboid__signal_bridge",
]

root_path = "../data/osdar23"
overall_summary = {x:{} for x in ["train", "val"]}
splits = ["train", "val"]

In [None]:
def analyze(root_path:str, split:str) -> Dict[str, Any]:
    if not os.path.exists(os.path.join(root_path, split)):
        return None

    print() # for new line
    sequence_data: Dict[str, TemporalSequenceDetails] = create_sequence_details(os.path.join(root_path, split), sequences_seperated=False)

    lidar_label_paths = sorted(glob(os.path.join(root_path, split, "labels_point_clouds", "*")))

    total_not_only_prev = 0
    total_not_only_next = 0
    total_not_both = 0

    data = {}
    for x in lidar_label_paths:
        json_path = os.path.join(x)
        json_data = None
        with open(json_path, "r") as f:
            json_data = json.load(f)
            metadata = json_data["openlabel"]["metadata"]
            seq_name = metadata["tagged_file"]
            frame_idx = list(json_data["openlabel"]["frames"].keys())[0]
            frame_properties = json_data["openlabel"]["frames"][frame_idx]["frame_properties"]
            token = frame_properties["token"]
            prev = frame_properties["prev"]
            next = frame_properties["next"]
            scene_token = frame_properties["scene_token"]
            frame_idx = frame_properties["frame_idx"]
            if prev is None and next is None:
                total_not_both += 1
            elif prev is None:
                total_not_only_prev += 1
            elif next is None:
                total_not_only_next += 1

            data[token] = (prev, next, scene_token, frame_idx, seq_name)

    chain_results = {}

    for token, (_, _, scene_token, _, seq_name) in data.items():
        if scene_token not in chain_results:
            chain_results[scene_token] = {
                "total": 0,
                "frames": [token],
                "seq_name": seq_name
            }
        else:
            chain_results[scene_token]["frames"].append(token)

    for token, (prev, next, scene_token, frame_idx, seq_name) in data.items(): 
        chain_results[scene_token]["total"] = len(chain_results[scene_token]["frames"])

    print("\n", "-" * 15, "split: ", split, "-" * 15)
    chain_results = sorted(chain_results.items(), key=lambda x:int(x[1]["total"]))

    print("No. Sequences with following number of frames: ")
    for x, y in sorted(chain_results):
        print(f"{y['seq_name']:<35}", "\ttoken", x, "\tno_frames:", y["total"])

    print("\nNumber of sequences: \t\t", len(chain_results))
    print("Samples with no prev: \t\t", total_not_only_prev)
    print("Samples with no next: \t\t", total_not_only_next)
    print("Samples with no prev nor next: \t", total_not_both)

In [None]:
overall_summary["train"] = analyze(root_path, "train")

In [None]:
overall_summary["val"] = analyze(root_path, "val")