In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from glob import glob
import os
import sys
import json
from typing import Dict, Any
from collections import defaultdict

sys.path.append(os.path.join(os.getcwd(), "..", "tools"))

In [None]:
from preprocessing.osdar23_find_temporal_split import TemporalSequenceDetails, create_sequence_details
from preprocessing.utils.table import create_header_line, log_table

In [None]:
CLASSES = [
    "lidar__cuboid__person",
    "lidar__cuboid__signal",
    "lidar__cuboid__catenary_pole",
    "lidar__cuboid__signal_pole",
    "lidar__cuboid__train",
    "lidar__cuboid__road_vehicle",
    "lidar__cuboid__buffer_stop",
    "lidar__cuboid__animal",
    "lidar__cuboid__switch",
    "lidar__cuboid__bicycle",
    "lidar__cuboid__crowd",
    "lidar__cuboid__wagons",
    "lidar__cuboid__signal_bridge",
]

OCCLUSION_LEVELS = ["0-25 %", "25-50 %", "50-75 %", "75-99 %", "100 %"]
DISTANCE_LEVELS = {
    "0-49": {"min": 0, "max": 50},
    "50-99": {"min": 50, "max": 100},
    "100-149": {"min": 100, "max": 150},
    "150-199": {"min": 150, "max": 200},
    "200-inf": {"min": 200, "max": 100000},
}

NUM_POINTS_LEVELS = {
    "0-199": {"min": 0, "max": 200},
    "200-499": {"min": 200, "max": 500},
    "500-999": {"min": 500, "max": 1000},
    "1000-1999": {"min": 1000, "max": 2000},
    "2000-2999": {"min": 2000, "max": 3000},
    "3000-inf": {"min": 3000, "max": 100000},
}

root_path = "../data/osdar23"
overall_summary = {x:{} for x in ["train", "val"]}
splits = ["train", "val"]

In [None]:
def summarize(overall_summary, splits) -> None:
    out = []

    details = defaultdict(lambda: defaultdict(list))

    for _, data in overall_summary.items():
        details["class"]["split_counts"].append(data["class"]["split_counts"])
        details["class"]["split_ratios"].append(data["class"]["split_ratios"])
        details["distance"]["split_counts"].append(data["distance"]["split_counts"])
        details["distance"]["split_ratios"].append(data["distance"]["split_ratios"])
        details["num_points"]["split_counts"].append(data["num_points"]["split_counts"])
        details["num_points"]["split_ratios"].append(data["num_points"]["split_ratios"])
        details["occlusion"]["split_counts"].append(data["occlusion"]["split_counts"])
        details["occlusion"]["split_ratios"].append(data["occlusion"]["split_ratios"])

    # fmt: off
    header = "Number and ratios of classes in splits:"
    split_cls_counts = details["class"]["split_counts"]
    split_cls_ratios = details["class"]["split_ratios"]
    out.append(log_table(header, CLASSES, splits, split_cls_counts, split_cls_ratios, None, 15, True, "", "Class"))

    header = "Number and ratios of objects with corresponding distance levels in splits:"
    split_distance_counts = details["distance"]["split_counts"]
    split_distance_ratios = details["distance"]["split_ratios"]
    distance_levels = list(DISTANCE_LEVELS.keys())
    out.append(log_table(header, CLASSES, splits, split_distance_counts, split_distance_ratios, distance_levels, 15, True, "", "Class"))

    header = "Number and ratios of objects with corresponding number of points in splits:"
    split_num_points_counts = details["num_points"]["split_counts"]
    split_num_points_ratios = details["num_points"]["split_ratios"]
    num_points_levels = list(NUM_POINTS_LEVELS.keys())
    out.append(log_table(header, CLASSES, splits, split_num_points_counts, split_num_points_ratios, num_points_levels, 15, True, "", "Class"))

    header = "Number and ratios of objects with occlusion levels in splits:"
    split_occlusion_counts = details["occlusion"]["split_counts"]
    split_occlusion_ratios = details["occlusion"]["split_ratios"]
    occlusion_levels = OCCLUSION_LEVELS
    out.append(log_table(header, CLASSES, splits, split_occlusion_counts, split_occlusion_ratios, occlusion_levels, 15, True, "", "Class"))
    # fmt: on

    print("\n".join(out))    

def analyze(root_path: str, split: str) -> Dict[str, Any]:
    if not os.path.exists(os.path.join(root_path, split)):
        return None

    print()  # for new line
    sequence_data: Dict[str, TemporalSequenceDetails] = create_sequence_details(
        os.path.join(root_path, split), sequences_seperated=False
    )

    lidar_label_paths = sorted(glob(os.path.join(root_path, split, "labels_point_clouds", "*")))

    total_not_only_prev = 0
    total_not_only_next = 0
    total_not_both = 0

    data = {}
    for x in lidar_label_paths:
        json_path = os.path.join(x)
        json_data = None
        with open(json_path, "r") as f:
            json_data = json.load(f)
            metadata = json_data["openlabel"]["metadata"]
            seq_name = metadata["tagged_file"]
            frame_idx = list(json_data["openlabel"]["frames"].keys())[0]
            frame_properties = json_data["openlabel"]["frames"][frame_idx]["frame_properties"]
            token = frame_properties["token"]
            prev = frame_properties["prev"]
            next = frame_properties["next"]
            scene_token = frame_properties["scene_token"]
            frame_idx = frame_properties["frame_idx"]
            if prev is None and next is None:
                total_not_both += 1
            elif prev is None:
                total_not_only_prev += 1
            elif next is None:
                total_not_only_next += 1

            data[token] = (prev, next, scene_token, frame_idx, seq_name)

    chain_results = {}

    for token, (_, _, scene_token, _, seq_name) in data.items():
        if scene_token not in chain_results:
            chain_results[scene_token] = {"total": 0, "frames": [token], "seq_name": seq_name}
        else:
            chain_results[scene_token]["frames"].append(token)

    for token, (prev, next, scene_token, frame_idx, seq_name) in data.items():
        chain_results[scene_token]["total"] = len(chain_results[scene_token]["frames"])

    print("\n", "-" * 15, "split: ", split, "-" * 15)
    chain_results = sorted(chain_results.items(), key=lambda x: int(x[1]["total"]))

    print("No. Sequences with following number of frames: ")
    for x, y in sorted(chain_results):
        print(f"{y['seq_name']:<35}", "\ttoken", x, "\tno_frames:", y["total"])

    print("\nNumber of sequences: \t\t", len(chain_results))
    print("Samples with no prev: \t\t", total_not_only_prev)
    print("Samples with no next: \t\t", total_not_only_next)
    print("Samples with no prev nor next: \t", total_not_both)

    total_class_count = {cls : 0 for cls in CLASSES}
    total_distance_count = {cls : {level : 0 for level in DISTANCE_LEVELS } for cls in CLASSES}
    total_num_points_count =  {cls : {level : 0 for level in NUM_POINTS_LEVELS } for cls in CLASSES}
    total_occlusion_count =   {cls : {level : 0 for level in OCCLUSION_LEVELS } for cls in CLASSES}

    for _, seq in sequence_data.items():
        for cls, count in seq.total_class_stats.items():
            total_class_count[cls] += count
        for cls, levels in seq.total_distance_stats.items():
            for level, count in levels.items():
                total_distance_count[cls][level] += count
        for cls, levels in seq.total_num_points_stats.items():
            for level, count in levels.items():
                total_num_points_count[cls][level] += count
        for cls, levels in seq.total_occlusion_stats.items():
            for level, count in levels.items():
                total_occlusion_count[cls][level] += count

    # fmt: off
    class_count_ratios = {cls : count / sum(total_class_count.values()) for cls, count in total_class_count.items()}
    distance_count_ratios = {cls : {level : count / sum(total_distance_count[cls].values()) if sum(total_distance_count[cls].values()) != 0 else 0 for level, count in levels.items()} for cls, levels in total_distance_count.items()}
    num_points_count_ratios = {cls : {level : count / sum(total_num_points_count[cls].values()) if sum(total_num_points_count[cls].values()) != 0 else 0 for level, count in levels.items()} for cls, levels in total_num_points_count.items()}
    occlusion_count_ratios = {cls : {level : count / sum(total_occlusion_count[cls].values()) if sum(total_occlusion_count[cls].values()) != 0 else 0 for level, count in levels.items()} for cls, levels in total_occlusion_count.items()}
    # fmt: on

    return {
        "class" : {
            "split_counts" : total_class_count,
            "split_ratios" : class_count_ratios,
        },
        "distance" : {
            "split_counts" : total_distance_count,
            "split_ratios" : distance_count_ratios,
        },
        "num_points" : {
            "split_counts" : total_num_points_count,
            "split_ratios" : num_points_count_ratios,
        },
        "occlusion" : {
            "split_counts" : total_occlusion_count,
            "split_ratios" : occlusion_count_ratios,
        }
    }

In [None]:
overall_summary["train"] = analyze(root_path, "train")

In [None]:
overall_summary["val"] = analyze(root_path, "val")

In [None]:
summarize(overall_summary, ["train", "val"])