In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from glob import glob
import os
import sys
import json
from typing import Dict, Any

sys.path.append(os.path.join(os.getcwd(), "..", "tools"))

In [3]:
from preprocessing.create_a9_temporal_split import TemporalSequenceDetails, create_sequence_details

In [4]:
CLASSES = (
        "CAR",
        "TRAILER",
        "TRUCK",
        "VAN",
        "PEDESTRIAN",
        "BUS",
        "MOTORCYCLE",
        "OTHER",
        "BICYCLE",
        "EMERGENCY_VEHICLE",
    )

root_path = "../data/a9_temporal_80_20"
overall_summary = {x:{} for x in ["train", "val", "test"]}
splits = ["train", "val", "test"]

In [5]:
def analyze(root_path:str, split:str) -> Dict[str, Any]:
    if not os.path.exists(os.path.join(root_path, split)):
        return None

    print() # for new line
    sequence_data: Dict[str, TemporalSequenceDetails] = create_sequence_details(os.path.join(root_path, split))

    img_label_s1_folder = os.path.join(
        root_path, split, "labels_point_clouds", "s110_lidar_ouster_south"
    )
    img_label_s1_paths = sorted(glob(os.path.join(img_label_s1_folder, "*")))

    total_not_only_prev = 0
    total_not_only_next = 0
    total_not_both = 0

    data = {}

    for x in img_label_s1_paths:
        img_label_s1_json_path = os.path.join(x)
        json_data = None
        with open(img_label_s1_json_path, "r") as f:
            json_data = json.load(f)
            frame_idx = list(json_data["openlabel"]["frames"].keys())[0]
            frame_properties = json_data["openlabel"]["frames"][frame_idx]["frame_properties"]
            token = frame_properties["token"]
            prev = frame_properties["prev"]
            next = frame_properties["next"]
            scene_token = frame_properties["scene_token"]
            frame_idx = frame_properties["frame_idx"]
            if prev is None and next is None:
                total_not_both += 1
            elif prev is None:
                total_not_only_prev += 1
            elif next is None:
                total_not_only_next += 1

            data[token] = (prev, next, scene_token, frame_idx)

    chain_results = {}

    for token, (_, _, scene_token, _) in data.items():
        if scene_token not in chain_results:
            chain_results[scene_token] = {
                "total": 0,
                "frames": [token]
            }
        else:
            chain_results[scene_token]["frames"].append(token)

    for token, (prev, next, scene_token, frame_idx) in data.items(): 
        chain_results[scene_token]["total"] = len(chain_results[scene_token]["frames"])

    print("\n", "-" * 15, "split: ", split, "-" * 15)
    chain_results = sorted(chain_results.items(), key=lambda x:int(x[1]["total"]))

    print("No. Sequences with following number of frames: ")
    for x, y in sorted(chain_results):
        print("\tscene token", x, "\ttotal no frames: \t", y["total"])

    print("\nNumber of sequences: \t\t", len(chain_results))
    print("Samples with no prev: \t\t", total_not_only_prev)
    print("Samples with no next: \t\t", total_not_only_next)
    print("Samples with no prev nor next: \t", total_not_both)


    # fmt: off
    print("\nNo. Classes with following difficulty levels: ")
    total_difficulty_stats = {x: {"easy": 0, "moderate": 0, "hard": 0} for x in CLASSES}
    for scene_token, data in sequence_data.items():
        for cls, counts in data.total_difficulty_stats.items():
            for x, count in counts.items():
                total_difficulty_stats[cls][x] += count
    print("\n{:<20} {:<15} {:<15} {:<15}".format("Object Type", "easy","moderate","hard"))
    print("-" * 60)
    for cls, counts in total_difficulty_stats.items():
        print ("{:<20} {:<15} {:<15} {:<15}".format(cls, counts["easy"], counts["moderate"], counts["hard"]))
    total_difficulty = sum([x["easy"] for x in total_difficulty_stats.values()]) + sum([x["moderate"] for x in total_difficulty_stats.values()]) + sum([x["hard"] for x in total_difficulty_stats.values()])
    difficulty_counts = [sum([x["easy"] for x in total_difficulty_stats.values()]), sum([x["moderate"] for x in total_difficulty_stats.values()]), sum([x["hard"] for x in total_difficulty_stats.values()])]
    print("{:<20} {:<15} {:<15} {:<15}".format("Total (count)", difficulty_counts[0], difficulty_counts[1], difficulty_counts[2]))
    difficulty_ratios = [sum([x["easy"] for x in total_difficulty_stats.values()])/total_difficulty, sum([x["moderate"] for x in total_difficulty_stats.values()])/total_difficulty, sum([x["hard"] for x in total_difficulty_stats.values()])/total_difficulty]
    print("{:<20} {:<15.3f} {:<15.3f} {:<15.3f}".format("Total (ratio)", difficulty_ratios[0], difficulty_ratios[1], difficulty_ratios[2]))

    print("\nNo. Classes with following distances: ")
    total_distance_stats = {x: {"d<40": 0, "d40-50": 0, "d>50": 0} for x in CLASSES}
    for scene_token, data in sequence_data.items():
        for cls, counts in data.total_distance_stats.items():
            for x, count in counts.items():
                total_distance_stats[cls][x] += count
    print("\n{:<20} {:<15} {:<15} {:<15}".format("Object Type", "d <= 40m","40 < d <= 50","50 < d"))
    print("-" * 60)
    for cls, counts in total_distance_stats.items():
        print ("{:<20} {:<15} {:<15} {:<15}".format(cls, counts["d<40"], counts["d40-50"], counts["d>50"]))
    total_distances = sum([x["d<40"] for x in total_distance_stats.values()]) + sum([x["d40-50"] for x in total_distance_stats.values()]) + sum([x["d>50"] for x in total_distance_stats.values()])
    distance_counts = [sum([x["d<40"] for x in total_distance_stats.values()]), sum([x["d40-50"] for x in total_distance_stats.values()]), sum([x["d>50"] for x in total_distance_stats.values()])]
    print("{:<20} {:<15} {:<15} {:<15}".format("Total (count)", distance_counts[0], distance_counts[1], distance_counts[2]))
    distance_ratios = [sum([x["d<40"] for x in total_distance_stats.values()])/total_distances, sum([x["d40-50"] for x in total_distance_stats.values()])/total_distances, sum([x["d>50"] for x in total_distance_stats.values()])/total_distances]
    print("{:<20} {:<15.3f} {:<15.3f} {:<15.3f}".format("Total (ratio)", distance_ratios[0], distance_ratios[1], distance_ratios[2]))

    print("\nNo. Classes with following number of points: ")
    total_points_stats = {x: {"n<20": 0, "n20-50": 0, "n>50": 0} for x in CLASSES}
    for scene_token, data in sequence_data.items():
        for cls, counts in data.total_num_points_stats.items():
            for x, count in counts.items():
                total_points_stats[cls][x] += count
    print("\n{:<20} {:<15} {:<15} {:<15}".format("Object Type", "n <= 20","20 < n <= 50","50 < n"))
    print("-" * 60)
    for cls, counts in total_points_stats.items():
        print ("{:<20} {:<15} {:<15} {:<15}".format(cls, counts["n<20"], counts["n20-50"], counts["n>50"]))
    total_ratios = sum([x["n<20"] for x in total_points_stats.values()]) + sum([x["n20-50"] for x in total_points_stats.values()]) + sum([x["n>50"] for x in total_points_stats.values()])
    points_counts = [sum([x["n<20"] for x in total_points_stats.values()]), sum([x["n20-50"] for x in total_points_stats.values()]), sum([x["n>50"] for x in total_points_stats.values()])]
    print("{:<20} {:<15} {:<15} {:<15} ".format("Total (count)", points_counts[0], points_counts[1], points_counts[2]))
    points_ratios = [sum([x["n<20"] for x in total_points_stats.values()])/total_ratios, sum([x["n20-50"] for x in total_points_stats.values()])/total_ratios, sum([x["n>50"] for x in total_points_stats.values()])/total_ratios]
    print("{:<20} {:<15.3f} {:<15.3f} {:<15.3f}".format("Total (ratio)", points_ratios[0], points_ratios[1], points_ratios[2]))

    print("\nNo. Classes with following occlusion levels: ")
    total_occlusion_stats = {x: {"NOT_OCCLUDED": 0, "PARTIALLY_OCCLUDED": 0, "MOSTLY_OCCLUDED": 0, "UNKNOWN" : 0} for x in CLASSES}
    for scene_token, data in sequence_data.items():
        for cls, counts in data.total_occlusion_stats.items():
            for x, count in counts.items():
                total_occlusion_stats[cls][x] += count
    print("\n{:<20} {:<20} {:<20} {:<20} {:<20}".format("Object Type", "NOT_OCCLUDED","PARTIALLY_OCCLUDED","MOSTLY_OCCLUDED", "UNKNOWN"))
    print("-" * 100)
    for cls, counts in total_occlusion_stats.items():
        print ("{:<20} {:<20} {:<20} {:<20} {:<20}".format(cls, counts["NOT_OCCLUDED"], counts["PARTIALLY_OCCLUDED"], counts["MOSTLY_OCCLUDED"], counts["UNKNOWN"]))
    total_occlusion = sum([x["NOT_OCCLUDED"] for x in total_occlusion_stats.values()]) + sum([x["PARTIALLY_OCCLUDED"] for x in total_occlusion_stats.values()]) + sum([x["MOSTLY_OCCLUDED"] for x in total_occlusion_stats.values()]) + sum([x["UNKNOWN"] for x in total_occlusion_stats.values()])
    occlusion_counts = [sum([x["NOT_OCCLUDED"] for x in total_occlusion_stats.values()]), sum([x["PARTIALLY_OCCLUDED"] for x in total_occlusion_stats.values()]), sum([x["MOSTLY_OCCLUDED"] for x in total_occlusion_stats.values()]), sum([x["UNKNOWN"] for x in total_occlusion_stats.values()])]
    print("{:<20} {:<20} {:<20} {:<20} {:<20}".format("Total (count)", occlusion_counts[0], occlusion_counts[1], occlusion_counts[2], occlusion_counts[3]))
    occlusion_ratios = [sum([x["NOT_OCCLUDED"] for x in total_occlusion_stats.values()])/total_occlusion, sum([x["PARTIALLY_OCCLUDED"] for x in total_occlusion_stats.values()])/total_occlusion, sum([x["MOSTLY_OCCLUDED"] for x in total_occlusion_stats.values()])/total_occlusion, sum([x["UNKNOWN"] for x in total_occlusion_stats.values()])/total_occlusion]
    print("{:<20} {:<20.3f} {:<20.3f} {:<20.3f} {:<20.3f}".format("Total (ratio)", occlusion_ratios[0], occlusion_ratios[1], occlusion_ratios[2], occlusion_ratios[3]))

    return {
        "difficulty_ratios": difficulty_ratios,
        "difficulty_counts": difficulty_counts,
        "distance_ratios": distance_ratios,
        "distance_counts": distance_counts,
        "points_ratios": points_ratios,
        "points_counts": points_counts,
        "occlusion_ratios": occlusion_ratios,
        "occlusion_counts": occlusion_counts
    }

    # fmt: on

In [6]:
def summarize(overall_summary) -> None:
    print("\n{:<10} {:<15} {:<15} {:<15}".format("Split", "easy", "moderate", "hard"))
    print("-" * 55)
    for split, data in overall_summary.items():
        print ("{:<10}| {:<15} {:<15} {:<15}".format(split, f"{data['difficulty_counts'][0]:<5} ({data['difficulty_ratios'][0]:.3f})", f"{data['difficulty_counts'][1]:<5} ({data['difficulty_ratios'][1]:.3f})", f"{data['difficulty_counts'][2]:<5} ({data['difficulty_ratios'][2]:.3f})"))

    print("\n{:<10} {:<15} {:<15} {:<15}".format("Split", "d <= 40m","40 < d <= 50","50 < d"))
    print("-" * 55)
    for split, data in overall_summary.items():
        print ("{:<10}| {:<15} {:<15} {:<15}".format(split, f"{data['distance_counts'][0]:<5} ({data['distance_ratios'][0]:.3f})", f"{data['distance_counts'][1]:<5} ({data['distance_ratios'][1]:.3f})", f"{data['distance_counts'][2]:<5} ({data['distance_ratios'][2]:.3f})"))

    print("\n{:<10} {:<15} {:<15} {:<15}".format("Split", "n <= 20","20 < n <= 50","50 < n"))
    print("-" * 55)
    for split, data in overall_summary.items():
        print ("{:<10}| {:<15} {:<15} {:<15}".format(split, f"{data['points_counts'][0]:<5} ({data['points_ratios'][0]:.3f})", f"{data['points_counts'][1]:<5} ({data['points_ratios'][1]:.3f})", f"{data['points_counts'][2]:<5} ({data['points_ratios'][2]:.3f})"))

    print("\n{:<10} {:<20} {:<20} {:<20} {:<20}".format("Split", "NOT_OCCLUDED","PARTIALLY_OCCLUDED","MOSTLY_OCCLUDED", "UNKNOWN"))
    print("-" * 90)
    for split, data in overall_summary.items():
        print ("{:<10}| {:<20} {:<20} {:<20} {:<20}".format(split, f"{data['occlusion_counts'][0]:<5} ({data['occlusion_ratios'][0]:.3f})", f"{data['occlusion_counts'][1]:<5} ({data['occlusion_ratios'][1]:.3f})", f"{data['occlusion_counts'][2]:<5} ({data['occlusion_ratios'][2]:.3f})", f"{data['occlusion_counts'][3]:<5} ({data['occlusion_ratios'][3]:.3f})"))


# Train Split

In [7]:
overall_summary["train"] = analyze(root_path, "train")




reading: 100%|██████████| 1920/1920 [00:01<00:00, 1419.90it/s]



 --------------- split:  train ---------------
No. Sequences with following number of frames: 
	scene token 018718e58e3449deaee359dae8eb6c83 	total no frames: 	 80
	scene token 1039ef33bc474d85a9e4ec1c79b514a0 	total no frames: 	 200
	scene token 184f37e3e4af4f499f99267d6d2f2add 	total no frames: 	 20
	scene token 1bef4f062c304b2c8d079e03caa7f2ed 	total no frames: 	 240
	scene token 29994d5d927b476d9a46cb0d16b5efed 	total no frames: 	 60
	scene token 29bb0c3e0c334cc6b106d26a469bbc30 	total no frames: 	 140
	scene token 4018b0126fb5426eaa1d666b38c178a5 	total no frames: 	 80
	scene token 443f4b2197ad413dbec0f89e9bd6901a 	total no frames: 	 20
	scene token 5a181d90680647cf9fb344bea00df2d0 	total no frames: 	 160
	scene token 61e485dd7291466a90dc7e8d5ed0d281 	total no frames: 	 20
	scene token 64eb61913e3240a9a6d52f740eb3cf66 	total no frames: 	 20
	scene token 6d3c9a7a13d24d11b0aed6f69378bced 	total no frames: 	 100
	scene token 70f5bf4a4d254fbe8892f97e3e6ca76a 	total no frames: 	 20
	s

# Validation Split

In [8]:
overall_summary["val"] = analyze(root_path, "val")




reading: 100%|██████████| 480/480 [00:00<00:00, 1155.77it/s]



 --------------- split:  val ---------------
No. Sequences with following number of frames: 
	scene token 06827b2f62fe4ffd9b3b1384af4d25f6 	total no frames: 	 20
	scene token 09ec0d0b9c9d4bad95a17c7685ea4eb5 	total no frames: 	 20
	scene token 0aa762c57ff44b9ab31e1af6dd0dfe4e 	total no frames: 	 20
	scene token 0d849d0c926b409b9652acfe7ed5af9e 	total no frames: 	 20
	scene token 0f17fe2082974f45bf55b2cdf0c7d58b 	total no frames: 	 20
	scene token 24b8d752c6454e4ca6c39e4502202cae 	total no frames: 	 20
	scene token 3b16e694730849e7b2951663e4789f8c 	total no frames: 	 20
	scene token 40279cb1f199419ea54ab55116cea018 	total no frames: 	 20
	scene token 5657513ccd4847cdb8878e53197d639d 	total no frames: 	 20
	scene token 59c66b7144f346138d40c4b472e9d699 	total no frames: 	 20
	scene token 6f6c90ce0b3342559cf783760555f20c 	total no frames: 	 20
	scene token 72468959d9c4456aa3a1b4c23600d711 	total no frames: 	 20
	scene token 748e9ff49cfd4700a30d5eba8c6a24e2 	total no frames: 	 20
	scene to

# Test Split

In [9]:
if os.path.exists(os.path.join(root_path, "test")):
    overall_summary["test"] = analyze(root_path, "test")

# Overall Summary

In [10]:
summarize(overall_summary)


Split      easy            moderate        hard           
-------------------------------------------------------
train     | 12167 (0.413)   9882  (0.335)   7434  (0.252)  
val       | 3624  (0.413)   2892  (0.330)   2254  (0.257)  


KeyError: 'difficulty_counts'