In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from glob import glob
import os
import sys
import json
from typing import Dict

sys.path.append(os.path.join(os.getcwd(), "..", "tools"))

In [3]:
from preprocessing.create_a9_temporal_split import TemporalSequenceDetails, create_sequence_details

In [4]:
CLASSES = (
        "CAR",
        "TRAILER",
        "TRUCK",
        "VAN",
        "PEDESTRIAN",
        "BUS",
        "MOTORCYCLE",
        "OTHER",
        "BICYCLE",
        "EMERGENCY_VEHICLE",
    )

In [5]:
root_path = "../data/a9_temporal"

overall_summary = {x:{} for x in ["train", "val", "test"]}

for split in ["train", "val", "test"]:
    if not os.path.exists(os.path.join(root_path, split)):
        continue

    print()
    sequence_data: Dict[str, TemporalSequenceDetails] = create_sequence_details(os.path.join(root_path, split))

    img_label_s1_folder = os.path.join(
        root_path, split, "labels_point_clouds", "s110_lidar_ouster_south"
    )
    img_label_s1_paths = sorted(glob(os.path.join(img_label_s1_folder, "*")))

    total_not_only_prev = 0
    total_not_only_next = 0
    total_not_both = 0

    data = {}

    for x in img_label_s1_paths:
        img_label_s1_json_path = os.path.join(x)
        json_data = None
        with open(img_label_s1_json_path, "r") as f:
            json_data = json.load(f)
            frame_idx = list(json_data["openlabel"]["frames"].keys())[0]
            frame_properties = json_data["openlabel"]["frames"][frame_idx]["frame_properties"]
            token = frame_properties["token"]
            prev = frame_properties["prev"]
            next = frame_properties["next"]
            scene_token = frame_properties["scene_token"]
            frame_idx = frame_properties["frame_idx"]
            if prev is None and next is None:
                total_not_both += 1
            elif prev is None:
                total_not_only_prev += 1
            elif next is None:
                total_not_only_next += 1

            data[token] = (prev, next, scene_token, frame_idx)

    chain_results = {}

    for token, (_, _, scene_token, _) in data.items():
        if scene_token not in chain_results:
            chain_results[scene_token] = {
                "total": 0,
                "frames": [token]
            }
        else:
            chain_results[scene_token]["frames"].append(token)

    for token, (prev, next, scene_token, frame_idx) in data.items(): 
        chain_results[scene_token]["total"] = len(chain_results[scene_token]["frames"])

    print("\n", "-" * 15, "split: ", split, "-" * 15)
    chain_results = sorted(chain_results.items(), key=lambda x:int(x[1]["total"]))

    print("No. Sequences with following number of frames: ")
    for x, y in sorted(chain_results):
        print("\tscene token", x, "\ttotal no frames: \t", y["total"])

    print("\nNumber of sequences: \t\t", len(chain_results))
    print("Samples with no prev: \t\t", total_not_only_prev)
    print("Samples with no next: \t\t", total_not_only_next)
    print("Samples with no prev nor next: \t", total_not_both)


    # fmt: off
    print("\nNo. Classes with following difficulty levels: ")
    total_difficulty_stats = {x: {"easy": 0, "moderate": 0, "hard": 0} for x in CLASSES}
    for scene_token, data in sequence_data.items():
        for cls, counts in data.total_difficulty_stats.items():
            for x, count in counts.items():
                total_difficulty_stats[cls][x] += count
    print("\n{:<20} {:<15} {:<15} {:<15}".format("Object Type", "easy","moderate","hard"))
    print("-" * 60)
    for cls, counts in total_difficulty_stats.items():
        print ("{:<20} {:<15} {:<15} {:<15}".format(cls, counts["easy"], counts["moderate"], counts["hard"]))
    total_difficulty = sum([x["easy"] for x in total_difficulty_stats.values()]) + sum([x["moderate"] for x in total_difficulty_stats.values()]) + sum([x["hard"] for x in total_difficulty_stats.values()])
    difficulty_counts = [sum([x["easy"] for x in total_difficulty_stats.values()]), sum([x["moderate"] for x in total_difficulty_stats.values()]), sum([x["hard"] for x in total_difficulty_stats.values()])]
    print("{:<20} {:<15} {:<15} {:<15}".format("Total (count)", difficulty_counts[0], difficulty_counts[1], difficulty_counts[2]))
    difficulty_ratios = [sum([x["easy"] for x in total_difficulty_stats.values()])/total_difficulty, sum([x["moderate"] for x in total_difficulty_stats.values()])/total_difficulty, sum([x["hard"] for x in total_difficulty_stats.values()])/total_difficulty]
    print("{:<20} {:<15.3f} {:<15.3f} {:<15.3f}".format("Total (ratio)", difficulty_ratios[0], difficulty_ratios[1], difficulty_ratios[2]))

    print("\nNo. Classes with following distances: ")
    total_distance_stats = {x: {"d<40": 0, "d40-50": 0, "d>50": 0} for x in CLASSES}
    for scene_token, data in sequence_data.items():
        for cls, counts in data.total_distance_stats.items():
            for x, count in counts.items():
                total_distance_stats[cls][x] += count
    print("\n{:<20} {:<15} {:<15} {:<15}".format("Object Type", "d <= 40m","40 < d <= 50","50 < d"))
    print("-" * 60)
    for cls, counts in total_distance_stats.items():
        print ("{:<20} {:<15} {:<15} {:<15}".format(cls, counts["d<40"], counts["d40-50"], counts["d>50"]))
    total_distances = sum([x["d<40"] for x in total_distance_stats.values()]) + sum([x["d40-50"] for x in total_distance_stats.values()]) + sum([x["d>50"] for x in total_distance_stats.values()])
    distance_counts = [sum([x["d<40"] for x in total_distance_stats.values()]), sum([x["d40-50"] for x in total_distance_stats.values()]), sum([x["d>50"] for x in total_distance_stats.values()])]
    print("{:<20} {:<15} {:<15} {:<15}".format("Total (count)", distance_counts[0], distance_counts[1], distance_counts[2]))
    distance_ratios = [sum([x["d<40"] for x in total_distance_stats.values()])/total_distances, sum([x["d40-50"] for x in total_distance_stats.values()])/total_distances, sum([x["d>50"] for x in total_distance_stats.values()])/total_distances]
    print("{:<20} {:<15.3f} {:<15.3f} {:<15.3f}".format("Total (ratio)", distance_ratios[0], distance_ratios[1], distance_ratios[2]))

    print("\nNo. Classes with following number of points: ")
    total_points_stats = {x: {"n<20": 0, "n20-50": 0, "n>50": 0} for x in CLASSES}
    for scene_token, data in sequence_data.items():
        for cls, counts in data.total_num_points_stats.items():
            for x, count in counts.items():
                total_points_stats[cls][x] += count
    print("\n{:<20} {:<15} {:<15} {:<15}".format("Object Type", "n <= 20","20 < n <= 50","50 < n"))
    print("-" * 60)
    for cls, counts in total_points_stats.items():
        print ("{:<20} {:<15} {:<15} {:<15}".format(cls, counts["n<20"], counts["n20-50"], counts["n>50"]))
    total_ratios = sum([x["n<20"] for x in total_points_stats.values()]) + sum([x["n20-50"] for x in total_points_stats.values()]) + sum([x["n>50"] for x in total_points_stats.values()])
    points_counts = [sum([x["n<20"] for x in total_points_stats.values()]), sum([x["n20-50"] for x in total_points_stats.values()]), sum([x["n>50"] for x in total_points_stats.values()])]
    print("{:<20} {:<15} {:<15} {:<15} ".format("Total (count)", points_counts[0], points_counts[1], points_counts[2]))
    points_ratios = [sum([x["n<20"] for x in total_points_stats.values()])/total_ratios, sum([x["n20-50"] for x in total_points_stats.values()])/total_ratios, sum([x["n>50"] for x in total_points_stats.values()])/total_ratios]
    print("{:<20} {:<15.3f} {:<15.3f} {:<15.3f}".format("Total (ratio)", points_ratios[0], points_ratios[1], points_ratios[2]))

    print("\nNo. Classes with following occlusion levels: ")
    total_occlusion_stats = {x: {"NOT_OCCLUDED": 0, "PARTIALLY_OCCLUDED": 0, "MOSTLY_OCCLUDED": 0, "UNKNOWN" : 0} for x in CLASSES}
    for scene_token, data in sequence_data.items():
        for cls, counts in data.total_occlusion_stats.items():
            for x, count in counts.items():
                total_occlusion_stats[cls][x] += count
    print("\n{:<20} {:<20} {:<20} {:<20} {:<20}".format("Object Type", "NOT_OCCLUDED","PARTIALLY_OCCLUDED","MOSTLY_OCCLUDED", "UNKNOWN"))
    print("-" * 100)
    for cls, counts in total_occlusion_stats.items():
        print ("{:<20} {:<20} {:<20} {:<20} {:<20}".format(cls, counts["NOT_OCCLUDED"], counts["PARTIALLY_OCCLUDED"], counts["MOSTLY_OCCLUDED"], counts["UNKNOWN"]))
    total_occlusion = sum([x["NOT_OCCLUDED"] for x in total_occlusion_stats.values()]) + sum([x["PARTIALLY_OCCLUDED"] for x in total_occlusion_stats.values()]) + sum([x["MOSTLY_OCCLUDED"] for x in total_occlusion_stats.values()]) + sum([x["UNKNOWN"] for x in total_occlusion_stats.values()])
    occlusion_counts = [sum([x["NOT_OCCLUDED"] for x in total_occlusion_stats.values()]), sum([x["PARTIALLY_OCCLUDED"] for x in total_occlusion_stats.values()]), sum([x["MOSTLY_OCCLUDED"] for x in total_occlusion_stats.values()]), sum([x["UNKNOWN"] for x in total_occlusion_stats.values()])]
    print("{:<20} {:<20} {:<20} {:<20} {:<20}".format("Total (count)", occlusion_counts[0], occlusion_counts[1], occlusion_counts[2], occlusion_counts[3]))
    occlusion_ratios = [sum([x["NOT_OCCLUDED"] for x in total_occlusion_stats.values()])/total_occlusion, sum([x["PARTIALLY_OCCLUDED"] for x in total_occlusion_stats.values()])/total_occlusion, sum([x["MOSTLY_OCCLUDED"] for x in total_occlusion_stats.values()])/total_occlusion, sum([x["UNKNOWN"] for x in total_occlusion_stats.values()])/total_occlusion]
    print("{:<20} {:<20.3f} {:<20.3f} {:<20.3f} {:<20.3f}".format("Total (ratio)", occlusion_ratios[0], occlusion_ratios[1], occlusion_ratios[2], occlusion_ratios[3]))

    overall_summary[split] = {
        "difficulty_ratios": difficulty_ratios,
        "difficulty_counts": difficulty_counts,
        "distance_ratios": distance_ratios,
        "distance_counts": distance_counts,
        "points_ratios": points_ratios,
        "points_counts": points_counts,
        "occlusion_ratios": occlusion_ratios,
        "occlusion_counts": occlusion_counts
    }

    # fmt: on




reading: 100%|██████████| 1920/1920 [00:02<00:00, 681.97it/s]



 --------------- split:  train ---------------
No. Sequences with following number of frames: 
	scene token 05043a60ef3d4d0f83a19a4618933555 	total no frames: 	 90
	scene token 0c35754c0898412b989a29dd0ba13424 	total no frames: 	 120
	scene token 18ae50f824cd4ec9b108f296b3ae2070 	total no frames: 	 60
	scene token 341cf65c155c47d58178bbfd4d7b3fb8 	total no frames: 	 60
	scene token 357503b6832e435d9453761ec08a91a0 	total no frames: 	 30
	scene token 68f29212061e4c0f820c90711aee8b5d 	total no frames: 	 90
	scene token 6bfc7d7aa62042619c750b5875e2db8f 	total no frames: 	 60
	scene token 7f1dd6eac5204075995899d71ee9af74 	total no frames: 	 120
	scene token 94f24f15c9d2428cb6510229579cad29 	total no frames: 	 210
	scene token a4c98c1cda4744469018bb8280b488fe 	total no frames: 	 180
	scene token a5ade10d818e468881bb87aac2a445fd 	total no frames: 	 180
	scene token ac4f7646a4964e2081afac53d0c2c51a 	total no frames: 	 90
	scene token d12f2f7292844eec9d70ca44b1a44828 	total no frames: 	 30
	s

reading: 100%|██████████| 240/240 [00:00<00:00, 702.40it/s]



 --------------- split:  val ---------------
No. Sequences with following number of frames: 
	scene token 26d151fa04ea4b4ab7d4802835c931cf 	total no frames: 	 30
	scene token 596a306072334b48b5dba1e2cca77952 	total no frames: 	 30
	scene token 8bf049d8e9c04094af7637909f5a2e9c 	total no frames: 	 30
	scene token 9e57a56222e642b9b634fda25518076c 	total no frames: 	 30
	scene token ad50a4c8fa21431eb4ee27e1f2e4c038 	total no frames: 	 30
	scene token be4f047449174528844bf20a1d9ca1b4 	total no frames: 	 30
	scene token e101a73cc06d46e5b38da0d0dcb464e8 	total no frames: 	 30
	scene token ed9753988f43452781aae79fbee25b49 	total no frames: 	 30

Number of sequences: 		 8
Samples with no prev: 		 8
Samples with no next: 		 8
Samples with no prev nor next: 	 0

No. Classes with following difficulty levels: 

Object Type          easy            moderate        hard           
------------------------------------------------------------
CAR                  96              70              4     

reading: 100%|██████████| 240/240 [00:00<00:00, 684.22it/s]



 --------------- split:  test ---------------
No. Sequences with following number of frames: 
	scene token 2f6fd3fe919d41a48e0fbd6c3bfa51bb 	total no frames: 	 30
	scene token 47df5038abae42b9b311d201564baadb 	total no frames: 	 30
	scene token 562150a20dc04c258c0d8e324012a48e 	total no frames: 	 30
	scene token 9c6960c3a15143f8bdf6834de57f7d27 	total no frames: 	 30
	scene token b7ca756237a7406d8b73681a6840018e 	total no frames: 	 30
	scene token d07256c6b0404ae1b2f9f48dc1ef52f0 	total no frames: 	 30
	scene token e0a984f86f8c4949af227a210cd2a392 	total no frames: 	 30
	scene token e6972a21ff524ed0860098910192d5c2 	total no frames: 	 30

Number of sequences: 		 8
Samples with no prev: 		 8
Samples with no next: 		 8
Samples with no prev nor next: 	 0

No. Classes with following difficulty levels: 

Object Type          easy            moderate        hard           
------------------------------------------------------------
CAR                  85              32              5    

In [6]:

print("\n", "-" * 11, "overall summary", "-" * 11)

print("\n{:<10} {:<15} {:<15} {:<15}".format("Split", "easy", "moderate", "hard"))
print("-" * 55)
for split, data in overall_summary.items():
    print ("{:<10}| {:<15} {:<15} {:<15}".format(split, f"{data['difficulty_counts'][0]:<5} ({data['difficulty_ratios'][0]:.3f})", f"{data['difficulty_counts'][1]:<5} ({data['difficulty_ratios'][1]:.3f})", f"{data['difficulty_counts'][2]:<5} ({data['difficulty_ratios'][2]:.3f})"))

print("\n{:<10} {:<15} {:<15} {:<15}".format("Split", "d <= 40m","40 < d <= 50","50 < d"))
print("-" * 55)
for split, data in overall_summary.items():
    print ("{:<10}| {:<15} {:<15} {:<15}".format(split, f"{data['distance_counts'][0]:<5} ({data['distance_ratios'][0]:.3f})", f"{data['distance_counts'][1]:<5} ({data['distance_ratios'][1]:.3f})", f"{data['distance_counts'][2]:<5} ({data['distance_ratios'][2]:.3f})"))

print("\n{:<10} {:<15} {:<15} {:<15}".format("Split", "n <= 20","20 < n <= 50","50 < n"))
print("-" * 55)
for split, data in overall_summary.items():
    print ("{:<10}| {:<15} {:<15} {:<15}".format(split, f"{data['points_counts'][0]:<5} ({data['points_ratios'][0]:.3f})", f"{data['points_counts'][1]:<5} ({data['points_ratios'][1]:.3f})", f"{data['points_counts'][2]:<5} ({data['points_ratios'][2]:.3f})"))

print("\n{:<10} {:<20} {:<20} {:<20} {:<20}".format("Split", "NOT_OCCLUDED","PARTIALLY_OCCLUDED","MOSTLY_OCCLUDED", "UNKNOWN"))
print("-" * 90)
for split, data in overall_summary.items():
    print ("{:<10}| {:<20} {:<20} {:<20} {:<20}".format(split, f"{data['occlusion_counts'][0]:<5} ({data['occlusion_ratios'][0]:.3f})", f"{data['occlusion_counts'][1]:<5} ({data['occlusion_ratios'][1]:.3f})", f"{data['occlusion_counts'][2]:<5} ({data['occlusion_ratios'][2]:.3f})", f"{data['occlusion_counts'][3]:<5} ({data['occlusion_ratios'][3]:.3f})"))



 ----------- overall summary -----------

Split      easy            moderate        hard           
-------------------------------------------------------
train     | 1584  (0.794)   299   (0.150)   113   (0.057)  
val       | 186   (0.616)   101   (0.334)   15    (0.050)  
test      | 162   (0.794)   37    (0.181)   5     (0.025)  

Split      d <= 40m        40 < d <= 50    50 < d         
-------------------------------------------------------
train     | 12530 (0.410)   10114 (0.331)   7942  (0.260)  
val       | 1519  (0.394)   1420  (0.369)   912   (0.237)  
test      | 1742  (0.456)   1240  (0.325)   834   (0.219)  

Split      n <= 20         20 < n <= 50    50 < n         
-------------------------------------------------------
train     | 16523 (0.573)   6204  (0.215)   6092  (0.211)  
val       | 1866  (0.510)   752   (0.206)   1041  (0.285)  
test      | 2065  (0.542)   851   (0.223)   896   (0.235)  

Split      NOT_OCCLUDED         PARTIALLY_OCCLUDED   MOSTLY_OCCLUDED 