In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from glob import glob
import os
import sys
import json
from typing import Dict, Any

sys.path.append(os.path.join(os.getcwd(), "..", "tools"))

In [3]:
from preprocessing.tumtraf_find_temporal_split import TemporalSequenceDetails, create_sequence_details

In [4]:
CLASSES = (
        "CAR",
        "TRAILER",
        "TRUCK",
        "VAN",
        "PEDESTRIAN",
        "BUS",
        "MOTORCYCLE",
        "OTHER",
        "BICYCLE",
        "EMERGENCY_VEHICLE",
    )

root_path = "../data/tumtraf-i"
overall_summary = {x:{} for x in ["train", "val", "test"]}
splits = ["train", "val", "test"]

In [5]:
def analyze(root_path:str, split:str) -> Dict[str, Any]:
    if not os.path.exists(os.path.join(root_path, split)):
        return None

    print() # for new line
    sequence_data: Dict[str, TemporalSequenceDetails] = create_sequence_details(os.path.join(root_path, split))

    img_label_s1_folder = os.path.join(
        root_path, split, "labels_point_clouds", "s110_lidar_ouster_south"
    )
    img_label_s1_paths = sorted(glob(os.path.join(img_label_s1_folder, "*")))

    total_not_only_prev = 0
    total_not_only_next = 0
    total_not_both = 0

    data = {}

    for x in img_label_s1_paths:
        img_label_s1_json_path = os.path.join(x)
        json_data = None
        with open(img_label_s1_json_path, "r") as f:
            json_data = json.load(f)
            frame_idx = list(json_data["openlabel"]["frames"].keys())[0]
            frame_properties = json_data["openlabel"]["frames"][frame_idx]["frame_properties"]
            token = frame_properties["token"]
            prev = frame_properties["prev"]
            next = frame_properties["next"]
            scene_token = frame_properties["scene_token"]
            frame_idx = frame_properties["frame_idx"]
            if prev is None and next is None:
                total_not_both += 1
            elif prev is None:
                total_not_only_prev += 1
            elif next is None:
                total_not_only_next += 1

            data[token] = (prev, next, scene_token, frame_idx)

    chain_results = {}

    for token, (_, _, scene_token, _) in data.items():
        if scene_token not in chain_results:
            chain_results[scene_token] = {
                "total": 0,
                "frames": [token]
            }
        else:
            chain_results[scene_token]["frames"].append(token)

    for token, (prev, next, scene_token, frame_idx) in data.items():
        chain_results[scene_token]["total"] = len(chain_results[scene_token]["frames"])

    print("\n", "-" * 15, "split: ", split, "-" * 15)
    chain_results = sorted(chain_results.items(), key=lambda x:int(x[1]["total"]))

    print("No. Sequences with following number of frames: ")
    for x, y in sorted(chain_results):
        print("\tscene token", x, "\ttotal no frames: \t", y["total"])

    print("\nNumber of sequences: \t\t", len(chain_results))
    print("Samples with no prev: \t\t", total_not_only_prev)
    print("Samples with no next: \t\t", total_not_only_next)
    print("Samples with no prev nor next: \t", total_not_both)


    # fmt: off
    print("\nNo. Classes with following difficulty levels: ")
    total_difficulty_stats = {x: {"easy": 0, "moderate": 0, "hard": 0} for x in CLASSES}
    for scene_token, data in sequence_data.items():
        for cls, counts in data.total_difficulty_stats.items():
            for x, count in counts.items():
                total_difficulty_stats[cls][x] += count
    print("\n{:<20} {:<15} {:<15} {:<15}".format("Object Type", "easy","moderate","hard"))
    print("-" * 60)
    for cls, counts in total_difficulty_stats.items():
        print ("{:<20} {:<15} {:<15} {:<15}".format(cls, counts["easy"], counts["moderate"], counts["hard"]))
    total_difficulty = sum([x["easy"] for x in total_difficulty_stats.values()]) + sum([x["moderate"] for x in total_difficulty_stats.values()]) + sum([x["hard"] for x in total_difficulty_stats.values()])
    difficulty_counts = [sum([x["easy"] for x in total_difficulty_stats.values()]), sum([x["moderate"] for x in total_difficulty_stats.values()]), sum([x["hard"] for x in total_difficulty_stats.values()])]
    print("{:<20} {:<15} {:<15} {:<15}".format("Total (count)", difficulty_counts[0], difficulty_counts[1], difficulty_counts[2]))
    difficulty_ratios = [sum([x["easy"] for x in total_difficulty_stats.values()])/total_difficulty, sum([x["moderate"] for x in total_difficulty_stats.values()])/total_difficulty, sum([x["hard"] for x in total_difficulty_stats.values()])/total_difficulty]
    print("{:<20} {:<15.3f} {:<15.3f} {:<15.3f}".format("Total (ratio)", difficulty_ratios[0], difficulty_ratios[1], difficulty_ratios[2]))

    print("\nNo. Classes with following distances: ")
    total_distance_stats = {x: {"d<40": 0, "d40-50": 0, "d>50": 0} for x in CLASSES}
    for scene_token, data in sequence_data.items():
        for cls, counts in data.total_distance_stats.items():
            for x, count in counts.items():
                total_distance_stats[cls][x] += count
    print("\n{:<20} {:<15} {:<15} {:<15}".format("Object Type", "d <= 40m","40 < d <= 50","50 < d"))
    print("-" * 60)
    for cls, counts in total_distance_stats.items():
        print ("{:<20} {:<15} {:<15} {:<15}".format(cls, counts["d<40"], counts["d40-50"], counts["d>50"]))
    total_distances = sum([x["d<40"] for x in total_distance_stats.values()]) + sum([x["d40-50"] for x in total_distance_stats.values()]) + sum([x["d>50"] for x in total_distance_stats.values()])
    distance_counts = [sum([x["d<40"] for x in total_distance_stats.values()]), sum([x["d40-50"] for x in total_distance_stats.values()]), sum([x["d>50"] for x in total_distance_stats.values()])]
    print("{:<20} {:<15} {:<15} {:<15}".format("Total (count)", distance_counts[0], distance_counts[1], distance_counts[2]))
    distance_ratios = [sum([x["d<40"] for x in total_distance_stats.values()])/total_distances, sum([x["d40-50"] for x in total_distance_stats.values()])/total_distances, sum([x["d>50"] for x in total_distance_stats.values()])/total_distances]
    print("{:<20} {:<15.3f} {:<15.3f} {:<15.3f}".format("Total (ratio)", distance_ratios[0], distance_ratios[1], distance_ratios[2]))

    print("\nNo. Classes with following number of points: ")
    total_points_stats = {x: {"n<20": 0, "n20-50": 0, "n>50": 0} for x in CLASSES}
    for scene_token, data in sequence_data.items():
        for cls, counts in data.total_num_points_stats.items():
            for x, count in counts.items():
                total_points_stats[cls][x] += count
    print("\n{:<20} {:<15} {:<15} {:<15}".format("Object Type", "n <= 20","20 < n <= 50","50 < n"))
    print("-" * 60)
    for cls, counts in total_points_stats.items():
        print ("{:<20} {:<15} {:<15} {:<15}".format(cls, counts["n<20"], counts["n20-50"], counts["n>50"]))
    total_ratios = sum([x["n<20"] for x in total_points_stats.values()]) + sum([x["n20-50"] for x in total_points_stats.values()]) + sum([x["n>50"] for x in total_points_stats.values()])
    points_counts = [sum([x["n<20"] for x in total_points_stats.values()]), sum([x["n20-50"] for x in total_points_stats.values()]), sum([x["n>50"] for x in total_points_stats.values()])]
    print("{:<20} {:<15} {:<15} {:<15} ".format("Total (count)", points_counts[0], points_counts[1], points_counts[2]))
    points_ratios = [sum([x["n<20"] for x in total_points_stats.values()])/total_ratios, sum([x["n20-50"] for x in total_points_stats.values()])/total_ratios, sum([x["n>50"] for x in total_points_stats.values()])/total_ratios]
    print("{:<20} {:<15.3f} {:<15.3f} {:<15.3f}".format("Total (ratio)", points_ratios[0], points_ratios[1], points_ratios[2]))

    print("\nNo. Classes with following occlusion levels: ")
    total_occlusion_stats = {x: {"NOT_OCCLUDED": 0, "PARTIALLY_OCCLUDED": 0, "MOSTLY_OCCLUDED": 0, "UNKNOWN" : 0} for x in CLASSES}
    for scene_token, data in sequence_data.items():
        for cls, counts in data.total_occlusion_stats.items():
            for x, count in counts.items():
                total_occlusion_stats[cls][x] += count
    print("\n{:<20} {:<20} {:<20} {:<20} {:<20}".format("Object Type", "NOT_OCCLUDED","PARTIALLY_OCCLUDED","MOSTLY_OCCLUDED", "UNKNOWN"))
    print("-" * 100)
    for cls, counts in total_occlusion_stats.items():
        print ("{:<20} {:<20} {:<20} {:<20} {:<20}".format(cls, counts["NOT_OCCLUDED"], counts["PARTIALLY_OCCLUDED"], counts["MOSTLY_OCCLUDED"], counts["UNKNOWN"]))
    total_occlusion = sum([x["NOT_OCCLUDED"] for x in total_occlusion_stats.values()]) + sum([x["PARTIALLY_OCCLUDED"] for x in total_occlusion_stats.values()]) + sum([x["MOSTLY_OCCLUDED"] for x in total_occlusion_stats.values()]) + sum([x["UNKNOWN"] for x in total_occlusion_stats.values()])
    occlusion_counts = [sum([x["NOT_OCCLUDED"] for x in total_occlusion_stats.values()]), sum([x["PARTIALLY_OCCLUDED"] for x in total_occlusion_stats.values()]), sum([x["MOSTLY_OCCLUDED"] for x in total_occlusion_stats.values()]), sum([x["UNKNOWN"] for x in total_occlusion_stats.values()])]
    print("{:<20} {:<20} {:<20} {:<20} {:<20}".format("Total (count)", occlusion_counts[0], occlusion_counts[1], occlusion_counts[2], occlusion_counts[3]))
    occlusion_ratios = [sum([x["NOT_OCCLUDED"] for x in total_occlusion_stats.values()])/total_occlusion, sum([x["PARTIALLY_OCCLUDED"] for x in total_occlusion_stats.values()])/total_occlusion, sum([x["MOSTLY_OCCLUDED"] for x in total_occlusion_stats.values()])/total_occlusion, sum([x["UNKNOWN"] for x in total_occlusion_stats.values()])/total_occlusion]
    print("{:<20} {:<20.3f} {:<20.3f} {:<20.3f} {:<20.3f}".format("Total (ratio)", occlusion_ratios[0], occlusion_ratios[1], occlusion_ratios[2], occlusion_ratios[3]))
    # fmt: on

    return {
        "difficulty_ratios": difficulty_ratios,
        "difficulty_counts": difficulty_counts,
        "distance_ratios": distance_ratios,
        "distance_counts": distance_counts,
        "points_ratios": points_ratios,
        "points_counts": points_counts,
        "occlusion_ratios": occlusion_ratios,
        "occlusion_counts": occlusion_counts
    }

In [6]:
def summarize(overall_summary) -> None:
    print("\n{:<10} {:<15} {:<15} {:<15}".format("Split", "easy", "moderate", "hard"))
    print("-" * 55)
    for split, data in overall_summary.items():
        print ("{:<10}| {:<15} {:<15} {:<15}".format(split, f"{data['difficulty_counts'][0]:<5} ({data['difficulty_ratios'][0]:.3f})", f"{data['difficulty_counts'][1]:<5} ({data['difficulty_ratios'][1]:.3f})", f"{data['difficulty_counts'][2]:<5} ({data['difficulty_ratios'][2]:.3f})"))

    print("\n{:<10} {:<15} {:<15} {:<15}".format("Split", "d <= 40m","40 < d <= 50","50 < d"))
    print("-" * 55)
    for split, data in overall_summary.items():
        print ("{:<10}| {:<15} {:<15} {:<15}".format(split, f"{data['distance_counts'][0]:<5} ({data['distance_ratios'][0]:.3f})", f"{data['distance_counts'][1]:<5} ({data['distance_ratios'][1]:.3f})", f"{data['distance_counts'][2]:<5} ({data['distance_ratios'][2]:.3f})"))

    print("\n{:<10} {:<15} {:<15} {:<15}".format("Split", "n <= 20","20 < n <= 50","50 < n"))
    print("-" * 55)
    for split, data in overall_summary.items():
        print ("{:<10}| {:<15} {:<15} {:<15}".format(split, f"{data['points_counts'][0]:<5} ({data['points_ratios'][0]:.3f})", f"{data['points_counts'][1]:<5} ({data['points_ratios'][1]:.3f})", f"{data['points_counts'][2]:<5} ({data['points_ratios'][2]:.3f})"))

    print("\n{:<10} {:<20} {:<20} {:<20} {:<20}".format("Split", "NOT_OCCLUDED","PARTIALLY_OCCLUDED","MOSTLY_OCCLUDED", "UNKNOWN"))
    print("-" * 90)
    for split, data in overall_summary.items():
        print ("{:<10}| {:<20} {:<20} {:<20} {:<20}".format(split, f"{data['occlusion_counts'][0]:<5} ({data['occlusion_ratios'][0]:.3f})", f"{data['occlusion_counts'][1]:<5} ({data['occlusion_ratios'][1]:.3f})", f"{data['occlusion_counts'][2]:<5} ({data['occlusion_ratios'][2]:.3f})", f"{data['occlusion_counts'][3]:<5} ({data['occlusion_ratios'][3]:.3f})"))


# Train Split

In [7]:
overall_summary["train"] = analyze(root_path, "train")




reading: 100%|██████████| 1920/1920 [00:02<00:00, 911.86it/s] 



 --------------- split:  train ---------------
No. Sequences with following number of frames: 
	scene token 0298dca5926544d0a066d92ac485c7a5 	total no frames: 	 120
	scene token 1551325f173c49e98ed2bd2a612a60f6 	total no frames: 	 60
	scene token 23576299f5c84bc88512849f6c626546 	total no frames: 	 120
	scene token 2ed8fd8d8adc478a833d9afe8c2f81e0 	total no frames: 	 80
	scene token 3e99be325a1f407c822861c7978c1193 	total no frames: 	 180
	scene token 4532d9702504416d93d18fbc8818481e 	total no frames: 	 200
	scene token 53422031e91a425793af21ba9ebf0ce5 	total no frames: 	 20
	scene token 64a7348a1d2047718c32c7c411a9a719 	total no frames: 	 40
	scene token 6665637092e64bc3a44dd6d9fcba461c 	total no frames: 	 140
	scene token 762ccbd92e2f49b2be98f1ebb375c1fa 	total no frames: 	 20
	scene token 8dc5e76d532944cb9195ff4cff555c19 	total no frames: 	 20
	scene token 8eed7066891f48588ce71ee1ac7c8157 	total no frames: 	 40
	scene token a858f70a5eee4138991887cf027e9651 	total no frames: 	 60
	s

# Validation Split

In [8]:
overall_summary["val"] = analyze(root_path, "val")




reading: 100%|██████████| 240/240 [00:00<00:00, 712.20it/s]



 --------------- split:  val ---------------
No. Sequences with following number of frames: 
	scene token 007d1f06897e49f0bab1c9295e0000d7 	total no frames: 	 20
	scene token 0b08c2fd01b849ee8daa3429a8a15a9b 	total no frames: 	 40
	scene token 0ea0cad2d8c34f5eb1a8b9f5176079b6 	total no frames: 	 20
	scene token 136a1c7b3d6045afb72c95cdf98ce7b5 	total no frames: 	 20
	scene token 77daab6531964cd1b5b501ca666874b2 	total no frames: 	 20
	scene token 7bf73c63db89451b87da844149ab6d3c 	total no frames: 	 20
	scene token 8d3443e8533b4519a142b97561b97861 	total no frames: 	 20
	scene token 94d9acc6c03a47659a15ea4d85cfc1da 	total no frames: 	 20
	scene token 98f9b1b36e6d45b9bd1696df837fb443 	total no frames: 	 20
	scene token bbe9fdd6b62e4f019482beb28f978b4e 	total no frames: 	 20
	scene token e7dce932bc094f23946a9d32bdeaf562 	total no frames: 	 20

Number of sequences: 		 11
Samples with no prev: 		 11
Samples with no next: 		 11
Samples with no prev nor next: 	 0

No. Classes with following 

# Test Split

In [9]:
if os.path.exists(os.path.join(root_path, "test")):
    overall_summary["test"] = analyze(root_path, "test")




reading: 100%|██████████| 240/240 [00:00<00:00, 703.96it/s]


 --------------- split:  test ---------------
No. Sequences with following number of frames: 
	scene token 26807fab5ba845d79153ad16bba0658f 	total no frames: 	 20
	scene token 4c7fad743d0e4f479b52386c5c9f7eb3 	total no frames: 	 20
	scene token 75a84e6a43e44a02a882982d28af7643 	total no frames: 	 20
	scene token aaf4dc3e8b784d0da2cd79249afaaefe 	total no frames: 	 20
	scene token b974d37cb9c54a208693130390377f78 	total no frames: 	 20
	scene token c0b0d066e68b4824b451bcb61b633e0c 	total no frames: 	 20
	scene token c622cedce3dd4788bbb9ee48039439a5 	total no frames: 	 20
	scene token c8ed799dd2df4072977553c370916e3c 	total no frames: 	 20
	scene token cf1ba463f1cf4e40bc337342b259b16d 	total no frames: 	 20
	scene token e4cc7a8e919b4a9c8b9387c8703bd8f3 	total no frames: 	 20
	scene token ee6c77af3ea94bf495ce5b668bc774dc 	total no frames: 	 20
	scene token fd508db657544515922c339631489594 	total no frames: 	 20

Number of sequences: 		 12
Samples with no prev: 		 12
Samples with no next:




# Summary

In [10]:
summarize(overall_summary)


Split      easy            moderate        hard           
-------------------------------------------------------
train     | 12516 (0.414)   10174 (0.337)   7512  (0.249)  
val       | 1601  (0.390)   1343  (0.327)   1166  (0.284)  
test      | 1674  (0.425)   1257  (0.319)   1010  (0.256)  

Split      d <= 40m        40 < d <= 50    50 < d         
-------------------------------------------------------
train     | 12516 (0.463)   10174 (0.376)   4333  (0.160)  
val       | 1601  (0.454)   1343  (0.380)   586   (0.166)  
test      | 1674  (0.475)   1257  (0.356)   596   (0.169)  

Split      n <= 20         20 < n <= 50    50 < n         
-------------------------------------------------------
train     | 14103 (0.530)   6023  (0.226)   6495  (0.244)  
val       | 1854  (0.545)   858   (0.252)   690   (0.203)  
test      | 1560  (0.468)   926   (0.278)   844   (0.253)  

Split      NOT_OCCLUDED         PARTIALLY_OCCLUDED   MOSTLY_OCCLUDED      UNKNOWN             
----------------