This script calculates:
- nbr of scenes
- nbr of images
- nbr of vehicle positions
- nbr of instances

The directory should look something like this (as described on official Kaggle page):

- root_dir:
    - day
        - test
            - images
            - labels
            - splits
        - train
            - ...
        - val
            - ...
    - night
        - test
            - ...
        - train
            - ...
        - val
            - ...

In [1]:
import os
import json
from tqdm import tqdm

# adjust root_dir to your path
root_dir = "/media/lukas/empty/EODAN_Dataset"


In [2]:
root_dir = os.path.abspath(root_dir)
light_cycles = ("day", "night")
splits = ("train", "test", "val")

total_stats = {"scenes": 0, "images": 0, "vehicles": 0, "vehicle_directs":
    0, "instances": 0, "instance_directs": 0}


for cycle in light_cycles:
    cycle_stats = {"scenes": 0, "images": 0, "vehicles": 0,
                   "vehicle_directs": 0, "instances": 0, "instance_directs": 0}
    for split in splits:
        split_stats = {"scenes": 0, "images": 0, "vehicles": 0,
                       "vehicle_directs": 0, "instances": 0,
                       "instance_directs": 0}
        path = os.path.join(root_dir, cycle, split)
        scenes = os.listdir(os.path.join(path, "images"))

        split_stats["scenes"] = len(scenes)
        images = []

        # check each scene
        for scene in scenes:
            # get image files for each scene
            images += os.listdir(os.path.join(path, "images", scene))
        split_stats["images"] = len(images)

        # for each image, check the annotation file
        kp_annot_path = os.path.join(path, "labels/keypoints")
        annot_files = os.listdir(kp_annot_path)
        for img in tqdm(images):
            with open(os.path.join(kp_annot_path, f"{img.split('.')[0]}"
                                                  f".json")) as f:
                annot = json.load(f)["annotations"]
            # nbr of vehicles
            split_stats["vehicles"] += len(annot)

            # nbr of direct vehicles
            split_stats["vehicle_directs"] += sum([int(vehicle["direct"]) for
                                                  vehicle in annot])

            # nbr of instances
            split_stats["instances"] += sum([len(vehicle["instances"]) for
                                             vehicle in annot])

            # nbr of direct instances
            split_stats["instance_directs"] += sum([int(inst["direct"]) for
                                                    vehicle in annot for
                                                    inst in
                                                    vehicle["instances"]])

        print("\nCycle: {}\tSplit: {}".format(cycle, split))
        print(split_stats)

        cycle_stats = {k: cycle_stats[k]+v for k, v in split_stats.items()}
    print("---------------------------------------------------")
    print(f"Total stats ({cycle}):")
    print(cycle_stats)
    print("")

    total_stats = {k: total_stats[k]+v for k, v in cycle_stats.items()}

print("\n\nTotal stats:")
print(total_stats)

100%|██████████| 19078/19078 [00:55<00:00, 344.25it/s]
100%|██████████| 3132/3132 [00:08<00:00, 348.13it/s]
100%|██████████| 3898/3898 [00:11<00:00, 345.58it/s]
100%|██████████| 25264/25264 [01:14<00:00, 341.21it/s]
100%|██████████| 4052/4052 [00:11<00:00, 344.64it/s]
100%|██████████| 4322/4322 [00:12<00:00, 338.08it/s]



Cycle: day	Split: train
{'scenes': 113, 'images': 19078, 'vehicles': 15403, 'vehicle_directs': 8138, 'instances': 45765, 'instance_directs': 27732}

Cycle: day	Split: test
{'scenes': 19, 'images': 3132, 'vehicles': 3045, 'vehicle_directs': 1284, 'instances': 9338, 'instance_directs': 5629}

Cycle: day	Split: val
{'scenes': 20, 'images': 3898, 'vehicles': 2602, 'vehicle_directs': 1300, 'instances': 7244, 'instance_directs': 3690}

Total stats (day):
{'scenes': 152, 'images': 26108, 'vehicles': 21050, 'vehicle_directs': 10722, 'instances': 62347, 'instance_directs': 37051}

Cycle: night	Split: train
{'scenes': 145, 'images': 25264, 'vehicles': 26615, 'vehicle_directs': 12300, 'instances': 72304, 'instance_directs': 40192}

Cycle: night	Split: test
{'scenes': 24, 'images': 4052, 'vehicles': 3384, 'vehicle_directs': 1962, 'instances': 10438, 'instance_directs': 5212}

Cycle: night	Split: val
{'scenes': 25, 'images': 4322, 'vehicles': 3600, 'vehicle_directs': 1988, 'instances': 12746, 'ins

In [6]:
rel_vehicle_directs = total_stats["vehicle_directs"] / total_stats["vehicles"]
rel_instance_directs = total_stats["instance_directs"] /  \
                       total_stats["instances"]

print("Relative directs/ indirects:")
print(f"Vehicles:\t Directs: {rel_vehicle_directs*100}%\tIndirects: "
      f"{(1-rel_vehicle_directs)*100}%")
print(f"Instances:\t Directs: {rel_instance_directs*100}%\tIndirects: "
      f"{(1-rel_instance_directs)*100}%")

Relative directs/ indirects:
Vehicles:	 Directs: 49.35497447345789%	Indirects: 50.645025526542106%
Instances:	 Directs: 57.072259004656765%	Indirects: 42.927740995343235%
