In [6]:
dataset_dir = "/root/repos/vlmaps/data/CAT"
dataset_sections = ["Brown_Field", "Main_Trail", "mixed", "Power_Line"]
dataset_templates_img = [r"img_(\d+)\.png", r"img_(\d+)\.png", r"img_pln_(\d+)\.png", r"img_pln_(\d+)\.png"]
dataset_templates_anno = [r"mask_(\d+)\.png", r"mask_(\d+)\.png", r"anno_pln_(\d+)\.png", r"anno_pln_(\d+)\.png"]
train_dir = "Train"
test_dir = "Test"

from PIL import Image
import os
import re
from IPython.display import display

from tqdm import tqdm

In [14]:
CLASS_COLORS = {
    "Background": (0, 0, 0),  # Black
    "Sedan": (0, 0, 255),  # Blue
    "Pickup": (0, 255, 0),  # Green  
    "Offroad": (255, 0, 0),  # Red
}

In [15]:
import rerun as rr
import time
rr.init("cat_dataset_all", recording_id="cat_dataset_all")
current_time = time.strftime("%Y%m%d_%H%M%S")
rr.save(f"/root/repos/vlmaps/data/CAT/rerun/cat_dataset_{current_time}.rrd")


In [None]:
# Assign a label and color to each class
labels_mapping = [
    rr.AnnotationInfo(id=i, label=label, color=CLASS_COLORS[label])
    for i, label in enumerate(CLASS_COLORS.keys())
]
rr.log("cat_dataset", rr.AnnotationContext(labels_mapping), static=True)

In [17]:
# iterate over all train, test, and dataset_sections

for dataset_idx, dataset_section in enumerate(dataset_sections):
    print(f"###### Processing {dataset_section}")
    for sub_dir in [train_dir, test_dir]:
        print(f"## Processing {sub_dir}")
        int_map_dir = dataset_dir + "/" + dataset_section + "/" + sub_dir + "/annos/int_maps/"
        raw_imgs_dir = dataset_dir + "/" + dataset_section + "/" + sub_dir + "/imgs/"

        img_template = re.compile(dataset_templates_img[dataset_idx])
        anno_template = re.compile(dataset_templates_anno[dataset_idx])

        # find all the files with the template in the raw_imgs_dir
        img_files = [f for f in os.listdir(raw_imgs_dir) if img_template.match(f)]
        anno_files = [f for f in os.listdir(int_map_dir) if anno_template.match(f)]

        # print the number of files
        print("Number of images: ", len(img_files))
        print("Number of annotations: ", len(anno_files))

        for file in tqdm(img_files):
            img_path = raw_imgs_dir + file
            img = Image.open(img_path)
            idx = re.match(img_template, file).group(1) # img_pln_1.png
            rr.set_time(timeline="frame", sequence=int(idx))
            rr.log(f"cat_dataset/{dataset_section}/{sub_dir}/imgs", rr.Image(img))

        for file in tqdm(anno_files):
            anno_path = int_map_dir + file
            anno = Image.open(anno_path)
            idx = re.match(anno_template, file).group(1) # anno_pln_1.png
            rr.set_time(timeline="frame", sequence=int(idx))
            rr.log(f"cat_dataset/{dataset_section}/{sub_dir}/anno", rr.SegmentationImage(anno))
    print("--------------------------------\n")
        

###### Processing Brown_Field
## Processing Train
Number of images:  140
Number of annotations:  140


100%|██████████| 140/140 [00:03<00:00, 45.59it/s]
100%|██████████| 140/140 [00:00<00:00, 384.22it/s]


## Processing Test
Number of images:  60
Number of annotations:  60


100%|██████████| 60/60 [00:01<00:00, 47.59it/s]
100%|██████████| 60/60 [00:00<00:00, 382.17it/s]


--------------------------------

###### Processing Main_Trail
## Processing Train
Number of images:  133
Number of annotations:  133


100%|██████████| 133/133 [00:02<00:00, 59.69it/s]
100%|██████████| 133/133 [00:00<00:00, 414.09it/s]


## Processing Test
Number of images:  57
Number of annotations:  57


100%|██████████| 57/57 [00:00<00:00, 60.07it/s]
100%|██████████| 57/57 [00:00<00:00, 412.16it/s]


--------------------------------

###### Processing mixed
## Processing Train
Number of images:  925
Number of annotations:  925


100%|██████████| 925/925 [00:10<00:00, 91.17it/s]
100%|██████████| 925/925 [00:01<00:00, 561.22it/s]


## Processing Test
Number of images:  397
Number of annotations:  397


100%|██████████| 397/397 [00:04<00:00, 88.78it/s]
100%|██████████| 397/397 [00:00<00:00, 557.11it/s]


--------------------------------

###### Processing Power_Line
## Processing Train
Number of images:  925
Number of annotations:  925


100%|██████████| 925/925 [00:10<00:00, 88.55it/s]
100%|██████████| 925/925 [00:01<00:00, 563.84it/s]


## Processing Test
Number of images:  397
Number of annotations:  397


100%|██████████| 397/397 [00:04<00:00, 92.01it/s]
100%|██████████| 397/397 [00:00<00:00, 509.38it/s]

--------------------------------






---------

In [5]:
for file in tqdm(img_files):
    img_path = raw_imgs_dir + file
    img = Image.open(img_path)
    idx = re.match(img_template, file).group(1) # img_pln_1.png
    rr.set_time(timeline="frame", sequence=int(idx))
    rr.log(f"cat_dataset/{dataset_section_selection}/imgs", rr.Image(img))

100%|██████████| 623/623 [00:06<00:00, 94.03it/s]


In [8]:
for file in tqdm(anno_files):
    anno_path = int_map_dir + file
    anno = Image.open(anno_path)
    idx = re.match(anno_template, file).group(1) # anno_pln_1.png
    rr.set_time(timeline="frame", sequence=int(idx))
    rr.log(f"cat_dataset/{dataset_section_selection}/anno", rr.SegmentationImage(anno))

100%|██████████| 623/623 [00:01<00:00, 558.78it/s]
