In [None]:
# This script is used to convert normal segmentation dataset to usable dataset for project
# For different dataset, the convert script may be very different, the template takes ADE20K(https://github.com/open-mmlab/mmsegmentation/blob/main/docs/en/user_guides/2_dataset_prepare.md#ade20k) dataset as an exmaple
# input: img_paths, gt_paths, mapping of idx to cls, dataset_name
# output: generate files used by project

In [None]:
# import package
from matplotlib import pyplot as plt
from PIL import Image
import numpy as np
from pathlib import Path
from omegaconf import OmegaConf
from utils.datasets import convert_dataset
import pandas as pd

In [None]:
# input
img_folder = './datasets/ade/ADEChallengeData2016/images/validation'
gt_folder = './datasets/ade/ADEChallengeData2016/annotations/validation'

## Check Segmentation Ground Truth Format

In [None]:
example_img_path = './datasets/ade/ADEChallengeData2016/images/validation/ADE_val_00000001.jpg'
example_gt_path = './datasets/ade/ADEChallengeData2016/annotations/validation/ADE_val_00000001.png'
example_img = Image.open(example_img_path)
example_gt = Image.open(example_gt_path).convert('L')

# display example image and ground truth
fig, ax = plt.subplots(1, 2, figsize=(10, 5))
ax[0].imshow(example_img)
ax[0].axis('off')
ax[0].set_title('Image')
ax[1].imshow(example_gt)
ax[1].axis('off')
ax[1].set_title('Ground Truth')
plt.show()

# check idxes in ground truth
idxes,counts = np.unique(example_gt,return_counts=True)
print('idxes in ground truth:', idxes)
print('counts in ground truth:', counts)

# check img and gt shape
print('img shape:', example_img.size)
print('gt shape:', example_gt.size)

## Build img_paths, gt_paths, idx2cls, dataset_name

In [None]:
img_paths = []
gt_paths = []
idx2cls = {}
dataset_name = "ade20k"

# img paths and gt paths
img_folder_path = Path(img_folder)
gt_folder_path = Path(gt_folder)
for img_path in img_folder_path.glob("*.jpg"):
    gt_path = gt_folder_path / (img_path.stem + ".png")
    img_paths.append(str(img_path))
    gt_paths.append(str(gt_path))
img_paths.sort()
gt_paths.sort()

# idx to cls
idx2cls_file = "./datasets/ade/ADEChallengeData2016/objectInfo150.txt"
df = pd.read_csv(idx2cls_file, delimiter="\t")
idx_name = df[["Idx", "Name"]]
idx_name = idx_name.to_numpy()
for idx, name in idx_name:
    idx2cls[idx] = name.split(",")[0].strip()

## Convert Dataset to Project Accepted Format

In [None]:
# convert dataset
convert_dataset(img_paths, gt_paths, idx2cls, dataset_name)

In [None]:
# post-process (for different dataset, the post-process may be very different)
metainfo_config = OmegaConf.load("./metadata/ade20k/info.yaml")
metainfo_config.data_root = "./datasets/ade/ADEChallengeData2016"
metainfo_config.data_name_list = "./metadata/ade20k/val_id.txt"
metainfo_config.name_to_cls_labels = "./metadata/ade20k/cls_labels.npy"
metainfo_config.img_path = "{data_root}/images/validation/{img_name}.jpg"
metainfo_config.gt_path = "{data_root}/annotations/validation/{img_name}.png"
for idx, name in idx_name:
    cls_name = name.split(",")[0].strip()
    names = [n.strip() for n in name.split(",")]
    names = [n for n in names if n]
    metainfo_config.category[cls_name] = names.copy()

OmegaConf.save(metainfo_config, "./metadata/ade20k/info.yaml")