In [2]:
import torch
import torchvision
import numpy as np
from PIL import Image
from tqdm import tqdm
from functools import partial

import pandas as pd
import random
from os import path
import os

In [3]:
ROOT_DIR = path.join('/home', 'victor', 'datasets')

DT_ROOT = path.join(ROOT_DIR, 'VOC')
DT_DEST_BINARY = path.join(ROOT_DIR, 'VOC_FORMS')
DT_DEST_RGB_RANDOM = path.join(ROOT_DIR , 'VOC_FORMS_RGB')
DT_DEST_RGB_SINGLE_CLASS = lambda c: path.join(ROOT_DIR, f'VOC_FORMS_RGB_{c.upper()}')

object_categories = ['aeroplane', 'bicycle', 'bird', 'boat',
                     'bottle', 'bus', 'car', 'cat', 'chair',
                     'cow', 'diningtable', 'dog', 'horse',
                     'motorbike', 'person', 'pottedplant',
                     'sheep', 'sofa', 'train', 'tvmonitor']

In [4]:
def gen_example_from_voc(voc):
    for example, segmentation in voc:
        im = Image.fromarray(np.array(segmentation) > 0).resize((512, 512)).convert("1")
        area = np.sum(im)
        yield (im, area)

def random_class_mask_generator(voc, im_shape=(224, 224)):
    for example, segmentation in voc:
        example, segmentation = example.resize(im_shape), segmentation.resize(im_shape)
        present_labels = np.setdiff1d(np.unique(segmentation), [0, 255])
        chosen = np.random.choice(present_labels)
        background = Image.fromarray((np.asarray(segmentation) != chosen).astype(np.bool))
        example.paste(0, mask=background)
        area = np.logical_not(background).sum()
        yield (example, area)

def class_mask_generator(voc, cl, im_shape=(224, 224)):
    for example, segmentation in voc:
        example, segmentation = example.resize(im_shape), segmentation.resize(im_shape)
        present_labels = np.setdiff1d(np.unique(segmentation), [0, 255])
        background = Image.fromarray((np.asarray(segmentation) != cl).astype(np.bool))
        area = np.logical_not(background).sum()
        if area == 0: continue
        
        example.paste(0, mask=background)
        yield (example, area)

def gen_df_from_voc(root_dir, dt, generator_fn, skip=True):
    root_dir = path.abspath(root_dir)
    img_dir = path.join(root_dir, "images")
    df_dest = path.join(root_dir, "data.csv")
    if skip and path.exists(df_dest):
        print(f"Found existing dataset, skipping for {root_dir}...")
        return pd.read_csv(df_dest, index_col=0)

    for directory in [root_dir, img_dir]:
        if not path.exists(directory):
            os.makedirs(directory)
            print(f"Created directory {directory}")

    df = pd.DataFrame(columns=["filename", "label"])
    for i, (img, area) in enumerate(generator_fn(dt)):
        filename = f"img_{i}.jpeg"
        dest_path = path.join(img_dir, filename)
        img.save(dest_path)
        row = pd.Series({"filename": filename, "label": area})
        df.loc[i] = row

    df.to_csv(df_dest)
    return df

In [5]:
def main():
    dt_train = torchvision.datasets.VOCSegmentation(
        root=path.join(DT_ROOT, 'train'),
        download=False,
        image_set='train'
    )

    dt_val = torchvision.datasets.VOCSegmentation(
        root=path.join(DT_ROOT, 'test'),
        download=False,
        image_set='val'
    )
    
    ## Binary ##
    gen_df_from_voc(
        path.join(DT_DEST_BINARY, "train"), dt_train, generator_fn=gen_example_from_voc
    )
    gen_df_from_voc(path.join(DT_DEST_BINARY, "test"), dt_val, generator_fn=gen_example_from_voc)

    ## RGB RANDOM CLASS ##
    gen_df_from_voc(
        path.join(DT_DEST_RGB_RANDOM, "train"),
        dt_train,
        generator_fn=random_class_mask_generator,
    )
    gen_df_from_voc(
        path.join(DT_DEST_RGB_RANDOM, "test"), dt_val, generator_fn=random_class_mask_generator
    )


    ## RGB SINGLE CLASS ##
    for cl in tqdm(object_categories):
        generator_fn = partial(class_mask_generator, cl=1 + object_categories.index(cl))
        gen_df_from_voc(
            path.join(DT_DEST_RGB_SINGLE_CLASS(cl), 'train'),
            dt_train,
            generator_fn=generator_fn
        )
        gen_df_from_voc(
            path.join(DT_DEST_RGB_SINGLE_CLASS(cl), 'test'),
            dt_val,
            generator_fn=generator_fn
        )

Found existing dataset, skipping for /home/victor/datasets/VOC_FORMS/train...
Found existing dataset, skipping for /home/victor/datasets/VOC_FORMS/test...
Created directory /home/victor/datasets/VOC_FORMS_RGB/train
Created directory /home/victor/datasets/VOC_FORMS_RGB/train/images
Created directory /home/victor/datasets/VOC_FORMS_RGB/test
Created directory /home/victor/datasets/VOC_FORMS_RGB/test/images


  0%|          | 0/20 [00:00<?, ?it/s]

Created directory /home/victor/datasets/VOC_FORMS_RGB_AEROPLANE/train
Created directory /home/victor/datasets/VOC_FORMS_RGB_AEROPLANE/train/images
Created directory /home/victor/datasets/VOC_FORMS_RGB_AEROPLANE/test
Created directory /home/victor/datasets/VOC_FORMS_RGB_AEROPLANE/test/images


  5%|▌         | 1/20 [00:11<03:34, 11.30s/it]

Created directory /home/victor/datasets/VOC_FORMS_RGB_BICYCLE/train
Created directory /home/victor/datasets/VOC_FORMS_RGB_BICYCLE/train/images
Created directory /home/victor/datasets/VOC_FORMS_RGB_BICYCLE/test
Created directory /home/victor/datasets/VOC_FORMS_RGB_BICYCLE/test/images


 10%|█         | 2/20 [00:22<03:22, 11.27s/it]

Created directory /home/victor/datasets/VOC_FORMS_RGB_BIRD/train
Created directory /home/victor/datasets/VOC_FORMS_RGB_BIRD/train/images
Created directory /home/victor/datasets/VOC_FORMS_RGB_BIRD/test
Created directory /home/victor/datasets/VOC_FORMS_RGB_BIRD/test/images


 15%|█▌        | 3/20 [00:33<03:12, 11.31s/it]

Created directory /home/victor/datasets/VOC_FORMS_RGB_BOAT/train
Created directory /home/victor/datasets/VOC_FORMS_RGB_BOAT/train/images
Created directory /home/victor/datasets/VOC_FORMS_RGB_BOAT/test
Created directory /home/victor/datasets/VOC_FORMS_RGB_BOAT/test/images


 20%|██        | 4/20 [00:45<03:00, 11.29s/it]

Created directory /home/victor/datasets/VOC_FORMS_RGB_BOTTLE/train
Created directory /home/victor/datasets/VOC_FORMS_RGB_BOTTLE/train/images
Created directory /home/victor/datasets/VOC_FORMS_RGB_BOTTLE/test
Created directory /home/victor/datasets/VOC_FORMS_RGB_BOTTLE/test/images


 25%|██▌       | 5/20 [00:56<02:49, 11.29s/it]

Created directory /home/victor/datasets/VOC_FORMS_RGB_BUS/train
Created directory /home/victor/datasets/VOC_FORMS_RGB_BUS/train/images
Created directory /home/victor/datasets/VOC_FORMS_RGB_BUS/test
Created directory /home/victor/datasets/VOC_FORMS_RGB_BUS/test/images


 30%|███       | 6/20 [01:07<02:38, 11.29s/it]

Created directory /home/victor/datasets/VOC_FORMS_RGB_CAR/train
Created directory /home/victor/datasets/VOC_FORMS_RGB_CAR/train/images
Created directory /home/victor/datasets/VOC_FORMS_RGB_CAR/test
Created directory /home/victor/datasets/VOC_FORMS_RGB_CAR/test/images


 35%|███▌      | 7/20 [01:19<02:27, 11.37s/it]

Created directory /home/victor/datasets/VOC_FORMS_RGB_CAT/train
Created directory /home/victor/datasets/VOC_FORMS_RGB_CAT/train/images
Created directory /home/victor/datasets/VOC_FORMS_RGB_CAT/test
Created directory /home/victor/datasets/VOC_FORMS_RGB_CAT/test/images


 40%|████      | 8/20 [01:30<02:17, 11.42s/it]

Created directory /home/victor/datasets/VOC_FORMS_RGB_CHAIR/train
Created directory /home/victor/datasets/VOC_FORMS_RGB_CHAIR/train/images
Created directory /home/victor/datasets/VOC_FORMS_RGB_CHAIR/test
Created directory /home/victor/datasets/VOC_FORMS_RGB_CHAIR/test/images


 45%|████▌     | 9/20 [01:42<02:06, 11.47s/it]

Created directory /home/victor/datasets/VOC_FORMS_RGB_COW/train
Created directory /home/victor/datasets/VOC_FORMS_RGB_COW/train/images
Created directory /home/victor/datasets/VOC_FORMS_RGB_COW/test
Created directory /home/victor/datasets/VOC_FORMS_RGB_COW/test/images


 50%|█████     | 10/20 [01:53<01:53, 11.39s/it]

Created directory /home/victor/datasets/VOC_FORMS_RGB_DININGTABLE/train
Created directory /home/victor/datasets/VOC_FORMS_RGB_DININGTABLE/train/images
Created directory /home/victor/datasets/VOC_FORMS_RGB_DININGTABLE/test
Created directory /home/victor/datasets/VOC_FORMS_RGB_DININGTABLE/test/images


 55%|█████▌    | 11/20 [02:04<01:42, 11.35s/it]

Created directory /home/victor/datasets/VOC_FORMS_RGB_DOG/train
Created directory /home/victor/datasets/VOC_FORMS_RGB_DOG/train/images
Created directory /home/victor/datasets/VOC_FORMS_RGB_DOG/test
Created directory /home/victor/datasets/VOC_FORMS_RGB_DOG/test/images


 60%|██████    | 12/20 [02:16<01:31, 11.41s/it]

Created directory /home/victor/datasets/VOC_FORMS_RGB_HORSE/train
Created directory /home/victor/datasets/VOC_FORMS_RGB_HORSE/train/images
Created directory /home/victor/datasets/VOC_FORMS_RGB_HORSE/test
Created directory /home/victor/datasets/VOC_FORMS_RGB_HORSE/test/images


 65%|██████▌   | 13/20 [02:27<01:19, 11.36s/it]

Created directory /home/victor/datasets/VOC_FORMS_RGB_MOTORBIKE/train
Created directory /home/victor/datasets/VOC_FORMS_RGB_MOTORBIKE/train/images
Created directory /home/victor/datasets/VOC_FORMS_RGB_MOTORBIKE/test
Created directory /home/victor/datasets/VOC_FORMS_RGB_MOTORBIKE/test/images


 70%|███████   | 14/20 [02:38<01:08, 11.34s/it]

Created directory /home/victor/datasets/VOC_FORMS_RGB_PERSON/train
Created directory /home/victor/datasets/VOC_FORMS_RGB_PERSON/train/images
Created directory /home/victor/datasets/VOC_FORMS_RGB_PERSON/test
Created directory /home/victor/datasets/VOC_FORMS_RGB_PERSON/test/images


 75%|███████▌  | 15/20 [02:52<00:59, 11.94s/it]

Created directory /home/victor/datasets/VOC_FORMS_RGB_POTTEDPLANT/train
Created directory /home/victor/datasets/VOC_FORMS_RGB_POTTEDPLANT/train/images
Created directory /home/victor/datasets/VOC_FORMS_RGB_POTTEDPLANT/test
Created directory /home/victor/datasets/VOC_FORMS_RGB_POTTEDPLANT/test/images


 80%|████████  | 16/20 [03:03<00:47, 11.76s/it]

Created directory /home/victor/datasets/VOC_FORMS_RGB_SHEEP/train
Created directory /home/victor/datasets/VOC_FORMS_RGB_SHEEP/train/images
Created directory /home/victor/datasets/VOC_FORMS_RGB_SHEEP/test
Created directory /home/victor/datasets/VOC_FORMS_RGB_SHEEP/test/images


 85%|████████▌ | 17/20 [03:14<00:34, 11.60s/it]

Created directory /home/victor/datasets/VOC_FORMS_RGB_SOFA/train
Created directory /home/victor/datasets/VOC_FORMS_RGB_SOFA/train/images
Created directory /home/victor/datasets/VOC_FORMS_RGB_SOFA/test
Created directory /home/victor/datasets/VOC_FORMS_RGB_SOFA/test/images


 90%|█████████ | 18/20 [03:26<00:23, 11.54s/it]

Created directory /home/victor/datasets/VOC_FORMS_RGB_TRAIN/train
Created directory /home/victor/datasets/VOC_FORMS_RGB_TRAIN/train/images
Created directory /home/victor/datasets/VOC_FORMS_RGB_TRAIN/test
Created directory /home/victor/datasets/VOC_FORMS_RGB_TRAIN/test/images


 95%|█████████▌| 19/20 [03:37<00:11, 11.49s/it]

Created directory /home/victor/datasets/VOC_FORMS_RGB_TVMONITOR/train
Created directory /home/victor/datasets/VOC_FORMS_RGB_TVMONITOR/train/images
Created directory /home/victor/datasets/VOC_FORMS_RGB_TVMONITOR/test
Created directory /home/victor/datasets/VOC_FORMS_RGB_TVMONITOR/test/images


100%|██████████| 20/20 [03:48<00:00, 11.45s/it]


In [None]:
if __name__ == "__main__": main()