In [1]:
import os 
import cv2
import sys
import json

import numpy as np
import skimage.io as io

from skimage.draw import polygon
from skimage import img_as_float

In [2]:
root = './anue/'

# Get all labels in the dataset as a dictionary

In [3]:
def get_all_labels(root):
    labels = []
    final_labels = {}
    
    img_folder, segmap_folder = os.listdir(root)
    
#     sub_path_train_img = os.path.join(root, img_folder, './train')
    sub_path_train_seg = os.path.join(root, segmap_folder, './train')

#     sub_path_val_img = os.path.join(root, img_folder, './val')
    sub_path_val_seg = os.path.join(root, segmap_folder, './val')
    
    # extract training labels
    for folder in os.listdir(sub_path_train_seg):
#         curr_img_folder = os.path.join(sub_path_train_img, folder)
        curr_seg_folder = os.path.join(sub_path_train_seg, folder)
        for file in os.listdir(curr_seg_folder):
            if 'json' not in file:
                continue
            f = open(os.path.join(curr_seg_folder, file), 'r')
            data = json.loads(f.read())
            for obj in data['objects']:
                if obj['label'] not in labels:
                    labels.append(obj['label'])

    # extract validation labels
    for folder in os.listdir(sub_path_val_seg):
#         curr_img_folder = os.path.join(sub_path_val_img, folder)
        curr_seg_folder = os.path.join(sub_path_val_seg, folder)
        for file in os.listdir(curr_seg_folder):
            if 'json' not in file:
                continue
            f = open(os.path.join(curr_seg_folder, file), 'r')
            data = json.loads(f.read())
            for obj in data['objects']:
                if obj['label'] not in labels:
                    labels.append(obj['label'])
                    
    for i in range(len(labels)):
        final_labels[labels[i]] = i
    return final_labels

In [None]:
labels = get_all_labels(root)

In [None]:
labels

# Generate Segmentation Maps

In [None]:
def create_segmentation_maps(root, labels):
    img_folder, segmap_folder = os.listdir(root)
    
    if not os.path.exists('./img'):
        os.makedirs('img')
    if not os.path.exists('./img/train'):
        os.makedirs('img/train')
    if not os.path.exists('./img/val'):
        os.makedirs('img/val')

    if not os.path.exists('./seg'):
        os.makedirs('seg')
    if not os.path.exists('./seg/train'):
        os.makedirs('seg/train')
    if not os.path.exists('./seg/val'):
        os.makedirs('seg/val')
    
    sub_path_train_img = os.path.join(root, img_folder, './train')
    sub_path_train_seg = os.path.join(root, segmap_folder, './train')

    sub_path_val_img = os.path.join(root, img_folder, './val')
    sub_path_val_seg = os.path.join(root, segmap_folder, './val')
    
    for folder in os.listdir(sub_path_train_seg):
        curr_img_folder = os.path.join(sub_path_train_img, folder)
        curr_seg_folder = os.path.join(sub_path_train_seg, folder)
        
        for file in os.listdir(curr_img_folder):
            id = file.split('_')[0]
            img = cv2.imread(os.path.join(curr_img_folder, file))
            
            f = open(os.path.join(curr_seg_folder, id+'_gtFine_polygons.json'), 'r')
            data = json.loads(f.read())
            seg_map = np.zeros((data['imgWidth'],data['imgHeight']))
            for obj in data['objects']:
                label = obj['label']
                poly = np.array(obj['polygon'])
                rr, cc = polygon(poly[:,0], poly[:,1], seg_map.shape)
                seg_map[rr,cc] = labels[label]
                
            cv2.imwrite('./img/train/'+id+'.png', img)
            cv2.imwrite('./seg/train/'+id+'.png', seg_map.T)
            
    for folder in os.listdir(sub_path_val_seg):
        curr_img_folder = os.path.join(sub_path_val_img, folder)
        curr_seg_folder = os.path.join(sub_path_val_seg, folder)
        
        for file in os.listdir(curr_img_folder):
            id = file.split('_')[0]
            img = cv2.imread(os.path.join(curr_img_folder, file))
            
            f = open(os.path.join(curr_seg_folder, id+'_gtFine_polygons.json'), 'r')
            data = json.loads(f.read())
            seg_map = np.zeros((data['imgWidth'],data['imgHeight']), np.uint8)
            for obj in data['objects']:
                label = obj['label']
                poly = np.array(obj['polygon'])
                rr, cc = polygon(poly[:,0], poly[:,1], seg_map.shape)
                seg_map[rr,cc] = labels[label]
                
            cv2.imwrite('./img/val/'+id+'.png', img)
            cv2.imwrite('./seg/val/'+id+'.png', seg_map.T)

In [None]:
create_segmentation_maps(root, labels)

# Create Subset

In [4]:
np.random.seed(42)

In [9]:
train_subset_size, val_subset_size = 1000, 100

In [10]:
train_img_path, train_seg_path = './img/train/', './seg/train/'
val_img_path, val_seg_path = './img/val/', './seg/val/'

In [11]:
train_random_indices = np.random.permutation(len(os.listdir(train_img_path)))[:train_subset_size]
val_random_indices = np.random.permutation(len(os.listdir(val_img_path)))[:val_subset_size]

In [12]:
if not os.path.exists('./img/subset_train'):
    os.makedirs('img/subset_train')
if not os.path.exists('./img/subset_val'):
    os.makedirs('img/subset_val')

if not os.path.exists('./seg/subset_train'):
    os.makedirs('seg/subset_train')
if not os.path.exists('./seg/subset_val'):
    os.makedirs('seg/subset_val')

In [13]:
train_img_list, train_seg_list = os.listdir(train_img_path), os.listdir(train_seg_path)
for i in train_random_indices:
    img = cv2.imread(os.path.join(train_img_path, train_img_list[i]))
    seg = cv2.imread(os.path.join(train_seg_path, train_seg_list[i]))
    
    cv2.imwrite('./img/subset_train/'+str(i)+'.png', img)
    cv2.imwrite('./seg/subset_train/'+str(i)+'.png', seg)

In [14]:
val_img_list, val_seg_list = os.listdir(val_img_path), os.listdir(val_seg_path)
for i in val_random_indices:
    img = cv2.imread(os.path.join(val_img_path, val_img_list[i]))
    seg = cv2.imread(os.path.join(val_seg_path, val_img_list[i]))
    
    cv2.imwrite('./img/subset_val/'+str(i)+'.png', img)
    cv2.imwrite('./seg/subset_val/'+str(i)+'.png', seg)   