In [8]:
#modified from the Berkeley BDD tools on Github:  https://github.com/ucbdrive/bdd-data

import argparse
import os
from os import path as osp
import sys
from random import shuffle


def gen_list(data_root, data_dir, list_dir, phase, list_type, max_train_images=None, suffix='.jpg'):
    phase_dir = osp.join(data_root, data_dir, phase)
    print(phase_dir)
    if not osp.exists(phase_dir):
        raise ValueError('Can not find folder {}'.format(phase_dir))
    images = [n[:-4]
                     for n in os.listdir(phase_dir)
                     if n[-len(suffix):] == suffix]
    shuffle(images)
    print('Found', len(images), 'items in', data_dir, phase)
    
    if max_train_images is not None:
        if phase == 'train':
            truncate_to = max_train_images
        else:
            truncate_to = int(max_train_images / 10)
        print('Truncating to ', truncate_to)
        images = images[0:truncate_to]
    
    out_path = osp.join(list_dir, '{}_{}.txt'.format(phase, list_type))
    if not osp.exists(list_dir):
        os.makedirs(list_dir)
    print('Writing', out_path)
    with open(out_path, 'w') as fp:
        fp.write('\n'.join(images))


def gen_images(data_root, list_dir, max_train_images=None, image_type='100k'):
    for phase in ['train', 'val', 'test']:
        gen_list(data_root, 'images',
                 list_dir, phase, 'images', max_train_images, suffix='.jpg')


def gen_drivable(data_root, max_train_images=None):
    image_type = '100k'
    label_dir = 'labels'
    list_dir = 'data\\bdd100k\\lists\\100k\\drivable'

    gen_images(data_root, list_dir, max_train_images, image_type)

    for p in ['train', 'val', 'test']:
        gen_list(data_root, label_dir, list_dir, p, 'labels', max_train_images=max_train_images, suffix='_train_id.png')


def gen_seg(data_root):
    image_type = '10k'
    label_dir = 'seg\\labels'
    list_dir = 'data\\bdd100k\\lists\\10k\\seg'

    gen_images(data_root, list_dir, image_type)

    for p in ['train', 'val']:
        gen_list(data_root, label_dir, list_dir, p, 'labels', max_train_images=None, suffix='_train_id.png')
        


In [9]:
data_path = 'data\\bdd100k\\seg\\';
gen_drivable(data_path);


data\bdd100k\seg\images\train
Found 7000 items in images train
Writing data\bdd100k\lists\100k\drivable\train_images.txt
data\bdd100k\seg\images\val
Found 900 items in images val
Writing data\bdd100k\lists\100k\drivable\val_images.txt
data\bdd100k\seg\images\test
Found 100 items in images test
Writing data\bdd100k\lists\100k\drivable\test_images.txt
data\bdd100k\seg\labels\train
Found 7000 items in labels train
Writing data\bdd100k\lists\100k\drivable\train_labels.txt
data\bdd100k\seg\labels\val
Found 900 items in labels val
Writing data\bdd100k\lists\100k\drivable\val_labels.txt
data\bdd100k\seg\labels\test
Found 100 items in labels test
Writing data\bdd100k\lists\100k\drivable\test_labels.txt
