In [42]:
LABEL_ZIP_PATH = './bdd100k_labels_release.zip'
DATASET_ZIP_PATH = './bdd100k_images_10k.zip'

UNZIP_PATH = './raw_unzip'

# Extract zip file

In [43]:
import zipfile

In [44]:
# extract zip file
with zipfile.ZipFile(LABEL_ZIP_PATH, 'r') as zip_ref:
    zip_ref.extractall(UNZIP_PATH, )
with zipfile.ZipFile(DATASET_ZIP_PATH, 'r') as zip_ref:
    zip_ref.extractall(UNZIP_PATH)

# Move image to data
- folder `A` is night image
- folder `B` is day image

In [45]:
# load label json file
import json
import os
from tqdm import tqdm
import shutil
import random

TRAIN_LABEL_PATH = os.path.join(UNZIP_PATH, 'bdd100k', 'labels', 'bdd100k_labels_images_train.json')
VAL_LABEL_PATH = os.path.join(UNZIP_PATH, 'bdd100k', 'labels', 'bdd100k_labels_images_val.json')
SOURCE_PATH = os.path.join(UNZIP_PATH, 'bdd100k', 'images', '10K')
DATASET_NAME = 'BDD100K'
TRAIN_TEST_SPLIT_RATIO = 0.8

In [52]:
# clear dataset folder
if os.path.exists(os.path.join('datasets', DATASET_NAME)):
    shutil.rmtree(os.path.join('datasets', DATASET_NAME))

In [46]:
# create dataset structure folder
for path in [
    'datasets',
    os.path.join('datasets', DATASET_NAME),
    os.path.join('datasets', DATASET_NAME, 'trainA'),
    os.path.join('datasets', DATASET_NAME, 'trainB'),
    os.path.join('datasets', DATASET_NAME, 'testA'),
    os.path.join('datasets', DATASET_NAME, 'testB')
]:
    if not os.path.exists(path):
        os.makedirs(path)

In [47]:
train_label = json.load(open(TRAIN_LABEL_PATH))
val_label = json.load(open(VAL_LABEL_PATH))

In [48]:
image_label = dict()
for label in train_label:
    image_label[label['name']] = label['attributes']
for label in val_label:
    image_label[label['name']] = label['attributes']
del train_label, val_label

In [49]:
for subdir in os.listdir(SOURCE_PATH):
    curr_path = os.path.join(SOURCE_PATH, subdir)

    looper = tqdm(os.listdir(curr_path), desc=subdir, unit='image')
    for image in looper:
        looper.set_description(f'{subdir} - {image}')
        looper.refresh()

        if image not in image_label:
            continue
        
        label = image_label[image]
        if label['timeofday'] == 'daytime':
            if random.random() < TRAIN_TEST_SPLIT_RATIO:
                shutil.copy(os.path.join(curr_path, image), os.path.join('datasets', DATASET_NAME, 'trainB', image))
            else:
                shutil.copy(os.path.join(curr_path, image), os.path.join('datasets', DATASET_NAME, 'testB', image))
        elif label['timeofday'] == 'night':
            if random.random() < TRAIN_TEST_SPLIT_RATIO:
                shutil.copy(os.path.join(curr_path, image), os.path.join('datasets', DATASET_NAME, 'trainA', image))
            else:
                shutil.copy(os.path.join(curr_path, image), os.path.join('datasets', DATASET_NAME, 'testA', image))

test - ae7bcd76-00000000.jpg:   0%|          | 0/2000 [00:00<?, ?image/s]

test - fffc7bba-f05de9f5.jpg: 100%|██████████| 2000/2000 [00:04<00:00, 482.75image/s]
train - ff3da814-c3463a43.jpg: 100%|██████████| 7000/7000 [00:42<00:00, 163.79image/s]
val - ff7b98c7-3cb964ac.jpg: 100%|██████████| 1000/1000 [00:02<00:00, 455.79image/s]


# Clear unused folder

In [50]:
# remove raw unzip folder
shutil.rmtree(UNZIP_PATH)