In [None]:
import os

import numpy as np

import mmcv
from mmengine import load, dump

In [None]:
# PATHING
# =======
# Directories.
HOME_ROOT = os.getenv('HOME')
DATA_ROOT = os.path.join(HOME_ROOT, '.Data/bdd/bdd100k')
IMAGES_ROOT = f"{DATA_ROOT}/images/100k"
train_images_dir = f"{IMAGES_ROOT}/train"
ANNOS_ROOT = f"{DATA_ROOT}/labels/det_20" # det_train.json, det_val.json

# Files
det_train_fpath = f"{ANNOS_ROOT}/det_train.json"
det_val_fpath = f"{ANNOS_ROOT}/det_val.json"

In [None]:
# Load train annos.
data = load(det_train_fpath)

In [None]:
type(data)

list

In [None]:
type(data[0])

dict

In [None]:
len(data)

69863

In [None]:
data[0].keys()

dict_keys(['name', 'attributes', 'timestamp', 'labels'])

In [None]:
# Okay, list[dict], that makes life easy.
# Let's just take however many samples we desire for our overfit test data.
import random

k = 13
seed = 42069
random.seed(seed)
overfit_test_data = random.choices(data, k=k)

In [None]:
len(overfit_test_data)

13

In [None]:
overfit_test_data[0]

{'name': '966ce84f-e03e0697.jpg',
 'attributes': {'weather': 'clear', 'timeofday': 'night', 'scene': 'highway'},
 'timestamp': 10000,
 'labels': [{'id': '1067402',
   'attributes': {'occluded': False,
    'truncated': False,
    'trafficLightColor': 'R'},
   'category': 'traffic light',
   'box2d': {'x1': 258.401191,
    'y1': 193.816491,
    'x2': 271.58085,
    'y2': 205.897843}},
  {'id': '1067403',
   'attributes': {'occluded': False,
    'truncated': False,
    'trafficLightColor': 'R'},
   'category': 'traffic light',
   'box2d': {'x1': 639.1967567011496,
    'y1': 177.18900711446346,
    'x2': 653.3925255431986,
    'y2': 189.44898929623307}},
  {'id': '1067404',
   'attributes': {'occluded': False,
    'truncated': False,
    'trafficLightColor': 'NA'},
   'category': 'car',
   'box2d': {'x1': 557.6780546602794,
    'y1': 246.5969523205588,
    'x2': 580.2197881846748,
    'y2': 257.58868121495635}},
  {'id': '1067405',
   'attributes': {'occluded': False,
    'truncated': Fals

In [None]:
# Okay, we can serialize the labels into a json now.
# But before that, get the image filenames and copy those to a new dir.
image_filenames = [datum['name'] for datum in overfit_test_data]
image_filepaths = [f'{train_images_dir}/{fname}' for fname in image_filenames]
print(image_filepaths)

['/home/evan/.Data/bdd/bdd100k/images/100k/train/966ce84f-e03e0697.jpg', '/home/evan/.Data/bdd/bdd100k/images/100k/train/9a1d74fb-93c56fe0.jpg', '/home/evan/.Data/bdd/bdd100k/images/100k/train/104409d4-c9800471.jpg', '/home/evan/.Data/bdd/bdd100k/images/100k/train/43f373b6-f9e4f050.jpg', '/home/evan/.Data/bdd/bdd100k/images/100k/train/2032bc98-e3827882.jpg', '/home/evan/.Data/bdd/bdd100k/images/100k/train/a0fd0237-4ca56bbe.jpg', '/home/evan/.Data/bdd/bdd100k/images/100k/train/576733c5-7465267d.jpg', '/home/evan/.Data/bdd/bdd100k/images/100k/train/65ee894d-f740d9ca.jpg', '/home/evan/.Data/bdd/bdd100k/images/100k/train/3a7ccba7-cf9980d4.jpg', '/home/evan/.Data/bdd/bdd100k/images/100k/train/ae08e429-98695ed8.jpg', '/home/evan/.Data/bdd/bdd100k/images/100k/train/a6d12e21-fc3e2cb4.jpg', '/home/evan/.Data/bdd/bdd100k/images/100k/train/4172f136-adc862a0.jpg', '/home/evan/.Data/bdd/bdd100k/images/100k/train/2233ded5-e632af44.jpg']


In [None]:
import shutil

# Setup paths and makedirs.
destination_dir = f"{IMAGES_ROOT}/overfit_data/det20_overfit_A/100k/train"
if not os.path.exists(destination_dir):
    os.makedirs(destination_dir)

# Now copy everything over.
for filepath in image_filepaths:
    print('Copied: ', filepath)
    shutil.copy(filepath, destination_dir)

Copied:  /home/evan/.Data/bdd/bdd100k/images/100k/train/966ce84f-e03e0697.jpg
Copied:  /home/evan/.Data/bdd/bdd100k/images/100k/train/9a1d74fb-93c56fe0.jpg
Copied:  /home/evan/.Data/bdd/bdd100k/images/100k/train/104409d4-c9800471.jpg
Copied:  /home/evan/.Data/bdd/bdd100k/images/100k/train/43f373b6-f9e4f050.jpg
Copied:  /home/evan/.Data/bdd/bdd100k/images/100k/train/2032bc98-e3827882.jpg
Copied:  /home/evan/.Data/bdd/bdd100k/images/100k/train/a0fd0237-4ca56bbe.jpg
Copied:  /home/evan/.Data/bdd/bdd100k/images/100k/train/576733c5-7465267d.jpg
Copied:  /home/evan/.Data/bdd/bdd100k/images/100k/train/65ee894d-f740d9ca.jpg
Copied:  /home/evan/.Data/bdd/bdd100k/images/100k/train/3a7ccba7-cf9980d4.jpg
Copied:  /home/evan/.Data/bdd/bdd100k/images/100k/train/ae08e429-98695ed8.jpg
Copied:  /home/evan/.Data/bdd/bdd100k/images/100k/train/a6d12e21-fc3e2cb4.jpg
Copied:  /home/evan/.Data/bdd/bdd100k/images/100k/train/4172f136-adc862a0.jpg
Copied:  /home/evan/.Data/bdd/bdd100k/images/100k/train/2233ded5

In [None]:
destination_dir = '/home/evan/.Data/bdd/bdd100k/overfit_data/det20_overfit_A/100k/train'

In [None]:
ls $destination_dir

104409d4-c9800471.jpg  43f373b6-f9e4f050.jpg  a0fd0237-4ca56bbe.jpg
2032bc98-e3827882.jpg  576733c5-7465267d.jpg  a6d12e21-fc3e2cb4.jpg
2233ded5-e632af44.jpg  65ee894d-f740d9ca.jpg  ae08e429-98695ed8.jpg
3a7ccba7-cf9980d4.jpg  966ce84f-e03e0697.jpg
4172f136-adc862a0.jpg  9a1d74fb-93c56fe0.jpg


In [None]:
# Cool, all there.
# Now let's dump our labels.
dst_labels = '/home/evan/.Data/bdd/bdd100k/overfit_data/det20_overfit_A/labels/det_20/train/det_train.json'
dump(overfit_test_data, dst_labels)

In [None]:
ls $dst_labels

/home/evan/.Data/bdd/bdd100k/overfit_data/det20_overfit_A/labels/det_20/train/det_train.json


In [None]:
# (archived in terminal, all set!)
# /home/evan/.Data/bdd/bdd100k/overfit_data/det20_train_overfit_0xa455.zip