In [1]:
import os
import re
import numpy as np
from PIL import Image, ImageEnhance
import matplotlib.pyplot as plt
from tqdm import tqdm
import random

In [2]:
obj_files = [x for x in os.listdir('objects/') if x[-3:] == 'png']
scene_files = [x for x in os.listdir('scenes/') if x[-3:] in ['png','peg','jpg']]

height, width = [], []
for obj_file in obj_files:
    height.append(Image.open('objects/'+obj_file).height)
    width.append(Image.open('objects/'+obj_file).width)
print('Max width: {}, Max height: {}'.format(max(height), max(width)))

Max width: 332, Max height: 213


In [3]:
def get_class(obj_file):
    return list(filter(None, re.split(r'(\d+)', obj_file)))[0]

In [4]:
def shuffle_rgb(obj):
    order = [0,1,2]
    random.shuffle(order)
    new_obj = np.array(obj)[:,:,order+[3]]
    # if the red channel is switched, take the weighted average of the new one (30%) w original (70%)
    if order[0] != 0:
        new_obj = np.array(new_obj*0.3 + np.array(obj)*0.7, dtype=np.uint8)
    return Image.fromarray(new_obj)

In [5]:
def add_noise(obj):
    w,h = obj.size
    noise = np.random.randint(0,255,size=(h,w,4))
    # set alpha channel to 0
    noise[:,:,3] = 0
    # random threshold for noise (up to 64 in magnitude)
    threshold = np.random.rand()*64
    noise[noise > threshold] = 0
    new_obj = np.array(obj) + noise
    new_obj[new_obj > 255] = 255
    new_obj = np.array(new_obj, dtype=np.uint8)
    return Image.fromarray(new_obj)

In [9]:
print(len(obj_files))
print(len(scene_files))

101
41


In [10]:
frame_w = 480
frame_h = 288
image_id = 0
no_of_images = 4000

for i in tqdm(range(no_of_images)):
    # pick scenes randomly
    scene_file = 'scenes/' + scene_files[np.random.randint(len(scene_files))]
    scene = Image.open(scene_file).resize((480, 302)).crop((0,14,480,302))
    
    """visual augmentation"""
    # 50% horizontal flip
    if np.random.rand() > 0.5:
        scene = scene.transpose(method=Image.FLIP_LEFT_RIGHT)
        
    # 50% enhance contrast by 0.8 to 1.5 (1 = no change)
    if np.random.rand() > 0.5:
        scene = ImageEnhance.Contrast(scene).enhance(np.random.rand()*0.7+0.8)

    # object count
    count = 5
    classes = {'person': '0', 'bicycle': '1', 'motorbike': '2'}
    objs = []
    obj_classes = []
    selected = []
    labels = []

    while len(selected) < count:
        obj_file = obj_files[np.random.randint(len(obj_files))]
        # make sure no duplicate objects
        if obj_file in selected:
            continue
        selected.append(obj_file)
        obj = Image.open('objects/' + obj_file)
        objs.append(obj)
        obj_classes.append(get_class(obj_file))

    for i,obj in enumerate(objs):
        w, h = obj.size
        scale = 1
        
        """scaling augmentation"""
        # resize objects, with max(w,h) of 24-120, up to 1.2x the orignal max(w,h) size
        max_scale = 1.2
        scale = min(max_scale, np.random.randint(24,145)/max(w,h))
        obj = obj.resize((int(scale*w), int(scale*h)), Image.ANTIALIAS)
        w, h = obj.size # update w and h
        
        # downsize max(w,h) to 90%, 50% of the time
        if np.random.rand() > 0.5:
            if max(w,h) == h:
                obj = obj.resize((int(w), int(h*0.9)), Image.ANTIALIAS) 
            else:
                obj = obj.resize((int(w*0.9), int(h)), Image.ANTIALIAS) 
            w, h = obj.size # update w and h
        
        """visual augmentation"""
        # change RGB channels 50% of the time
        if np.random.rand() > 0.5:
            obj = shuffle_rgb(obj)
            
        # add noise 50% of the time
        if np.random.rand() > 0.5:
            obj = add_noise(obj)
            
        # 50% horizontal flip
        if np.random.rand() > 0.5:
            obj = obj.transpose(method=Image.FLIP_LEFT_RIGHT)
            
        # 50% enhance contrast by 0.8 to 1.5 (1 = no change)
        if np.random.rand() > 0.5:
            obj = ImageEnhance.Contrast(obj).enhance(np.random.rand()*0.7+0.8)

        """position settings"""
        # in the center of N equally spaced columns + jitter (+/- 0.25*x_grid)
        x_grid = int(480/count)
        x_pos = int(i*x_grid + x_grid/2 - w/2) + int((np.random.rand()-0.5)*x_grid/2)
        x_pos = min(int(480 - w), max(0,x_pos)) # make sure obj is in image range

        # random vertical position with 30% top margin, 0 btm margin
        y_pos = int((288 - h)*(np.random.rand()*0.7+0.3))
        
        """paste object into scene and create label"""
        alpha = Image.fromarray(np.array(obj)[:,:,3], 'L')
        scene.paste(obj, (x_pos, y_pos), alpha)

        # class, x, y, w, h
        label = ' '.join([classes[obj_classes[i]], str((x_pos + w/2)/frame_w)[:8], str((y_pos + h/2)/frame_h)[:8],
                          str(w/frame_w)[:8], str(h/frame_h)[:8]])
        # print(label, obj_classes[i])
        labels.append(label)
    
    file_id = 'vru_image' + str(int(image_id)).zfill(6)

    # save image and label files
    scene.save('data/'+file_id + ".png", "PNG")
    
    with open('data/'+file_id+'.txt', 'w+') as the_file:
        the_file.writelines([l + '\n' for l in labels])
    image_id += 1

100%|██████████| 4000/4000 [03:53<00:00, 18.37it/s]


---

# VRU21k: 4k vru + 17k nuScenes/BDD
### Select data from nuScenes and BDD100k to mix with augmented data

In [11]:
with open('/home/sf/darknet/data/bdd100k/bdd100k_480_3c_train.txt', 'r') as f:
    bdd_files = [x[:-1] for x in f.readlines()]

with open('/home/sf/darknet/data/nuscenes/train.txt', 'r') as f:
    nuscenes_files = [x[:-1] for x in f.readlines()]

### bdd100k

In [17]:
"""
select bdd100k images with at least N labels, with either a motorbike or bicycle
- 24225 images have at least 1 label
- 17119 have at least 2
- 12178 have at least 3
- 9337 have at least 4
"""
N = 2
bdd_filtered = []
for bdd in tqdm(bdd_files):
    label_file = bdd[:-3] + 'txt'
    if os.path.isfile(label_file):
        with open(label_file) as f:
            labels = f.readlines()
            length = len(labels)
            
        if length >= N*2:
            bdd_filtered.append(bdd)
            continue
        if length >= N:
            classes = [x[0] for x in labels]
            if ('1' in classes) | ('2' in classes):
                bdd_filtered.append(bdd)

100%|██████████| 70000/70000 [00:11<00:00, 5884.99it/s]


In [18]:
len(bdd_filtered)

11477

In [22]:
with open('bdd{}_3c_atleast{}.txt'.format(len(bdd_filtered),N), 'w+') as the_file:
    the_file.writelines([l + '\n' for l in bdd_filtered])

### nuScenes

In [19]:
# select nuSenes images with at least N labels
# 8297 has at least 4 labels
N = 2
nus_filtered = []
for nus in tqdm(nuscenes_files):
    label_file = nus[:-3] + 'txt'
    if os.path.isfile(label_file):
        with open(label_file) as f:
            labels = f.readlines()
            length = len(labels)
        
        if length >= N*2:
            nus_filtered.append(nus)
            continue    
        if (length >= N):
            classes = [x[0] for x in labels]
            if ('1' in classes) | ('2' in classes):
                nus_filtered.append(nus)

100%|██████████| 43503/43503 [00:06<00:00, 6399.69it/s]


In [20]:
len(nus_filtered)

11291

In [21]:
with open('nuscenes{}_3c_atleast{}.txt'.format(len(nus_filtered),N), 'w+') as the_file:
    the_file.writelines([l + '\n' for l in nus_filtered])

## Get names of VRU images, combine with nuscenes and bdd100k

In [44]:
vru_files = ['/media/sf/61CEC7C06EDB83CC/vru/data/vru4000_6jun/'+x for x in os.listdir('/media/sf/61CEC7C06EDB83CC/vru/data/vru4000_6jun/') if x[-3:] == 'png']

In [45]:
nusbdd = nus_filtered + bdd_filtered
random.shuffle(nusbdd)
vru21k = nusbdd[:17000] + vru_files
random.shuffle(vru21k)
print(len(vru21k))

vru21k_val = nusbdd[17000:]
print(len(vru21k_val))

21000
5768


In [46]:
with open('vru21k_train.txt', 'w+') as the_file:
    the_file.writelines([l + '\n' for l in vru21k])

In [47]:
with open('vru21k_val5768.txt', 'w+') as the_file:
    the_file.writelines([l + '\n' for l in vru24k_val])

---

# VRU2k
### Select data from nuScenes and BDD100k to mix with augmented data

In [485]:
with open('/home/sf/darknet/data/bdd100k/bdd100k_480_3c_train.txt', 'r') as f:
    bdd_files = [x[:-1] for x in f.readlines()]

with open('/home/sf/darknet/data/nuscenes/train.txt', 'r') as f:
    nuscenes_files = [x[:-1] for x in f.readlines()]

### bdd100k

In [737]:
"""
select bdd100k images with at least N labels, with either a motorbike or bicycle
- 24225 images have at least 1 label
- 17119 have at least 2
- 12178 have at least 3
- 9337 have at least 4
"""
N = 3
bdd_filtered = []
for bdd in tqdm(bdd_files):
    label_file = bdd[:-3] + 'txt'
    if os.path.isfile(label_file):
        with open(label_file) as f:
            labels = f.readlines()
            length = len(labels)
            
        if (length >= N):
            classes = [x[0] for x in labels]
            if ('1' in classes) | ('2' in classes):
                bdd_filtered.append(bdd)

100%|██████████| 70000/70000 [00:14<00:00, 4781.72it/s]


In [738]:
len(bdd_filtered)

4134

In [534]:
with open('bdd{}_3c_atleast{}.txt'.format(len(bdd_filtered),N), 'w+') as the_file:
    the_file.writelines([l + '\n' for l in bdd_filtered])

### nuScenes

In [739]:
# select nuSenes images with at least N labels
# 8297 has at least 4 labels
N = 3
nus_filtered = []
for nus in tqdm(nuscenes_files):
    label_file = nus[:-3] + 'txt'
    if os.path.isfile(label_file):
        with open(label_file) as f:
            labels = f.readlines()
            length = len(labels)
            
        if (length >= N):
            classes = [x[0] for x in labels]
            if ('1' in classes) | ('2' in classes):
                nus_filtered.append(nus)

100%|██████████| 43503/43503 [00:07<00:00, 5517.27it/s]


In [740]:
len(nus_filtered)

3209

In [539]:
with open('nuscenes{}_3c_atleast{}.txt'.format(len(nus_filtered),N), 'w+') as the_file:
    the_file.writelines([l + '\n' for l in nus_filtered])

## Get names of VRU images, combine with nuscenes and bdd100k

In [590]:
vru_files = ['/media/sf/61CEC7C06EDB83CC/vru/data/'+x for x in os.listdir('/media/sf/61CEC7C06EDB83CC/vru/data/vru200_4jun/') if x[-3:] == 'png']

In [600]:
nusbdd = nus_filtered + bdd_filtered
random.shuffle(nusbdd)
vru2k = nusbdd[:1800] + vru_files
random.shuffle(vru2k)

In [601]:
with open('vru2k.txt', 'w+') as the_file:
    the_file.writelines([l + '\n' for l in vru2k])

---

In [312]:
def create_grid(x,y):
    positions = []
    for i in range(x):
        for j in range(y):
            positions.append((i+1,j+1))
    return positions

In [313]:
def is_large(PILimage):
    return (PILimage.width > 60),(PILimage.height > 48)