# Setup 

In [1]:
from sys import path
path.append('../src/')

import cv2
import numpy as np
import pickle as pkl
import matplotlib.pyplot as plt
from os import makedirs, path
from glob import glob
from global_config import FILE_DUMP, FOLDER_DATA_AUGMENTATION
from mrk_file import MRKFile
from keras.preprocessing.image import ImageDataGenerator

%matplotlib inline

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
def data_augmentation(datagen, output_folder, images, mrks, amount, prefix, seed=42):
    if not path.exists(output_folder):
        makedirs(output_folder, exist_ok=True)
    
    for i, (img, mrk) in enumerate(datagen.flow(images, mrks, batch_size=1, seed=seed)):
        if i >= amount: break        
            
        print('{} of {}'.format(i + 1, amount), end='\r')
        output_file = '{}{}{:04}'.format(output_folder, prefix, i)

        cv2.imwrite('{}.png'.format(output_file), img[0])
        mrk[0].save('{}.mrk'.format(output_file))
    print()

# Load Dump 

In [3]:
_, images, mrks = pkl.load(open(FILE_DUMP, 'rb'))
mrks = np.array(mrks)

print(images.shape, images.dtype)
print(mrks.shape)

(5763, 224, 224, 3) float32
(5763,)


# 2. Blurred

In [4]:
datagen = ImageDataGenerator(width_shift_range=0.2, 
                             height_shift_range=0.2, 
                             horizontal_flip=True, 
                             fill_mode='constant')

req_values = np.array([mrk.photo_reqs.blurred.value for mrk in mrks])
mask_c = (req_values == 1)
mask_nc = (req_values == 0)
mask_d = (req_values == -1)
print(mask_c.sum(), mask_nc.sum(), mask_d.sum())

output_folder = FOLDER_DATA_AUGMENTATION + '2-blurred/'

data_augmentation(datagen, output_folder, images[mask_c], mrks[mask_c], 5000, '2_c_')
data_augmentation(datagen, output_folder, images[mask_nc], mrks[mask_nc], 5000, '2_nc_')
# data_augmentation(datagen, output_folder, images[mask_d], mrks[mask_d], 5000, '2_d_')

4871 892 0
5000 of 5000
5000 of 5000


# 3. Looking Away

In [5]:
datagen = ImageDataGenerator(width_shift_range=0.2, 
                             height_shift_range=0.2, 
                             horizontal_flip=True, 
                             brightness_range=[0.9, 1.1], 
                             rotation_range=15)

req_values = np.array([mrk.photo_reqs.looking_away.value for mrk in mrks])
mask_c = (req_values == 1)
mask_nc = (req_values == 0)
mask_d = (req_values == -1)
print(mask_c.sum(), mask_nc.sum(), mask_d.sum())

output_folder = FOLDER_DATA_AUGMENTATION + '3-looking_away/'

data_augmentation(datagen, output_folder, images[mask_c], mrks[mask_c], 5000, '3_c_')
data_augmentation(datagen, output_folder, images[mask_nc], mrks[mask_nc], 5000, '3_nc_')
# data_augmentation(datagen, output_folder, images[mask_d], mrks[mask_d], 5000, '3_d_')

3720 2043 0
5000 of 5000
5000 of 5000


# ~~4. Ink Marked/Creased~~

In [6]:
req_values = np.array([mrk.photo_reqs.ink_marked_creased.value for mrk in mrks])
mask_c = (req_values == 1)
mask_nc = (req_values == 0)
mask_d = (req_values == -1)
print(mask_c.sum(), mask_nc.sum(), mask_d.sum())

5763 0 0


# 5. Unnatural Skin Tone 

In [7]:
datagen = ImageDataGenerator(width_shift_range=0.2, 
                             height_shift_range=0.2, 
                             horizontal_flip=True, 
                             rotation_range=15, 
                             zoom_range=0.1)

req_values = np.array([mrk.photo_reqs.unnatural_skin_tone.value for mrk in mrks])
mask_c = (req_values == 1)
mask_nc = (req_values == 0)
mask_d = (req_values == -1)
print(mask_c.sum(), mask_nc.sum(), mask_d.sum())

output_folder = FOLDER_DATA_AUGMENTATION + '5-unnatural_skin_tone/'

data_augmentation(datagen, output_folder, images[mask_c], mrks[mask_c], 5000, '5_c_')
data_augmentation(datagen, output_folder, images[mask_nc], mrks[mask_nc], 5000, '5_nc_')
# data_augmentation(datagen, output_folder, images[mask_d], mrks[mask_d], 5000, '5_d_')

2540 3223 0
5000 of 5000
5000 of 5000


# 6. Too Dark/Light 

In [8]:
datagen = ImageDataGenerator(width_shift_range=0.2, 
                             height_shift_range=0.2, 
                             horizontal_flip=True, 
                             rotation_range=15, 
                             zoom_range=0.1, 
                             fill_mode='nearest')

req_values = np.array([mrk.photo_reqs.too_dark_light.value for mrk in mrks])
mask_c = (req_values == 1)
mask_nc = (req_values == 0)
mask_d = (req_values == -1)
print(mask_c.sum(), mask_nc.sum(), mask_d.sum())

output_folder = FOLDER_DATA_AUGMENTATION + '6-too_dark_light/'

data_augmentation(datagen, output_folder, images[mask_c], mrks[mask_c], 5000, '6_c_')
data_augmentation(datagen, output_folder, images[mask_nc], mrks[mask_nc], 5000, '6_nc_')
# data_augmentation(datagen, output_folder, images[mask_d], mrks[mask_d], 5000, '6_d_')

4711 1052 0
5000 of 5000
5000 of 5000


# 7. Washed Out

In [9]:
datagen = ImageDataGenerator(width_shift_range=0.2, 
                             height_shift_range=0.2, 
                             horizontal_flip=True, 
                             rotation_range=15)

req_values = np.array([mrk.photo_reqs.washed_out.value for mrk in mrks])
mask_c = (req_values == 1)
mask_nc = (req_values == 0)
mask_d = (req_values == -1)
print(mask_c.sum(), mask_nc.sum(), mask_d.sum())

output_folder = FOLDER_DATA_AUGMENTATION + '7-washed_out/'

data_augmentation(datagen, output_folder, images[mask_c], mrks[mask_c], 5000, '7_c_')
data_augmentation(datagen, output_folder, images[mask_nc], mrks[mask_nc], 5000, '7_nc_')
# data_augmentation(datagen, output_folder, images[mask_d], mrks[mask_d], 5000, '7_d_')

5690 73 0
5000 of 5000
5000 of 5000


# *8. Pixelation*

In [10]:
datagen = ImageDataGenerator(width_shift_range=0.2, 
                             height_shift_range=0.2, 
                             horizontal_flip=True, 
                             brightness_range=[0.9, 1.1])

req_values = np.array([mrk.photo_reqs.pixelation.value for mrk in mrks])
mask_c = (req_values == 1)
mask_nc = (req_values == 0)
mask_d = (req_values == -1)
print(mask_c.sum(), mask_nc.sum(), mask_d.sum())

output_folder = FOLDER_DATA_AUGMENTATION + '8-pixelation/'

data_augmentation(datagen, output_folder, images[mask_c], mrks[mask_c], 5000, '8_c_')
data_augmentation(datagen, output_folder, images[mask_nc], mrks[mask_nc], 5000, '8_nc_')

5368 395 0
5000 of 5000
5000 of 5000


# 9. Hair Across Eyes 

In [11]:
datagen = ImageDataGenerator(width_shift_range=0.2, 
                             height_shift_range=0.2, 
                             horizontal_flip=True, 
                             rotation_range=15, 
                             brightness_range=[0.9, 1.1], 
                             zoom_range=0.2)

req_values = np.array([mrk.photo_reqs.hair_across_eyes.value for mrk in mrks])
mask_c = (req_values == 1)
mask_nc = (req_values == 0)
mask_d = (req_values == -1)
print(mask_c.sum(), mask_nc.sum(), mask_d.sum())

output_folder = FOLDER_DATA_AUGMENTATION + '9-hair_across_eyes/'

data_augmentation(datagen, output_folder, images[mask_c], mrks[mask_c], 5000, '9_c_')
data_augmentation(datagen, output_folder, images[mask_nc], mrks[mask_nc], 5000, '9_nc_')
# data_augmentation(datagen, output_folder, images[mask_d], mrks[mask_d], 5000, '9_d_')

4252 1511 0
5000 of 5000
5000 of 5000


# 10. Eyes Closed 

In [12]:
datagen = ImageDataGenerator(width_shift_range=0.2, 
                             height_shift_range=0.2, 
                             horizontal_flip=True, 
                             rotation_range=15, 
                             brightness_range=[0.9, 1.1], 
                             zoom_range=0.2)

req_values = np.array([mrk.photo_reqs.eyes_closed.value for mrk in mrks])
mask_c = (req_values == 1)
mask_nc = (req_values == 0)
mask_d = (req_values == -1)
print(mask_c.sum(), mask_nc.sum(), mask_d.sum())

output_folder = FOLDER_DATA_AUGMENTATION + '10-eyes_closed/'

data_augmentation(datagen, output_folder, images[mask_c], mrks[mask_c], 5000, '10_c_')
data_augmentation(datagen, output_folder, images[mask_nc], mrks[mask_nc], 5000, '10_nc_')
# data_augmentation(datagen, output_folder, images[mask_d], mrks[mask_d], 5000, '10_d_')

4440 1323 0
5000 of 5000
5000 of 5000


# 11. Varied Background

In [13]:
datagen = ImageDataGenerator(horizontal_flip=True, brightness_range=[0.9, 1.1])

req_values = np.array([mrk.photo_reqs.varied_background.value for mrk in mrks])
mask_c = (req_values == 1)
mask_nc = (req_values == 0)
mask_d = (req_values == -1)
print(mask_c.sum(), mask_nc.sum(), mask_d.sum())

output_folder = FOLDER_DATA_AUGMENTATION + '11-varied_bgd/'

data_augmentation(datagen, output_folder, images[mask_c], mrks[mask_c], 5000, '11_c_')
data_augmentation(datagen, output_folder, images[mask_nc], mrks[mask_nc], 5000, '11_nc_')
# data_augmentation(datagen, output_folder, images[mask_d], mrks[mask_d], 5000, '11_d_')

2997 2766 0
5000 of 5000
5000 of 5000


# 12. Roll/pitch/yaw

In [14]:
datagen = ImageDataGenerator(width_shift_range=0.2, 
                             height_shift_range=0.2, 
                             horizontal_flip=True, 
                             brightness_range=[0.9, 1.1], 
                             zoom_range=0.2)

req_values = np.array([mrk.photo_reqs.roll_pitch_yaw.value for mrk in mrks])
mask_c = (req_values == 1)
mask_nc = (req_values == 0)
mask_d = (req_values == -1)
print(mask_c.sum(), mask_nc.sum(), mask_d.sum())

output_folder = FOLDER_DATA_AUGMENTATION + '12-roll_pitch_yaw/'

data_augmentation(datagen, output_folder, images[mask_c], mrks[mask_c], 5000, '12_c_')
data_augmentation(datagen, output_folder, images[mask_nc], mrks[mask_nc], 5000, '12_nc_')
# data_augmentation(datagen, output_folder, images[mask_d], mrks[mask_d], 5000, '12_d_')

4209 1554 0
5000 of 5000
5000 of 5000


# 13. Flash Reflection on Skin

In [15]:
datagen = ImageDataGenerator(width_shift_range=0.2, 
                             height_shift_range=0.2, 
                             horizontal_flip=True, 
                             rotation_range=15, 
                             zoom_range=0.2)

req_values = np.array([mrk.photo_reqs.flash_reflection_on_skin.value for mrk in mrks])
mask_c = (req_values == 1)
mask_nc = (req_values == 0)
mask_d = (req_values == -1)
print(mask_c.sum(), mask_nc.sum(), mask_d.sum())

output_folder = FOLDER_DATA_AUGMENTATION + '13-flash_reflection_on_skin/'

data_augmentation(datagen, output_folder, images[mask_c], mrks[mask_c], 5000, '13_c_')
data_augmentation(datagen, output_folder, images[mask_nc], mrks[mask_nc], 5000, '13_nc_')
# data_augmentation(datagen, output_folder, images[mask_d], mrks[mask_d], 5000, '13_d_')

2893 2870 0
5000 of 5000
5000 of 5000


# 14. Red Eyes 

In [16]:
datagen = ImageDataGenerator(width_shift_range=0.2, 
                             height_shift_range=0.2, 
                             horizontal_flip=True, 
                             rotation_range=15, 
                             zoom_range=0.2)

req_values = np.array([mrk.photo_reqs.red_eyes.value for mrk in mrks])
mask_c = (req_values == 1)
mask_nc = (req_values == 0)
mask_d = (req_values == -1)
print(mask_c.sum(), mask_nc.sum(), mask_d.sum())

output_folder = FOLDER_DATA_AUGMENTATION + '14-red_eyes/'

data_augmentation(datagen, output_folder, images[mask_c], mrks[mask_c], 5000, '14_c_')
data_augmentation(datagen, output_folder, images[mask_nc], mrks[mask_nc], 5000, '14_nc_')
# data_augmentation(datagen, output_folder, images[mask_d], mrks[mask_d], 5000, '14_d_')

4385 1378 0
5000 of 5000
5000 of 5000


# 15. Shadows Behind Head 

In [17]:
datagen = ImageDataGenerator(width_shift_range=0.2, 
                             height_shift_range=0.2, 
                             horizontal_flip=True, 
                             rotation_range=15, 
                             zoom_range=0.2)

req_values = np.array([mrk.photo_reqs.shadows_behind_head.value for mrk in mrks])
mask_c = (req_values == 1)
mask_nc = (req_values == 0)
mask_d = (req_values == -1)
print(mask_c.sum(), mask_nc.sum(), mask_d.sum())

output_folder = FOLDER_DATA_AUGMENTATION + '15-shadows_behind_head/'

data_augmentation(datagen, output_folder, images[mask_c], mrks[mask_c], 5000, '15_c_')
data_augmentation(datagen, output_folder, images[mask_nc], mrks[mask_nc], 5000, '15_nc_')
# data_augmentation(datagen, output_folder, images[mask_d], mrks[mask_d], 5000, '15_d_')

3768 1995 0
5000 of 5000
5000 of 5000


# 16. Shadows Across Face

In [18]:
datagen = ImageDataGenerator(width_shift_range=0.2, 
                             height_shift_range=0.2, 
                             horizontal_flip=True, 
                             rotation_range=15, 
                             zoom_range=0.2)

req_values = np.array([mrk.photo_reqs.shadows_across_face.value for mrk in mrks])
mask_c = (req_values == 1)
mask_nc = (req_values == 0)
mask_d = (req_values == -1)
print(mask_c.sum(), mask_nc.sum(), mask_d.sum())

output_folder = FOLDER_DATA_AUGMENTATION + '16-shadows_across_face/'

data_augmentation(datagen, output_folder, images[mask_c], mrks[mask_c], 5000, '16_c_')
data_augmentation(datagen, output_folder, images[mask_nc], mrks[mask_nc], 5000, '16_nc_')
# data_augmentation(datagen, output_folder, images[mask_d], mrks[mask_d], 5000, '16_d_')

3136 2627 0
5000 of 5000
5000 of 5000


# 17. Dark Tinted Lenses

In [19]:
datagen = ImageDataGenerator(width_shift_range=0.2, 
                             height_shift_range=0.2, 
                             horizontal_flip=True, 
                             rotation_range=15, 
                             brightness_range=[0.9, 1.1], 
                             zoom_range=0.2)

req_values = np.array([mrk.photo_reqs.dark_tinted_lenses.value for mrk in mrks])
mask_c = (req_values == 1)
mask_nc = (req_values == 0)
mask_d = (req_values == -1)
print(mask_c.sum(), mask_nc.sum(), mask_d.sum())

output_folder = FOLDER_DATA_AUGMENTATION + '17-dark_tinted_lenses/'

data_augmentation(datagen, output_folder, images[mask_c], mrks[mask_c], 5000, '17_c_')
data_augmentation(datagen, output_folder, images[mask_nc], mrks[mask_nc], 5000, '17_nc_')
# data_augmentation(datagen, output_folder, images[mask_d], mrks[mask_d], 5000, '17_d_')

5114 649 0
5000 of 5000
5000 of 5000


# 18. Flash Reflection on Lenses

In [20]:
datagen = ImageDataGenerator(width_shift_range=0.2, 
                             height_shift_range=0.2, 
                             horizontal_flip=True, 
                             rotation_range=15, 
                             zoom_range=0.2)

req_values = np.array([mrk.photo_reqs.flash_reflection_on_lenses.value for mrk in mrks])
mask_c = (req_values == 1)
mask_nc = (req_values == 0)
mask_d = (req_values == -1)
print(mask_c.sum(), mask_nc.sum(), mask_d.sum())

output_folder = FOLDER_DATA_AUGMENTATION + '18-flash_reflection_on_lenses/'

data_augmentation(datagen, output_folder, images[mask_c], mrks[mask_c], 5000, '18_c_')
data_augmentation(datagen, output_folder, images[mask_nc], mrks[mask_nc], 5000, '18_nc_')
# data_augmentation(datagen, output_folder, images[mask_d], mrks[mask_d], 5000, '18_d_')

4618 1145 0
5000 of 5000
5000 of 5000


# 19. Frames Too Heavy

In [21]:
datagen = ImageDataGenerator(width_shift_range=0.2, 
                             height_shift_range=0.2, 
                             horizontal_flip=True, 
                             rotation_range=15, 
                             brightness_range=[0.9, 1.1], 
                             zoom_range=0.2)

req_values = np.array([mrk.photo_reqs.frames_too_heavy.value for mrk in mrks])
mask_c = (req_values == 1)
mask_nc = (req_values == 0)
mask_d = (req_values == -1)
print(mask_c.sum(), mask_nc.sum(), mask_d.sum())

output_folder = FOLDER_DATA_AUGMENTATION + '19-frames_too_heavy/'

data_augmentation(datagen, output_folder, images[mask_c], mrks[mask_c], 5000, '19_c_')
data_augmentation(datagen, output_folder, images[mask_nc], mrks[mask_nc], 5000, '19_nc_')
# data_augmentation(datagen, output_folder, images[mask_d], mrks[mask_d], 5000, '20_d_')

5733 30 0
5000 of 5000
5000 of 5000


# 20. Frame Covering Eyes 

In [22]:
datagen = ImageDataGenerator(width_shift_range=0.2, 
                             height_shift_range=0.2, 
                             horizontal_flip=True, 
                             rotation_range=15, 
                             brightness_range=[0.9, 1.1], 
                             zoom_range=0.2)

req_values = np.array([mrk.photo_reqs.frame_covering_eyes.value for mrk in mrks])
mask_c = (req_values == 1)
mask_nc = (req_values == 0)
mask_d = (req_values == -1)
print(mask_c.sum(), mask_nc.sum(), mask_d.sum())

output_folder = FOLDER_DATA_AUGMENTATION + '20-frame_covering_eyes/'

data_augmentation(datagen, output_folder, images[mask_c], mrks[mask_c], 5000, '20_c_')
data_augmentation(datagen, output_folder, images[mask_nc], mrks[mask_nc], 5000, '20_nc_')
# data_augmentation(datagen, output_folder, images[mask_d], mrks[mask_d], 5000, '20_d_')

4086 1677 0
5000 of 5000
5000 of 5000


# 21. Hat/cap 

In [23]:
datagen = ImageDataGenerator(width_shift_range=0.2, 
                             height_shift_range=0.2, 
                             horizontal_flip=True, 
                             rotation_range=15, 
                             brightness_range=[0.9, 1.1], 
                             zoom_range=0.2)

req_values = np.array([mrk.photo_reqs.hat_cap.value for mrk in mrks])
mask_c = (req_values == 1)
mask_nc = (req_values == 0)
mask_d = (req_values == -1)
print(mask_c.sum(), mask_nc.sum(), mask_d.sum())

output_folder = FOLDER_DATA_AUGMENTATION + '21-hat_cap/'

data_augmentation(datagen, output_folder, images[mask_c], mrks[mask_c], 5000, '21_c_')
data_augmentation(datagen, output_folder, images[mask_nc], mrks[mask_nc], 5000, '21_nc_')
# data_augmentation(datagen, output_folder, images[mask_d], mrks[mask_d], 5000, '21_d_')

4927 836 0
5000 of 5000
5000 of 5000


# *22. Veil Over Face*

In [24]:
datagen = ImageDataGenerator(width_shift_range=0.2, 
                             height_shift_range=0.2, 
                             horizontal_flip=True, 
                             rotation_range=15, 
                             brightness_range=[0.9, 1.1], 
                             zoom_range=0.2)

req_values = np.array([mrk.photo_reqs.veil_over_face.value for mrk in mrks])
mask_c = (req_values == 1)
mask_nc = (req_values == 0)
mask_d = (req_values == -1)
print(mask_c.sum(), mask_nc.sum(), mask_d.sum())

output_folder = FOLDER_DATA_AUGMENTATION + '22-veil_over_face/'

data_augmentation(datagen, output_folder, images[mask_c], mrks[mask_c], 5000, '22_c_')
data_augmentation(datagen, output_folder, images[mask_nc], mrks[mask_nc], 5000, '22_nc_')

5400 363 0
5000 of 5000
5000 of 5000


# 23. Mouth Open 

In [25]:
datagen = ImageDataGenerator(rotation_range=15, 
                             width_shift_range=0.2, 
                             height_shift_range=0.2, 
                             horizontal_flip=True, 
                             zoom_range=0.1, 
                             brightness_range=[0.9, 1.1])

req_values = np.array([mrk.photo_reqs.mouth_open.value for mrk in mrks])
mask_c = (req_values == 1)
mask_nc = (req_values == 0)
mask_d = (req_values == -1)
print(mask_c.sum(), mask_nc.sum(), mask_d.sum())

output_folder = FOLDER_DATA_AUGMENTATION + '23-mouth_open/'

data_augmentation(datagen, output_folder, images[mask_c], mrks[mask_c], 5000, '23_c_')
data_augmentation(datagen, output_folder, images[mask_nc], mrks[mask_nc], 5000, '23_nc_')
# data_augmentation(datagen, output_folder, images[mask_d], mrks[mask_d], 5000, '23_d_')

4203 1560 0
5000 of 5000
5000 of 5000


# *24. Presence of Other Faces or Toys too Close to Face*

In [26]:
datagen = ImageDataGenerator(width_shift_range=0.1, 
                             height_shift_range=0.1, 
                             horizontal_flip=True, 
                             rotation_range=15, 
                             brightness_range=[0.9, 1.1], 
                             zoom_range=0.2)

req_values = np.array([mrk.photo_reqs.presence_of_other_faces_or_toys.value for mrk in mrks])
mask_c = (req_values == 1)
mask_nc = (req_values == 0)
mask_d = (req_values == -1)
print(mask_c.sum(), mask_nc.sum(), mask_d.sum())

output_folder = FOLDER_DATA_AUGMENTATION + '24-presence_of_other_faces_or_toys/'

data_augmentation(datagen, output_folder, images[mask_c], mrks[mask_c], 5000, '24_c_')
data_augmentation(datagen, output_folder, images[mask_nc], mrks[mask_nc], 5000, '24_d_')

5656 107 0
5000 of 5000
5000 of 5000
