In [3]:
#pip install imgaug

In [4]:
import os
import cv2
import imageio
import numpy as np
import imgaug as ia
import imgaug.augmenters as iaa
from PIL import Image
from datetime import datetime
from imgaug.augmentables.batches import UnnormalizedBatch

In [5]:
def count_files_in_folder(folder):
    files_count = len([name for name in os.listdir(folder) if os.path.isfile(os.path.join(folder, name))])
    return(files_count)


def save_image(image, folder):
    """Save an image with unique name

    Arguments:
        image {Pillow} -- image object to be saved
        folder {string} -- output folder
    """

    # check whether the folder exists and create one if not
    if not os.path.exists(folder):
        os.makedirs(folder)

    # to not erase previously saved photos counter (image name) = number of photos in a folder + 1
    image_counter = count_files_in_folder(folder)+1

    # save image to the dedicated folder (folder name = label)
    image_name = folder + '/' + str(image_counter) + '.png'
    image.save(image_name)
    
    
def get_files_in_folder(folder):
    return [os.path.join(folder, name) for name in os.listdir(folder) if os.path.isfile(os.path.join(folder, name))]


def list_oversample(initial_list, max_size):
    """duplicate a list n times or take a part of a list

    Arguments:
        initial_list {list} -- array to be resized
        max_size {int} -- majority class size
    """
    
    resized_array = []
    initial_length = len(initial_list)
    new_size = max_size - initial_length
    if new_size >= initial_length:
        augment_rate = int(new_size/initial_length)
        resized_array = initial_list*augment_rate
    else:
        resized_array = initial_list[:new_size]
    return resized_array

def save_image_array(image_array, folder):
    for image in image_array:
        save_image(Image.fromarray(image), folder)

In [13]:
# input image
IMAGE_FOLDER = 'data_aug'

In [19]:
# all subfolders in the initial directory
image_subfolders = [os.path.join(IMAGE_FOLDER, subfolder) for subfolder in os.listdir(IMAGE_FOLDER)]
# number of instances in the majority class
max_image_count = max([count_files_in_folder(subfolder) for subfolder in image_subfolders])
# if we don't have any data, set this value manually
max_image_count=50
image_target_subfolders = [subfolder for subfolder in image_subfolders if count_files_in_folder(subfolder) < max_image_count]

In [20]:
max_image_count

50

In [21]:
# Set augmenters
ia.seed(1)

seq = iaa.Sequential([
    iaa.Fliplr(0.5), # horizontal flips
    iaa.Crop(percent=(0, 0.1)), # random crops
    # Small gaussian blur with random sigma between 0 and 0.5.
    # But we only blur about 50% of all images.
    iaa.Sometimes(
        0.5,
        iaa.GaussianBlur(sigma=(0, 0.5))
    ),
    # Strengthen or weaken the contrast in each image.
    iaa.LinearContrast((0.75, 1.5)),
    # Add gaussian noise.
    # For 50% of all images, we sample the noise once per pixel.
    # For the other 50% of all images, we sample the noise per pixel AND
    # channel. This can change the color (not only brightness) of the
    # pixels.
    iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.05*255), per_channel=0.5),
    # Make some images brighter and some darker.
    # In 20% of all cases, we sample the multiplier once per channel,
    # which can end up changing the color of the images.
    iaa.Multiply((0.8, 1.2), per_channel=0.2),
    # Apply affine transformations to each image.
    # Scale/zoom them, translate/move them, rotate them and shear them.
    iaa.Affine(
        scale={"x": (0.8, 1.2), "y": (0.8, 1.2)},
        translate_percent={"x": (-0.2, 0.2), "y": (-0.2, 0.2)},
        rotate=(-25, 25),
        shear=(-8, 8)
    )
], random_order=True) # apply augmenters in random order

In [22]:
for subfolder in image_target_subfolders:
    print (subfolder)
    
    # =============Time calculation===============
    start_time = datetime.now()
    # =============Time calculation===============
    # create images array per folder
    
    image_files = get_files_in_folder(subfolder)
    synthetic_image_files = list_oversample(image_files, max_image_count)
    images = [imageio.imread(image_file) for image_file in synthetic_image_files]
    
    # apply imge augmentation on a subfolder
    augmented_images = seq(images=images)    
    
    save_image_array(augmented_images, subfolder)
    
    
    # =============Time calculation===============
    # check the endtime
    end_time = datetime.now()
    # get the total time spent
    time_spent = end_time - start_time
    spent_minutes, spent_seconds = divmod(
        time_spent.days * 86400 + time_spent.seconds, 60)
    print("{} min {} sec".format(spent_minutes, spent_seconds))
    # =============Time calculation===============

data_aug\activia
0 min 6 sec
data_aug\veloute
1 min 6 sec
