# Augment dataset

Here we use the module ```albumentations``` to generate off-line data augmentation. It is off-line because we generate the augmented images prior to model training, and store them on the hard drive ; this is not what Keras augmentation does, i.e. online (or on-the-fly) augmentation : we load one batch at a time, augment it, send it for training (at one epoch) et erase it just after. This avoids to store the full augmented dataset. 

Be careful because the online augmentation strategy requires more epochs (or more steps per epoch) since the augmented data is as numerous as the original one (even though the augmented dataset is virtually infinite).

In [55]:
import cv2
import glob
import imageio
import os
import matplotlib.pyplot as plt
import numpy as np
import napari

import albumentations as A

In [56]:
os.chdir("D:/Hugo/Budding/Train_Set/")

images_path, masks_path = "images", "masks"

# link images and their respective masks
images, masks = [], []
for im_p, ms_p in zip(os.listdir(images_path), os.listdir(masks_path)):
    images.append(im_p)
    masks.append(ms_p) # images and masks
    
print(f"Number of (image, mask) couples : ({len(images)}, {len(masks)}).")

Number of (image, mask) couples : (65, 65).


In [70]:
transform = A.Compose([A.VerticalFlip(p=0.5),
                       A.RandomRotate90(p=0.5),
                       A.HorizontalFlip(p=0.5),
                       A.augmentations.transforms.GaussNoise(var_limit=(50, 250), p=0.5),
                       A.ShiftScaleRotate(shift_limit=0.2, scale_limit=[-0.5, 0.2], 
                                          rotate_limit=360, border_mode=cv2.BORDER_WRAP)
                        ])

In [71]:
aug_images_path, aug_masks_path = "images", "masks"
IMG_TO_GENERATE = 10

i = 0
images_count = np.zeros((len(images),), dtype=np.uint8)  # count the number of times an image was used
while i < IMG_TO_GENERATE:
    
    if i % 100 == 0:
        print(f"Prepared {round(i / IMG_TO_GENERATE * 100, 2)}% of the augmented images.")
    
    # choosing one image
    idx = np.random.randint(0, len(images))
    img, msk = imageio.imread(os.path.join(images_path, images[idx])), imageio.imread(os.path.join(masks_path, masks[idx]))
    
    # augment it
    transformed = transform(image=img, mask=msk)
    aug_im, aug_ms = transformed["image"], transformed["mask"]
    aug_im, aug_ms = (aug_im - aug_im.min()) / (np.max((aug_im.max() - aug_im.min(), 1))) * 255, (aug_ms - aug_ms.min()) / (np.max((aug_ms.max() - aug_ms.min(), 1))) * 255
    
    aug_im, aug_ms = aug_im.astype("uint16"), aug_ms.astype("uint16")

    # save augmented images
    im_name = images[idx].split(".")[0]
    save_name = f"aug_{im_name}_{images_count[idx]}.tif"
    imageio.imwrite(os.path.join(aug_images_path, save_name), aug_im)
    imageio.imwrite(os.path.join(aug_masks_path, save_name), aug_ms)
    
    images_count[idx] += 1
    i += 1
    
    if i == IMG_TO_GENERATE - 1:
        print("Done.")

Prepared 0.0% of the augmented images.
Done.
