# Training Images Augmentation

This script performs data augmentation techniques specifically chosen for webcam images showing different cloud conditions. Those are used to train different CNN models to predict cloud types. The "offline" data augmentation technique of this script will increase the diversity of the image dataset by adding multiple (random) variations to the pictures and saving them as copies. This procedure will therefore also increase the dataset size itself. Image augmentation should only be done on training, not validation images!

Author: Elias Frey, RSGB/Unibe \
Date: 02.10.2023

In [1]:
import os
import cv2
import albumentations as A
import os
import shutil
import numpy as np

### Define Params

In [2]:
# Paths
lab_img_path = 'data/cropped_images'
aug_img_path = 'data/aug_test'

# Chose if original picture should be included in the final augmentation set
apply_copy_org = True

### Directory management

In [3]:
if not os.path.exists(aug_img_path):
        os.makedirs(aug_img_path)

In [9]:
if apply_copy_org:
    for filename in os.listdir(lab_img_path):
        org_path = os.path.join(lab_img_path, filename)
        if os.path.isfile(org_path):
            new_fn = f"{filename.split('.')[0]}_org.jpg"
            aug_path = os.path.join(aug_img_path, new_fn)
            # Original image will be copied to the augmentation directory
            shutil.copy(org_path, aug_path)

### Data Augmentation

In [10]:
def get_auglist(rnd_int):
    """
    Augmentation function that combines all selected augmentations and ensure high amount of diversity in the final augmented dataset
    """
    # List containing all augmentations
    augmentations_list = [
        
        # 0. Random brightness contrast cropped
        A.Compose([A.OneOf([A.Sequential([A.RandomBrightnessContrast(brightness_limit=(-0.15, 0.15), contrast_limit=(-0.15, 0.15), p=1),
                                         A.Rotate(limit=(-20, -45), p=1),
                                         ]),

                            A.Sequential([A.RandomBrightnessContrast(brightness_limit=(-0.15, 0.15), contrast_limit=(-0.15, 0.15), p=1),
                                          A.RandomSizedCrop([250, 250], image_size, image_size, p=1),
                                          A.Rotate(limit=(15,35), p=1)
                                         ]),
                           ], p=1,
                          ),

                   A.SomeOf([A.HorizontalFlip(p=1), A.VerticalFlip(p=1)],
                            rnd_int,
                            p=1),
                  ]),

        # 1. Gaussian noise (org/cropped)
        A.Compose([A.OneOf([A.Sequential([A.GaussNoise(var_limit=[8,10], per_channel=False, p=1),
                                          A.Rotate(limit=(-20, -45), p=1),
                                         ]),

                            A.Sequential([A.GaussNoise(var_limit=[8,10], per_channel=False, p=1),
                                          A.RandomSizedCrop([250, 250], image_size, image_size, p=1),
                                          A.Rotate(limit=(15,35), p=1)
                                         ]),
                           ], p=1,
                          ),

                   A.SomeOf([A.HorizontalFlip(p=1), A.VerticalFlip(p=1)],
                            rnd_int,
                            p=1),
                  ]),

        # 2. Color jitter (org/cropped)
        A.Compose([A.OneOf([A.Sequential([A.ColorJitter(brightness=0, contrast=0, saturation=0.1, hue=(0.05, 0.05), p=1),
                                          A.Rotate(limit=(20, 45), p=1),
                                         ]),

                            A.Sequential([A.ColorJitter(brightness=0, contrast=0, saturation=0.1, hue=(-0.05, 0.05), p=1),
                                          A.RandomSizedCrop([250, 250], image_size, image_size, p=1),
                                          A.Rotate(limit=(-15, -35), p=1),
                                         ]),
                           ], p=1,
                          ),
                   
                   A.SomeOf([A.HorizontalFlip(p=1), A.VerticalFlip(p=1)],
                            rnd_int,
                            p=1),
                  ]),
    ]
    return augmentations_list

In [11]:
# Apply data augmentation
for img_filename in os.listdir(lab_img_path):
        img_path = os.path.join(lab_img_path, img_filename)
        img = cv2.imread(img_path)
        
        augmentations_list = get_auglist(np.random.randint(low=1, high=3))
        for idx, sel_augmentation in enumerate(augmentations_list):
            sel_augmentation[len(sel_augmentation)-1].n = np.random.randint(low=1, high=3)
            #print(f'{sel_augmentation[len(sel_augmentation)-1].n} -- AFTER')
            augmented_image = sel_augmentation(image=img)['image']
            augmented_filename = f"{img_filename.split('.')[0]}_aug{idx}.jpg"
            augmented_path = os.path.join(aug_img_path, augmented_filename)
            cv2.imwrite(augmented_path, augmented_image)