In [3]:
import imgaug.augmenters as iaa
import os
import numpy as np
from PIL import Image
import glob

In [5]:
# define augmentations
seq = iaa.Sequential([
    iaa.Fliplr(0.5),  # horizontal flips
    # small gaussian blur with random sigma between 0 and 0.5.
    # but we only blur about 20% of all images.
    iaa.Sometimes(0.5, iaa.GaussianBlur(sigma=(0, 0.2))),
    # Strengthen or weaken the contrast in each image.
    iaa.ContrastNormalization((0.75, 1.5)),
    # Add gaussian noise.
    # for 50% of all images, we sample the noise once per pixel.
    # for the other 35% of all images, we sample the noise per pixel AND
    # channel. This can change the color (not only brightness) of the pixels.
    iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.05*255), per_channel=0.35),
    # Make some images brighter and some darker.
    # In 20% of all cases, we sample the multiplier once per channel,
    # which can end up changing the color of the images.
    iaa.Multiply((0.8, 1.2), per_channel=0.2),
    # Apply affine transformations to each image.
    # Scale/zoom them, translate/move them, rotate them and shear them.
    iaa.Affine(
        scale={"x": (0.8, 1.2), "y": (0.8, 1.2)},
        translate_percent={"x": (-0.2, 0.2), "y": (-0.2, 0.2)},
        rotate=(-25, 25),
        shear=(-8, 8)
    )
], random_order=True)  # apply augmenters in random order

In [6]:
def load_images_from_folder(folder):
    """Load images from the given folder"""
    images = []
    for filename in os.listdir(folder):
        img = Image.open(os.path.join(folder, filename))
        if img is not None:
            images.append(np.array(img))
    return images

def augment_images(image_list, augmentation_pipeline, augmentations_per_image):
    """Augment the given list of images."""
    augmented_images = []
    for _ in range(augmentations_per_image):
        # Augment a batch of images
        images_aug = augmentation_pipeline(images=image_list)
        augmented_images.extend(images_aug)
    return augmented_images

In [7]:
# count how many images are in each class to determine how many to generate
class_directories = glob.glob('/Users/shagundeepsingh/Documents/coral/preprocessing/data/classes/*/')
class_counts = {os.path.basename(os.path.normpath(class_dir)): len(glob.glob(f'{class_dir}/*.jpg')) for class_dir in class_directories}

# calculate how many images you need for class balancing
max_count = max(class_counts.values())
augmentations_per_class = {class_name: max_count - count for class_name, count in class_counts.items()}

# for each class, generate new images and save them
for class_name, additional_images_needed in augmentations_per_class.items():
    image_folder = f'./data/classes/{class_name}/'
    images = load_images_from_folder(image_folder)
    
    # perform augmentation
    new_images = augment_images(images, seq, augmentations_per_image=additional_images_needed // len(images))
    
    # save the new images
    for idx, aug_image in enumerate(new_images):
        aug_image = Image.fromarray(aug_image)
        save_path = f"{image_folder}/{class_name}_aug_{idx}.jpg"
        aug_image.save(save_path)