## Imports

In [64]:
import os
import cv2
import numpy as np

from PIL import Image
from imgaug import augmenters as iaa
import imgaug as ia

## Constants

In [4]:
raw_characters_path = r'E:\GitHub\smart-parking-system\dataset\classification-raw'
raw_characters = os.listdir(raw_characters_path)

In [59]:
augmented_characters_path = r'E:\GitHub\smart-parking-system\dataset\classification'

In [10]:
raw_max_size = [0, 0]

## Functions

In [60]:
def load_images_from_folder(folder, start = 0, end = 100000):
    images = []
    for filename in os.listdir(folder)[start:end]:
        img = cv2.imread(os.path.join(folder,filename))
        if img is not None:
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            images.append(img)
    return np.array(images)

In [61]:
def save_images_in_folder(output_folder_path, images, start_index = 0):
    for idx, img in enumerate(images):
        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
        cv2.imwrite(os.path.join(output_folder_path, str(start_index + idx) + '.png'), img)

In [62]:
def augment_images(output_folder_path, augmenter, images, start_index = 0, iterations = 10):
    no_samples_batch = len(images)
    for iters in range(iterations):
        augmented_images=augmenter(images=images)
        save_images_in_folder(output_folder_path=output_folder_path, 
                              images=augmented_images, 
                              start_index = iters * no_samples_batch)

## Get number of samples for raw characters

In [48]:
raw_no_samples = {}
for raw_character in raw_characters:
    print(f'{raw_character} directory has {len(os.listdir(os.path.join(raw_characters_path, raw_character)))} samples.')
    raw_no_samples[str(raw_character)] = len(os.listdir(os.path.join(raw_characters_path, raw_character)))

0 directory has 91 samples.
1 directory has 73 samples.
2 directory has 32 samples.
3 directory has 47 samples.
4 directory has 18 samples.
5 directory has 28 samples.
6 directory has 12 samples.
7 directory has 15 samples.
8 directory has 30 samples.
9 directory has 38 samples.
A directory has 69 samples.
B directory has 168 samples.
C directory has 44 samples.
D directory has 20 samples.
E directory has 6 samples.
F directory has 20 samples.
G directory has 16 samples.
H directory has 13 samples.
I directory has 28 samples.
J directory has 1 samples.
K directory has 17 samples.
L directory has 44 samples.
M directory has 16 samples.
N directory has 13 samples.
O directory has 14 samples.
P directory has 28 samples.
R directory has 31 samples.
S directory has 44 samples.
T directory has 38 samples.
U directory has 2 samples.
V directory has 40 samples.
W directory has 17 samples.
X directory has 17 samples.
Y directory has 19 samples.
Z directory has 5 samples.


In [49]:
print(raw_no_samples)

{'0': 91, '1': 73, '2': 32, '3': 47, '4': 18, '5': 28, '6': 12, '7': 15, '8': 30, '9': 38, 'A': 69, 'B': 168, 'C': 44, 'D': 20, 'E': 6, 'F': 20, 'G': 16, 'H': 13, 'I': 28, 'J': 1, 'K': 17, 'L': 44, 'M': 16, 'N': 13, 'O': 14, 'P': 28, 'R': 31, 'S': 44, 'T': 38, 'U': 2, 'V': 40, 'W': 17, 'X': 17, 'Y': 19, 'Z': 5}


## Add padding to images to make them the same size

### Search for the maximum widht and height that we'll later use for padding

In [22]:
for raw_character in raw_characters:
    for raw_sample in os.listdir(os.path.join(raw_characters_path, raw_character)):
        image = Image.open(os.path.join(raw_characters_path, raw_character, raw_sample))
        width, height = image.size
        if width > raw_max_size[0]:
            raw_max_size[0] = width
        if height > raw_max_size[1]:
            raw_max_size[1] = height

In [23]:
print(f'{raw_max_size = }')

raw_max_size = [195, 256]


### Add padding to all images to match the maximum size of a picture

In [35]:
for raw_character in raw_characters:
    for raw_sample in os.listdir(os.path.join(raw_characters_path, raw_character)):
        image = Image.open(os.path.join(raw_characters_path, raw_character, raw_sample))
        width, height = image.size

        x_axis_offset = (raw_max_size[0] - width) / 2
        y_axis_offset = (raw_max_size[1] - height) / 2
        new_width = width + int((2 * x_axis_offset))
        new_height = height + int((2 * y_axis_offset))

        result = Image.new(image.mode, (new_width, new_height), (255, 255, 255))
        result.paste(image, (int(x_axis_offset), int(y_axis_offset)))
        result.save(os.path.join(raw_characters_path, raw_character, raw_sample))

## Add data augmentation to the raw padded dataset

### Create augmenter

In [69]:
# Sometimes(0.5, ...) applies the given augmenter in 50% of all cases,
# e.g. Sometimes(0.5, GaussianBlur(0.3)) would blur roughly every second
# image.
sometimes = lambda aug: iaa.Sometimes(0.3, aug)

# Define our sequence of augmentation steps that will be applied to every image.
seq = iaa.Sequential(
    [
        # crop and pad some of the images by 0-10% of their height/width
        sometimes(iaa.CropAndPad(percent=(0, 0.1),
                                 pad_mode=["constant", "edge"],
                                 pad_cval=(254, 255)
        )),

        # Apply affine transformations to some of the images
        # - scale to 80-120% of image height/width (each axis independently)
        # - translate by -20 to +20 relative to height/width (per axis)
        # - rotate by -45 to +45 degrees
        # - shear by -16 to +16 degrees
        # - order: use nearest neighbour or bilinear interpolation (fast)
        # - mode: use any available mode to fill newly created pixels
        #         see API or scikit-image for which modes are available
        # - cval: if the mode is constant, then use a random brightness
        #         for the newly created pixels (e.g. sometimes black,
        #         sometimes white)
        sometimes(iaa.Affine(
            scale={"x": (0.8, 1.1), "y": (0.8, 1.1)},
            translate_percent={"x": (-0.1, 0.1), "y": (-0.1, 0.1)},
            rotate=(-3, 3),
            shear=(-5, 5),
            order=[0, 1],
            cval=(254, 255),
            mode=ia.ALL
        )),

        #
        # Execute 0 to 3 of the following (less important) augmenters per
        # image. Don't execute all of them, as that would often be way too
        # strong.
        #
        iaa.SomeOf((0, 3),
            [
                iaa.Dropout(p=(0, 0.1)),

                # Blur each image with varying strength using
                # gaussian blur (sigma between 0 and 3.0),
                # average/uniform blur (kernel size between 2x2 and 7x7)
                # median blur (kernel size between 3x3 and 11x11).
                iaa.OneOf([
                    iaa.GaussianBlur((0, .3)),
                    iaa.AverageBlur(k=(2, 3))
                ]),

                # Sharpen each image, overlay the result with the original
                # image using an alpha between 0 (no sharpening) and 1
                # (full sharpening effect).
                iaa.Sharpen(alpha=(0.2, 0.8), lightness=(0.75, 1.25)),

                # Search in some images either for all edges or for
                # directed edges. These edges are then marked in a black
                # and white image and overlayed with the original image
                # using an alpha of 0 to 0.7.
                sometimes(iaa.OneOf([
                    iaa.EdgeDetect(alpha=(0, 0.1)),
                    iaa.DirectedEdgeDetect(
                        alpha=(0, 0.1), direction=(0.0, 1.0)
                    ),
                ])),

                # Add a value of -10 to 10 to each pixel.
                iaa.Add((-10, 10)),

                # Change brightness of images (50-150% of original value).
                iaa.Multiply((0.5, 1.2)),

                # Improve or worsen the contrast of images.
                iaa.LinearContrast((0.5, 1.6)),

                # Convert each image to grayscale and then overlay the
                # result with the original with random alpha. I.e. remove
                # colors with varying strengths.
                iaa.Grayscale(alpha=(0.0, 1.0)),

                # In some images distort local areas with varying strength.
                sometimes(iaa.PiecewiseAffine(scale=(0.01, 0.03)))
            ],
            # do all of the above augmentations in random order
            random_order=True
        )
    ],
    # do all of the above augmentations in random order
    random_order=True
)

### Augment dataset

In [55]:
# 2200 samples per clasa are trebui sa fie ok
aug_iters = {}
for character in list(raw_no_samples.keys()):
    aug_iters[character] = round(2200 / raw_no_samples[character])

In [71]:
for character in list(aug_iters.keys()):
    imgs = load_images_from_folder(os.path.join(raw_characters_path, character))
    augment_images(output_folder_path=os.path.join(augmented_characters_path, character), 
                   augmenter=seq, 
                   images=imgs, 
                   iterations=aug_iters[character])