## This code does the full augmentation process for each dataset, before feeding the data to the CNN models for experimentation

### Importing necessary library for augmentation utilities

In [None]:
import os
import glob
import shutil
import random

import numpy as np
import scipy as sp
import pandas as pd

#OpenCV-Python
import cv2
import cv

# imgaug
import imageio
import imgaug as ia
import imgaug.augmenters as iaa

# Keras
from keras.preprocessing.image import ImageDataGenerator,array_to_img, img_to_array, load_img 

#visualisation
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
%matplotlib inline
import seaborn as sns
from IPython.display import HTML, Image

from PIL import Image

In [None]:
# Defining data path

def loadimage_path(source_dir,filename):
    file_path = source_dir + '/' + filename
    image = cv2.imread(file_path)
    return image

def loadimage_array(img_arr):
    plt.imshow(img_arr, interpolation='nearest')
    plt.show()
    return

def move_file_fromfolder(source, destination):
    allfiles = os.listdir(source)
    for f in allfiles:
        shutil.move(source + f, destination + f)

## Augmentation Transformation Pipeline Processing (Randomized)

In [None]:
# Defining a pipeline.
# The example has been taken from the documentation
aug_pipeline = iaa.Sequential([
        iaa.SomeOf((3,6),[
            iaa.OneOf([
                iaa.SaltAndPepper((0.01, 0.02), per_channel=True),
                iaa.CoarseDropout(p=(0.01, 0.02), size_percent = 0.85, per_channel=0.5),
            ]),
            iaa.Rotate((-9, 9), mode = "symmetric"),
            iaa.Fliplr(0.5),
            iaa.Cutout(nb_iterations=(2, 4), size=0.07, squared=True, fill_mode="gaussian", fill_per_channel=True),
            iaa.Affine(scale = (0.9, 1.2), backend = 'cv2'),
            iaa.GammaContrast((0.85, 1.5)),
            iaa.AddToHueAndSaturation((-40, 40), per_channel=True),
            iaa.pillike.EnhanceColor(factor = (0.6, 1.3)),
            iaa.GaussianBlur(sigma = (0,0.5))
        ]), 
    ],
    random_order=True # apply the augmentations in random order
)

In [None]:
resize_from_width = iaa.Sequential([
        iaa.Resize({"height": "keep-aspect-ratio", "width": 256}),
        iaa.CenterCropToFixedSize(height=256, width=256),
    ],
    random_order=False # apply the augmentations in random order
)

resize_from_height = iaa.Sequential([
        iaa.Resize({"height": 256, "width": "keep-aspect-ratio"}),
        iaa.CenterCropToFixedSize(height=256, width=256),
    ],
    random_order=False # apply the augmentations in random order
)

# Augmentation Function (preprocessing, augment, save)

In [None]:
def augment_photo(source_file, target_dir, num_augment):
    img = cv2.imread(source_file)
    images_aug = np.array([aug_pipeline.augment_image(img) for _ in range(num_augment)])
    for i in range(0, num_augment):
        im = cv2.cvtColor(images_aug[i], cv2.COLOR_BGR2RGB)
        im = Image.fromarray(im)
        target_filename = target_dir + 'A' + str(i+1) + '.jpg'
        im.save(target_filename)
    print('Augmentation process for file done!')

### Trial Mode for dummy folder (evaluate augmentation quality)

In [None]:
augment_photo(r"source_path",
             r"destination_path",number_of_augment_for_each_image)

### Function to augment 1 folder at a time, and saving to specified folder

In [None]:
def augment_folder(source_dir, target_dir, num_augment, hard_limit):
    limit = 0
    for i, file in enumerate(os.listdir(source_dir)):
        img = loadimage_path(source_dir, file)
        images_aug = np.array([aug_pipeline.augment_image(img) for _ in range(num_augment)])
        for i in range(0, num_augment):
            limit += 1
            im = cv2.cvtColor(images_aug[i], cv2.COLOR_BGR2RGB)
            im = Image.fromarray(im)
            target_filename = target_dir + 'A' + str(i+1) + '- ' + file
            if(limit > hard_limit):
                break
            im.save(target_filename)
    print('Augmentation process for folder ' + source_dir + ' done!')

In [None]:
def resize_folder(source_dir, target_dir):
    for i, file in enumerate(os.listdir(source_dir)):
        path = source_dir + '/' + file
        img = cv2.imread(path)
        h, w = img.shape[0], img.shape[1]
        if((h != 256) | (w != 256)):
            if(h > w):
                images_resized = np.array(resize_from_width.augment_image(img))
            else:
                images_resized = np.array(resize_from_height.augment_image(img))
            #im = Image.fromarray(images_aug)
            target_filename = target_dir + '/' + file
            cv2.imwrite(target_filename, images_resized)
    print('Resizing process for folder ' + source_dir.split('/')[5] + ' done!')

### Execution of Augmentation process for each folder 

#### (Model A : HQ, Model B : HP-S, Model C : HP-L , Model D : RS-S , Model E : RS-L)

In [None]:
dists = ["Distribution 1", "Distribution 2", "Distribution 3"]
models = ["Model A", "Model B", "Model C", "Model D", "Model E"]
typologies = ["Confined", "RC Infilled", "Timber", "Unconfined"]
small_sampling = [["Confined",2, 580], ["RC Infilled",8,764], ["Timber",15, 814], ["Unconfined",33, 844]]
hp_sampling = [["Confined",2, 1064], ["RC Infilled",8,1404], ["Timber",13, 1475], ["Unconfined",20, 1519]]
rs_sampling = [["Confined",2, 2160], ["RC Infilled",8,2845], ["Timber",14, 3012], ["Unconfined",38, 3155]]

for dist in dists:
    for model in models:
        if((model == "Model A") or (model == "Model B") or (model == "Model D")):
            for sampling in small_sampling:
                source_dir = "Categorized Datasets/Dataset/" + dist + "/" + model + "/train/" + sampling[0] + "/"
                print(source_dir)
                augment_folder(source_dir, source_dir, sampling[1], sampling[2])
        elif(model == "Model C"):
            for sampling in hp_sampling:
                source_dir = "Categorized Datasets/Dataset/" + dist + "/" + model + "/train/" + sampling[0] + "/"
                print(source_dir)
                augment_folder(source_dir, source_dir, sampling[1], sampling[2])
        else:
            for sampling in rs_sampling:
                source_dir = "Categorized Datasets/Dataset/" + dist + "/" + model + "/train/" + sampling[0] + "/"
                print(source_dir)
                augment_folder(source_dir, source_dir, sampling[1], sampling[2])