# 1. Review Dataset and Build Image Loading Function

### 1.1 Import TF and Dependencies

In [None]:
import cv2
import tensorflow as tf
import json
import numpy as np
from matplotlib import pyplot as plt

### 1.2 Load Image into TF Data Pipeline

In [None]:
images = tf.data.Dataset.list_files('data/images/*/*.jpg', shuffle=False)

In [None]:
images.as_numpy_iterator().next()

In [None]:
def load_image(x):
    byte_img = tf.io.read_file(x)
    img = tf.io.decode_jpeg(byte_img)
    return img

In [None]:
images = images.map(load_image)

### 1.3 View Raw Images with Matplotlib

In [None]:
image_generator = images.batch(4).as_numpy_iterator()

In [None]:
plot_images = image_generator.next()

In [None]:
fig, ax = plt.subplots(ncols=4, figsize=(20,20))
for idx, image in enumerate(plot_images):
    ax[idx].imshow(image)
plt.show()

# 2. Partition Unaugmented Data

### 2.1 Split Data Into Train (70%), Test (15%), and Validation (15%)

In [None]:
import os
import random
import shutil

In [None]:
labels = os.walk(os.path.join('data', 'images')).__next__()[1]
labels.sort()
labelMap = {labels[i]: i + 1 for i in range(len(labels))}
partitions = {
    'train': 10,
    'test': 2,
    'val': 3
}

In [None]:
labelMap

In [None]:
lblSource = os.path.join('data', 'labels')

for partition in list(partitions.keys()):
    imgDest = os.path.join('data', partition, 'images')
    lblDest = os.path.join('data', partition, 'labels')
    for label in labels:
        imgSource = os.path.join('data', 'images', label)
        for imgFilename in random.sample(os.listdir(imgSource), partitions[partition]):
            shutil.move(os.path.join(imgSource, imgFilename), imgDest)
            lblFilename = imgFilename.split('.')[0] + '.json'
            shutil.move(os.path.join(lblSource, lblFilename), lblDest)

# 3. Apply Image Augmentation on Images and Labels using Albumentations

### 3.1 Setup Albumentations Transform Pipeline

In [None]:
import albumentations as alb

In [None]:
augmentor = alb.Compose([
    alb.RandomCrop(width=450, height=450),
    alb.HorizontalFlip(p=0.5),
    alb.RandomBrightnessContrast(p=0.2),
    alb.RandomGamma(p=0.2),
    alb.RGBShift(p=0.2),
    alb.VerticalFlip(p=0.5)
], bbox_params=alb.BboxParams(format='albumentations', label_fields=['class_labels']))

### 3.2 Load a Test Image and Annotation with OpenCV and JSON

In [None]:
imgSource = os.path.join('data', 'train', 'images')
[filename] = random.sample(os.listdir(imgSource), 1)
img = cv2.imread(os.path.join(imgSource, filename))

In [None]:
with open(os.path.join('data', 'train', 'labels', filename.split('.')[0] + '.json'), 'r') as f:
    label = json.load(f)

### 3.3 Extract Coordinates and Rescale to Match Image Resolution

In [None]:
coords = [0, 0, 0, 0]
coords[0] = label['shapes'][0]['points'][0][0]
coords[1] = label['shapes'][0]['points'][0][1]
coords[2] = label['shapes'][0]['points'][1][0]
coords[3] = label['shapes'][0]['points'][1][1]

In [None]:
(row, col, _) = img.shape
coords = list(np.divide(coords, [col, row, col, row]))

In [None]:
coords

### 3.4 Apply Augmentations and View Results

In [None]:
imgLabel = label['shapes'][0]['label']

In [None]:
augmented = augmentor(image=img, bboxes=[coords], class_labels=[imgLabel])

In [None]:
cv2.rectangle(
    augmented['image'],
    tuple(np.multiply(augmented['bboxes'][0][:2], [450, 450]).astype(int)),
    tuple(np.multiply(augmented['bboxes'][0][2:], [450, 450]).astype(int)),
    (255, 0, 0), 2
)

plt.imshow(augmented['image'])

# 4. Build and Run Augmentation Pipeline

### 4.1 Run Augmentation Pipeline

In [None]:
for partition in ['train', 'test', 'val']:
    for image in os.listdir(os.path.join('data', partition, 'images')):
        img = cv2.imread(os.path.join('data', partition, 'images', image))

        coords = [0, 0, 0.00001, 0.00001]
        label_path = os.path.join('data', partition, 'labels', f'{image.split(".")[0]}.json')
        if os.path.exists(label_path):
            with open(label_path, 'r') as f:
                label = json.load(f)

            coords[0] = label['shapes'][0]['points'][0][0]
            coords[1] = label['shapes'][0]['points'][0][1]
            coords[2] = label['shapes'][0]['points'][1][0]
            coords[3] = label['shapes'][0]['points'][1][1]
            imgLabel = label['shapes'][0]['label']

            (row, col, _) = img.shape
            coords = list(np.divide(coords, [col, row, col, row]))

        try:
            for x in range(20):
                augmented = augmentor(image=img, bboxes=[coords], class_labels=[imgLabel])
                cv2.imwrite(os.path.join('aug_data', partition, 'images', f'{image.split(".")[0]}.{x}.jpg'), augmented['image'])

                annotation = {}
                annotation['image'] = image

                if os.path.exists(label_path):
                    if len(augmented['bboxes']) == 0:
                        annotation['bbox'] = [0, 0, 0, 0]
                        annotation['class'] = 0
                    else:
                        annotation['bbox'] = augmented['bboxes'][0]
                        annotation['class'] = labelMap[imgLabel]
                else:
                    annotation['bbox'] = [0, 0, 0, 0]
                    annotation['class'] = 0

                with open(os.path.join('aug_data', partition, 'labels', f'{image.split(".")[0]}.{x}.json'), 'w') as f:
                    json.dump(annotation, f)
        except Exception as e:
            print(e)
