**Import**

In [None]:
from PIL import Image, ImageOps
import os
import xml.etree.ElementTree as ET
from tqdm.auto import tqdm

import numpy as np
import matplotlib as plt
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

import pandas as pd

**Prepare variables and functions**

In [None]:
# Variables for resizing - SET BEFORE USING!
folder_path = 'raw_new/original-images/' # Enter input folder with original images
output_resized = 'raw_new/resized-images/' # Enter resized images output folder
output_64 = 'raw_new/64/' # Enter 64x64px images output folder
path_annot = 'data/annotations/' # Contains the bounding boxes from the KerasCV images
dimensions = (1024,1024) # Dimensions the original images are resized to
target_size = (64, 64) # Dimensions the output images are cut into

In [None]:
jpg_files = []

for file_name in os.listdir(folder_path):
    jpg_files.append(os.path.splitext(file_name)[0])

In [None]:
###Loads an image
def load_image(folder_path, image_id):
    image = Image.open(folder_path + image_id + '.jpg')
    return image

In [None]:
### Resizes the loaded image to 1024x1024 pixels and maintains aspect ratio through interpolation, 
### pads dimension after aspect ratio to create 1024x1024 - same as raw_data cut images
def resize_image(image):
    cropped_and_sized = ImageOps.contain(image, dimensions)
    padding_dim_h = dimensions[0]-cropped_and_sized.size[0]
    padding_dim_w = dimensions[1]-cropped_and_sized.size[1]
    resized_image = ImageOps.expand(cropped_and_sized, border=(padding_dim_h, 0, 0, padding_dim_w), fill='black')
    return resized_image

In [None]:
### Chops the image into pieces of 64x64px and stores them in the destination folder
def chop_image(image, image_id, output_path):
    gridx = target_size[0]
    gridy = target_size[1]
    rangex = int(dimensions[0]/gridx)
    rangey = int(dimensions[1]/gridy)
    counter = 1
    for x in range(rangex):
        for y in range(rangey):
            bbox = (x * gridx, y * gridy, x * gridx + gridx, y * gridy + gridy)
            slice_bit = image.crop(bbox)
            slice_bit.save(output_path + '/' + image_id + '_' + str(x) + '_' + str(y) + '.jpg',
                        optimize=True, bits=6)

In [None]:
### Save the resized images
def save_resized_image(image, image_id, output_path):
    image.save(output_path + image_id + '.jpg', format='JPEG')

**Processing files**

In [None]:
### Process images
for element in jpg_files:
    image = load_image(folder_path, element)
    resized_image = resize_image(image)
    save_resized_image(resized_image, element, output_resized)
    chop_image(resized_image, element, output_64)

In [None]:
### Check if some cut images are fully black and remove them from the data set
files_64 = []

for file_name in os.listdir(output_64):
    files_64.append(os.path.splitext(file_name)[0])

for element in files_64:
    image = load_image(output_64, element)
    if image.getbbox() == None: ## Falsy None of getbbox filters
        os.remove(output_64 + element + '.jpg') ## Removes images from the file system that are fully black

# Labeling Waldo in 64x64 Images Using Original Bounding Boxes

Original data has already been labeled with bounding boxes for KerasCV processing. In order not to have to label manually again (8k+ sub-images) the original bounding boxes can be used to detect which image file will contain waldo. However, first the bounding box coordinates must be transformed in the same way that the images were transformed and the file name calculated.

In [None]:
### Understand original image sizes images
orig_image_sizes = {}

for element in jpg_files:
    image = load_image(folder_path, element)
    orig_image_sizes[element]=image.size

In [None]:
x = []
y = []

for element in orig_image_sizes.keys():
    x.append(orig_image_sizes[element][0])

print(f"Average x dimension for original images is {round(np.mean(x),0)}")

for element in orig_image_sizes.keys():
    y.append(orig_image_sizes[element][1])

print(f"Average y dimension for original images is {round(np.mean(y),0)}")

In [None]:
### Calculate resizing factor for each image
resizing_factor = {}

for element in orig_image_sizes.keys():
    if orig_image_sizes[element][0] >= orig_image_sizes[element][0]:
        resizing_factor[element] = round((orig_image_sizes[element][0])/1024,2)
    else:
        resizing_factor[element] = round((orig_image_sizes[element][1])/1024,2)

resizing_factor

In [None]:
### Process XML Tree of bounding boxes to find Waldo in cut images
xml_files = sorted(
    [
        os.path.join(path_annot, file_name)
        for file_name in os.listdir(path_annot)
        if file_name.endswith(".xml")
    ]
)

In [None]:
### Process XML Tree of bounding boxes to find Waldo in cut images

class_ids = ["Waldo"]

class_mapping = dict(zip(range(len(class_ids)), class_ids))

def parse_annotation(xml_file):
    tree = ET.parse(xml_file)
    root = tree.getroot()

    image_name = root.find("filename").text
    image_path = os.path.join(folder_path, image_name)

    boxes = []
    classes = []
    for obj in root.iter("object"):
        cls = obj.find("name").text
        classes.append(cls)

        bbox = obj.find("bndbox")
        xmin = float(bbox.find("xmin").text)
        ymin = float(bbox.find("ymin").text)
        xmax = float(bbox.find("xmax").text)
        ymax = float(bbox.find("ymax").text)
        boxes.append([xmin, ymin, xmax, ymax])

    class_ids = [
        list(class_mapping.keys())[list(class_mapping.values()).index(cls)]
        for cls in classes
    ]
    return image_path, boxes, class_ids

xml_lib = {}
image_paths = []
bbox = []
classes = []
for xml_file in tqdm(xml_files):
    image_path, boxes, class_ids = parse_annotation(xml_file)
    image_paths.append(image_path)
    bbox.append(boxes)
    classes.append(class_ids)
    xml_lib[image_path.split('/')[2].split('.')[0]] = boxes[0]

In [None]:
# Turn input data into data frames for further processing
resizing_df = pd.DataFrame(resizing_factor.values(), index=resizing_factor.keys(),columns=['resizing_factor'])
dimensions_df = pd.DataFrame(xml_lib.values(), index=xml_lib.keys(), columns=['x1','y1', 'x2','y2'])

In [None]:
bbdf = resizing_df.join(dimensions_df) #Join the data frames on shared index of file names

In [None]:
# Calculate the x_file and y_file coordinates in the target images (i.e. image 38 will be in file '1f6c8dc6-38_10_9
bbdf['x_file'] = round((bbdf['x1']/bbdf['resizing_factor'])/64,2)
bbdf['y_file'] = round((bbdf['y1']/bbdf['resizing_factor'])/64,2)

In [None]:
bbdf

# Image Augmentation

There are too few images containing Waldo and therefore external augmentation should generate more label data. There are multiple external image augmentation techniques, here we started with image rotation to turn every waldo image into 4 images (1 original and three turned by 90°, 180° and 270°)

In [None]:
working_folder = '/home/albert/code/Larelag/wheres-waldo/raw_new/64/waldo_augment/'

In [None]:
aug_files = []

for file_name in os.listdir(working_folder):
    aug_files.append(os.path.splitext(file_name)[0])

In [None]:
def rotate_image(image, rotation):
    if rotation == '90':
        image = image.transpose(Image.ROTATE_90)
    elif rotation == '180':
        image = image.transpose(Image.ROTATE_180)
    elif rotation == '270':
        image = image.transpose(Image.ROTATE_270)
    else:
        print('invalid entry! Please enter "vertical" or "horizontal"')
    return image

In [None]:
def save_augment(image, working_folder, augmentation, image_id):  
    image.save(working_folder + image_id + '_' + augmentation + '.jpg', format='JPEG')

In [None]:
### Rotate 90°
for element in aug_files:
    image = load_image(working_folder, element)
    image = rotate_image(image, '90')
    save_augment(image, working_folder, 'rotate_90', element)

In [None]:
### Rotate 180°
for element in aug_files:
    image = load_image(working_folder, element)
    image = rotate_image(image, '180')
    save_augment(image, working_folder, 'rotate_180', element)

In [None]:
### Rotate 270°
for element in aug_files:
    image = load_image(working_folder, element)
    image = rotate_image(image, '270')
    save_augment(image, working_folder, 'rotate_270', element)