In [2]:
import os
from tqdm.notebook import tqdm
from PIL import Image


In [3]:
# list all files in data/flatten
def list_files(dir = 'data/flatten'):
    files = os.listdir(dir)
    return files

files = list_files()

In [9]:
# funtion to pad the picture with white space to make it square
def pad_image(image):
    # get the size of the image
    width, height = image.size
    # calculate the size of the new image
    new_size = max(width, height)
    # create a new image with the new size and white background
    new_image = Image.new("RGB", (new_size, new_size), (255, 255, 255))
    # paste the original image in the center
    new_image.paste(image, ((new_size - width) // 2, (new_size - height) // 2))
    return new_image

# function to resize the image to a square of n x n pixels
def resize_image(image, size):
    # resize the image to the new size
    new_image = image.resize((size, size))
    return new_image

def center_crop_to_square(image):
    width, height = image.size
    new_size = min(width, height)
    left = (width - new_size) // 2
    top = (height - new_size) // 2
    right = (width + new_size) // 2
    bottom = (height + new_size) // 2
    new_image = image.crop((left, top, right, bottom))
    return new_image

import random
def random_center_crop_to_square(image):
    width, height = image.size
    new_size = min(width, height)
    left = random.randint(0, width - new_size)
    top = random.randint(0, height - new_size)
    right = left + new_size
    bottom = top + new_size
    new_image = image.crop((left, top, right, bottom))
    return new_image

def apply_one_squaring_method(image):
    # check size of the image
    width, height = image.size
    # if the image is already square, return it
    if width == height:
        return image
    methods = [pad_image, center_crop_to_square, random_center_crop_to_square]
    method = random.choice(methods)
    return method(image)



In [4]:
for file in tqdm(files):
    image = Image.open('data/flatten/' + file)
    image = apply_one_squaring_method(image)
    image.save('data/square/' + file)


  0%|          | 0/303125 [00:00<?, ?it/s]

In [10]:
# make 256x256 and 128x128 and 64x64 versions
for file in tqdm(files):
    image = Image.open('data/square/' + file)
    image256 = resize_image(image, 256)
    image256.save('data/square256/' + file)
    image128 = resize_image(image, 128)
    image128.save('data/square128/' + file)
    image64 = resize_image(image, 64)
    image64.save('data/square64/' + file)

  0%|          | 0/303125 [00:00<?, ?it/s]

In [5]:
import random
# make 10, 25% random crops of the 256x256, 128x128 and 64x64 versions
def random_choice(images_list, percentage, seed=42):
    random.seed(seed)
    n = len(images_list)
    n_images = int(n * percentage)
    return random.sample(images_list, n_images)

for percentage in [
    0.005, 0.1, 0.25
    ]:
    for size in [64, 128, 256]:
        files = list_files('data/square' + str(size))
        short_files = random_choice(files, percentage, seed=42)
        for file in tqdm(short_files):
            image = Image.open('data/square' + str(size) + '/' + file)
            if os.path.exists(f'data/square' + str(size) + f'_random{percentage}/') == False:
                os.mkdir(f'data/square' + str(size) + f'_random{percentage}/')
            image.save(f'data/square' + str(size) + f'_random{percentage}/' + file)

  0%|          | 0/1515 [00:00<?, ?it/s]

  0%|          | 0/1515 [00:00<?, ?it/s]

  0%|          | 0/1515 [00:00<?, ?it/s]