In [10]:
import os
from PIL import Image
from tqdm import tqdm

In [2]:
MIN_IMAGE_WIDTH=256
MIN_IMAGE_HEIGHT=256
MAX_IMAGE_WIDTH=1280
MAX_IMAGE_HEIGHT=1280
RESOURCES_PATH="../resources/"

In [3]:
def is_image(file):
    return file.endswith(".jpg") or file.endswith(".png")

## Remove unacceptable (small or very large) files

In [4]:
def check_image_size(image_path):
    image = Image.open(image_path)
    width, height = image.size
    return width < MIN_IMAGE_WIDTH or height < MIN_IMAGE_HEIGHT \
        or width > MAX_IMAGE_WIDTH or height > MAX_IMAGE_HEIGHT

In [5]:
def remove_unacceptable_images(directory):
    counter = 0
    for file in tqdm(os.listdir(directory)):
        if is_image(file):
            image_path = os.path.join(directory, file)
            if check_image_size(image_path):
                counter = counter + 1
                os.remove(image_path)
    print(f"{counter} files removed.")

In [6]:
remove_unacceptable_images(RESOURCES_PATH)

3655 files removed.


## Resize images

In [7]:
def resize_image(image_path):
    image = Image.open(image_path)
    width, height = image.size
    if min(width, height) < 512:
        convert_size = 256
    elif min(width, height) < 1024:
        convert_size = 512
    else:
        convert_size = 1024

    scale = convert_size / min(width, height)
    image = image.resize((int(width * scale), int(height * scale)))

    image.save(image_path)

In [8]:
def resize_dir_images(directory):
    for file in tqdm(os.listdir(directory)):
        if is_image(file):
            try:
                resize_image(os.path.join(directory, file))
            except Exception as e:
                print(f"error in resize file: {file}")
                continue
    print("all images resized.")

In [9]:
resize_dir_images(RESOURCES_PATH)

all images resized.


## Square crop images

In [11]:
def square_crop_image(image_path):
    image = Image.open(image_path)
    width, height = image.size
    crop_size = min(width, height)
    crop_x = int(width / 2 - crop_size / 2)
    crop_y = int(height / 2 - crop_size / 2)
    cropped_image = image.crop((crop_x, crop_y, crop_x + crop_size, crop_y + crop_size))
    cropped_image.save(image_path)

In [12]:
def square_crop_dir_images(directory):
    for file in tqdm(os.listdir(directory)):
        if is_image(file):
            try:
                square_crop_image(os.path.join(directory, file))
            except Exception as e:
                print(f"error in crop file: {file}")
                continue
    print("all images square cropped.")

In [13]:
square_crop_dir_images(RESOURCES_PATH)

100%|██████████| 20451/20451 [29:17<00:00, 11.63it/s] 

all images square cropped.



