In [1]:
from PIL import Image

def resize_image(image_path, size = (512, 512)):
    im = Image.open(image_path)
    return im.resize(size)

In [9]:
# resize all images from the subdirs of real_data
import os

def resize_all_images(dir_):
    for root, dirs, files in os.walk(dir_):
        for file in files:
            if file.endswith(('.jpg', '.jpeg', '.png')):
                image_path = os.path.join(root, file)
                im = resize_image(image_path)
                im.save(image_path)

In [11]:
resize_all_images("../images/arthropods/train/odonata")

In [17]:
# take 20% of the images for validation
import shutil
from random import shuffle


def split_data(source_directory, training_directory, testing_directory, split_size=0.8):
    # Check if the provided paths exist or not, if not, create them
    os.makedirs(training_directory, exist_ok=True)
    os.makedirs(testing_directory, exist_ok=True)

    # Get all files in the source directory
    files = [
        file
        for file in os.listdir(source_directory)
        if file.endswith((".png", ".jpg", ".jpeg"))
    ]
    shuffle(files)  # Shuffle the list to ensure random distribution

    # Calculate the split index
    split_index = int(len(files) * split_size)

    # Split files into training and testing
    train_files = files[:split_index]
    test_files = files[split_index:]

    # Copy the training files to the training directory
    for file in train_files:
        shutil.copy(
            os.path.join(source_directory, file), os.path.join(training_directory, file)
        )

    # Copy the testing files to the testing directory
    for file in test_files:
        shutil.copy(
            os.path.join(source_directory, file), os.path.join(testing_directory, file)
        )

In [20]:
name = "odonata"
main_dir = os.path.join("../..", "images", "arthropods", "augmented")
source_dir = os.path.join(main_dir, name)
train_dir = os.path.join(main_dir, "train", name)
test_dir = os.path.join(main_dir, "test", name)

split_data(source_dir, train_dir, test_dir)

In [13]:
# create 3 rotated copies of each image in the training set
import numpy as np

def augment_data(source_directory, target_directory, rotations=[0, 90, 180, 270]):
    os.makedirs(target_directory, exist_ok=True)

    files = [
        file
        for file in os.listdir(source_directory)
        if file.endswith((".png", ".jpg", ".jpeg"))
    ]

    for file in files:
        image = Image.open(os.path.join(source_directory, file))
        for rotation in rotations:
            rotated_image = image.rotate(rotation)
            rotated_image.save(
                os.path.join(target_directory, f"{file.split('.')[0]}_{rotation}.jpg")
            )


In [23]:
col = "lepidoptera"
main_dir = os.path.join("../..", "images", "arthropods", "test")
train_dir = os.path.join(main_dir, col)
augmented_dir = os.path.join(main_dir, "augmented", col)

augment_data(train_dir, augmented_dir)