In [30]:
import glob
import os

dataset = "exDark-yolo"
dataset_path_images = f"../../yolo-testing/datasets/{dataset}/images"
dataset_path_labels = f"../../yolo-testing/datasets/{dataset}/labels"

train_folder_images = f"{dataset_path_images}/train"
test_folder_images = f"{dataset_path_images}/test"

train_folder_labels = f"{dataset_path_labels}/train"
test_folder_labels = f"{dataset_path_labels}/test"

formats = ["jpg", "jpeg", "png", "JPG", "JPEG", "PNG"]

# We want to move too big images from the train to the test folder

train_images = []
test_images = []
for format in formats:
    train_images += glob.glob(f"{dataset_path_images}/train/*.{format}")
    test_images += glob.glob(f"{dataset_path_images}/test/*.{format}")

print(f"Train images: {len(train_images)}")
print(f"Test images: {len(test_images)}")

train_labels = glob.glob(f"{dataset_path_labels}/train/*.txt")
test_labels = glob.glob(f"{dataset_path_labels}/test/*.txt")

print(f"Train labels: {len(train_labels)}")
print(f"Test labels: {len(test_labels)}")


Train images: 3000
Test images: 2563
Train labels: 2900
Test labels: 2463


In [26]:
# Make a list of the tuples with filename and corresponding image size
train_images_sizes = []
for image in train_images:
    train_images_sizes.append((image, os.path.getsize(image)))

# Sort the list by image size
train_images_sizes.sort(key=lambda x: x[1])

test_images_sizes = []
for image in test_images:
    test_images_sizes.append((image, os.path.getsize(image)))

# Sort the list by image size
test_images_sizes.sort(key=lambda x: x[1])

# reverse the list to get the biggest images first for training, and the smallest images first for testing
train_images_sizes.reverse()

print(f"Train images sizes: {len(train_images_sizes)}", train_images_sizes[:4])
print(f"Test images sizes: {len(test_images_sizes)}", test_images_sizes[:4])

largest_100_train_images = train_images_sizes[:100]
smallest_100_test_images = test_images_sizes[:100]

Train images sizes: 3000 [('../../yolo-testing/datasets/exDark-yolo/images/train/2015_02633.jpg', 5941701), ('../../yolo-testing/datasets/exDark-yolo/images/train/2015_02634.jpg', 4211874), ('../../yolo-testing/datasets/exDark-yolo/images/train/2015_02637.jpg', 4070877), ('../../yolo-testing/datasets/exDark-yolo/images/train/2015_02638.jpg', 4044594)]
Test images sizes: 2563 [('../../yolo-testing/datasets/exDark-yolo/images/test/2015_00592.jpg', 4312), ('../../yolo-testing/datasets/exDark-yolo/images/test/2015_00561.jpg', 4352), ('../../yolo-testing/datasets/exDark-yolo/images/test/2015_06778.jpg', 4552), ('../../yolo-testing/datasets/exDark-yolo/images/test/2015_03651.jpg', 4562)]


In [29]:
def replace_image_extension(image_path, new_extension):
    image_name = os.path.basename(image_path)
    image_name_without_extension = os.path.splitext(image_name)[0]
    return f"{os.path.dirname(image_path)}/{image_name_without_extension}.{new_extension}"

# Move the largest 100 images with corresponding labels from the train folder to the test folder
for image in largest_100_train_images:
    image_path = image[0]
    image_name = os.path.basename(image_path)
    label_path = replace_image_extension(image_path, "txt").replace("images", "labels")
    target_image_path = f"{test_folder_images}/{image_name}"
    target_label_path = replace_image_extension(target_image_path, "txt")
    print(f"Moving {image_path} to {target_image_path}")
    print(f"Moving {label_path} to {target_label_path}")
    os.rename(image_path, target_image_path)
    os.rename(label_path, target_label_path)

# Move the smallest 100 images with corresponding labels from the test folder to the train folder
for image in smallest_100_test_images:
    image_path = image[0]
    image_name = os.path.basename(image_path)
    label_path = replace_image_extension(image_path, "txt").replace("images", "labels")
    target_image_path = f"{train_folder_images}/{image_name}"
    target_label_path = replace_image_extension(target_image_path, "txt")
    print(f"Moving {image_path} to {target_image_path}")
    print(f"Moving {label_path} to {target_label_path}")
    os.rename(image_path, target_image_path)
    os.rename(label_path, target_label_path)

Moving ../../yolo-testing/datasets/exDark-yolo/images/train/2015_02633.jpg to ../../yolo-testing/datasets/exDark-yolo/images/test/2015_02633.jpg
Moving ../../yolo-testing/datasets/exDark-yolo/labels/train/2015_02633.txt to ../../yolo-testing/datasets/exDark-yolo/images/test/2015_02633.txt
Moving ../../yolo-testing/datasets/exDark-yolo/images/train/2015_02634.jpg to ../../yolo-testing/datasets/exDark-yolo/images/test/2015_02634.jpg
Moving ../../yolo-testing/datasets/exDark-yolo/labels/train/2015_02634.txt to ../../yolo-testing/datasets/exDark-yolo/images/test/2015_02634.txt
Moving ../../yolo-testing/datasets/exDark-yolo/images/train/2015_02637.jpg to ../../yolo-testing/datasets/exDark-yolo/images/test/2015_02637.jpg
Moving ../../yolo-testing/datasets/exDark-yolo/labels/train/2015_02637.txt to ../../yolo-testing/datasets/exDark-yolo/images/test/2015_02637.txt
Moving ../../yolo-testing/datasets/exDark-yolo/images/train/2015_02638.jpg to ../../yolo-testing/datasets/exDark-yolo/images/test/