Copies images and masks from the correct source directories to the testing and training directories, keeping newdata separate in training.

In [1]:
import shutil
import random
from pathlib import Path
import os

script_dir = Path(os.getcwd()).parent

In [2]:
# Define directories

# Source directories
new_images_dir = script_dir / "newdata/images"
new_json_dir = script_dir / "newdata/json"
new_masks_dir = script_dir / "newdata/masks"
test_images_list = script_dir / "newdata/test_images.txt"

# Destination directories
train_images_dir = script_dir / "temp/training/images"
train_masks_dir = script_dir / "temp/training/masks"
train_new_images_dir = script_dir / "temp/training/new_images"
train_new_masks_dir = script_dir / "temp/training/new_masks"
test_images_dir = script_dir / "temp/testing/images"
test_masks_dir = script_dir / "temp/testing/masks"

# Number of images to be chosen for testing (if test_images_list doesn't exist)
test_num = 100

In [3]:
# Create the destination directories if they don't exist
train_images_dir.mkdir(parents=True, exist_ok=True)
train_masks_dir.mkdir(parents=True, exist_ok=True)
train_new_images_dir.mkdir(parents=True, exist_ok=True)
train_new_masks_dir.mkdir(parents=True, exist_ok=True)
test_images_dir.mkdir(parents=True, exist_ok=True)
test_masks_dir.mkdir(parents=True, exist_ok=True)

# Delete all files in /testing and /training subdirectories
for directory in [train_images_dir, train_masks_dir, train_new_images_dir, train_new_masks_dir, test_images_dir, test_masks_dir]:
    for file in directory.iterdir():
        if file.is_file():
            file.unlink()

# Get list of non-empty image/mask pairs
json_files = [f for f in new_json_dir.iterdir() if f.is_file()]

# Check if test_images_list exists
if not test_images_list.exists():
    # Randomly pick N non-empty image/mask pairs
    test_images = random.sample(json_files, test_num)
    # Sort test images by the first number in their name
    test_images.sort(key=lambda x: int(x.stem.split("_")[0]))
    # Write test images to file
    with open(test_images_list, "w") as f:
        for json_file in test_images:
            f.write(f"{json_file.stem}\n")
else:
    # Read test images from file
    with open(test_images_list, "r") as f:
        test_images = [new_json_dir / f"{line.strip()}.json" for line in f]

# Copy test images and masks
for json_file in test_images:
    image_file = new_images_dir / f"{json_file.stem}.jpg"
    mask_file = new_masks_dir / f"{json_file.stem}.jpg"

    # Check if files exist before copying
    if image_file.exists():
        shutil.copy(image_file, test_images_dir / image_file.name)
    else:
        print(f"Warning: Test image not found: {image_file}")

    if mask_file.exists():
        shutil.copy(mask_file, test_masks_dir / mask_file.name)
    else:
        print(f"Warning: Test mask not found: {mask_file}")

mask_files = [f for f in new_masks_dir.iterdir() if f.is_file()]
test_image_stems = {json_file.stem for json_file in test_images}

# Copy remaining newdata images and masks to training_newdata directory
for mask_file_path in mask_files:
    if mask_file_path.stem not in test_image_stems:
        image_file_path = new_images_dir / f"{mask_file_path.stem}.jpg"

        # Check if files exist before copying
        if image_file_path.exists():
            shutil.copy(image_file_path, train_new_images_dir / image_file_path.name)
        else:
            print(f"Warning: Newdata training image not found: {image_file_path}")

        if mask_file_path.exists():
            shutil.copy(mask_file_path, train_new_masks_dir / mask_file_path.name)
        else:
            print(f"Warning: Newdata training mask not found: {mask_file_path}")

print(f"Setup complete - only new data copied (no supplement data).")

Setup complete - only new data copied (no supplement data).
