In [7]:
import os
import shutil

def organize_dir(dir_path: str, split_ratio: list, new_path: str) -> None:
    """
    Organize an image folder that contains class folders
    into training, validation, and test set folders.

    Args:
    dir_path (str): Path to target directory of data.
    split_ratio (list[float]): Percentage of data to allocate to each split (0.0-1.0). 
                               Example: [0.7, 0.2, 0.1] for train, validation, test.
    """
    # Check if the directory exists
    if os.path.exists(dir_path):
        folders = [os.path.join(dir_path, f) for f in os.listdir(dir_path) if
                   os.path.isdir(os.path.join(dir_path, f))]
        new_path += f"_{str(split_ratio[0])}_{str(split_ratio[1])}_{str(split_ratio[2])}"

        for clss in folders:

            # Get the list of all files in the directory
            file_list = os.listdir(clss)

            # Calculate the number of files in each split
            num_files = len(file_list)
            num_train = int(num_files * split_ratio[0])
            num_val = int(num_files * split_ratio[1])
            num_test = int(num_files * split_ratio[2])

            # Split the data
            train_files = file_list[:num_train]
            val_files = file_list[num_train:num_train+num_val]
            test_files = file_list[num_train+num_val:num_train+num_val+num_test]

            # Create the directories
            os.makedirs(os.path.join(new_path, "train", os.path.basename(clss)), exist_ok=True)
            os.makedirs(os.path.join(new_path, "val", os.path.basename(clss)), exist_ok=True)
            os.makedirs(os.path.join(new_path, "test", os.path.basename(clss)), exist_ok=True)

            # Copy the files
            for file in train_files:
                shutil.copy(os.path.join(clss, file),
                            os.path.join(new_path, "train", os.path.basename(clss), file))
            for file in val_files:
                shutil.copy(os.path.join(clss, file),
                            os.path.join(new_path, "val", os.path.basename(clss), file))
            for file in test_files:
                shutil.copy(os.path.join(clss, file),
                            os.path.join(new_path, "test", os.path.basename(clss), file))
    else:
        print(f"Directory not found at: {dir_path}")

In [8]:
def walk_through_dir(dir_path):
    """
    Walks through dir_path returning its contents.

    Args:
    dir_path (str): target directory

    Returns:
    A print out of:
        number of subdiretories in dir_path
        number of images (files) in each subdirectory
        name of each subdirectory
    """
    for dirpath, dirnames, filenames in os.walk(dir_path):
        print(f"There are {len(dirnames)} directories and {len(filenames)} images in '{dirpath}'.")

In [3]:
dir_path = os.path.join("data", "tomo")
organize_dir(os.path.join(dir_path,"cropped_images_equal_sizes"), [0.5,0.25,0.25], dir_path)

In [4]:
walk_through_dir(dir_path+"_0.5_0.25_0.25")

There are 3 directories and 0 images in 'data/tomo_0.5_0.25_0.25'.
There are 2 directories and 0 images in 'data/tomo_0.5_0.25_0.25/test'.
There are 0 directories and 407 images in 'data/tomo_0.5_0.25_0.25/test/2_root_images'.
There are 0 directories and 407 images in 'data/tomo_0.5_0.25_0.25/test/1_root_images'.
There are 2 directories and 0 images in 'data/tomo_0.5_0.25_0.25/train'.
There are 0 directories and 815 images in 'data/tomo_0.5_0.25_0.25/train/2_root_images'.
There are 0 directories and 815 images in 'data/tomo_0.5_0.25_0.25/train/1_root_images'.
There are 2 directories and 0 images in 'data/tomo_0.5_0.25_0.25/val'.
There are 0 directories and 407 images in 'data/tomo_0.5_0.25_0.25/val/2_root_images'.
There are 0 directories and 407 images in 'data/tomo_0.5_0.25_0.25/val/1_root_images'.


In [10]:
dir_path = os.path.join("data", "pano")
organize_dir(os.path.join(dir_path,"cropped_images"), [0.5,0.25,0.25], dir_path)
walk_through_dir(dir_path+"_0.5_0.25_0.25")

There are 3 directories and 0 images in 'data/pano_0.5_0.25_0.25'.
There are 2 directories and 0 images in 'data/pano_0.5_0.25_0.25/test'.
There are 0 directories and 40 images in 'data/pano_0.5_0.25_0.25/test/2_root_images'.
There are 0 directories and 158 images in 'data/pano_0.5_0.25_0.25/test/1_root_images'.
There are 2 directories and 0 images in 'data/pano_0.5_0.25_0.25/train'.
There are 0 directories and 80 images in 'data/pano_0.5_0.25_0.25/train/2_root_images'.
There are 0 directories and 315 images in 'data/pano_0.5_0.25_0.25/train/1_root_images'.
There are 2 directories and 0 images in 'data/pano_0.5_0.25_0.25/val'.
There are 0 directories and 40 images in 'data/pano_0.5_0.25_0.25/val/2_root_images'.
There are 0 directories and 161 images in 'data/pano_0.5_0.25_0.25/val/1_root_images'.
