 Python function that takes a main_folder path and splits its contents into three subfolders, train, test, and valid, based on a given percentage split:

In [2]:
import os
import shutil
import random

def split_directory(main_folder, copy_folder, split=(0.8, 0.1, 0.1)):
    assert sum(split) == 1, "Split percentages should add up to 1."
    assert all([os.path.isdir(main_folder), os.path.isdir(copy_folder)]), "Folder paths should be valid directories."
    
    # Create subdirectories if they don't exist
    for subdir in ['train', 'test', 'valid']:
        path = os.path.join(copy_folder, subdir)
        os.makedirs(path, exist_ok=True)

    # Get all files in the main folder
    files = os.listdir(main_folder)

    # Shuffle files randomly
    random.shuffle(files)

    # Calculate split points based on percentages
    split_points = [int(len(files) * split[0]), int(len(files) * (split[0] + split[1]))]

    # Copy files to subdirectories based on split points
    for i, filename in enumerate(files):
        source_path = os.path.join(main_folder, filename)
        if i < split_points[0]:
            dest_path = os.path.join(copy_folder, 'train', filename)
        elif i < split_points[1]:
            dest_path = os.path.join(copy_folder, 'test', filename)
        else:
            dest_path = os.path.join(copy_folder, 'valid', filename)
        shutil.copy2(source_path, dest_path)


In [4]:
main_folder = "E:/Export_PersonOnlyCOCO/Export_PersonOnlyCOCO/images/val"
copy_folder = "E:/Export_PersonOnlyCOCO/Export_PersonOnlyCOCO/Person/image2"
split = (0.8, 0.1, 0.1)

split_directory(main_folder, copy_folder, split)


In [7]:
import os
import shutil
import random

def split_directory(main_folder, copy_folder, split=(0.8, 0.1, 0.1)):
    assert sum(split) == 1, "Split percentages should add up to 1."
    assert all([os.path.isdir(main_folder), os.path.isdir(copy_folder)]), "Folder paths should be valid directories."
    
    # Create subdirectories if they don't exist
    for subdir in ['train', 'test', 'valid']:
        path = os.path.join(copy_folder, subdir)
        os.makedirs(path, exist_ok=True)

    # Get all label files in the main folder
    label_files = [filename for filename in os.listdir(main_folder) if filename.endswith('.txt')]

    # Shuffle label files randomly
    random.shuffle(label_files)

    # Calculate split points based on percentages
    split_points = [int(len(label_files) * split[0]), int(len(label_files) * (split[0] + split[1]))]

    # Copy label and image files to subdirectories based on split points
    for i, filename in enumerate(label_files):
        source_label_path = os.path.join(main_folder, filename)
        image_filename = os.path.splitext(filename)[0] + '.jpg'
        source_image_path = os.path.join(main_folder, image_filename)
        if i < split_points[0]:
            dest_label_path = os.path.join(copy_folder, 'train', filename)
            dest_image_path = os.path.join(copy_folder, 'train', image_filename)
        elif i < split_points[1]:
            dest_label_path = os.path.join(copy_folder, 'test', filename)
            dest_image_path = os.path.join(copy_folder, 'test', image_filename)
        else:
            dest_label_path = os.path.join(copy_folder, 'valid', filename)
            dest_image_path = os.path.join(copy_folder, 'valid', image_filename)
        shutil.copy2(source_label_path, dest_label_path)
        shutil.copy2(source_image_path, dest_image_path)


In [9]:
main_folder = "E:/Export_PersonOnlyCOCO/Export_PersonOnlyCOCO/images/val"
copy_folder = "E:/Export_PersonOnlyCOCO/Export_PersonOnlyCOCO/Person/labels"
split = (0.8, 0.1, 0.1)

split_directory(main_folder, copy_folder, split)