In [7]:
import os
import shutil
import random

def split_corresponding_datasets(folder_a, folder_b, output_folder, train_ratio=0.7, val_ratio=0.2, test_ratio=0.1):
    # Ensure ratios sum to 1
    # assert (train_ratio + val_ratio + test_ratio) == 1.0, "Ratios must sum to 1"

    # Create output directories for folder A and B
    for folder in ['train', 'val', 'test']:
        os.makedirs(os.path.join(output_folder, 'A', folder), exist_ok=True)
        os.makedirs(os.path.join(output_folder, 'B', folder), exist_ok=True)

    # Get list of all images from folder A
    images_a = sorted([f for f in os.listdir(folder_a) if os.path.isfile(os.path.join(folder_a, f))])
    images_b = sorted([f for f in os.listdir(folder_b) if os.path.isfile(os.path.join(folder_b, f))])
    
    assert len(images_a) == len(images_b), "Folders A and B must have the same number of files"

    # Pair corresponding files
    paired_images = list(zip(images_a, images_b))
    random.shuffle(paired_images)

    # Calculate split indices
    total_images = len(paired_images)
    train_end = int(total_images * train_ratio)
    val_end = train_end + int(total_images * val_ratio)

    # Split images
    train_images = paired_images[:train_end]
    val_images = paired_images[train_end:val_end]
    test_images = paired_images[val_end:]

    # Copy images to respective folders
    for img_a, img_b in train_images:
        shutil.copy(os.path.join(folder_a, img_a), os.path.join(output_folder, 'A', 'train', img_a))
        shutil.copy(os.path.join(folder_b, img_b), os.path.join(output_folder, 'B', 'train', img_b))

    for img_a, img_b in val_images:
        shutil.copy(os.path.join(folder_a, img_a), os.path.join(output_folder, 'A', 'val', img_a))
        shutil.copy(os.path.join(folder_b, img_b), os.path.join(output_folder, 'B', 'val', img_b))

    for img_a, img_b in test_images:
        shutil.copy(os.path.join(folder_a, img_a), os.path.join(output_folder, 'A', 'test', img_a))
        shutil.copy(os.path.join(folder_b, img_b), os.path.join(output_folder, 'B', 'test', img_b))

    print(f"Total images: {total_images}")
    print(f"Training images: {len(train_images)}")
    print(f"Validation images: {len(val_images)}")
    print(f"Testing images: {len(test_images)}")

In [8]:
# Example usage
folder_a = 'FacialData/A'
folder_b = 'FacialData/B'
output_folder = 'FacialData/NewData'
split_corresponding_datasets(folder_a, folder_b, output_folder)


Total images: 130
Training images: 91
Validation images: 26
Testing images: 13
