In [7]:
import os
import shutil
import numpy as np
import random

def create_split_directories(path1, path2, tag):
    # Ensure reproducibility
    random.seed(42)
    
    # Get all .npy and .txt files
    npy_files = [f for f in os.listdir(path1) if f.endswith('.npy')]
    txt_files = [f for f in os.listdir(path2) if f.endswith('.txt')]
    
    # Sort to maintain consistent ordering
    npy_files.sort()
    txt_files.sort()
    
    # Determine the smaller size folder
    if len(npy_files) > len(txt_files):
        common_files = [f.replace('.txt', '') for f in txt_files if f.replace('.txt', '.npy') in npy_files]
    else:
        common_files = [f.replace('.npy', '') for f in npy_files if f.replace('.npy', '.txt') in txt_files]
    
    # Calculate split indices
    total_files = len(common_files)
    train_split = int(total_files * 0.8)
    val_split = int(total_files * 0.9)
    
    # Shuffle indices
    indices = list(range(total_files))
    random.shuffle(indices)
    
    # Split indices
    train_indices = indices[:train_split]
    val_indices = indices[train_split:val_split]
    test_indices = indices[val_split:]
    
    # Create directories
    train_yolo_labels = f'train_yolo_labels_{tag}'
    val_yolo_labels = f'val_yolo_labels_{tag}'
    test_yolo_labels = f'test_yolo_labels_{tag}'
    
    train_numpy_images = f'train_numpy_images_{tag}'
    val_numpy_images = f'val_numpy_images_{tag}'
    test_numpy_images = f'test_numpy_images_{tag}'
    
    os.makedirs(train_yolo_labels, exist_ok=True)
    os.makedirs(val_yolo_labels, exist_ok=True)
    os.makedirs(test_yolo_labels, exist_ok=True)
    
    os.makedirs(train_numpy_images, exist_ok=True)
    os.makedirs(val_numpy_images, exist_ok=True)
    os.makedirs(test_numpy_images, exist_ok=True)
    
    # Copy files to respective directories
    def copy_files(indices, common_files, src_dir1, src_dir2, dest_dir1, dest_dir2, ext1, ext2):
        for idx in indices:
            base_filename = common_files[idx]
            filename1 = base_filename + ext1
            filename2 = base_filename + ext2
            if os.path.exists(os.path.join(src_dir1, filename1)):
                shutil.copy(os.path.join(src_dir1, filename1), os.path.join(dest_dir1, filename1))
            if os.path.exists(os.path.join(src_dir2, filename2)):
                shutil.copy(os.path.join(src_dir2, filename2), os.path.join(dest_dir2, filename2))
    
    # Copy files based on common file names
    copy_files(train_indices, common_files, path1, path2, train_numpy_images, train_yolo_labels, '.npy', '.txt')
    copy_files(val_indices, common_files, path1, path2, val_numpy_images, val_yolo_labels, '.npy', '.txt')
    copy_files(test_indices, common_files, path1, path2, test_numpy_images, test_yolo_labels, '.npy', '.txt')
    
    print(f"Directories created and files split with tag '{tag}':")
    print(f"Train YOLO labels: {train_yolo_labels}")
    print(f"Val YOLO labels: {val_yolo_labels}")
    print(f"Test YOLO labels: {test_yolo_labels}")
    print(f"Train NumPy images: {train_numpy_images}")
    print(f"Val NumPy images: {val_numpy_images}")
    print(f"Test NumPy images: {test_numpy_images}")

# Example usage
path1 = '/Users/vihaan/Workspace/!Datasets/Processed_Data_S24/image_patches'
path2 = '/Users/vihaan/Workspace/!Datasets/Processed_Data_S24/labels'
tag = '6band'
create_split_directories(path1, path2, tag)

Directories created and files split with tag '6band':
Train YOLO labels: train_yolo_labels_6band
Val YOLO labels: val_yolo_labels_6band
Test YOLO labels: test_yolo_labels_6band
Train NumPy images: train_numpy_images_6band
Val NumPy images: val_numpy_images_6band
Test NumPy images: test_numpy_images_6band
