In [None]:
import os
import random
import shutil

# Define input and output folders
input_folder = r'C:\Users\cinth\Documentos\ams\data_science\actual_thesis\codes\MedSAM\up_dataset\us_data\us_data'
output_folder_images = r'C:\Users\cinth\Documentos\ams\data_science\actual_thesis\codes\MedSAM\up_dataset\us_data\partitioned_dataset\images'
output_folder_masks = r'C:\Users\cinth\Documentos\ams\data_science\actual_thesis\codes\MedSAM\up_dataset\us_data\partitioned_dataset\masks'

os.makedirs(output_folder_images, exist_ok=True)
os.makedirs(output_folder_masks, exist_ok=True)

# Define proportions for training, validation, and testing sets (as fractions)
train_ratio = 0.6
val_ratio = 0.2
test_ratio = 0.2

# Create output folders for training, validation, and testing sets
train_folder_images = os.path.join(output_folder_images, 'train')
val_folder_images = os.path.join(output_folder_images, 'val')
test_folder_images = os.path.join(output_folder_images, 'test')
train_folder_masks = os.path.join(output_folder_masks, 'train')
val_folder_masks = os.path.join(output_folder_masks, 'val')
test_folder_masks = os.path.join(output_folder_masks, 'test')
os.makedirs(train_folder_images, exist_ok=True)
os.makedirs(val_folder_images, exist_ok=True)
os.makedirs(test_folder_images, exist_ok=True)
os.makedirs(train_folder_masks, exist_ok=True)
os.makedirs(val_folder_masks, exist_ok=True)
os.makedirs(test_folder_masks, exist_ok=True)

# Function to copy images and masks while conserving names
def copy_images_and_masks(images, masks, src_image_folder, src_mask_folder, dest_image_folder, dest_mask_folder):
    for idx, (image_name, mask_name) in enumerate(zip(images, masks)):
        src_image_path = os.path.join(src_image_folder, image_name)
        src_mask_path = os.path.join(src_mask_folder, mask_name)

        # Define the destination image and mask paths
        dest_image_path = os.path.join(dest_image_folder, image_name)
        dest_mask_path = os.path.join(dest_mask_folder, mask_name)

        # Copy images and masks to corresponding output folders while conserving names
        shutil.copyfile(src_image_path, dest_image_path)
        shutil.copyfile(src_mask_path, dest_mask_path)

# Traverse through each video folder
for video_folder in os.listdir(input_folder):
    video_folder_path = os.path.join(input_folder, video_folder)
    if os.path.isdir(video_folder_path):
        # Look for the 'mask_enhance' folder within each video folder
        mask_enhance_folder = os.path.join(video_folder_path, 'mask_enhance')
        if os.path.exists(mask_enhance_folder):
            # List image and mask filenames
            images = os.listdir(os.path.join(video_folder_path, 'image'))
            masks = os.listdir(mask_enhance_folder)

            # Shuffle both lists in the same order
            combined = list(zip(images, masks))
            random.shuffle(combined)
            images_shuffled, masks_shuffled = zip(*combined)

            # Convert tuples to lists
            images_shuffled = list(images_shuffled)
            masks_shuffled = list(masks_shuffled)

            # Sort images and masks based on numerical index
            images_shuffled.sort(key=lambda x: int(x.split('_')[-1].split('.')[0]))
            masks_shuffled.sort(key=lambda x: int(x.split('_')[-2]))

            # Calculate the number of images for each set
            num_train = int(train_ratio * len(images_shuffled))
            num_val = int(val_ratio * len(images_shuffled))
            num_test = len(images_shuffled) - num_train - num_val

            # Partition images and masks into training, validation, and testing sets
            train_images = images_shuffled[:num_train]
            val_images = images_shuffled[num_train:num_train+num_val]
            test_images = images_shuffled[num_train+num_val:]
            train_masks = masks_shuffled[:num_train]
            val_masks = masks_shuffled[num_train:num_train+num_val]
            test_masks = masks_shuffled[num_train+num_val:]

            # Copy images and masks to corresponding output folders while conserving names
            copy_images_and_masks(train_images, train_masks, 
                                   os.path.join(video_folder_path, 'image'), 
                                   mask_enhance_folder, 
                                   train_folder_images, 
                                   train_folder_masks)
            copy_images_and_masks(val_images, val_masks, 
                                   os.path.join(video_folder_path, 'image'), 
                                   mask_enhance_folder, 
                                   val_folder_images, 
                                   val_folder_masks)
            copy_images_and_masks(test_images, test_masks, 
                                   os.path.join(video_folder_path, 'image'), 
                                   mask_enhance_folder, 
                                   test_folder_images, 
                                   test_folder_masks)

print("Dataset partitioning completed.")
