In [1]:
import os
import random
import shutil
from tqdm import tqdm

In [2]:
# Paths
input_folder = "../Resized_argued_MyDataset_BKP"  # The folder containing the normalized dataset
output_folder = "../Splitted_MyDataset02"  # Root folder for the split dataset

In [None]:
# Split proportions (Adjust here)
split_ratios = {
    "train": 0.70,  # Adjust to 0.8 for 80%, 0.75 for 75%
    "val": 0.15,    # Adjust to 0.15 for 15%, 0.1 for 10%
    "test": 0.15    # Keep as 0.1 for 10%
}

# Categories
categories = ["Anger", "Disgust", "Fear", "Happy", "Neutral", "Sad", "Surprise"]

In [4]:
# Ensure output structure
def ensure_split_structure(output_folder, split_ratios, categories):
    os.makedirs(output_folder, exist_ok=True)
    for split in split_ratios.keys():
        split_path = os.path.join(output_folder, split)
        os.makedirs(split_path, exist_ok=True)
        for category in categories:
            os.makedirs(os.path.join(split_path, category), exist_ok=True)

ensure_split_structure(output_folder, split_ratios, categories)

In [5]:
# Split dataset
def split_dataset(input_folder, output_folder, split_ratios, categories):
    for category in categories:
        category_path = os.path.join(input_folder, category)
        images = [img for img in os.listdir(category_path) if img.lower().endswith(('png', 'jpg', 'jpeg'))]
        random.shuffle(images)  # Shuffle the images to randomize the split

        total_images = len(images)
        train_end = int(total_images * split_ratios['train'])
        val_end = train_end + int(total_images * split_ratios['val'])

        train_images = images[:train_end]
        val_images = images[train_end:val_end]
        test_images = images[val_end:]

        # Move images to respective folders
        for split, split_images in zip(['train', 'val', 'test'], [train_images, val_images, test_images]):
            split_category_path = os.path.join(output_folder, split, category)
            for img_name in tqdm(split_images, desc=f"Processing {split}/{category}"):
                src_path = os.path.join(category_path, img_name)
                dest_path = os.path.join(split_category_path, img_name)
                shutil.copy(src_path, dest_path)

In [6]:
split_dataset(input_folder, output_folder, split_ratios, categories)

Processing train/Anger:   0%|          | 0/7417 [00:00<?, ?it/s]

Processing train/Anger: 100%|██████████| 7417/7417 [04:00<00:00, 30.83it/s]
Processing val/Anger: 100%|██████████| 1483/1483 [00:41<00:00, 35.59it/s]
Processing test/Anger: 100%|██████████| 990/990 [00:26<00:00, 37.55it/s]
Processing train/Disgust: 100%|██████████| 6747/6747 [03:09<00:00, 35.64it/s]
Processing val/Disgust: 100%|██████████| 1349/1349 [00:33<00:00, 40.26it/s]
Processing test/Disgust: 100%|██████████| 900/900 [00:22<00:00, 40.63it/s]
Processing train/Fear: 100%|██████████| 5451/5451 [01:57<00:00, 46.52it/s]
Processing val/Fear: 100%|██████████| 1090/1090 [00:23<00:00, 45.57it/s]
Processing test/Fear: 100%|██████████| 728/728 [00:16<00:00, 44.93it/s]
Processing train/Happy: 100%|██████████| 27027/27027 [16:54<00:00, 26.65it/s]  
Processing val/Happy: 100%|██████████| 5405/5405 [03:25<00:00, 26.30it/s]
Processing test/Happy: 100%|██████████| 3605/3605 [02:12<00:00, 27.27it/s]
Processing train/Neutral: 100%|██████████| 30287/30287 [19:19<00:00, 26.13it/s]  
Processing val/Ne