In [1]:
import os
import random
import shutil
from tqdm import tqdm

In [2]:
# Paths
input_folder = "../Resized_argued_normed_MyDataset"  # The folder containing the normalized dataset
output_folder = "../Splitted_MyDataset"  # Root folder for the split dataset

In [3]:
# Split proportions (Adjust here)
split_ratios = {
    "train": 0.7,  # Adjust to 0.8 for 80%, 0.75 for 75%
    "val": 0.2,    # Adjust to 0.15 for 15%, 0.1 for 10%
    "test": 0.1    # Keep as 0.1 for 10%
}

# Categories
categories = ["Anger", "Disgust", "Fear", "Happy", "Neutral", "Sad", "Surprise"]

In [4]:
# Ensure output structure
def ensure_split_structure(output_folder, split_ratios, categories):
    os.makedirs(output_folder, exist_ok=True)
    for split in split_ratios.keys():
        split_path = os.path.join(output_folder, split)
        os.makedirs(split_path, exist_ok=True)
        for category in categories:
            os.makedirs(os.path.join(split_path, category), exist_ok=True)

ensure_split_structure(output_folder, split_ratios, categories)

In [5]:
# Split dataset
def split_dataset(input_folder, output_folder, split_ratios, categories):
    for category in categories:
        category_path = os.path.join(input_folder, category)
        images = [img for img in os.listdir(category_path) if img.lower().endswith(('png', 'jpg', 'jpeg'))]
        random.shuffle(images)  # Shuffle the images to randomize the split

        total_images = len(images)
        train_end = int(total_images * split_ratios['train'])
        val_end = train_end + int(total_images * split_ratios['val'])

        train_images = images[:train_end]
        val_images = images[train_end:val_end]
        test_images = images[val_end:]

        # Move images to respective folders
        for split, split_images in zip(['train', 'val', 'test'], [train_images, val_images, test_images]):
            split_category_path = os.path.join(output_folder, split, category)
            for img_name in tqdm(split_images, desc=f"Processing {split}/{category}"):
                src_path = os.path.join(category_path, img_name)
                dest_path = os.path.join(split_category_path, img_name)
                shutil.copy(src_path, dest_path)

In [6]:
split_dataset(input_folder, output_folder, split_ratios, categories)

Processing train/Anger: 100%|██████████| 6923/6923 [02:26<00:00, 47.34it/s]
Processing val/Anger: 100%|██████████| 1978/1978 [00:42<00:00, 46.75it/s]
Processing test/Anger: 100%|██████████| 989/989 [00:20<00:00, 47.74it/s]
Processing train/Disgust: 100%|██████████| 6297/6297 [02:03<00:00, 51.10it/s]
Processing val/Disgust: 100%|██████████| 1799/1799 [00:34<00:00, 51.98it/s]
Processing test/Disgust: 100%|██████████| 900/900 [00:18<00:00, 49.71it/s]
Processing train/Fear: 100%|██████████| 5088/5088 [01:43<00:00, 49.01it/s]
Processing val/Fear: 100%|██████████| 1453/1453 [00:27<00:00, 52.23it/s]
Processing test/Fear: 100%|██████████| 728/728 [00:13<00:00, 53.93it/s]
Processing train/Happy: 100%|██████████| 25225/25225 [09:00<00:00, 46.64it/s]
Processing val/Happy: 100%|██████████| 7207/7207 [02:51<00:00, 42.01it/s]
Processing test/Happy: 100%|██████████| 3605/3605 [01:43<00:00, 35.00it/s]
Processing train/Neutral: 100%|██████████| 28268/28268 [07:51<00:00, 60.01it/s]
Processing val/Neutra