In [1]:
import os
import shutil
import random
from math import floor

In [2]:

# Paths
original_dataset_dir = 'dataset'  # Your current dataset directory
output_dir = 'split_dataset'       # New directory for the split dataset
categories = ['sparse', 'dense', 'crowded']  # Class categories

# Split ratios
train_ratio = 0.7  # 70% for training
val_ratio = 0.2    # 20% for validation
test_ratio = 0.1   # 10% for testing

In [3]:

# Create output directories for train, val, and test
for split in ['train', 'val', 'test']:
    for category in categories:
        split_dir = os.path.join(output_dir, split, category)
        os.makedirs(split_dir, exist_ok=True)

# Function to split the dataset
def split_dataset():
    for category in categories:
        category_dir = os.path.join(original_dataset_dir, category)
        images = os.listdir(category_dir)  # List all image files in the category
        random.shuffle(images)  # Shuffle to ensure randomness

        total_images = len(images)
        train_count = floor(train_ratio * total_images)
        val_count = floor(val_ratio * total_images)
        test_count = total_images - train_count - val_count  # Remaining for test

        # Split into train, val, test
        train_images = images[:train_count]
        val_images = images[train_count:train_count + val_count]
        test_images = images[train_count + val_count:]

        # Copy images to respective directories
        for image in train_images:
            src = os.path.join(category_dir, image)
            dst = os.path.join(output_dir, 'train', category, image)
            shutil.copy(src, dst)
        
        for image in val_images:
            src = os.path.join(category_dir, image)
            dst = os.path.join(output_dir, 'val', category, image)
            shutil.copy(src, dst)
        
        for image in test_images:
            src = os.path.join(category_dir, image)
            dst = os.path.join(output_dir, 'test', category, image)
            shutil.copy(src, dst)

    print("Dataset has been successfully split into train, val, and test sets.")

In [4]:

# Run the split function
split_dataset()

Dataset has been successfully split into train, val, and test sets.
