In [4]:
import os
import shutil
import random

def split_dataset(source_dir, train_dir, test_dir, split_ratio=0.8):
    if not os.path.exists(source_dir):
        print(f"Error: Source directory '{source_dir}' does not exist.")
        return

    os.makedirs(train_dir, exist_ok=True)
    os.makedirs(test_dir, exist_ok=True)

    # Iterate through each class folder
    for class_name in os.listdir(source_dir):
        class_path = os.path.join(source_dir, class_name)
        if not os.path.isdir(class_path):  # Skip if it's not a directory
            continue

        # Create corresponding train and test directories for the class
        train_class_dir = os.path.join(train_dir, class_name)
        test_class_dir = os.path.join(test_dir, class_name)
        os.makedirs(train_class_dir, exist_ok=True)
        os.makedirs(test_class_dir, exist_ok=True)

        # Get all images in the class directory
        all_files = [f for f in os.listdir(class_path) if os.path.isfile(os.path.join(class_path, f))]
        
        if not all_files:
            print(f"Skipping empty class folder: {class_name}")
            continue
        
        # Shuffle and split the files
        random.shuffle(all_files)
        split_index = int(len(all_files) * split_ratio)
        train_files = all_files[:split_index]
        test_files = all_files[split_index:]

        # Move files to respective folders
        for file in train_files:
            shutil.move(os.path.join(class_path, file), os.path.join(train_class_dir, file))
        
        for file in test_files:
            shutil.move(os.path.join(class_path, file), os.path.join(test_class_dir, file))

        print(f"Class '{class_name}': {len(train_files)} training files, {len(test_files)} testing files.")

    print("Dataset split complete.")

# Example usage
source_directory = "Yoga_Dataset"  # Change to your dataset folder
train_directory = "train"
test_directory = "test"

split_dataset(source_directory, train_directory, test_directory)


Class 'Anjaneyasana': 2240 training files, 560 testing files.
Class 'Vrksasana': 2240 training files, 560 testing files.
Class 'Phalakasana': 1926 training files, 482 testing files.
Class 'Setu Bandha Sarvangasana': 1713 training files, 429 testing files.
Class 'Virabhadrasana Two': 2240 training files, 560 testing files.
Class 'Trikonasana': 2240 training files, 560 testing files.
Class 'Utkatasana': 2240 training files, 560 testing files.
Class 'Adho Mukha Svanasana': 2228 training files, 558 testing files.
Dataset split complete.
