In [3]:
import os
import shutil
from sklearn.model_selection import train_test_split
import glob

In [4]:
def create_splits(source_dir, train_dir, val_dir, test_dir, test_size=0.15, val_size=0.15):
    """
    Splits dataset into training, validation, and test sets.
    - source_dir: Directory with the augmented dataset, organized in class-based subdirectories.
    - train_dir, val_dir, test_dir: Directories to store the train, validation, and test splits.
    - test_size: Fraction of the dataset to be used as test set.
    - val_size: Fraction of the training set to be used as validation set.
    """
    class_folders = glob.glob(os.path.join(source_dir, '*'))
    
    for class_folder in class_folders:
        class_name = os.path.basename(class_folder)
        
        # Creating class directories in train, validation, and test directories
        os.makedirs(os.path.join(train_dir, class_name), exist_ok=True)
        os.makedirs(os.path.join(val_dir, class_name), exist_ok=True)
        os.makedirs(os.path.join(test_dir, class_name), exist_ok=True)
        
        images = glob.glob(os.path.join(class_folder, '*.jpg'))
        train_images, test_images = train_test_split(images, test_size=test_size, random_state=42)
        train_images, val_images = train_test_split(train_images, test_size=val_size, random_state=42)
        
        # Function to copy images to their respective directories
        for image_path in train_images:
            shutil.copy(image_path, os.path.join(train_dir, class_name))
            
        for image_path in val_images:
            shutil.copy(image_path, os.path.join(val_dir, class_name))
            
        for image_path in test_images:
            shutil.copy(image_path, os.path.join(test_dir, class_name))

In [7]:
# Define your dataset directories
source_dir = '/Users/shagundeepsingh/Documents/coral/preprocessing/data/classes/'
train_dir = '/Users/shagundeepsingh/Documents/coral/preprocessing/new/train/'
val_dir = '/Users/shagundeepsingh/Documents/coral/preprocessing/new/val/'
test_dir = '/Users/shagundeepsingh/Documents/coral/preprocessing/new/test/'

create_splits(source_dir, train_dir, val_dir, test_dir, test_size=0.2, val_size=0.25)
