# Split dataset

In [2]:
import os
import shutil
import random

# Set the path to your dataset folder
dataset_folder = "./photos2"

# Set the desired split ratio between train and test datasets
train_ratio = 0.8  # 80% for training, 20% for testing

# Set the path for the output train and test folders
output_train_folder = "./train"
output_test_folder = "./test"

# Create output folders if they don't exist
os.makedirs(output_train_folder, exist_ok=True)
os.makedirs(output_test_folder, exist_ok=True)

# Iterate over the classes in the dataset folder
for class_name in os.listdir(dataset_folder):
    class_folder = os.path.join(dataset_folder, class_name)

    if not os.path.isdir(class_folder):
        continue

    # Create class folders in the output train and test folders
    output_train_class_folder = os.path.join(output_train_folder, class_name)
    output_test_class_folder = os.path.join(output_test_folder, class_name)

    os.makedirs(output_train_class_folder, exist_ok=True)
    os.makedirs(output_test_class_folder, exist_ok=True)

    # Get the list of images in the class folder
    images = os.listdir(class_folder)

    # Shuffle the images randomly
    random.shuffle(images)

    # Calculate the split index
    split_index = int(len(images) * train_ratio)

    # Split the images into train and test sets
    train_images = images[:split_index]
    test_images = images[split_index:]

    # Move train images to the output train folder
    for image_name in train_images:
        src_path = os.path.join(class_folder, image_name)
        dst_path = os.path.join(output_train_class_folder, image_name)
        shutil.copy(src_path, dst_path)

    # Move test images to the output test folder
    for image_name in test_images:
        src_path = os.path.join(class_folder, image_name)
        dst_path = os.path.join(output_test_class_folder, image_name)
        shutil.copy(src_path, dst_path)
