## Making splitted dataset with same subfolders

In [1]:
import os
import shutil
import random

# ===================== CONFIG =====================
SOURCE_DIR = "boat_type_classification_dataset"          # original folder
DEST_DIR = "dataset_split"      # new output folder
TRAIN_RATIO = 0.8               # 80% train, 20% test
RANDOM_SEED = 42
# ==================================================

random.seed(RANDOM_SEED)

train_dir = os.path.join(DEST_DIR, "train")
test_dir = os.path.join(DEST_DIR, "test")

os.makedirs(train_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

# Loop through each class folder
for class_name in os.listdir(SOURCE_DIR):
    class_path = os.path.join(SOURCE_DIR, class_name)

    if not os.path.isdir(class_path):
        continue

    images = os.listdir(class_path)
    images = [img for img in images if img.lower().endswith(('.png', '.jpg', '.jpeg'))]

    random.shuffle(images)

    split_index = int(len(images) * TRAIN_RATIO)
    train_images = images[:split_index]
    test_images = images[split_index:]

    # Create class folders
    os.makedirs(os.path.join(train_dir, class_name), exist_ok=True)
    os.makedirs(os.path.join(test_dir, class_name), exist_ok=True)

    # Copy train images
    for img in train_images:
        src = os.path.join(class_path, img)
        dst = os.path.join(train_dir, class_name, img)
        shutil.copy2(src, dst)

    # Copy test images
    for img in test_images:
        src = os.path.join(class_path, img)
        dst = os.path.join(test_dir, class_name, img)
        shutil.copy2(src, dst)

    print(f"{class_name}: {len(train_images)} train | {len(test_images)} test")

print("\nDataset split completed successfully.")


buoy: 42 train | 11 test
cruise_ship: 152 train | 39 test
ferry_boat: 50 train | 13 test
freight_boat: 18 train | 5 test
gondola: 154 train | 39 test
inflatable_boat: 12 train | 4 test
kayak: 162 train | 41 test
paper_boat: 24 train | 7 test
sailboat: 311 train | 78 test

Dataset split completed successfully.
