In [3]:
import os
import shutil
import random
import argparse
def split_data(src_dir, dest_dir, train_ratio, val_ratio, test_ratio, seed=42):
    """
    src_dir: 원본 데이터가 클래스별 폴더로 나뉘어 있는 상위 디렉터리
    dest_dir: 분할된 데이터를 저장할 상위 디렉터리
    train_ratio, val_ratio, test_ratio: 학습/검증/테스트 비율 (합이 1.0이어야 함)
    seed: 랜덤 시드 (재현 가능하도록)
    """
    if abs((train_ratio + val_ratio + test_ratio) - 1.0) > 1e-6:
        raise ValueError("train_ratio + val_ratio + test_ratio must sum to 1.0")

    random.seed(seed)
    classes = [d for d in os.listdir(src_dir) if os.path.isdir(os.path.join(src_dir, d))]

    for cls in classes:
        cls_path = os.path.join(src_dir, cls)
        images = [f for f in os.listdir(cls_path)
                  if os.path.isfile(os.path.join(cls_path, f))]
        random.shuffle(images)

        total = len(images)
        train_end = int(train_ratio * total)
        val_end   = train_end + int(val_ratio * total)

        splits = {
            'train': images[:train_end],
            'valid': images[train_end:val_end],
            'test':  images[val_end:]
        }

        for split_name, split_files in splits.items():
            out_dir = os.path.join(dest_dir, split_name, cls)
            os.makedirs(out_dir, exist_ok=True)
            for img in split_files:
                shutil.copy2(os.path.join(cls_path, img),
                             os.path.join(out_dir, img))

    print("Dataset split complete.")

In [4]:
src = "E:\★★★★★AI★★★★★\음식 ai data\selectStart음식DATA\Computer Vision Lab"
dest = "E:\★★★★★AI★★★★★\음식 ai data\selectStart음식DATA\dataset_splits"
train_ratio = 0.7
val_ratio = 0.15
test_ratio = 0.15

split_data(src, dest, train_ratio, val_ratio, test_ratio, seed=42)

KeyboardInterrupt: 