In [1]:
import os
import random
import shutil

### 학습 데이터셋에서 테스트 데이터셋 추출
- Training 데이터에서 split_ratio(%)만큼 랜덤으로 추출해 Test 데이터로 이동.
- 인자
    - training_dir (str): Training 데이터 경로 (./class_data/Training/{질병번호} 구조)
    - test_dir (str): Test 데이터 경로 (./class_data/Test/{질병번호} 구조)
    - split_ratio (float): 이동할 데이터 비율 (default: 0.1, 즉 10%)

In [None]:
def split_data(training_dir, test_dir, split_ratio=0.1):
    # Training 디렉토리 탐색
    if not os.path.exists(training_dir):
        print(f"Training directory '{training_dir}' not found!")
        return

    # Test 디렉토리 생성
    os.makedirs(test_dir, exist_ok=True)

    # 질병번호 폴더 순회
    for disease_folder in os.listdir(training_dir):
        disease_path = os.path.join(training_dir, disease_folder)

        if not os.path.isdir(disease_path):
            continue  # 폴더가 아닌 경우 건너뜀

        # 이미지 파일 리스트
        image_files = [f for f in os.listdir(disease_path) if f.lower().endswith(('.jpg', '.png', '.jpeg'))]

        # 총 파일 개수 및 이동할 파일 개수 계산
        total_files = len(image_files)
        num_to_move = max(1, int(total_files * split_ratio))  # 최소 1개는 이동

        print(f"Processing '{disease_folder}' - Total: {total_files}, To Move: {num_to_move}")

        # 랜덤으로 파일 선택
        files_to_move = random.sample(image_files, num_to_move)

        # Test 디렉토리 내 질병번호 폴더 생성
        test_disease_path = os.path.join(test_dir, disease_folder)
        os.makedirs(test_disease_path, exist_ok=True)

        # 파일 이동
        for file_name in files_to_move:
            src_path = os.path.join(disease_path, file_name)
            dest_path = os.path.join(test_disease_path, file_name)
            shutil.move(src_path, dest_path)

        print(f"Moved {len(files_to_move)} files from '{disease_folder}' to Test directory.")

    print("Data splitting completed.")

In [3]:
# 실행
training_dir = "./class_dataset/Training"  # Training 데이터 디렉토리
test_dir = "./class_dataset/Test"          # Test 데이터 디렉토리
split_ratio = 0.1                       # 10% 랜덤 추출

split_data(training_dir, test_dir, split_ratio)

Processing '0' - Total: 1500, To Move: 150
Moved 150 files from '0' to Test directory.
Processing '1' - Total: 973, To Move: 97
Moved 97 files from '1' to Test directory.
Processing '11' - Total: 391, To Move: 39
Moved 39 files from '11' to Test directory.
Processing '12' - Total: 389, To Move: 38
Moved 38 files from '12' to Test directory.
Processing '16' - Total: 964, To Move: 96
Moved 96 files from '16' to Test directory.
Processing '17' - Total: 259, To Move: 25
Moved 25 files from '17' to Test directory.
Processing '18' - Total: 894, To Move: 89
Moved 89 files from '18' to Test directory.
Processing '2' - Total: 915, To Move: 91
Moved 91 files from '2' to Test directory.
Processing '3' - Total: 470, To Move: 47
Moved 47 files from '3' to Test directory.
Processing '4' - Total: 227, To Move: 22
Moved 22 files from '4' to Test directory.
Processing '5' - Total: 802, To Move: 80
Moved 80 files from '5' to Test directory.
Processing '6' - Total: 458, To Move: 45
Moved 45 files from '6