In [35]:
import os
import shutil
from sklearn.model_selection import train_test_split

# 함수: 클래스 폴더 내의 이미지 수를 가져오는 함수
def count_images_in_class(class_folder):
    image_count = 0
    for root, dirs, files in os.walk(class_folder):
        for file in files:
            if file.endswith(('.jpg', '.jpeg', '.png', '.bmp', '.gif')):
                image_count += 1
    return image_count

# 함수: 클래스 폴더를 기준으로 가장 하위 폴더의 이미지 개수를 가져오는 함수
def max_images_count_in_subfolders(data_folder):
    max_count = 0
    for class_name in os.listdir(data_folder):
        class_folder = os.path.join(data_folder, class_name)
        if os.path.isdir(class_folder):
            for subfolder_name in os.listdir(class_folder):
                subfolder_path = os.path.join(class_folder, subfolder_name)
                if os.path.isdir(subfolder_path):
                    image_count = count_images_in_class(subfolder_path)
                    max_count = max(max_count, image_count)
    return max_count

# 데이터 폴더 경로 설정
data_folder = 'C:/Users/Sienna/Desktop/pet_eye_disease'
train_folder = 'C:/Users/Sienna/Desktop/train'
test_folder = 'C:/Users/Sienna/Desktop/test'
val_folder = 'C:/Users/Sienna/Desktop/val'

# 폴더 초기화
for folder in [train_folder, test_folder, val_folder]:
    if os.path.exists(folder):
        shutil.rmtree(folder)
    os.makedirs(folder)

# 함수: 이미지 복사
def copy_images(image_paths, target_folder):
    for image_path in image_paths:
        class_name = os.path.basename(os.path.dirname(os.path.dirname(image_path)))
        subfolder_name = os.path.basename(os.path.dirname(image_path))
        class_folder = os.path.join(target_folder, class_name, subfolder_name)
        if not os.path.exists(class_folder):
            os.makedirs(class_folder)
        shutil.copy(image_path, class_folder)

# 함수: 데이터 분할
def split_data(data_folder, train_size=0.8, test_size=0.1, val_size=0.1):
    train_paths = []
    test_paths = []
    val_paths = []

    for class_name in os.listdir(data_folder):
        class_folder = os.path.join(data_folder, class_name)
        if os.path.isdir(class_folder):
            for subfolder_name in os.listdir(class_folder):
                subfolder_path = os.path.join(class_folder, subfolder_name)
                if os.path.isdir(subfolder_path):
                    images = [os.path.join(subfolder_path, f) for f in os.listdir(subfolder_path) if f.endswith(('.jpg', '.jpeg', '.png', '.bmp', '.gif'))]
                    train, test_val = train_test_split(images, test_size=test_size+val_size, random_state=42)
                    test, val = train_test_split(test_val, test_size=val_size/(test_size+val_size), random_state=42)

                    train_paths.extend(train)
                    # 수정된 부분: 테스트 폴더에는 증강된 데이터를 포함시키지 않음
                    test_paths.extend([p for p in test if "augmented" not in os.path.basename(p)])
                    val_paths.extend([p for p in val if "augmented" not in os.path.basename(p)])

    return train_paths, test_paths, val_paths

# 가장 하위 폴더의 이미지 개수 기준으로 데이터 분할
max_count = max_images_count_in_subfolders(data_folder)
train_paths, test_paths, val_paths = split_data(data_folder)

# 이미지 복사
copy_images(train_paths, train_folder)
copy_images(test_paths, test_folder)
copy_images(val_paths, val_folder)

print(f"Train: {len(train_paths)}, Test: {len(test_paths)}, Val: {len(val_paths)}")


Train: 56480, Test: 6038, Val: 6149


In [None]:
# 클래스별로 따로 분할한 파일만들기^^

import os
import shutil

def copy_data(data_folder, save_folder):
    # train, test, val 폴더를 반복하면서 데이터를 복사하기
    for split in ['train', 'test', 'val']:
        split_folder = os.path.join(data_folder, split)
        if not os.path.exists(split_folder):
            continue
        
        for class_name in os.listdir(split_folder):
            class_folder = os.path.join(split_folder, class_name)
            if os.path.isdir(class_folder):
                # 클래스 폴더 아래에 train, test, val 폴더 생성
                for symptom in ['positive', 'negative']:
                    symptom_folder = os.path.join(class_folder, symptom)
                    if not os.path.exists(symptom_folder):
                        continue
                    
                    for file_name in os.listdir(symptom_folder):
                        src = os.path.join(symptom_folder, file_name)
                        dst = os.path.join(save_folder, class_name, split, symptom, file_name)
                        shutil.copy(src, dst)

# 원본 데이터 폴더 및 복사할 폴더 경로 설정
data_folder = 'C:/Users/Sienna/Desktop/pet_eye_disease'
save_folder = 'C:/Users/Sienna/Desktop/splited_pet_eye_disease'

# 데이터 복사 작업 수행
copy_data(data_folder, save_folder)

print("Data copied successfully!")




In [8]:
import os
import shutil
from sklearn.model_selection import train_test_split

def split_data(data_folder, save_folder, train_size=0.8, test_size=0.1, val_size=0.1):
    # 클래스 폴더 1~10을 반복하며 데이터 분할 및 저장
    for class_name in os.listdir(data_folder):
        class_folder = os.path.join(data_folder, class_name)
        if os.path.isdir(class_folder):
            # 클래스 폴더 아래에 train, test, val 폴더 생성
            for split in ['train', 'test', 'val']:
                split_folder = os.path.join(save_folder, class_name, split)
                os.makedirs(split_folder, exist_ok=True)

            # 데이터 분할
            class_train, class_test = train_test_split(os.listdir(class_folder), test_size=test_size, random_state=42)
            class_test, class_val = train_test_split(class_test, test_size=val_size/(test_size+val_size), random_state=42)
            
            # 데이터를 분할된 폴더에 복사
            for file_name in class_train:
                source_path = os.path.join(class_folder, file_name)
                dest_path = os.path.join(save_folder, class_name, 'train', file_name)
                shutil.copy(source_path, dest_path)
            for file_name in class_test:
                source_path = os.path.join(class_folder, file_name)
                dest_path = os.path.join(save_folder, class_name, 'test', file_name)
                shutil.copy(source_path, dest_path)
            for file_name in class_val:
                source_path = os.path.join(class_folder, file_name)
                dest_path = os.path.join(save_folder, class_name, 'val', file_name)
                shutil.copy(source_path, dest_path)

# 데이터 폴더 및 저장할 폴더 경로 설정
data_folder = 'C:/Users/Sienna/Desktop/pet_eye_disease'
save_folder = 'C:/Users/Sienna/Desktop/splited_pet_eye_disease'

# 데이터 분할 및 저장
split_data(data_folder, save_folder)

print("Data split and saved successfully!")


ValueError: With n_samples=1, test_size=0.5 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.

In [5]:
# 파일 개수 확인

import os

path = "C:/Users/Sienna/Desktop/ew"

# # 서브 디렉토리 목록 출력
# for root, subdirs, files in os.walk(path):

#     for d in subdirs:
#         fullpath = root + '/' + d
#         print(fullpath)

# print()

# 서브 디렉토리별 파일 개수 출력
for root, subdirs, files in os.walk(path):

    if len(files) > 0:
        print(root, len(files))

C:/Users/Sienna/Desktop/ew\test\C01_Corneal-Ulcer\negative 353
C:/Users/Sienna/Desktop/ew\test\C01_Corneal-Ulcer\positive 353
C:/Users/Sienna/Desktop/ew\test\C02_Sequestrum\negative 353
C:/Users/Sienna/Desktop/ew\test\C02_Sequestrum\positive 353
C:/Users/Sienna/Desktop/ew\test\C03_Conjunctivitis\negative 353
C:/Users/Sienna/Desktop/ew\test\C03_Conjunctivitis\positive 353
C:/Users/Sienna/Desktop/ew\test\C04_Nonulcerative-keratitis\negative 353
C:/Users/Sienna/Desktop/ew\test\C04_Nonulcerative-keratitis\positive 353
C:/Users/Sienna/Desktop/ew\test\C05_Blepharitis\negative 353
C:/Users/Sienna/Desktop/ew\test\C05_Blepharitis\positive 353
C:/Users/Sienna/Desktop/ew\test\D06_Conjunctivitis\negative 353
C:/Users/Sienna/Desktop/ew\test\D06_Conjunctivitis\positive 353
C:/Users/Sienna/Desktop/ew\test\D07_Ulcerative-Keratitis\negative 353
C:/Users/Sienna/Desktop/ew\test\D07_Ulcerative-Keratitis\positive 353
C:/Users/Sienna/Desktop/ew\test\D08_Cataract\negative 353
C:/Users/Sienna/Desktop/ew\test\