In [5]:
import os
import random
from PIL import Image
import torchvision.transforms as transforms

## 데이터 증강 처리
- 각 클래스 폴더에서 이미지 개수를 확인하고, 부족한 개수를 증강 처리하여 저장
- 정규화 및 토큰화는 학습 시 진행

### 전처리 과정
1. **데이터 전처리**
    - `torchvision.transforms` 모듈을 활용하여 학습 및 테스트 데이터를 각각 전처리
    - **학습 데이터(`train_transform`)**: 데이터 증강을 포함한 다양한 변환 적용 및 정규화
        - **Resize**: 이미지를 `(224, 224)`로 크기 조정
        - **RandomResizedCrop**: 이미지를 무작위로 크롭하고 크기를 `(224, 224)`로 조정
        - **RandomHorizontalFlip**: 수평(좌우) 방향으로 랜덤 반전(p=0.5)
        - **RandomVerticalFlip**: 수직(상하) 방향으로 랜덤 반전(p=0.5)
        - **RandomRotation**: 이미지 각도를 미리 지정한 각도 범위에서 랜덤으로 회전

In [None]:
def augment_and_save_images(class_folder, target_count, augment_count=1000):
    image_files = [f for f in os.listdir(class_folder) if f.lower().endswith(('.jpg', '.png', '.jpeg'))]
    current_count = len(image_files)

    if current_count >= target_count:
        print(f"'{class_folder}' already has {current_count} images, no augmentation needed.")
        return

    print(f"'{class_folder}' has {current_count} images, need to generate {target_count - current_count} more.")

    # Rotation (at finer angles) 정의
    rotation_angles = [-76.15, -48.46, -20.77, -6.92, 6.92, 20.77, 48.46, 76.15]

    # transforms 정의
    augment_transforms = [
        transforms.Resize((224, 224)),
        transforms.RandomResizedCrop(224, scale=(0.8, 1.2)),  # Scaling
        transforms.RandomAffine(degrees=0, translate=(0.2, 0.2)),  # Translation
        transforms.RandomHorizontalFlip(p=0.5),  # 좌우 대칭
        transforms.RandomVerticalFlip(p=0.5),  # 상하 대칭
    ]
    rotate_transform = lambda angle: transforms.RandomRotation((angle, angle))

    new_images = 0
    while new_images < (target_count - current_count):
        # 증강을 통해 새로운 이미지 생성
        for image_file in random.sample(image_files, min(augment_count, len(image_files))):
            img_path = os.path.join(class_folder, image_file)
            img = Image.open(img_path)

            augmented_imgs = []

            # Scaling, Translation, Flipping 등 기본 증강 적용
            for transform in augment_transforms:
                augmented_imgs.append(transform(img))

            # Rotation (at finer angles)
            for angle in rotation_angles:
                augmented_imgs.append(rotate_transform(angle)(img))

            # 저장할 새로운 파일 이름 설정 및 저장
            for augmented_img in augmented_imgs:
                new_image_filename = f"{os.path.splitext(image_file)[0]}_aug_{new_images+1}.jpg"
                augmented_img.save(os.path.join(class_folder, new_image_filename))
                new_images += 1

                if new_images >= (target_count - current_count):
                    break

            if new_images >= (target_count - current_count):
                break

    print(f"Total {new_images} augmented images generated for class '{class_folder}'.")

In [7]:
def process_all_classes(base_dir, target_count):
    """
    모든 클래스 폴더를 순회하며 각 클래스별로 증강 작업 수행
    """
    for disease_folder in os.listdir(base_dir):
        class_folder = os.path.join(base_dir, disease_folder)
        if os.path.isdir(class_folder):
            augment_and_save_images(class_folder, target_count)

In [9]:
# 사용 예시
training_dir = "../mtl_transform_dataset/Training"  # Training 데이터 디렉토리
process_all_classes(training_dir, target_count=500)

'../mtl_transform_dataset/Training\0' already has 1350 images, no augmentation needed.
'../mtl_transform_dataset/Training\1' already has 876 images, no augmentation needed.
'../mtl_transform_dataset/Training\11' has 352 images, need to generate 148 more.
Total 148 augmented images generated for class '../mtl_transform_dataset/Training\11'.
'../mtl_transform_dataset/Training\12' has 351 images, need to generate 149 more.
Total 149 augmented images generated for class '../mtl_transform_dataset/Training\12'.
'../mtl_transform_dataset/Training\16' already has 868 images, no augmentation needed.
'../mtl_transform_dataset/Training\17' has 234 images, need to generate 266 more.
Total 266 augmented images generated for class '../mtl_transform_dataset/Training\17'.
'../mtl_transform_dataset/Training\18' already has 805 images, no augmentation needed.
'../mtl_transform_dataset/Training\2' already has 824 images, no augmentation needed.
'../mtl_transform_dataset/Training\3' has 423 images, need t