In [20]:
import os

import pandas as pd
import numpy as np

import albumentations as A
from PIL import Image

from tqdm import tqdm

In [21]:
PRE_PATH = '/upstage-cv-classification-cv2/data/'

TRAIN_IMAGE_PATH = PRE_PATH + 'test'
TRAIN_CSV_PATH = PRE_PATH + 'train_semi.csv'
TRAIN_DF = pd.read_csv(TRAIN_CSV_PATH)
TRAIN_AUG_IMAGE_PATH = PRE_PATH + 'train_semi_aug'
TRAIN_AUG_CSV_PATH = PRE_PATH + 'train_semi_aug.csv'

VALID_IMAGE_PATH = PRE_PATH + 'valid'
VALID_CSV_PATH = PRE_PATH + 'valid.csv'
VALID_DF = pd.read_csv(VALID_CSV_PATH)


In [22]:
noise_value_min = 0
noise_value_max = 500
hf_rotate_000_noise_000_500 = A.Compose([A.HorizontalFlip(p=1), A.GaussNoise(var_limit=(noise_value_min, noise_value_max), mean=0, p=1)])
hf_rotate_030_noise_000_500 = A.Compose([A.HorizontalFlip(p=1), A.Rotate(limit=(30, 30), p=1), A.GaussNoise(var_limit=(noise_value_min, noise_value_max), mean=0, p=1)])
hf_rotate_060_noise_000_500 = A.Compose([A.HorizontalFlip(p=1), A.Rotate(limit=(60, 60), p=1), A.GaussNoise(var_limit=(noise_value_min, noise_value_max), mean=0, p=1)])
hf_rotate_090_noise_000_500 = A.Compose([A.HorizontalFlip(p=1), A.Rotate(limit=(90, 90), p=1), A.GaussNoise(var_limit=(noise_value_min, noise_value_max), mean=0, p=1)])
hf_rotate_120_noise_000_500 = A.Compose([A.HorizontalFlip(p=1), A.Rotate(limit=(120, 120), p=1), A.GaussNoise(var_limit=(noise_value_min, noise_value_max), mean=0, p=1)])
hf_rotate_150_noise_000_500 = A.Compose([A.HorizontalFlip(p=1), A.Rotate(limit=(150, 150), p=1), A.GaussNoise(var_limit=(noise_value_min, noise_value_max), mean=0, p=1)])
hf_rotate_180_noise_000_500 = A.Compose([A.HorizontalFlip(p=1), A.Rotate(limit=(180, 180), p=1), A.GaussNoise(var_limit=(noise_value_min, noise_value_max), mean=0, p=1)])
hf_rotate_210_noise_000_500 = A.Compose([A.HorizontalFlip(p=1), A.Rotate(limit=(210, 210), p=1), A.GaussNoise(var_limit=(noise_value_min, noise_value_max), mean=0, p=1)])
hf_rotate_240_noise_000_500 = A.Compose([A.HorizontalFlip(p=1), A.Rotate(limit=(240, 240), p=1), A.GaussNoise(var_limit=(noise_value_min, noise_value_max), mean=0, p=1)])
hf_rotate_270_noise_000_500 = A.Compose([A.HorizontalFlip(p=1), A.Rotate(limit=(270, 270), p=1), A.GaussNoise(var_limit=(noise_value_min, noise_value_max), mean=0, p=1)])
hf_rotate_300_noise_000_500 = A.Compose([A.HorizontalFlip(p=1), A.Rotate(limit=(300, 300), p=1), A.GaussNoise(var_limit=(noise_value_min, noise_value_max), mean=0, p=1)])
hf_rotate_330_noise_000_500 = A.Compose([A.HorizontalFlip(p=1), A.Rotate(limit=(330, 330), p=1), A.GaussNoise(var_limit=(noise_value_min, noise_value_max), mean=0, p=1)])

# 회전 변환
rotate_000 = A.Compose([])
rotate_030 = A.Compose([A.Rotate(limit=(30, 30), p=1)])
rotate_060 = A.Compose([A.Rotate(limit=(60, 60), p=1)])
rotate_090 = A.Compose([A.Rotate(limit=(90, 90), p=1)])
rotate_120 = A.Compose([A.Rotate(limit=(120, 120), p=1)])
rotate_150 = A.Compose([A.Rotate(limit=(150, 150), p=1)])
rotate_180 = A.Compose([A.Rotate(limit=(180, 180), p=1)])
rotate_210 = A.Compose([A.Rotate(limit=(210, 210), p=1)])
rotate_240 = A.Compose([A.Rotate(limit=(240, 240), p=1)])
rotate_270 = A.Compose([A.Rotate(limit=(270, 270), p=1)])
rotate_300 = A.Compose([A.Rotate(limit=(300, 300), p=1)])
rotate_330 = A.Compose([A.Rotate(limit=(330, 330), p=1)])

# 여러 이미지 변환을 정의한 리스트입니다. 각 변환은 튜플로 되어 있으며, 튜플의 첫 번째 요소는 변환의 접두사(prefix)이고, 두 번째 요소는 변환 객체입니다.
base_aug_types = [
    (f"hf_r000_n0500~0500_", hf_rotate_000_noise_000_500),
    (f"hf_r030_n0500~0500_", hf_rotate_030_noise_000_500),
    (f"hf_r060_n0500~0500_", hf_rotate_060_noise_000_500),
    (f"hf_r090_n0500~0500_", hf_rotate_090_noise_000_500),
    (f"hf_r120_n0500~0500_", hf_rotate_120_noise_000_500),
    (f"hf_r150_n0500~0500_", hf_rotate_150_noise_000_500),
    (f"hf_r180_n0500~0500_", hf_rotate_180_noise_000_500),
    (f"hf_r210_n0500~0500_", hf_rotate_210_noise_000_500),
    (f"hf_r240_n0500~0500_", hf_rotate_240_noise_000_500),
    (f"hf_r270_n0500~0500_", hf_rotate_270_noise_000_500),
    (f"hf_r300_n0500~0500_", hf_rotate_300_noise_000_500),
    (f"hf_r330_n0500~0500_", hf_rotate_330_noise_000_500),
    ("r000_", rotate_000),
    (f"r030_", rotate_030),
    (f"r060_", rotate_060),
    (f"r090_", rotate_090),
    (f"r120_", rotate_120),
    (f"r150_", rotate_150),
    (f"r180_", rotate_180),
    (f"r210_", rotate_210),
    (f"r240_", rotate_240),
    (f"r270_", rotate_270),
    (f"r300_", rotate_300),
    (f"r330_", rotate_330),
]

In [23]:
# 빈 리스트를 초기화합니다. 변환된 이미지의 ID와 타겟 정보를 저장할 것입니다.
ids = []
targets = []
 
# `train_kr_df` DataFrame의 각 행에 대해 반복합니다.
# `itertuples()`는 DataFrame을 튜플 형태로 반복할 수 있게 해줍니다.
for index, ID, target in tqdm(TRAIN_DF.itertuples(), total=TRAIN_DF.shape[0], desc='Image augmentation'):
    image_path = os.path.join(TRAIN_IMAGE_PATH, ID)
    image = np.array(Image.open(image_path))
    
    # `base_aug_types`에 정의된 각 변환에 대해 반복합니다.
    for prefix, aug_function in base_aug_types:
        # 변환 함수를 사용하여 이미지를 변환합니다.
        transformed_image = aug_function(image=image)['image']
        new_ID = prefix + ID
        ids.append(new_ID)
        targets.append(target)
        Image.fromarray(transformed_image).save(os.path.join(TRAIN_AUG_IMAGE_PATH, new_ID))


# 변환된 이미지의 ID와 타겟 정보를 담은 DataFrame을 생성합니다.
aug_data = {
    'ID': ids,
    'target': targets
}
aug_data = pd.DataFrame(aug_data)
aug_data.to_csv(TRAIN_AUG_CSV_PATH, index=False)

Image augmentation: 100%|██████████| 2714/2714 [13:00<00:00,  3.48it/s]
