In [1]:
import os

import pandas as pd
import numpy as np

import albumentations as A
from PIL import Image
import cv2

from tqdm import tqdm

INFO:albumentations.check_version:A new version of Albumentations is available: 1.4.13 (you have 1.4.12). Upgrade using: pip install -U albumentations. To disable automatic update checks, set the environment variable NO_ALBUMENTATIONS_UPDATE to 1.


In [3]:
def create_base_aug_types(rotate_unit = 45, noise_range = 500, isnoise = True, isshift = True):
    base_aug_types = []
    
    # 단순 회전
    for rotate_size in range(0, 360, rotate_unit):
        name = f"rotate_{rotate_size:03}"
        func = A.Compose([A.Rotate(
            limit = (rotate_size, rotate_size),
            p=1.0,
            border_mode=cv2.BORDER_CONSTANT,
            value=(255, 255, 255))])
        
        base_aug_types.append((name, func))

    # 회전 + flip
    for rotate_size in range(0, 360, rotate_unit):
        name = f"rotate_{rotate_size:03}"
        func = A.Compose([
            A.Rotate(
                limit = (rotate_size, rotate_size),
                p=1.0,
                border_mode=cv2.BORDER_CONSTANT,
                value=(255, 255, 255)),
            A.HorizontalFlip(p=1)])
        
        base_aug_types.append((name, func))


    # 회전 & 움직임 & scale
    if isshift:
        for rotate_size in range(0, 360, rotate_unit):
            name = f"roate_shift_{rotate_size:03}"
            func = A.Compose([
                A.ShiftScaleRotate(
                    shift_limit = 0.2,
                    scale_limit = 0.2,
                    rotate_limit = (rotate_size, rotate_size),
                    p = 1.0,
                    border_mode = cv2.BORDER_CONSTANT,
                    value = (255, 255, 255)),
                A.HorizontalFlip(p=0.5)
            ])

            base_aug_types.append((name, func))

    # 노이즈
    if isnoise:
        for rotate_size in range(0, 360, rotate_unit):
            name = f"roate_shift_noise_{rotate_size:03}"
            func = A.Compose([A.ShiftScaleRotate(
                shift_limit = 0.1,
                scale_limit = 0.1,
                rotate_limit = (rotate_size, rotate_size),
                p = 1.0,
                border_mode = cv2.BORDER_CONSTANT,
                value = (255, 255, 255)
            ),
            A.GaussNoise(var_limit=(0, noise_range))])

            base_aug_types.append((name, func))

    return base_aug_types

In [4]:
def agumentation7(train_df, train_image_path, train_aug_image_path, base_aug_types):
    ids = []
    targets = []

    for row in tqdm(train_df.itertuples(), total = train_df.shape[0]):
        id, target = row.ID, row.target
        image_path = os.path.join(train_image_path, id)
        image = np.array(Image.open(image_path))

        for prefix, aug_function in base_aug_types:
            transformed_image = aug_function(image = image)['image']
            new_id = prefix + id

            ids.append(new_id)
            targets.append(target)

            Image.fromarray(transformed_image).save(os.path.join(train_aug_image_path, new_id))

    aug_df = pd.DataFrame()
    aug_df['ID'] = ids
    aug_df['target'] = targets

    return aug_df


In [24]:
train_df = pd.read_csv('/upstage-cv-classification-cv2/data/train_semi_37.csv')
train_image_path = '/upstage-cv-classification-cv2/data/test'
train_aug_image_path = '/upstage-cv-classification-cv2/data/train_semi_aug2'
base_aug_types = create_base_aug_types(rotate_unit = 45, noise_range = 500, isnoise = False, isshift = False)

semi_37_aug_df = agumentation7(train_df, train_image_path, train_aug_image_path, base_aug_types)
semi_37_aug_df

100%|██████████| 120/120 [00:03<00:00, 37.11it/s]


Unnamed: 0,ID,target
0,rotate_00001c918594307c6f2.jpg,3
1,rotate_04501c918594307c6f2.jpg,3
2,rotate_09001c918594307c6f2.jpg,3
3,rotate_13501c918594307c6f2.jpg,3
4,rotate_18001c918594307c6f2.jpg,3
...,...,...
1915,rotate_135fda82cd92fe332e3.jpg,7
1916,rotate_180fda82cd92fe332e3.jpg,7
1917,rotate_225fda82cd92fe332e3.jpg,7
1918,rotate_270fda82cd92fe332e3.jpg,7


In [5]:
train_df = pd.read_csv('/upstage-cv-classification-cv2/data/train.csv')
train_image_path = '/upstage-cv-classification-cv2/data/train'
train_aug_image_path = '/upstage-cv-classification-cv2/data/train_aug'
base_aug_types = create_base_aug_types(rotate_unit = 45, noise_range = 1000, isnoise = True, isshift = True)

train_aug_df = agumentation7(train_df, train_image_path, train_aug_image_path, base_aug_types)
train_aug_df

  0%|          | 0/1570 [00:00<?, ?it/s]

100%|██████████| 1570/1570 [03:37<00:00,  7.23it/s]


Unnamed: 0,ID,target
0,rotate_000002f99746285dfdd.jpg,16
1,rotate_045002f99746285dfdd.jpg,16
2,rotate_090002f99746285dfdd.jpg,16
3,rotate_135002f99746285dfdd.jpg,16
4,rotate_180002f99746285dfdd.jpg,16
...,...,...
50235,roate_shift_noise_135ffc22136f958deb1.jpg,9
50236,roate_shift_noise_180ffc22136f958deb1.jpg,9
50237,roate_shift_noise_225ffc22136f958deb1.jpg,9
50238,roate_shift_noise_270ffc22136f958deb1.jpg,9


In [6]:
train_aug_df.to_csv('/upstage-cv-classification-cv2/data/train_aug.csv', index=False)

In [31]:
train_semi_aug2_df = pd.concat([train_aug_df, semi_37_aug_df], axis=0)
train_semi_aug2_df.to_csv('/upstage-cv-classification-cv2/data/train_semi_aug2.csv', index=False)