In [25]:
import pandas as pd 
import numpy as np 

import os 
from glob import glob 

from PIL import Image
import albumentations as A

from tqdm import tqdm

In [26]:
# 잘못된 label 업데이트
train_df = pd.read_csv("data/train.csv")
train_df.loc[428, 'target'] = 7
train_df.loc[1095, 'target'] = 14
train_df.loc[862, 'target'] = 3
train_df.loc[192, 'target'] = 7
train_df.loc[1237, 'target'] = 14
train_df.loc[38, 'target'] = 10
train_df.loc[340, 'target'] = 10

train_df.to_csv("data/train.csv", index=False)

In [27]:
df = pd.read_csv('data/train.csv')

In [28]:
data_path = 'data/train/'

In [29]:
horizontal_flip = A.Compose([
    A.HorizontalFlip(p=1),
])

vertical_flip = A.Compose([
    A.VerticalFlip(p=1),
])

double_flip = A.Compose([
    A.HorizontalFlip(p=1),
    A.VerticalFlip(p=1),
])

transpose = A.Compose([
    A.Transpose(p=1)
])

transpose_hflip = A.Compose([
    A.Transpose(p=1), 
    A.HorizontalFlip(p=1),
])

transpose_vflip = A.Compose([
    A.Transpose(p=1),
    A.VerticalFlip(p=1),
])

transpose_dflip = A.Compose([
    A.Transpose(p=1),  
    A.HorizontalFlip(p=1),
    A.VerticalFlip(p=1),
])

noise_rotate = A.Compose([
    A.GaussNoise(var_limit=(100, 1000), per_channel=False, p=1),
    A.Blur(blur_limit=(2, 4), p=0.5),
    A.ToGray(p=0.2),
    A.ShiftScaleRotate(shift_limit=0.2, scale_limit=0, rotate_limit=30, p=0.4, border_mode=0, value=(255, 255, 255)),
])

In [30]:
base_aug_types = [
    ("hf_", horizontal_flip),
    ("vf_", vertical_flip),
    ("df_", double_flip),
    ("tp_", transpose),
    ("tphf_", transpose_hflip),
    ("tpvf_", transpose_vflip),
    ("tpdf_", transpose_dflip)
]

In [31]:
ids = []
targets = []
for index, ID, target in tqdm(df.itertuples(), total=df.shape[0], desc='Image augmentation'):
    image_path = os.path.join(data_path, ID)
    image = np.array(Image.open(image_path))
    
    for prefix, aug_function in base_aug_types:
        transformed_image = aug_function(image=image)['image']
        new_ID = prefix + ID
        ids.append(new_ID)
        targets.append(target)
        Image.fromarray(transformed_image).save(os.path.join(data_path, new_ID))
        
aug_data = {
    'ID' : ids,
    'target' : targets
}
aug_data = pd.DataFrame(aug_data)    
df = pd.concat([df, aug_data])

Image augmentation: 100%|██████████| 1570/1570 [01:07<00:00, 23.27it/s]


In [32]:
ids = []
targets = []
for index, ID, target in tqdm(df.itertuples(), total=df.shape[0], desc='Image augmentation'):
    image_path = os.path.join(data_path, ID)
    image = np.array(Image.open(image_path))

    for i in range(7):
        transformed_image = noise_rotate(image=image)['image']
        image_ID = f'nb{i}_' + ID 
        ids.append(image_ID)
        targets.append(target)
        Image.fromarray(transformed_image).save(os.path.join(data_path, image_ID))
    
nb_data = {
    'ID' : ids,
    'target' : targets
}
nb_data = pd.DataFrame(nb_data)    
df = pd.concat([df, nb_data])

Image augmentation: 100%|██████████| 12560/12560 [25:57<00:00,  8.07it/s]


In [33]:
df.to_csv('data/aug_train.csv', index=False)

In [34]:
df.shape

(100480, 2)