In [109]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 

import os 
from glob import glob 

from PIL import Image, ImageOps
import albumentations as A
import cv2

from tqdm import tqdm 

In [32]:
df = pd.read_csv('../data/train.csv')

In [53]:
transforms = A.Compose([
    A.OneOf([
        A.HorizontalFlip(),
        A.VerticalFlip(),
    ]),
    A.RandomBrightnessContrast(brightness_limit=(-0.3, 0.3), contrast_limit=(-0.3, 0.3), p=0.5),
    A.HueSaturationValue(p=0.5),
    A.Transpose(p=0.5),
    A.Rotate(limit=90, border_mode=cv2.BORDER_REPLICATE),
    A.CoarseDropout(p=0.5, max_holes=30, max_height=8, max_width=8, min_holes=8, min_height=8, min_width=8),
    A.ElasticTransform(p=0.5, alpha=1.0, sigma=50.0, alpha_affine=50.0, interpolation=0, border_mode=1, value=(0, 0, 0), mask_value=None, approximate=False),
    # A.Equalize(p=0.5, mode='cv', by_channels=False),
    A.SomeOf([
        A.Blur(blur_limit=3),
        A.MotionBlur(blur_limit=3),
        A.Downscale(scale_min=0.699999988079071, scale_max=0.9900000095367432, interpolation=3),
        A.GaussNoise(var_limit=(0, 700), per_channel=True),
    ], n=2, p=0.85),
    A.GridDistortion(p=0.5, num_steps=5, distort_limit=(-0.029999999329447746, 0.05000000074505806), interpolation=2, border_mode=0, value=(0, 0, 0), mask_value=None, normalized=True)
])

In [26]:
ids = []
targets = []
data_path = '../data/train/'
for index, ID, target in tqdm(df.itertuples(), desc='Image augmentation', mininterval=0.1):
    image_path = os.path.join(data_path, ID)
    image = np.array(Image.open(image_path))
    if target == 13:
        n = 25
    elif target == 14:
        n = 35
    elif target == 1:
        n = 45
    else:
        n = 20
    for i in range(n):
        transformed_image = transforms(image=image)['image']
        image_ID = f'tf{i}_' + ID 
        ids.append(image_ID)
        targets.append(target)
        Image.fromarray(transformed_image).save(os.path.join(data_path, image_ID))

aug_data = {
    'ID' : ids,
    'target' : targets
}
aug_df = pd.DataFrame(aug_data)    
df = pd.concat([df, aug_df])

Image augmentation: 1570it [54:55,  2.10s/it]


In [29]:
df.to_csv('../data/add_transformed_train.csv', index=False)