In [1]:
import cv2
from PIL import Image
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random
import torch
from torchvision import transforms
from tqdm import tqdm

augmented_path = '/home/jack/Mounts/DiskOne/kona_coffee/augmented'

os.makedirs(augmented_path, exist_ok=True)

In [2]:
torch.manual_seed(17)

def get_transformer():
    return torch.nn.Sequential(
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.0),
        transforms.GaussianBlur(kernel_size=3, sigma=(0.1, 2.0)),
        transforms.RandomAdjustSharpness(sharpness_factor=2.0, p=0.5),
        transforms.RandomVerticalFlip(p=0.7),
        transforms.RandomHorizontalFlip(p=0.7),
    )

df = pd.read_csv('filtered.csv')

df = df[df.keep == True]

df

Unnamed: 0,image,keep
0,/home/jack/Mounts/DiskOne/kona_coffee/splits/1...,True
1,/home/jack/Mounts/DiskOne/kona_coffee/splits/1...,True
2,/home/jack/Mounts/DiskOne/kona_coffee/splits/1...,True
3,/home/jack/Mounts/DiskOne/kona_coffee/splits/1...,True
4,/home/jack/Mounts/DiskOne/kona_coffee/splits/1...,True
...,...,...
1838,/home/jack/Mounts/DiskOne/kona_coffee/splits/1...,True
1840,/home/jack/Mounts/DiskOne/kona_coffee/splits/1...,True
1842,/home/jack/Mounts/DiskOne/kona_coffee/splits/1...,True
1844,/home/jack/Mounts/DiskOne/kona_coffee/splits/1...,True


In [3]:
data = []

for row in tqdm(df.itertuples(), total=df.shape[0]):
    image = Image.open(row.image).convert('RGB')
    
    # save original image
    file_path = os.path.join(augmented_path, os.path.basename(row.image))
    
    if not os.path.exists(file_path):
        image.save(file_path)

    data.append([file_path])
    
    # create 3 augmented images
    for i in range(3):
        try:
            basename = os.path.basename(row.image).split('.')[0]
            basename = basename + f'.aug.{i}.png'
            file_path = os.path.join(augmented_path, basename)
            
            if not os.path.exists(file_path):
                transformer = get_transformer()
                trans_image = transformer(image)
                trans_image.save(file_path)

            data.append([file_path])

        except Exception as e:
            print(e)
            continue

100%|██████████| 691/691 [00:42<00:00, 16.16it/s] 


In [4]:
df_aug = pd.DataFrame(data, columns=['image'])

old_df = None
if os.path.exists('augmented.csv'):
    old_df = pd.read_csv('augmented.csv')
    df_aug = pd.concat([old_df, df_aug], ignore_index=True)
    
df_aug = df_aug.drop_duplicates(subset=['image'])

df_aug.to_csv('augmented.csv', index=False)

df_aug

Unnamed: 0,image
0,/home/jack/Mounts/DiskOne/kona_coffee/augmente...
1,/home/jack/Mounts/DiskOne/kona_coffee/augmente...
2,/home/jack/Mounts/DiskOne/kona_coffee/augmente...
3,/home/jack/Mounts/DiskOne/kona_coffee/augmente...
4,/home/jack/Mounts/DiskOne/kona_coffee/augmente...
...,...
4807,/home/jack/Mounts/DiskOne/kona_coffee/augmente...
4808,/home/jack/Mounts/DiskOne/kona_coffee/augmente...
4809,/home/jack/Mounts/DiskOne/kona_coffee/augmente...
4810,/home/jack/Mounts/DiskOne/kona_coffee/augmente...
