# Data augmentation

In [1]:
import glob
from skimage.io import imread
import numpy as np
import albumentations as A

In [24]:
# Path of images
files_leak = glob.glob("../data/patches_clean/leak/*.tif")
files_nonleak = glob.glob("../data/patches_clean/non_leak/*.tif")
files = files_leak + files_nonleak

In [18]:
# Define augmentation pipeline
transform = A.Compose([
    A.HorizontalFlip(0.5),
    A.Flip(0.5),
    A.RandomRotate90(0.5),
    A.Transpose(0.5)
])

In [19]:
# Read and perform augmentation
X_leaks = []
for file in files_leak:
    image = imread(file)
    if image.shape == (21, 21, 14):
        X_leaks.append(image)
    else: 
        next

print("Data without augmentation: ", np.array(X_leaks).shape)

# Augmentation
for i in [j for j in range(0, 826, 25)]:
    for img in range(0, len(X_leaks), 50):
        image_tr = transform(image = X_leaks[img])["image"]
        X_leaks.append(image_tr)

    X_leaks_raw = np.array(X_leaks)
    print("Data with augmentation:", X_leaks_raw.shape)

Data without augmentation:  (825, 21, 21, 14)
Data with augmentation: (842, 21, 21, 14)
Data with augmentation: (859, 21, 21, 14)
Data with augmentation: (877, 21, 21, 14)
Data with augmentation: (895, 21, 21, 14)
Data with augmentation: (913, 21, 21, 14)
Data with augmentation: (932, 21, 21, 14)
Data with augmentation: (951, 21, 21, 14)
Data with augmentation: (971, 21, 21, 14)
Data with augmentation: (991, 21, 21, 14)
Data with augmentation: (1011, 21, 21, 14)
Data with augmentation: (1032, 21, 21, 14)
Data with augmentation: (1053, 21, 21, 14)
Data with augmentation: (1075, 21, 21, 14)
Data with augmentation: (1097, 21, 21, 14)
Data with augmentation: (1119, 21, 21, 14)
Data with augmentation: (1142, 21, 21, 14)
Data with augmentation: (1165, 21, 21, 14)
Data with augmentation: (1189, 21, 21, 14)
Data with augmentation: (1213, 21, 21, 14)
Data with augmentation: (1238, 21, 21, 14)
Data with augmentation: (1263, 21, 21, 14)
Data with augmentation: (1289, 21, 21, 14)
Data with augment

In [26]:
# Read and perform augmentation
X_nonleaks = []
for file in files_nonleak:
    image = imread(file)
    if image.shape == (21, 21, 14):
        X_nonleaks.append(image)
    else: 
        next

print("Data without augmentation: ", np.array(X_nonleaks).shape)

# Augmentation
for i in [j for j in range(0, 826, 25)]:
    for img in range(0, len(X_nonleaks), 50):
        image_tr = transform(image = X_nonleaks[img])["image"]
        X_nonleaks.append(image_tr)

    X_nonleaks_raw = np.array(X_nonleaks)
    print("Data with augmentation:", X_nonleaks_raw.shape)

Data without augmentation:  (825, 21, 21, 14)
Data with augmentation: (842, 21, 21, 14)
Data with augmentation: (859, 21, 21, 14)
Data with augmentation: (877, 21, 21, 14)
Data with augmentation: (895, 21, 21, 14)
Data with augmentation: (913, 21, 21, 14)
Data with augmentation: (932, 21, 21, 14)
Data with augmentation: (951, 21, 21, 14)
Data with augmentation: (971, 21, 21, 14)
Data with augmentation: (991, 21, 21, 14)
Data with augmentation: (1011, 21, 21, 14)
Data with augmentation: (1032, 21, 21, 14)
Data with augmentation: (1053, 21, 21, 14)
Data with augmentation: (1075, 21, 21, 14)
Data with augmentation: (1097, 21, 21, 14)
Data with augmentation: (1119, 21, 21, 14)
Data with augmentation: (1142, 21, 21, 14)
Data with augmentation: (1165, 21, 21, 14)
Data with augmentation: (1189, 21, 21, 14)
Data with augmentation: (1213, 21, 21, 14)
Data with augmentation: (1238, 21, 21, 14)
Data with augmentation: (1263, 21, 21, 14)
Data with augmentation: (1289, 21, 21, 14)
Data with augment

In [31]:
X = np.vstack([X_leaks_raw, X_nonleaks_raw])
X.shape

(3282, 21, 21, 14)

In [38]:
Y = [1 for i in range(0, int(X.shape[0]/2))] + [0 for i in range(0, int(X.shape[0]/2))]
Y = np.array(Y).reshape(3282, 1)

In [41]:
# Write full sets
np.save("../data/clean/X.npy", X)
np.save("../data/clean/Y.npy", Y)