# Data augmentation

In [1]:
import glob
from skimage.io import imread
import numpy as np
import albumentations as A

In [2]:
# Path of images
files_leak = glob.glob("../data/patches_clean/leak/*.tif")
files_nonleak = glob.glob("../data/patches_clean/non_leak/*.tif")
files = files_leak + files_nonleak

In [3]:
# Define augmentation pipeline
transform = A.Compose([
    A.HorizontalFlip(0.5),
    A.Flip(0.5),
    A.RandomRotate90(0.5),
    A.Transpose(0.5)
])

In [4]:
# Read and perform augmentation
X_leaks = []
for file in files_leak:
    image = imread(file)
    if image.shape == (20, 20, 14):
        X_leaks.append(image)
    else: 
        next

print("Data without augmentation: ", np.array(X_leaks).shape)

# Augmentation
for i in [j for j in range(0, 826, 25)]:
    for img in range(0, len(X_leaks), 50):
        image_tr = transform(image = X_leaks[img])["image"]
        X_leaks.append(image_tr)

    X_leaks_raw = np.array(X_leaks)
    print("Data with augmentation:", X_leaks_raw.shape)

Data without augmentation:  (984, 20, 20, 14)
Data with augmentation: (1004, 20, 20, 14)
Data with augmentation: (1025, 20, 20, 14)
Data with augmentation: (1046, 20, 20, 14)
Data with augmentation: (1067, 20, 20, 14)
Data with augmentation: (1089, 20, 20, 14)
Data with augmentation: (1111, 20, 20, 14)
Data with augmentation: (1134, 20, 20, 14)
Data with augmentation: (1157, 20, 20, 14)
Data with augmentation: (1181, 20, 20, 14)
Data with augmentation: (1205, 20, 20, 14)
Data with augmentation: (1230, 20, 20, 14)
Data with augmentation: (1255, 20, 20, 14)
Data with augmentation: (1281, 20, 20, 14)
Data with augmentation: (1307, 20, 20, 14)
Data with augmentation: (1334, 20, 20, 14)
Data with augmentation: (1361, 20, 20, 14)
Data with augmentation: (1389, 20, 20, 14)
Data with augmentation: (1417, 20, 20, 14)
Data with augmentation: (1446, 20, 20, 14)
Data with augmentation: (1475, 20, 20, 14)
Data with augmentation: (1505, 20, 20, 14)
Data with augmentation: (1536, 20, 20, 14)
Data wit

In [5]:
# Read and perform augmentation
X_nonleaks = []
for file in files_nonleak:
    image = imread(file)
    if image.shape == (20, 20, 14):
        X_nonleaks.append(image)
    else: 
        next

print("Data without augmentation: ", np.array(X_nonleaks).shape)

# Augmentation
for i in [j for j in range(0, 826, 25)]:
    for img in range(0, len(X_nonleaks), 50):
        image_tr = transform(image = X_nonleaks[img])["image"]
        X_nonleaks.append(image_tr)

    X_nonleaks_raw = np.array(X_nonleaks)
    print("Data with augmentation:", X_nonleaks_raw.shape)

Data without augmentation:  (985, 20, 20, 14)
Data with augmentation: (1005, 20, 20, 14)
Data with augmentation: (1026, 20, 20, 14)
Data with augmentation: (1047, 20, 20, 14)
Data with augmentation: (1068, 20, 20, 14)
Data with augmentation: (1090, 20, 20, 14)
Data with augmentation: (1112, 20, 20, 14)
Data with augmentation: (1135, 20, 20, 14)
Data with augmentation: (1158, 20, 20, 14)
Data with augmentation: (1182, 20, 20, 14)
Data with augmentation: (1206, 20, 20, 14)
Data with augmentation: (1231, 20, 20, 14)
Data with augmentation: (1256, 20, 20, 14)
Data with augmentation: (1282, 20, 20, 14)
Data with augmentation: (1308, 20, 20, 14)
Data with augmentation: (1335, 20, 20, 14)
Data with augmentation: (1362, 20, 20, 14)
Data with augmentation: (1390, 20, 20, 14)
Data with augmentation: (1418, 20, 20, 14)
Data with augmentation: (1447, 20, 20, 14)
Data with augmentation: (1476, 20, 20, 14)
Data with augmentation: (1506, 20, 20, 14)
Data with augmentation: (1537, 20, 20, 14)
Data wit

In [6]:
X = np.vstack([X_leaks_raw, X_nonleaks_raw])
X.shape

(3909, 20, 20, 14)

In [7]:
Y = [1 for i in range(0, int(X_leaks_raw.shape[0]))] + [0 for i in range(0, int(X_nonleaks_raw.shape[0]))]
Y = np.array(Y).reshape(X.shape[0], 1)
Y.shape

(3909, 1)

In [9]:
# Write full sets
np.save("../data/clean/X_full.npy", X)
np.save("../data/clean/Y_full.npy", Y)