In [1]:
import os
import glob
import shutil

# First, it is necessary to separate the images in the dataset into labels and images, as in this
# dataset all came together in the same directory
# Furthermore, we removed the _anno.bmp from the end of the masks

base_dir = 'dataset/original'

images_dir = os.path.join(base_dir, "images")
labels_dir = os.path.join(base_dir, "labels")

os.makedirs(images_dir, exist_ok=True)
os.makedirs(labels_dir, exist_ok=True)

bmp_files = glob.glob(os.path.join(base_dir, "*.bmp"))
for bmp_path in bmp_files:
    file_name = os.path.basename(bmp_path)

    if file_name.endswith("_anno.bmp"):
        dest = os.path.join(labels_dir, file_name.replace('_anno.bmp','.bmp'))
    else:
        dest = os.path.join(images_dir, file_name)

    shutil.copy2(bmp_path, dest)

print(f"{len(glob.glob(os.path.join(images_dir, '*.bmp')))} images in 'images/'")
print(f"{len(glob.glob(os.path.join(labels_dir, '*.bmp')))} masks in 'labels/'")

165 images in 'images/'
165 masks in 'labels/'


In [2]:
import albumentations as A
import os
import random
import numpy as np
import config
import sys
import cv2
sys.path.append('../util')
from DatasetAugmentation import *


#Enter the root name of the original dataset
original_dataset_path = './dataset/original'
output_base           = config.dataset_path

# -----------------------------
# Parameters
# -----------------------------
N = 15  # number of augmentations
num_to_valid = 24    # number of images to move from train to valid
num_to_test  = 25    # number of images to move from train to test


target_size  = (256, 256)
random.seed(42)

# -----------------------------
# Entry and exit paths
# -----------------------------
orig_train_img_dir  = os.path.join(original_dataset_path, 'images')
orig_train_mask_dir = os.path.join(original_dataset_path, 'labels')
orig_valid_img_dir  = os.path.join(original_dataset_path, 'blank')
orig_valid_mask_dir = os.path.join(original_dataset_path, 'blank')
orig_test_img_dir   = os.path.join(original_dataset_path, 'blank')
orig_test_mask_dir  = os.path.join(original_dataset_path, 'blank')



output_dirs = {
    'train_images': os.path.join(output_base, 'images/train'),
    'train_labels': os.path.join(output_base, 'labels/train'),
    'valid_images': os.path.join(output_base, 'images/valid'),
    'valid_labels': os.path.join(output_base, 'labels/valid'),
    'test_images':  os.path.join(output_base, 'images/test'),
    'test_labels':  os.path.join(output_base, 'labels/test'),
}

transforms = A.Compose([
    A.Resize(*target_size, interpolation=cv2.INTER_NEAREST),
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5),
    A.RandomRotate90(p=0.5),
    A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.1, rotate_limit=30, p=0.7, border_mode=cv2.BORDER_REFLECT),
    A.RandomBrightnessContrast(p=0.5),
    A.ElasticTransform(p=0.2),
    A.GaussianBlur(p=0.3),
    A.GridDistortion(p=0.2),
])

def fix_mask(mask):
    return (mask > 0).astype(np.uint8) * 255


augment_dataset(N, num_to_valid, num_to_test,
                    orig_train_img_dir, orig_train_mask_dir,
                    orig_valid_img_dir, orig_valid_mask_dir,
                    orig_test_img_dir, orig_test_mask_dir,
                    output_base,
                    transforms,
                    function_to_apply_to_masks=fix_mask
                    )

  from .autonotebook import tqdm as notebook_tqdm


Total images in the original dataset: 165
→ Training: 116
→ Validation (of training): 24
→ Test (training): 25

With N=15, total images generated in training will be: 1856


  original_init(self, **validated_kwargs)


→ 0 images copied from the original valid folder.
→ 0 images copied from the original test folder.


Copiando valid: 100%|██████████| 24/24 [00:00<00:00, 97.16it/s]


→ 24 images copied from the train for validation.


Copiando test: 100%|██████████| 25/25 [00:00<00:00, 109.76it/s]


→ 25 images copied from the train for testing.


Enlarging workout images: 100%|██████████| 116/116 [00:13<00:00,  8.50it/s]


Final summary:
train_images: 1856 files
train_labels: 1856 files
valid_images: 24 files
valid_labels: 24 files
test_images: 25 files
test_labels: 25 files



