In [None]:
import albumentations as A
import os
import random
import config
import sys
import cv2
import numpy as np
import shutil
sys.path.append('../util')
from DatasetAugmentation import *

# First download the FUSEG dataset:
# https://github.com/uwm-bigdata/wound-segmentation/tree/master/data/Foot%20Ulcer%20Segmentation%20Challenge


#Enter the root name of the original dataset
original_dataset_path = '/mnt/TUDAO/0Datasets/fuseg/original'
output_base           = '/mnt/TUDAO/0Datasets/fuseg/augmented-v3-512'

  from .autonotebook import tqdm as notebook_tqdm


### Removing images without masks from dataset

In [None]:


def move_black_masks(
    masks_path, 
    imgs_path, 
    output_path, 
    extensions=(".png", ".jpg", ".jpeg", ".tif")
):
    os.makedirs(output_path, exist_ok=True)
    black_masks = []

    for root, _, files in os.walk(masks_path):
        for file in files:
            if file.lower().endswith(extensions):
                mask_path = os.path.join(root, file)
                img = cv2.imread(mask_path, cv2.IMREAD_UNCHANGED)

                if img is None:
                    print(f"[WARNING] Unable to read {mask_path}")
                    continue

                # Convert to grayscale if you have multiple channels
                if len(img.shape) == 3:
                    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

                # Checks if all pixels are 0
                if np.all(img == 0):
                    base_name, _ = os.path.splitext(file)
                    black_masks.append(base_name)

                    # Matching image path
                    img_path = None
                    for ext in extensions:
                        attempt = os.path.join(imgs_path, base_name + ext)
                        if os.path.exists(attempt):
                            img_path = attempt
                            break

                    # Move mask
                    new_name_mask = f"{base_name}_mask.png"
                    output_mask = os.path.join(output_path, new_name_mask)
                    shutil.move(mask_path, output_mask)

                    # Move image if exists
                    if img_path and os.path.exists(img_path):
                        destino_img = os.path.join(output_path, os.path.basename(img_path))
                        shutil.move(img_path, destino_img)
                    else:
                        print(f"[WARNING] Matching image not found for {file}")

                    print(f"[OK] Moved: {file} and corresponding image")

    print(f"\nTotal black masks moved:{len(black_masks)}")
    return black_masks



# Path of the original masks
dir_mascaras = f"{original_dataset_path}/train/labels"
# Original images path
dir_imagens = f"{original_dataset_path}/train/images"
# Destination path where they will be moved
dir_destino = f"{original_dataset_path}/train-fix/"

move_black_masks(dir_mascaras, dir_imagens, dir_destino)


In [3]:
# Path of the original masks
dir_mascaras = f"{original_dataset_path}/test/labels"
# Original images path
dir_imagens = f"{original_dataset_path}/test/images"
# Destination path where they will be moved
dir_destino = f"{original_dataset_path}/test-fix/"

move_black_masks(dir_mascaras, dir_imagens, dir_destino)

[OK] Moved: 0417.png and corresponding image
[OK] Moved: 0483.png and corresponding image
[OK] Moved: 0869.png and corresponding image

Total black masks moved:3


['0417', '0483', '0869']

In [4]:
# Path of the original masks
dir_mascaras = f"{original_dataset_path}/validation/labels"
# Original images path
dir_imagens = f"{original_dataset_path}/validation/images"
# Destination path where they will be moved
dir_destino = f"{original_dataset_path}/validation-fix/"

move_black_masks(dir_mascaras, dir_imagens, dir_destino)

[OK] Moved: 0128.png and corresponding image
[OK] Moved: 0533.png and corresponding image

Total black masks moved:2


['0128', '0533']

### Moving 100 images from validation to test

In [3]:
from_img_dir = os.path.join(original_dataset_path, "validation", "images")
from_lbl_dir = os.path.join(original_dataset_path, "validation", "labels")
to_img_dir = os.path.join(original_dataset_path, "test", "images")
to_lbl_dir = os.path.join(original_dataset_path, "test", "labels")

os.makedirs(to_img_dir, exist_ok=True)
os.makedirs(to_lbl_dir, exist_ok=True)

images = os.listdir(from_img_dir)
selected = random.sample(images, 100)

for img in selected:
    shutil.move(os.path.join(from_img_dir, img), os.path.join(to_img_dir, img))
    shutil.move(os.path.join(from_lbl_dir, img), os.path.join(to_lbl_dir, img))

### Augmentation

In [None]:

# -----------------------------
# Parameters
# -----------------------------
#Data augmentation was performed by generating ten augmented images for each image in the training set.
N = 10  # number of augmentations
num_to_valid = 0    # number of images to move from train to valid
num_to_test  = 0    # number of images to move from train to test


target_size  = (512, 512)
random.seed(42)

# -----------------------------
# Entry and exit paths
# -----------------------------
orig_train_img_dir  = os.path.join(original_dataset_path, 'train/images')
orig_train_mask_dir = os.path.join(original_dataset_path, 'train/labels')
orig_valid_img_dir  = os.path.join(original_dataset_path, 'validation/images')
orig_valid_mask_dir = os.path.join(original_dataset_path, 'validation/labels')
orig_test_img_dir   = os.path.join(original_dataset_path, 'test/images')
orig_test_mask_dir  = os.path.join(original_dataset_path, 'test/labels')


output_dirs = {
    'train_images': os.path.join(output_base, 'images/train'),
    'train_labels': os.path.join(output_base, 'labels/train'),
    'valid_images': os.path.join(output_base, 'images/valid'),
    'valid_labels': os.path.join(output_base, 'labels/valid'),
    'test_images':  os.path.join(output_base, 'images/test'),
    'test_labels':  os.path.join(output_base, 'labels/test'),
}

transforms = A.Compose([
    A.Resize(*target_size, interpolation=cv2.INTER_NEAREST), #all augmented images were kept at $512 \times 512$ resolution
    A.HorizontalFlip(p=0.5), #random transformations including horizontal and vertical flips
    A.VerticalFlip(p=0.5),   #random transformations including horizontal and vertical flips
    A.RandomRotate90(p=0.5), #random rotations in multiples of 90°
    A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.1, rotate_limit=30, p=0.7, border_mode=cv2.BORDER_REFLECT),
    #translations, scaling, and small rotations limited to 10\%, 10\%, and 30\% respectively
    #A reflective padding strategy was used to preserve border continuity
    A.RandomBrightnessContrast(p=0.5), #brightness and contrast adjustments ($p=0.5$)
    A.ElasticTransform(p=0.2), #elastic deformations ($p=0.2$)
    A.GaussianBlur(p=0.3), #Gaussian blur ($p=0.3$)
    A.GridDistortion(p=0.2), #grid distortions ($p=0.2$)
])

augment_dataset(N, num_to_valid, num_to_test,
                    orig_train_img_dir, orig_train_mask_dir,
                    orig_valid_img_dir, orig_valid_mask_dir,
                    orig_test_img_dir, orig_test_mask_dir,
                    output_base,
                    transforms,
                    )

Total images in the original dataset: 791
→ Training: 791
→ Validation (of training): 0
→ Test (training): 0

With N=10, total images generated in training will be: 8701


  original_init(self, **validated_kwargs)
Copiando valid: 100%|██████████| 100/100 [00:02<00:00, 34.62it/s]
Copiando test: 100%|██████████| 300/300 [00:01<00:00, 164.50it/s]


→ 100 images copied from the original valid folder.
→ 100 images copied from the original test folder.


Enlarging workout images: 100%|██████████| 791/791 [04:50<00:00,  2.73it/s]


Final summary:
train_images: 8701 files
train_labels: 8701 files
valid_images: 100 files
valid_labels: 100 files
test_images: 100 files
test_labels: 100 files



