### Data processing

1. Dataset loading: images and masks
2. Dataset splitting:
   1. Taining set
   2. Validation set
   3. Test set
3. Data augmentation
4. Save the dataset

In [1]:
import os
import numpy as np
import cv2
from glob import glob
from tqdm import tqdm
from albumentations import ( Compose, RandomBrightnessContrast, RandomRain, RandomFog, RandomSunFlare, CoarseDropout )

# import os
# import numpy as np
# import cv2
# from glob import glob
# from tqdm import tqdm
# from albumentations.augmentations.transforms import (
#     RandomBrightnessContrast, RandomRain, RandomFog, RandomSunFlare,
#     CoarseDropout
# )

### Dataset loading: images and masks

In [2]:
def load_training_data(path):
    """
    Load images and masks from the training directory.
    """
    images = sorted(glob(os.path.join(path, 'train/images', '*.png')))
    masks = sorted(glob(os.path.join(path, 'train/masks', '*.png')))

    if not images or not masks:
        raise ValueError("No training images or masks found. Check your dataset paths.")
    
    return images, masks

In [4]:
def augment_training_data(images, masks, save_dir):
    """
    Apply augmentations to the training dataset and save the augmented dataset separately,
    ensuring that the mask values are preserved.
    """

    augmentation_list = [
        ("brightness_contrast", RandomBrightnessContrast(p=1.0)),
        ("rain", RandomRain(p=1.0)),
        ("fog", RandomFog(p=1.0)),
        ("sunflare", RandomSunFlare(p=1.0)),
        ("coarsedropout", CoarseDropout(p=1.0, max_holes=10, max_height=32, max_width=32))
    ]

    for img_path, mask_path in tqdm(zip(images, masks), total=len(images), desc="Augmenting training data"):
        img_name = os.path.basename(img_path).split('.')[0]

        img = cv2.imread(img_path, cv2.IMREAD_COLOR)
        mask = cv2.imread(mask_path, cv2.IMREAD_COLOR)

        if img is not None and mask is not None:
            for aug_name, aug in augmentation_list:
                augmented = Compose([aug], additional_targets={'mask': 'image'})(image=img, mask=mask)
                aug_img, aug_mask = augmented['image'], augmented['mask']

                # Save augmented images and masks separately with original mask colors
                cv2.imwrite(os.path.join(save_dir, 'images', f"{img_name}_{aug_name}.png"), aug_img)
                cv2.imwrite(os.path.join(save_dir, 'masks', f"{img_name}_{aug_name}.png"), aug_mask)

In [5]:
# Execute augmentation
dataset_path = '/home/ahsan/University/Thesis/UNet_Directory/Datasets/second_phase/processed_dataset/aug'
train_images, train_masks = load_training_data(dataset_path)

In [6]:
# Save augmented training data
augmented_dir = os.path.join(dataset_path, 'train')
os.makedirs(os.path.join(augmented_dir, 'images'), exist_ok=True)
os.makedirs(os.path.join(augmented_dir, 'masks'), exist_ok=True)

In [7]:
augment_training_data(train_images, train_masks, augmented_dir)

  ("coarsedropout", CoarseDropout(p=1.0, max_holes=10, max_height=32, max_width=32))
  ("coarsedropout", CoarseDropout(p=1.0, max_holes=10, max_height=32, max_width=32))
  ("coarsedropout", CoarseDropout(p=1.0, max_holes=10, max_height=32, max_width=32))
Augmenting training data: 100%|██████████| 8/8 [00:01<00:00,  4.87it/s]
