# Data Preprocessing and Augmentation

In [5]:
import os
import numpy as np
import torch
from skimage.io import imread
from torch.utils.data import Dataset, DataLoader
import albumentations as A
from albumentations.pytorch import ToTensorV2

In [6]:
test_path = "../Dataset/Test/color"
test_label_path = "../Dataset/Test/label"
train_path = "../Dataset/TrainVal/color"
train_label_path = "../Dataset/TrainVal/label"

In [7]:

class SegmentationDataset(Dataset):
    def __init__(self, image_dir, mask_dir, transform=None):
        self.image_dir = image_dir
        self.mask_dir = mask_dir
        self.transform = transform
        self.image_filenames = os.listdir(image_dir)
    
    def __len__(self):
        return len(self.image_filenames)
    
    def __getitem__(self, idx):
        image_path = os.path.join(self.image_dir, self.image_filenames[idx])
        mask_path = os.path.join(self.mask_dir, self.image_filenames[idx])

        image = imread(image_path)
        mask = imread(mask_path)

        if self.transform:
            augmented = self.transform(image=image, mask=mask)
            image, mask = augmented["image"], augmented["mask"]

        return image, mask

# Albumentations-based transformation pipeline
transform = A.Compose([
    A.Resize(300, 300),  # Resize keeping aspect ratio
    A.RandomCrop(256, 256),  # Crop to fixed size
    A.HorizontalFlip(p=0.5),  # Flip images & masks with 50% probability
    A.Rotate(limit=20, p=0.5),  # Random rotation (-20° to 20°)
    A.ElasticTransform(alpha=1, sigma=50, alpha_affine=50, p=0.3),  # Elastic distortion
    A.GridDistortion(p=0.3),  # Slight grid warping
    A.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1, p=0.5),  # Color jitter
    A.GaussianBlur(blur_limit=(3, 7), p=0.2),  # Random blur
    A.GaussNoise(var_limit=(10, 50), p=0.2),  # Random noise
    A.CoarseDropout(max_holes=2, max_height=50, max_width=50, p=0.3),  # Cutout occlusion
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),  # Standard normalization
    ToTensorV2()  # Convert to PyTorch tensor
])

  A.ElasticTransform(alpha=1, sigma=50, alpha_affine=50, p=0.3),  # Elastic distortion
  A.GaussNoise(var_limit=(10, 50), p=0.2),  # Random noise
  A.CoarseDropout(max_holes=2, max_height=50, max_width=50, p=0.3),  # Cutout occlusion


In [8]:
# Example usage
dataset = SegmentationDataset(image_dir=train_path, mask_dir=train_label_path, transform=transform)
dataloader = DataLoader(dataset, batch_size=8, shuffle=True)