# Data Augemntation - Image Manipulation 
1. Geometric Transformations (Rotations, Translation, Shearing, Flipping)
2. Non- Geometric Transformations (Cropping, Noise injection, Color Space, Jitter, Kernel)

In [None]:
import torch
import shutil
import os
import numpy as np
import cv2
import albumentations as A #this supposedly is much faster than using torchvision
from albumentations.pytorch import ToTensorV2
from tqdm import tqdm

### Create augmentation pipeline

In [None]:
augmentation_pipeline = A.Compose([
    # Geometric Transformations
    A.Rotate(limit=25, p=0.5),  # rotation +-25 with prob 0.5
    A.ShiftScaleRotate(shift_limit=0.3, scale_limit=0.2, rotate_limit=0, p=0.3),  # translation (shift and scale but do not rotate more)
    A.Affine(shear=5, p=0.5),  # shearing up to 5deg - does not seem that useful for us
    A.HorizontalFlip(p=0.5),  # flipping h

    # Non-Geometric Transformations
    A.RandomResizedCrop(size=[524,524], scale=(0.7, 1.0), p=0.5),  # gets randomly 70-100% of image and resized it back to 256x256
    A.GaussNoise(var_limit=(2, 10), p=0.2),  # less variance
    A.CLAHE(clip_limit=2.0, tile_grid_size=(8, 8), p=0.2), # contrast for color (insetad of b&w)
    A.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.05, p=0.3),  # Color Adjustments different lightning conditions
    A.GaussianBlur(blur_limit=(3, 7), p=0.2),  # Kernel Blurring

    A.Resize(524, 524),

    ToTensorV2()
])

### Use the pipeline to balance the dataset
### Idea : The augmented images at each catergory must all be equal to majority_class*2

In [None]:
# define the paths
input_dir = "../data/dataset_split/train"  
output_dir = "../data/dataset_balanced/train" 

if os.path.exists(output_dir):
    shutil.rmtree(output_dir)

os.makedirs(output_dir, exist_ok=True)

class_counts = {}
for class_folder in os.listdir(input_dir):
    class_path = os.path.join(input_dir, class_folder)
    num_images = len(os.listdir(class_path))
    class_counts[class_folder] = num_images

change_factor = 2

max_class_size = max(class_counts.values())
new_target_size = max_class_size * change_factor

print(class_counts)
print(f"max category is {max_class_size} of class {max(class_counts, key=class_counts.get)}") 

for class_folder, current_count in tqdm(class_counts.items(), desc="Balancing & Expanding Classes"):
    class_path = os.path.join(input_dir, class_folder)
    augmented_class_path = os.path.join(output_dir, class_folder)
    os.makedirs(augmented_class_path, exist_ok=True)

    images = os.listdir(class_path)
    
    for img_name in images:
        src_path = os.path.join(class_path, img_name)
        dst_path = os.path.join(augmented_class_path, img_name)
        cv2.imwrite(dst_path, cv2.imread(src_path))

    num_needed = new_target_size - current_count

    while num_needed > 0:
        for img_name in images:
            if num_needed <= 0:
                break 

            img_path = os.path.join(class_path, img_name)
            image = cv2.imread(img_path)
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

            augmented = augmentation_pipeline(image=image)["image"]

            output_filename = f"{os.path.splitext(img_name)[0]}_aug_{num_needed}.png"
            output_path = os.path.join(augmented_class_path, output_filename)

            if isinstance(augmented, torch.Tensor):  
                augmented = augmented.permute(1, 2, 0).cpu().numpy()
                augmented = (augmented * 255).astype(np.uint8) 

            cv2.imwrite(output_path, cv2.cvtColor(augmented, cv2.COLOR_RGB2BGR))
            num_needed -= 1

print(f"Dataset balanced & expanded! New images saved in {output_dir}")

{'cardboard': 368, 'glass': 336, 'metal': 632, 'paper': 400, 'plastic': 736, 'trash': 396}
max category is 736 of class plastic


Balancing & Expanding Classes: 100%|██████████| 6/6 [01:48<00:00, 18.06s/it]

✅ Dataset balanced & expanded! New images saved in ../data/dataset_balanced/train



