# Data Augemntation - Image Manipulation 
1. Geometric Transformations (Rotations, Translation, Shearing, Flipping)
2. Non- Geometric Transformations (Cropping, Noise injection, Color Space, Jitter, Kernel)

In [16]:
import torch
import shutil
import torchvision
from torchvision import models,datasets,transforms
import os
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
import numpy as np
import cv2
import random
import albumentations as A #this supposedly is much faster than using torchvision
from albumentations.pytorch import ToTensorV2
from tqdm import tqdm

In [None]:
#version 2
def hide_and_seek(image, **kwargs):  
    """
    Applies a more controlled Hide-and-Seek augmentation to prevent excessive information loss.
    """
    if isinstance(image, np.ndarray):
        img = np.transpose(image, (2, 0, 1))  # Convert HWC -> CHW
        img = img.copy()  
    else:
        img = image.clone() if isinstance(image, torch.Tensor) else image  

    c, h, w = img.shape  
    grid_sizes = [16, 32, 44]  # Removed large grid sizes
    hide_prob = 0.3  # Lowered probability to avoid excessive hiding

    grid_size = random.choice(grid_sizes)

    for x in range(0, w, grid_size):
        for y in range(0, h, grid_size):
            x_end = min(w, x + grid_size)
            y_end = min(h, y + grid_size)
            if random.random() <= hide_prob:
                img[:, y:y_end, x:x_end] = img.mean()  # Use mean value instead of black for soft erasing

    if isinstance(image, np.ndarray):
        img = np.transpose(img, (1, 2, 0))  

    return img

# New Augmentation Pipeline (Balanced Erasing)
augmentation_pipeline = A.Compose([
    A.CoarseDropout(max_holes=2, max_height=30, max_width=30, fill_value=0, p=0.4),  # Lowered impact
    A.GridDropout(ratio=0.3, p=0.3),  # Reduced erasing coverage
    A.Lambda(image=hide_and_seek, p=0.3),  # Hide-and-Seek with reduced probability
    A.OneOf([
        A.GaussNoise(var_limit=(10.0, 50.0), p=0.3),  # Introduce soft noise
        A.Blur(blur_limit=3, p=0.3)  # Use blur instead of hard cutout
    ], p=0.4),
    # A.RandomErasing(p=0.3, scale=(0.02, 0.2), ratio=(0.3, 3.3), value=0),  # Corrected RandomErasing
    ToTensorV2(),
])


  A.CoarseDropout(max_holes=7, max_height=40, max_width=40, fill_value=0, p=0.7),  # Increase erasure intensity
  A.Erasing(p=0.65, scale=(0.03, 0.4), ratio=(0.3, 3.5), value=0),  # Increased upper erasure limit


In [None]:
#varsion 3
import albumentations as A
from albumentations.pytorch import ToTensorV2
import random
import torch
import numpy as np

def hide_and_seek(image, **kwargs):  
    """
    Enhanced Hide-and-Seek with better structured occlusions.
    """
    if isinstance(image, np.ndarray):
        img = np.transpose(image, (2, 0, 1))  # Convert HWC -> CHW
        img = img.copy()  
    else:
        img = image.clone() if isinstance(image, torch.Tensor) else image  

    c, h, w = img.shape  
    grid_sizes = [6, 12, 24, 36]  # More fine-grained occlusions
    hide_prob = 0.65  # Increase probability to erase more images

    grid_size = random.choice(grid_sizes)

    for x in range(0, w, grid_size):
        for y in range(0, h, grid_size):
            x_end = min(w, x + grid_size)
            y_end = min(h, y + grid_size)
            if random.random() <= hide_prob:
                img[:, y:y_end, x:x_end] = img.mean()  # Smooth erasure

    if isinstance(image, np.ndarray):
        img = np.transpose(img, (1, 2, 0))  

    return img

# **FINAL Augmentation Pipeline for Maximum Erasing Impact**
augmentation_pipeline = A.Compose([
    A.CoarseDropout(max_holes=7, max_height=40, max_width=40, fill_value=0, p=0.7),  # Increase erasure intensity
    A.GridDropout(ratio=0.35, p=0.7),  # More structured dropout
    A.Lambda(image=hide_and_seek, p=0.65),  # Stronger Hide-and-Seek
    A.Erasing(p=0.65, scale=(0.03, 0.4), ratio=(0.3, 3.5), value=0),  # Increased upper erasure limit
    ToTensorV2(),
])


In [18]:
# use the pipeline to also balance the dataset
# idea : augment images until limit set as majority_class*2


# define the paths
input_dir = "../data/dataset_split/train"  
output_dir = "../data/dataset_erasing_02/train"  # for balanced augmented dataset

# overwrite it
if os.path.exists(output_dir):
    shutil.rmtree(output_dir)  # delete contents

os.makedirs(output_dir, exist_ok=True)

class_counts = {}
for class_folder in os.listdir(input_dir):
    class_path = os.path.join(input_dir, class_folder)
    num_images = len(os.listdir(class_path))
    class_counts[class_folder] = num_images

# change this
change_factor = 2

# Find the largest class (majority class)
max_class_size = max(class_counts.values())
new_target_size = max_class_size * change_factor

print(class_counts)
print(f"max category is {max_class_size} of class {max(class_counts, key=class_counts.get)}") 

for class_folder, current_count in tqdm(class_counts.items(), desc="Balancing & Expanding Classes"):
    class_path = os.path.join(input_dir, class_folder)
    augmented_class_path = os.path.join(output_dir, class_folder) #augmented images will be saved here
    os.makedirs(augmented_class_path, exist_ok=True)

    images = os.listdir(class_path)
    
    # decide later but this will copy all the non-augmented images as well 
    for img_name in images:
        src_path = os.path.join(class_path, img_name)
        dst_path = os.path.join(augmented_class_path, img_name)
        cv2.imwrite(dst_path, cv2.imread(src_path))  # Copy image

    # Compute number of extra images needed
    num_needed = new_target_size - current_count  # balancing all classes to 2*majority

    # Augment existing images
    while num_needed > 0:
        for img_name in images:
            if num_needed <= 0:
                break 

            img_path = os.path.join(class_path, img_name)
            image = cv2.imread(img_path)
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Convert BGR (OpenCV) to RGB

            augmented = augmentation_pipeline(image=image)["image"]  # Albumentations output

            # Save Augmented Image (Directly as PNG)
            output_filename = f"{os.path.splitext(img_name)[0]}_aug_{num_needed}.png"
            output_path = os.path.join(augmented_class_path, output_filename)

            # Ensure the output is a NumPy array before saving
            if isinstance(augmented, torch.Tensor):  
                augmented = augmented.permute(1, 2, 0).cpu().numpy()  # Convert CHW -> HWC
                augmented = (augmented * 255).astype(np.uint8)  # Convert from [0,1] to [0,255]

            # Save the augmented image
            cv2.imwrite(output_path, cv2.cvtColor(augmented, cv2.COLOR_RGB2BGR))
            num_needed -= 1

print(f"✅ Dataset balanced & expanded! New images saved in {output_dir}")

{'cardboard': 368, 'glass': 336, 'metal': 632, 'paper': 400, 'plastic': 736, 'trash': 396}
max category is 736 of class plastic


Balancing & Expanding Classes: 100%|██████████| 6/6 [1:31:37<00:00, 916.21s/it]   

✅ Dataset balanced & expanded! New images saved in ../data/dataset_erasing_02/train





In [19]:
import os

# Set the path to the main directory

main_dir = "../data/dataset_erasing_02/train"
# Dictionary to store image counts per subfolder
image_counts = {}

# Loop through each subdirectory
for subfolder in os.listdir(main_dir):
    subfolder_path = os.path.join(main_dir, subfolder)
    
    # Check if it's a directory
    if os.path.isdir(subfolder_path):
        # Count images (considering common image formats)
        image_count = len([file for file in os.listdir(subfolder_path) if file.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif', '.tiff'))])
        
        image_counts[subfolder] = image_count

# Print the results
for subfolder, count in image_counts.items():
    print(f"{subfolder}: {count} images")

cardboard: 1472 images
glass: 1472 images
metal: 1472 images
paper: 1472 images
plastic: 1472 images
trash: 1472 images


In [None]:
#sos remember to change the directory for the tensors
dir = '../data/dataset_erasing/train'

params = { 'batch_size':16,
           'shuffle':True,
           'num_workers':4 }


transform = transforms.Compose([transforms.Resize(256),
                                transforms.RandomResizedCrop(256), #Augmented
                                transforms.ToTensor(),
                                transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

#the training dataset will be from the datase_balanced folder 
train_dataset = datasets.ImageFolder(os.path.join(dir, 'train'),transform = transform )

transform = transforms.Compose([transforms.Resize(256),
                                transforms.CenterCrop(256),
                                transforms.ToTensor(),
                                transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])


#change the dir for validationa (and test)
dir = '../data/dataset_split'
val_dataset = datasets.ImageFolder(os.path.join(dir, 'val'),transform = transform )


#data loaders
train_dataloader = torch.utils.data.DataLoader(train_dataset, **params)
val_dataloader = torch.utils.data.DataLoader(val_dataset, **params)

class_names = train_dataset.classes

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")