In [1]:
import os
import cv2
import numpy as np
import tensorflow as tf
import random

# Paths (Update if needed)
input_dir = r"E:\CLD_project\CLD_dataset_split"  # Your dataset with train/val/test
output_dir = r"E:\CLD_project\CLD_dataset_split_augmented"  # Augmented dataset location
os.makedirs(output_dir, exist_ok=True)

# Target number of images per class (Adjust if needed)
TARGET_IMAGES = 30000

# ✅ Augmentation Functions
def augment_brightness(image):
    return np.clip(tf.image.adjust_brightness(image, delta=random.uniform(-0.4, 0.4)).numpy(), 0, 1)

def augment_contrast(image):
    return np.clip(tf.image.adjust_contrast(image, contrast_factor=random.uniform(0.9, 1.2)).numpy(), 0, 1)

def augment_flip(image):
    return np.flip(image, axis=1)

def augment_rotate(image):
    h, w, _ = image.shape
    angle = np.random.uniform(-30, 30)
    M = cv2.getRotationMatrix2D((w / 2, h / 2), angle, 1)
    return cv2.warpAffine(image, M, (w, h))

def augment_shift(image):
    h, w, _ = image.shape
    w_shift = np.random.uniform(-0.2, 0.2) * w
    h_shift = np.random.uniform(-0.2, 0.2) * h
    M_shift = np.float32([[1, 0, w_shift], [0, 1, h_shift]])
    return cv2.warpAffine(image, M_shift, (w, h))

def augment_zoom(image):
    zoom_factor = np.random.uniform(0.8, 1.2)
    new_size = int(224 * zoom_factor)
    zoomed = cv2.resize(image, (new_size, new_size))
    return cv2.resize(zoomed, (224, 224))

def augment_cover_part(image):
    h, w, _ = image.shape
    mask_size = random.randint(int(h * 0.3), int(h * 0.3))
    x, y = random.randint(0, w - mask_size), random.randint(0, h - mask_size)
    image[y:y+mask_size, x:x+mask_size, :] = 0
    return image

def augment_gaussian_noise(image):
    noise = np.random.normal(0, random.uniform(10, 30), image.shape).astype(np.float32) / 255.0
    return np.clip(image + noise, 0, 1)

def augment_salt_pepper_noise(image, salt_prob=0.2, pepper_prob=0.2):
    noisy_image = image.copy()
    h, w, c = image.shape
    num_salt, num_pepper = int(salt_prob * h * w), int(pepper_prob * h * w)
    salt_coords = [np.random.randint(0, i, num_salt) for i in [h, w]]
    pepper_coords = [np.random.randint(0, i, num_pepper) for i in [h, w]]
    noisy_image[salt_coords[0], salt_coords[1], :] = 1.0
    noisy_image[pepper_coords[0], pepper_coords[1], :] = 0.0
    return noisy_image

AUGMENTATIONS = [
    augment_brightness, augment_contrast, augment_flip,
    augment_rotate, augment_shift, augment_zoom,
    augment_cover_part, augment_gaussian_noise, augment_salt_pepper_noise
]

# ✅ Process 'train' and 'val' (Skip 'test')
for split in ['train', 'val']:  # Apply only to train and val
    split_input_path = os.path.join(input_dir, split)
    split_output_path = os.path.join(output_dir, split)
    os.makedirs(split_output_path, exist_ok=True)

    for class_name in os.listdir(split_input_path):
        class_input_path = os.path.join(split_input_path, class_name)
        class_output_path = os.path.join(split_output_path, class_name)
        os.makedirs(class_output_path, exist_ok=True)

        print(f"Processing class '{class_name}' in '{split}'...")

        images = [img for img in os.listdir(class_input_path) if img.endswith(('.jpg', '.png', '.jpeg'))]
        num_original = len(images)

        if num_original == 0:
            print(f"⚠️ Skipping '{class_name}' (no images found).")
            continue

        print(f"🔹 Found {num_original} original images.")

        # Copy original images
        for image_name in images:
            image_path = os.path.join(class_input_path, image_name)
            image = cv2.imread(image_path)
            if image is None:
                continue
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            image = cv2.resize(image, (224, 224))

            save_path = os.path.join(class_output_path, f"orig_{image_name}")
            cv2.imwrite(save_path, cv2.cvtColor(image, cv2.COLOR_RGB2BGR))

        num_needed = TARGET_IMAGES - num_original
        print(f"🔹 Generating {num_needed} augmented images.")

        aug_count = 0
        while aug_count < num_needed:
            for image_name in images:
                image_path = os.path.join(class_input_path, image_name)
                image = cv2.imread(image_path)
                if image is None:
                    continue
                image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                image = cv2.resize(image, (224, 224))

                num_augmentations = random.randint(1, 3)
                chosen_augmentations = random.sample(AUGMENTATIONS, num_augmentations)

                aug_image = image.astype(np.float32) / 255.0
                for aug_func in chosen_augmentations:
                    aug_image = aug_func(aug_image)

                aug_image = (aug_image * 255).astype(np.uint8)

                save_path = os.path.join(class_output_path, f"aug_{aug_count}_{image_name}")
                cv2.imwrite(save_path, cv2.cvtColor(aug_image, cv2.COLOR_RGB2BGR))

                aug_count += 1
                if aug_count >= num_needed:
                    break

        print(f"✅ Class '{class_name}' in '{split}' now contains {TARGET_IMAGES} images.")

print("🎉 Data augmentation for train and val complete!")


Processing class 'Cerscospora' in 'train'...
🔹 Found 6144 original images.
🔹 Generating 23856 augmented images.


KeyboardInterrupt: 

In [1]:
import os
import cv2
import numpy as np
import tensorflow as tf
import random

# Paths
input_dir = r"E:\CLD_project\CLD_dataset_split"  # Your split dataset
output_dir = r"E:\CLD_project\CLD_dataset_split_augmented"  # Augmented dataset location
os.makedirs(output_dir, exist_ok=True)

# ✅ Define Target Image Counts
TARGET_IMAGES = {
    "train": 25000,  # Each class in train will have 25,000 images
    "val": 5000,     # Each class in val will have 5,000 images
}

# ✅ Augmentation Functions
def augment_brightness(image):
    return np.clip(tf.image.adjust_brightness(image, delta=random.uniform(-0.4, 0.4)).numpy(), 0, 1)

def augment_contrast(image):
    return np.clip(tf.image.adjust_contrast(image, contrast_factor=random.uniform(0.9, 1.2)).numpy(), 0, 1)

def augment_flip(image):
    return np.flip(image, axis=1)

def augment_rotate(image):
    h, w, _ = image.shape
    angle = np.random.uniform(-30, 30)
    M = cv2.getRotationMatrix2D((w / 2, h / 2), angle, 1)
    return cv2.warpAffine(image, M, (w, h))

def augment_shift(image):
    h, w, _ = image.shape
    w_shift = np.random.uniform(-0.2, 0.2) * w
    h_shift = np.random.uniform(-0.2, 0.2) * h
    M_shift = np.float32([[1, 0, w_shift], [0, 1, h_shift]])
    return cv2.warpAffine(image, M_shift, (w, h))

def augment_zoom(image):
    zoom_factor = np.random.uniform(0.8, 1.2)
    new_size = int(224 * zoom_factor)
    zoomed = cv2.resize(image, (new_size, new_size))
    return cv2.resize(zoomed, (224, 224))

def augment_cover_part(image):
    h, w, _ = image.shape
    mask_size = random.randint(int(h * 0.3), int(h * 0.3))
    x, y = random.randint(0, w - mask_size), random.randint(0, h - mask_size)
    image[y:y+mask_size, x:x+mask_size, :] = 0
    return image

def augment_gaussian_noise(image):
    noise = np.random.normal(0, random.uniform(10, 30), image.shape).astype(np.float32) / 255.0
    return np.clip(image + noise, 0, 1)

def augment_salt_pepper_noise(image, salt_prob=0.2, pepper_prob=0.2):
    noisy_image = image.copy()
    h, w, c = image.shape
    num_salt, num_pepper = int(salt_prob * h * w), int(pepper_prob * h * w)
    salt_coords = [np.random.randint(0, i, num_salt) for i in [h, w]]
    pepper_coords = [np.random.randint(0, i, num_pepper) for i in [h, w]]
    noisy_image[salt_coords[0], salt_coords[1], :] = 1.0
    noisy_image[pepper_coords[0], pepper_coords[1], :] = 0.0
    return noisy_image

AUGMENTATIONS = [
    augment_brightness, augment_contrast, augment_flip,
    augment_rotate, augment_shift, augment_zoom,
    augment_cover_part, augment_gaussian_noise, augment_salt_pepper_noise
]

# ✅ Process 'train' and 'val' (Skip 'test')
for split in ["train", "val"]:  # Apply augmentation only to train and val
    split_input_path = os.path.join(input_dir, split)
    split_output_path = os.path.join(output_dir, split)
    os.makedirs(split_output_path, exist_ok=True)

    target_count = TARGET_IMAGES[split]

    for class_name in os.listdir(split_input_path):
        class_input_path = os.path.join(split_input_path, class_name)
        class_output_path = os.path.join(split_output_path, class_name)
        os.makedirs(class_output_path, exist_ok=True)

        print(f"Processing class '{class_name}' in '{split}'...")

        images = [img for img in os.listdir(class_input_path) if img.endswith(('.jpg', '.png', '.jpeg'))]
        num_original = len(images)

        if num_original == 0:
            print(f"⚠️ Skipping '{class_name}' (no images found).")
            continue

        print(f"🔹 Found {num_original} original images.")

        # Copy original images
        for image_name in images:
            image_path = os.path.join(class_input_path, image_name)
            image = cv2.imread(image_path)
            if image is None:
                continue
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            image = cv2.resize(image, (224, 224))

            save_path = os.path.join(class_output_path, f"orig_{image_name}")
            cv2.imwrite(save_path, cv2.cvtColor(image, cv2.COLOR_RGB2BGR))

        num_needed = target_count - num_original
        print(f"🔹 Generating {num_needed} augmented images.")

        aug_count = 0
        while aug_count < num_needed:
            for image_name in images:
                image_path = os.path.join(class_input_path, image_name)
                image = cv2.imread(image_path)
                if image is None:
                    continue
                image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                image = cv2.resize(image, (224, 224))

                num_augmentations = random.randint(1, 3)
                chosen_augmentations = random.sample(AUGMENTATIONS, num_augmentations)

                aug_image = image.astype(np.float32) / 255.0
                for aug_func in chosen_augmentations:
                    aug_image = aug_func(aug_image)

                aug_image = (aug_image * 255).astype(np.uint8)

                save_path = os.path.join(class_output_path, f"aug_{aug_count}_{image_name}")
                cv2.imwrite(save_path, cv2.cvtColor(aug_image, cv2.COLOR_RGB2BGR))

                aug_count += 1
                if aug_count >= num_needed:
                    break

        print(f"✅ Class '{class_name}' in '{split}' now contains {target_count} images.")

print("🎉 Data augmentation for train and val complete!")


Processing class 'Cerscospora' in 'train'...
🔹 Found 6144 original images.
🔹 Generating 18856 augmented images.
✅ Class 'Cerscospora' in 'train' now contains 25000 images.
Processing class 'Healthy' in 'train'...
🔹 Found 15187 original images.
🔹 Generating 9813 augmented images.
✅ Class 'Healthy' in 'train' now contains 25000 images.
Processing class 'Leaf rust' in 'train'...
🔹 Found 6668 original images.
🔹 Generating 18332 augmented images.
✅ Class 'Leaf rust' in 'train' now contains 25000 images.
Processing class 'Miner' in 'train'...
🔹 Found 13582 original images.
🔹 Generating 11418 augmented images.
✅ Class 'Miner' in 'train' now contains 25000 images.
Processing class 'Phoma' in 'train'...
🔹 Found 5256 original images.
🔹 Generating 19744 augmented images.
✅ Class 'Phoma' in 'train' now contains 25000 images.
Processing class 'Cerscospora' in 'val'...
🔹 Found 768 original images.
🔹 Generating 4232 augmented images.
✅ Class 'Cerscospora' in 'val' now contains 5000 images.
Processing