In [None]:
import Augmentor
import os
from tqdm import tqdm
from pathlib import Path


def data_augmentation(class_path: str, target_count: int):

    class_path = Path(class_path) # each class paths
    if not class_path.is_dir():
        print(f"Error: Directory not found at {class_path}")
        return

    class_name = class_path.name

    # counting the number of original images
    images = [f for f in os.listdir(class_path)
              if os.path.isfile(class_path / f) and not f.startswith('.')]
    current_count = len(images)

    # calculating how many new images are required
    generate_count = target_count - current_count

    if current_count >= target_count:
        print(
            f"Skipping {class_name}: Current count ({current_count}) is already >= target ({target_count}).")
        return

    if generate_count <= 0:
        # if count already satisfied, then 0 image generate (no image actually)
        print(f"Skipping {class_name}: Need to generate 0 or fewer images.")
        return

    # augmentor pipeline
    p = Augmentor.Pipeline(source_directory=str(class_path),
                           output_directory='augmented_output')

    # some data augmentation for better model learning
    p.rotate(probability=0.7, max_left_rotation=20, max_right_rotation=20)
    p.zoom_random(probability=0.5, percentage_area=0.9)
    p.flip_left_right(probability=0.5)
    p.flip_top_bottom(probability=0.3)
    p.random_distortion(probability=0.5, grid_width=4,
                        grid_height=4, magnitude=2)
    p.shear(probability=0.5, max_shear_left=10, max_shear_right=10)

    print(f"Class: {class_name}")
    print(f"Original Count: {current_count}")
    print(f"Generating: {generate_count}")
    print(f"New Target Total: {target_count}")

    # generate the required samples
    p.sample(generate_count)

    print(
        f"Augmentation complete for {class_name}. Generated files in {class_path / 'augmented_output'}")

In [3]:
data_dir="soybean_Sudden Death Syndrome"

data_augmentation(class_path=data_dir,target_count=220)

Initialised with 110 image(s) found.
Output directory set to soybean_Sudden Death Syndrome\augmented_output.-> Class: soybean_Sudden Death Syndrome
   Original Count: 110
   Generating: 110
   New Target Total: 220


Processing <PIL.Image.Image image mode=RGB size=3456x4608 at 0x2115A3A4370>: 100%|██████████| 110/110 [02:34<00:00,  1.41s/ Samples]

Augmentation complete for soybean_Sudden Death Syndrome. Generated files in soybean_Sudden Death Syndrome\augmented_output



