In [3]:
import os
import cv2
import numpy as np
import random
from pathlib import Path
from collections import defaultdict

try:
    PROJECT_ROOT = Path(__file__).resolve().parent.parent
except NameError:
    PROJECT_ROOT = Path.cwd().parent

SEGMENTED_DIR = PROJECT_ROOT / "plant_images" / "segmented"
FARM_BG_ROOT = PROJECT_ROOT / "farm_backgrounds"
OUTPUT_RANDOM_PIXELS_DIR = PROJECT_ROOT / "augmented_random_pixels"
OUTPUT_FARM_PHOTO_DIR = PROJECT_ROOT / "augmented_farm_backgrounds"

SPLITS = {"train": 0.6, "val": 0.2, "test": 0.2}
SEED = 4896
ALLOWED_EXTENSIONS = {".jpg", ".jpeg", ".png"}

def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)

def get_all_leaf_images(segmented_dir):
    leaf_images_by_class = defaultdict(list)
    for root, _, files in os.walk(segmented_dir):
        for file in files:
            if file.lower().endswith(tuple(ALLOWED_EXTENSIONS)):
                cls = Path(root).name
                leaf_images_by_class[cls].append(os.path.join(root, file))
    return leaf_images_by_class

def split_dataset(image_dict, splits):
    split_data = {"train": [], "val": [], "test": []}
    for cls, paths in image_dict.items():
        random.shuffle(paths)
        n = len(paths)
        n_train = int(n * splits["train"])
        n_val = int(n * splits["val"])
        split_data["train"].extend([(p, cls) for p in paths[:n_train]])
        split_data["val"].extend([(p, cls) for p in paths[n_train:n_train + n_val]])
        split_data["test"].extend([(p, cls) for p in paths[n_train + n_val:]])
    return split_data

def load_backgrounds(split_name):
    bg_dir = FARM_BG_ROOT / split_name
    return [
        os.path.join(bg_dir, f)
        for f in os.listdir(bg_dir)
        if f.lower().endswith(tuple(ALLOWED_EXTENSIONS))
    ]

def segment_leaf_black_bg(img_bgr, threshold=20):
    # Isolate leaf from black background via thresholding
    gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
    _, mask = cv2.threshold(gray, threshold, 255, cv2.THRESH_BINARY)

    # Optional smoothing: erosion + dilation
    kernel = np.ones((3, 3), np.uint8)
    mask = cv2.erode(mask, kernel, iterations=1)
    mask = cv2.dilate(mask, kernel, iterations=2)
    return mask

def pick_farm_color_hsv():
    # Simulate soil/foliage/sky color distributions in HSV space
    prob = random.random()
    if prob < 0.6:
        h = random.randint(30, 85)    # green
        s = random.randint(50, 255)
        v = random.randint(50, 255)
    elif prob < 0.8:
        h = random.randint(10, 30)    # brown
        s = random.randint(40, 200)
        v = random.randint(40, 120)
    else:
        h = random.randint(90, 130)   # blue (sky)
        s = random.randint(20, 255)
        v = random.randint(100, 255)
    hsv_color = np.array([[[h, s, v]]], dtype=np.uint8)
    bgr_color = cv2.cvtColor(hsv_color, cv2.COLOR_HSV2BGR)
    return bgr_color[0, 0, :]

def create_random_pixel_background(height, width):
    # Generate random pixel-based background with farm colors
    return np.array([
        [pick_farm_color_hsv() for _ in range(width)]
        for _ in range(height)
    ], dtype=np.uint8)

def apply_transformations(image, mask):
    # Horizontal Flip 
    if random.random() < 0.5:
        image = cv2.flip(image, 1)
        mask = cv2.flip(mask, 1)

    # Rotation, Scaling, Translation
    h, w = image.shape[:2]
    scale = random.uniform(0.7, 1.3)
    angle = random.uniform(-25, 25)
    tx = random.randint(-int(0.2 * w), int(0.2 * w))
    ty = random.randint(-int(0.2 * h), int(0.2 * h))
    M = cv2.getRotationMatrix2D((w / 2, h / 2), angle, scale)
    M[:, 2] += [tx, ty]
    image = cv2.warpAffine(image, M, (w, h), borderValue=(0, 0, 0))
    mask = cv2.warpAffine(mask, M, (w, h), borderValue=0)

    # HSV Color Jitter
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV).astype(np.float32)
    hsv[..., 2] *= random.uniform(0.85, 1.15)  # brightness
    hsv[..., 0] += random.uniform(-10, 10)     # hue shift
    hsv = np.clip(hsv, 0, 255).astype(np.uint8)
    image = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)

    # Cutout
    if random.random() < 0.1:
        cut_w = int(random.uniform(0.05, 0.15) * w)
        cut_h = int(random.uniform(0.05, 0.15) * h)
        x = random.randint(0, w - cut_w)
        y = random.randint(0, h - cut_h)
        image[y:y + cut_h, x:x + cut_w] = 0

    # Simulated Shadow Mask 
    if random.random() < 0.1:
        overlay = image.copy()
        x1, y1 = random.randint(0, w), random.randint(0, h)
        x2, y2 = random.randint(0, w), random.randint(0, h)
        center = ((x1 + x2) // 2, (y1 + y2) // 2)
        axes = (abs(x2 - x1) // 2, abs(y2 - y1) // 2)
        cv2.ellipse(overlay, center, axes, 0, 0, 360, (0, 0, 0), -1)
        image = cv2.addWeighted(overlay, 0.4, image, 0.6, 0)

    return image, mask

def overlay_leaf_on_background(leaf_bgr, leaf_mask, background_bgr):
    h, w = leaf_bgr.shape[:2]
    background_bgr = cv2.resize(background_bgr, (w, h), interpolation=cv2.INTER_AREA)
    mask_3ch = cv2.cvtColor(leaf_mask, cv2.COLOR_GRAY2BGR)
    alpha = mask_3ch.astype(float) / 255.0
    leaf_part = (leaf_bgr * alpha).astype(np.uint8)
    bg_part = (background_bgr * (1 - alpha)).astype(np.uint8)
    return cv2.add(leaf_part, bg_part)

def simulate_jpeg_artifact(img, q_min=60, q_max=95):
    # Apply artificial JPEG compression artifacts
    encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), random.randint(q_min, q_max)]
    _, enc = cv2.imencode('.jpg', img, encode_param)
    return cv2.imdecode(enc, cv2.IMREAD_COLOR)

def save_augmented_image(output_dir, split, cls, base_filename, suffix, image):
    # Save the file to the correct class + split folder
    class_dir = output_dir / split / cls
    class_dir.mkdir(parents=True, exist_ok=True)
    save_path = class_dir / f"{base_filename}_{suffix}.jpg"
    cv2.imwrite(str(save_path), image)

def main():
    set_seed(SEED)
    all_leaf_images = get_all_leaf_images(SEGMENTED_DIR)
    split_data = split_dataset(all_leaf_images, SPLITS)

    for split_name, items in split_data.items():
        bg_paths = load_backgrounds(split_name)

        for leaf_path, cls in items:
            base = Path(leaf_path).stem
            image = cv2.imread(leaf_path)
            if image is None:
                continue

            # Segment the leaf
            mask = segment_leaf_black_bg(image)

            # Apply transformations (jitter, rotate, etc.)
            image, mask = apply_transformations(image, mask)

            # Composite on farm-colored random pixel background
            h, w = image.shape[:2]
            pixel_bg = create_random_pixel_background(h, w)
            img_random = overlay_leaf_on_background(image, mask, pixel_bg)
            img_random = simulate_jpeg_artifact(img_random)
            save_augmented_image(OUTPUT_RANDOM_PIXELS_DIR, split_name, cls, base, "random", img_random)

            # Composite on actual farm photo background
            if bg_paths:
                bg_path = random.choice(bg_paths)
                bg_img = cv2.imread(bg_path)
                if bg_img is not None:
                    img_farm = overlay_leaf_on_background(image, mask, bg_img)
                    img_farm = simulate_jpeg_artifact(img_farm)
                    save_augmented_image(OUTPUT_FARM_PHOTO_DIR, split_name, cls, base, "farm", img_farm)

        print(f"Completed: {split_name} set with {len(items)} images")

if __name__ == "__main__":
    main()


Completed: train set with 32572 images
Completed: val set with 10849 images
Completed: test set with 10885 images
