# We are working on half of the dataset containing lab images. 
We will preprocess the images to ensure they are suitable for training our model.

In [1]:
import cv2
import numpy as np
from skimage import exposure
from pathlib import Path

# preprocess_leaf — concise description

White balance — balances global color cast (gray-world SimpleWB).
CLAHE (LAB L-channel) — improves local contrast without over-amplifying noise.
Denoise — removes color noise using fastNlMeansDenoisingColored.
Sharpen — enhances local detail via Gaussian blur + addWeighted blending.

Outputs: returns an RGB numpy array (H, W, 3) and writes `<stem>_final.png` to `output_dir`.

Notes: no segmentation or resizing; operates on the full input image and writes only the final image.


In [4]:
def preprocess_leaf(image_path, output_dir="processed"):
    Path(output_dir).mkdir(parents=True, exist_ok=True)

    # 1. Read image
    img = cv2.imread(image_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    # ----- Step 1: White balance (gray-world) -----
    img_wb = cv2.xphoto.createSimpleWB().balanceWhite(img)

    # ----- Step 2: CLAHE in LAB -----
    lab = cv2.cvtColor(img_wb, cv2.COLOR_RGB2LAB)
    l, a, b = cv2.split(lab)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    l_clahe = clahe.apply(l)
    lab_clahe = cv2.merge((l_clahe, a, b))
    img_clahe = cv2.cvtColor(lab_clahe, cv2.COLOR_LAB2RGB)

    # ----- Step 3: Edge-preserving denoise -----
    img_denoised = cv2.fastNlMeansDenoisingColored(img_clahe, None, 10, 10, 7, 21)

    # NOTE: Step 4 (segmentation / GrabCut) intentionally removed as requested.
    # We operate on the whole (denoised) image from here on and produce a single final output.

    # ----- Step 4 (now): Sharpen whole image -----
    blur = cv2.GaussianBlur(img_denoised, (0, 0), 1)
    img_sharp = cv2.addWeighted(img_denoised, 1.5, blur, -0.5, 0)

    # final image (images are already 256x256, so no resize/pad)
    final = img_sharp

    # ----- Save single final output -----
    stem = Path(image_path).stem
    out_path = Path(output_dir) / f"{stem}_final.png"
    cv2.imwrite(str(out_path), cv2.cvtColor(final, cv2.COLOR_RGB2BGR))

    return final

Define the image path, pass the images to the function, and save the processed results.

In [5]:
# Batch-process images in 'Half Dataset' (keeps class subfolders in output)
root = Path(r"Other Half")
exts = {".jpg", ".jpeg", ".png", ".JPG", ".JPEG"}
total = 0
processed = 0
failed = []

for p in sorted(root.rglob("*")):
    if p.is_file() and p.suffix.lower() in {'.jpg', '.jpeg', '.png'}:
        total += 1
        try:
            # keep class subfolder name in output (e.g. processed/Apple___Apple_scab/...)
            out_dir = Path('processed') / p.parent.name
            preprocess_leaf(str(p), output_dir=str(out_dir))
            processed += 1
        except Exception as e:
            failed.append((str(p), repr(e)))
        if total % 100 == 0:
            print(f"Processed {total} images so far...")

print(f"Done — total: {total}, success: {processed}, failed: {len(failed)}")
if failed:
    print('First failures:')
    for f,err in failed[:10]:
        print('-', f, err)


Processed 100 images so far...
Processed 200 images so far...
Processed 300 images so far...
Processed 400 images so far...
Processed 500 images so far...
Processed 600 images so far...
Processed 700 images so far...
Processed 800 images so far...
Processed 900 images so far...
Processed 1000 images so far...
Processed 1100 images so far...
Processed 1200 images so far...
Processed 1300 images so far...
Processed 1400 images so far...
Processed 1500 images so far...
Processed 1600 images so far...
Processed 1700 images so far...
Processed 1800 images so far...
Processed 1900 images so far...
Processed 2000 images so far...
Processed 2100 images so far...
Processed 2200 images so far...
Processed 2300 images so far...
Processed 2400 images so far...
Processed 2500 images so far...
Processed 2600 images so far...
Processed 2700 images so far...
Processed 2800 images so far...
Processed 2900 images so far...
Processed 3000 images so far...
Processed 3100 images so far...
Processed 3200 im