#                                                   PLANT DISEASE DETECTION


# Step 01
Covert the images to png to get the cropped image of the leaf

In [None]:
import os
import cv2
import numpy as np

# ─── CONFIGURATION ────────────────────────────────────────────────────────────
INPUT_ROOT  = 'original_dataset'   # top‐level folder with class subfolders
OUTPUT_ROOT = 'Leaf_Crops_original'         # will get subfolders per class
HSV_LOWER   = np.array([25,  40, 40])
HSV_UPPER   = np.array([85, 255,255])
KERNEL      = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (7,7))

def makedirs(path):
    os.makedirs(path, exist_ok=True)

# ensure top‐level output folder exists
makedirs(OUTPUT_ROOT)

# loop over each class subfolder
for class_name in os.listdir(INPUT_ROOT):
    class_in  = os.path.join(INPUT_ROOT,  class_name)
    class_out = os.path.join(OUTPUT_ROOT, class_name)
    if not os.path.isdir(class_in):
        continue
    makedirs(class_out)

    # process each image in that class
    for fname in os.listdir(class_in):
        if not fname.lower().endswith(('.jpg','jpeg','png')):
            continue

        img_path = os.path.join(class_in, fname)
        img      = cv2.imread(img_path)
        if img is None:
            continue

        # 1) Build green‐leaf mask
        hsv  = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
        mask = cv2.inRange(hsv, HSV_LOWER, HSV_UPPER)
        mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, KERNEL)
        mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN,  KERNEL)

        # 2) Find the largest contour = leaf
        cnts, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        if not cnts:
            print(f"No leaf found in {fname}, skipping.")
            continue
        leaf_cnt = max(cnts, key=cv2.contourArea)

        # 3) Convex hull for smooth boundary
        hull = cv2.convexHull(leaf_cnt)

        # 4) Rasterize hull to alpha mask + bbox
        hull_mask = np.zeros_like(mask)
        cv2.drawContours(hull_mask, [hull], -1, 255, -1)
        ys, xs    = np.where(hull_mask > 0)
        y1, y2    = ys.min(), ys.max()
        x1, x2    = xs.min(), xs.max()

        # 5) Crop BGR + alpha to bbox
        crop_bgr   = img[y1:y2+1, x1:x2+1]
        crop_alpha = hull_mask[y1:y2+1, x1:x2+1]
        b, g, r    = cv2.split(crop_bgr)
        rgba       = cv2.merge([b, g, r, crop_alpha])

        # 6) Save PNG in the proper class folder
        base     = os.path.splitext(fname)[0]
        out_path = os.path.join(class_out, f"{base}_leaf.png")
        cv2.imwrite(out_path, rgba)
        print(f"Saved crop: {out_path}")

print("All done! Cropped leaves in:", OUTPUT_ROOT)


# Step 02
It walks through each folder of images and counts the number of images, verifies their format and sizes. It then saves this information in a CSV file image_index.

In [None]:
import os
import pandas as pd
from PIL import Image

# ─── assuming you run this script from DIP_PROJECT ─────────────────────────────
base_dir = os.path.join(os.getcwd(), "Leaf_Crops_original")
# or whatever your top‐level folder is

records = []
for class_name in os.listdir(base_dir):
    class_folder = os.path.join(base_dir, class_name)
    if not os.path.isdir(class_folder):
        continue

    # look for any common image extension
    img_files = [f for f in os.listdir(class_folder)
                 if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
    print(f"Found {len(img_files):2d} images in '{class_name}'")

    for fname in img_files:
        path = os.path.join(class_folder, fname)
        try:
            with Image.open(path) as img:
                w, h = img.size
                fmt = img.format
        except Exception as e:
            print(f"  ✗ Couldn’t open {fname}: {e}")
            continue

        records.append({
            'filepath': path,
            'label':    class_name,
            'format':   fmt,
            'width':    w,
            'height':   h
        })

df = pd.DataFrame(records, columns=['filepath','label','format','width','height'])

print("\nSample rows:")
print(df.sample(5))
print("\nCounts per class:")
print(df['label'].value_counts())

df.to_csv('image_index.csv', index=False)
print("\nWrote image_index.csv")


Found 997 images in 'Pepper__bell___Bacterial_spot'
Found 1478 images in 'Pepper__bell___healthy'
Found 1000 images in 'Potato___Early_blight'
Found 152 images in 'Potato___healthy'
Found 1000 images in 'Potato___Late_blight'
Found 2127 images in 'Tomato_Bacterial_spot'
Found 425 images in 'Tomato_Early_blight'

Sample rows:
                                               filepath  \
6961  c:\Users\hp\Downloads\DIP_PROJECT\Leaf_Crops_o...   
94    c:\Users\hp\Downloads\DIP_PROJECT\Leaf_Crops_o...   
4677  c:\Users\hp\Downloads\DIP_PROJECT\Leaf_Crops_o...   
6798  c:\Users\hp\Downloads\DIP_PROJECT\Leaf_Crops_o...   
6436  c:\Users\hp\Downloads\DIP_PROJECT\Leaf_Crops_o...   

                              label format  width  height  
6961            Tomato_Early_blight    PNG    156     204  
94    Pepper__bell___Bacterial_spot    PNG    226     245  
4677          Tomato_Bacterial_spot    PNG    140     206  
6798            Tomato_Early_blight    PNG    188     218  
6436          Toma

# Step 03: Data Preprocessing
The dataset is divided in to train, val and split with a ratio of 70%, 15% and 15% respectively.

1. **Configure** parameters: set `TARGET_SIZE`, `INDEX_CSV`, `OUTPUT_DIR`, `SPLIT_RATIOS`, and `RANDOM_STATE`.  
2. **Load** your image index CSV (with columns `filepath,label,format,width,height`) into a DataFrame.  
3. **Split** the DataFrame into train, validation, and test sets via `train_test_split`, stratified by `label`.  
4. **Process & save** each split:  
   - Read each image from `filepath`, resize to `TARGET_SIZE`,   
   - Write the resized BGR image to `Processed/<split>/<label>/<filename>`.  
5. **Log & finish**: print progress for each split, then confirm completion.  




In [None]:
import os
import cv2
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

# ─── PARAMETERS ────────────────────────────────────────────────────────────────
TARGET_SIZE   = (256, 256)
INDEX_CSV     = "image_index.csv"      # produced earlier
OUTPUT_DIR    = "Processed"
SPLIT_RATIOS  = (0.70, 0.15, 0.15)     # train / val / test
RANDOM_STATE  = 42

# ─── UTILITY: make sure directory exists ───────────────────────────────────────
def makedirs(path):
    os.makedirs(path, exist_ok=True)

# ─── STEP 1: load index CSV ────────────────────────────────────────────────────
df = pd.read_csv(INDEX_CSV)  # columns: filepath,label,format,width,height

# ─── STEP 2: train/val/test split ───────────────────────────────────────────────
train_df, temp_df = train_test_split(
    df,
    test_size=1 - SPLIT_RATIOS[0],
    stratify=df['label'],
    random_state=RANDOM_STATE
)
val_df, test_df = train_test_split(
    temp_df,
    test_size = SPLIT_RATIOS[2] / (SPLIT_RATIOS[1] + SPLIT_RATIOS[2]),
    stratify   = temp_df['label'],
    random_state = RANDOM_STATE
)

splits = {
    'train': train_df,
    'val':   val_df,
    'test':  test_df
}

# ─── STEP 3: process & save ────────────────────────────────────────────────────
for split_name, subset in splits.items():
    print(f"\nProcessing {split_name} ({len(subset)} images)…")
    for _, row in subset.iterrows():
        src_path = row['filepath']
        label    = row['label']

        # read, resize
        img = cv2.imread(src_path)
        if img is None:
            print(" ✗ failed to read:", src_path)
            continue
        img_resized = cv2.resize(img, TARGET_SIZE)

        # color–space conversions (in memory)
        img_rgb = cv2.cvtColor(img_resized, cv2.COLOR_BGR2RGB)
        img_hsv = cv2.cvtColor(img_resized, cv2.COLOR_BGR2HSV)

        # save the BGR resized image (you can load RGB/HSV on‐the‐fly later)
        out_dir  = os.path.join(OUTPUT_DIR, split_name, label)
        makedirs(out_dir)
        fname    = os.path.basename(src_path)
        out_path = os.path.join(out_dir, fname)
        cv2.imwrite(out_path, img_resized)


print("\nDone resizing & splitting into train/val/test folders.")




Processing train (5025 images)…

Processing val (1077 images)…

Processing test (1077 images)…

Done resizing & splitting into train/val/test folders.


# Step 04: Segmentation of Infected Regions

1. **Configure** input/output paths (`Leaf_Crops_original` → `Segmented`) and set `masks/`, `overlays/` subfolders plus a 5×5 elliptical kernel.  
2. **Define** `segment_lesions(img_bgr)`:  
   - Apply CLAHE on the Lab L-channel  
   - Otsu threshold the a-channel to isolate brown/yellow lesions  
   - Morphologically open/close to clean up  
   - Blend a red overlay via `cv2.addWeighted()`  
3. **Iterate** over each class folder in `INPUT_DIR`, creating matching `Segmented/masks/<class>` and `Segmented/overlays/<class>` directories.  
4. **Process** each `*.png` leaf crop:  
   - Load as RGBA, split into BGR + alpha  
   - Call `segment_lesions()` on the BGR image  
   - Zero out mask & overlay pixels where `alpha == 0`  
   - Save `<base>_mask.png` and `<base>_overlay.png` to their respective folders  

In [None]:
import os
import cv2
import numpy as np

# ─── CONFIGURATION ────────────────────────────────────────────────────────────
INPUT_DIR    = 'Leaf_Crops_original'              # top‐level with class subfolders
OUTPUT_ROOT  = 'Segmented'    # where masks/overlays will go
MASK_SUBDIR  = 'masks'
OVER_SUBDIR  = 'overlays'
KERNEL_SIZE  = (5, 5)

# Make sure output dirs exist
for sub in (MASK_SUBDIR, OVER_SUBDIR):
    os.makedirs(os.path.join(OUTPUT_ROOT, sub), exist_ok=True)

def segment_lesions(img_bgr):
    """CLAHE + Otsu on Lab ‘a’ to isolate brown/yellow lesions."""
    lab   = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2LAB)
    L, A, B = cv2.split(lab)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    L_eq  = clahe.apply(L)
    lab_eq = cv2.merge([L_eq, A, B])

    # Otsu threshold on the a-channel
    _, mask = cv2.threshold(
        lab_eq[:, :, 1], 0, 255,
        cv2.THRESH_BINARY + cv2.THRESH_OTSU
    )

    # Morphological cleanup
    kernel     = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, KERNEL_SIZE)
    mask_open  = cv2.morphologyEx(mask,  cv2.MORPH_OPEN,  kernel, iterations=2)
    mask_clean = cv2.morphologyEx(mask_open, cv2.MORPH_CLOSE, kernel, iterations=2)

    # Build a red‐tinted overlay of lesions
    overlay = img_bgr.copy()
    overlay[mask_clean > 0] = (0, 0, 255)
    blended = cv2.addWeighted(img_bgr, 0.7, overlay, 0.3, 0)

    return mask_clean, blended

# ─── PROCESS ─────────────────────────────────────────────────────────────────
for class_name in os.listdir(INPUT_DIR):
    class_in_dir  = os.path.join(INPUT_DIR, class_name)
    if not os.path.isdir(class_in_dir):
        continue

    # prepare mirror subfolders
    mask_out_dir = os.path.join(OUTPUT_ROOT, MASK_SUBDIR, class_name)
    over_out_dir = os.path.join(OUTPUT_ROOT, OVER_SUBDIR, class_name)
    os.makedirs(mask_out_dir, exist_ok=True)
    os.makedirs(over_out_dir, exist_ok=True)

    for fname in os.listdir(class_in_dir):
        if not fname.lower().endswith('.png'):
            continue
        in_path = os.path.join(class_in_dir, fname)

        # load RGBA leaf crop
        rgba = cv2.imread(in_path, cv2.IMREAD_UNCHANGED)
        if rgba is None or rgba.shape[2] != 4:
            print(f"Skipping {fname}: not a valid RGBA PNG")
            continue
        b, g, r, alpha = cv2.split(rgba)
        img_bgr        = cv2.merge([b, g, r])

        # segment lesions
        mask, overlay = segment_lesions(img_bgr)

        # zero out anything outside the leaf
        mask[alpha == 0]    = 0
        overlay[alpha == 0] = img_bgr[alpha == 0]

        # save
        base = os.path.splitext(fname)[0]
        cv2.imwrite(os.path.join(mask_out_dir, base + '_mask.png'),    mask)
        cv2.imwrite(os.path.join(over_out_dir, base + '_overlay.png'), overlay)

        print(f"Processed {class_name}/{fname}")

print("Done! Check your infected masks & overlays under:", OUTPUT_ROOT)


Processed Pepper__bell___Bacterial_spot/0022d6b7-d47c-4ee2-ae9a-392a53f48647___JR_B.Spot 8964_leaf.png
Processed Pepper__bell___Bacterial_spot/006adb74-934f-448f-a14f-62181742127b___JR_B.Spot 3395_leaf.png
Processed Pepper__bell___Bacterial_spot/00f2e69a-1e56-412d-8a79-fdce794a17e4___JR_B.Spot 3132_leaf.png
Processed Pepper__bell___Bacterial_spot/01613cd0-d3cd-4e96-945c-a312002037bf___JR_B.Spot 3262_leaf.png
Processed Pepper__bell___Bacterial_spot/0169b9ac-07b9-4be1-8b85-da94481f05a4___NREC_B.Spot 9169_leaf.png
Processed Pepper__bell___Bacterial_spot/018e494e-d2eb-468b-9d02-40219d9f4921___JR_B.Spot 9045_leaf.png
Processed Pepper__bell___Bacterial_spot/01940b6d-7dea-4889-a7b8-a35f4e9bba34___NREC_B.Spot 9120_leaf.png
Processed Pepper__bell___Bacterial_spot/01dfb88b-cd5a-420c-b163-51f5fe07b74d___JR_B.Spot 9091_leaf.png
Processed Pepper__bell___Bacterial_spot/01ebc916-4793-40a3-b5e4-a32687e4fa3d___NREC_B.Spot 9125_leaf.png
Processed Pepper__bell___Bacterial_spot/024623ab-be81-4d99-a653-c3b

# Step 05: Feature Extraction

1. **Configure** input directories (`Processed/<split>/<class>`) and mask root (`Segmented/masks/<class>`), plus splits `["train","val","test"]`.  
2. **Iterate** over each split and class folder, listing images and corresponding `_mask.png` files.  
3. **Load** each image and mask, **resize** the mask (nearest‐neighbor) to match the image if needed.  
4. **Threshold** the mask to a boolean `lesion` map; skip if no lesion pixels.  
5. **Extract color features**: mean & standard deviation per BGR channel over the lesion region.  
6. **Extract shape features**: contour area, perimeter, roundness, and bounding‐box aspect ratio.  
7. **Append** all features plus `split`, `label`, `filename` into a records list, then **build** a DataFrame and **save** to `features_all_splits.csv`.  


In [None]:
import os
import cv2
import numpy as np
import pandas as pd
from math import pi

# ─── CONFIGURATION ────────────────────────────────────────────────────────────
PROCESSED_DIR = "Processed"   # root of resized train/val/test subfolders
MASK_ROOT     = "Segmented"   # contains `masks/{class_name}/..._mask.png`
SPLITS        = ["train", "val", "test"]

records = []

for split in SPLITS:
    proc_split_dir = os.path.join(PROCESSED_DIR, split)
    if not os.path.isdir(proc_split_dir):
        print(f"Skipping missing split: {split}")
        continue

    for class_name in os.listdir(proc_split_dir):
        proc_class_dir = os.path.join(proc_split_dir, class_name)
        if not os.path.isdir(proc_class_dir):
            continue

        # masks are in Segmented/masks/<class_name>/
        seg_mask_dir = os.path.join(MASK_ROOT, "masks", class_name)
        if not os.path.isdir(seg_mask_dir):
            print(f"  No mask folder for class '{class_name}', skipping.")
            continue

        images = [f for f in os.listdir(proc_class_dir)
                  if f.lower().endswith(('.jpg','jpeg','png'))]
        masks  = [f for f in os.listdir(seg_mask_dir)
                  if f.lower().endswith('_mask.png')]
        print(f"{split}/{class_name}: {len(images)} images, {len(masks)} masks")

        for fname in images:
            base      = os.path.splitext(fname)[0]
            img_path  = os.path.join(proc_class_dir, fname)
            mask_path = os.path.join(seg_mask_dir, f"{base}_mask.png")

            if not os.path.exists(mask_path):
                print(f"    ✗ missing mask for {fname}")
                continue

            img  = cv2.imread(img_path)
            mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
            if img is None or mask is None:
                print(f"    ✗ failed to load {fname}")
                continue

            # ─── ensure mask & image share the same dimensions ────────────────
            h, w = img.shape[:2]
            if mask.shape[:2] != (h, w):
                mask = cv2.resize(mask, (w, h), interpolation=cv2.INTER_NEAREST)

            lesion = mask > 0
            if not lesion.any():
                print(f"    ⚠ no lesion pixels in {fname}")
                continue

            # ─── COLOR FEATURES ────────────────────────────────────────────────
            means = [img[:, :, c][lesion].mean() for c in range(3)]
            stds  = [img[:, :, c][lesion].std()  for c in range(3)]

            # ─── SHAPE FEATURES ───────────────────────────────────────────────
            cnts, _    = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
            total_area = sum(cv2.contourArea(c) for c in cnts)
            total_per  = sum(cv2.arcLength(c, True)  for c in cnts)
            roundness  = (4 * pi * total_area / (total_per**2)) if total_per > 0 else 0

            ratios = []
            for c in cnts:
                x, y, w_cnt, h_cnt = cv2.boundingRect(c)
                if h_cnt > 0:
                    ratios.append(w_cnt / float(h_cnt))
            bbox_aspect = float(np.mean(ratios)) if ratios else 0

            # ─── APPEND FEATURE ROW ─────────────────────────────────────────
            records.append({
                'split':       split,
                'label':       class_name,
                'filename':    fname,
                'mean_B':      means[0], 'mean_G':  means[1], 'mean_R':  means[2],
                'std_B':       stds[0],  'std_G':   stds[1],  'std_R':   stds[2],
                'area':        total_area,
                'perimeter':   total_per,
                'roundness':   roundness,
                'bbox_aspect': bbox_aspect
            })

# ─── BUILD DATAFRAME & SAVE ───────────────────────────────────────────────────
df = pd.DataFrame(records)
df.to_csv('features_all_splits.csv', index=False)
print(f"\nExtracted features for {len(df)} images across splits {SPLITS}")
print("Saved to features_all_splits.csv")


train/Pepper__bell___Bacterial_spot: 698 images, 997 masks
train/Pepper__bell___healthy: 1035 images, 1478 masks
train/Potato___Early_blight: 700 images, 1000 masks
train/Potato___healthy: 106 images, 152 masks
train/Potato___Late_blight: 700 images, 1000 masks
train/Tomato_Bacterial_spot: 1489 images, 2127 masks
train/Tomato_Early_blight: 297 images, 425 masks
val/Pepper__bell___Bacterial_spot: 149 images, 997 masks
val/Pepper__bell___healthy: 222 images, 1478 masks
val/Potato___Early_blight: 150 images, 1000 masks
val/Potato___healthy: 23 images, 152 masks
val/Potato___Late_blight: 150 images, 1000 masks
val/Tomato_Bacterial_spot: 319 images, 2127 masks
val/Tomato_Early_blight: 64 images, 425 masks
test/Pepper__bell___Bacterial_spot: 150 images, 997 masks
test/Pepper__bell___healthy: 221 images, 1478 masks
test/Potato___Early_blight: 150 images, 1000 masks
test/Potato___healthy: 23 images, 152 masks
test/Potato___Late_blight: 150 images, 1000 masks
test/Tomato_Bacterial_spot: 319 ima

# Step 06: Disease Prediction Model

1. **Load** `features_train_test.csv` and split into `train_df` / `test_df` by the `split` column.  
2. **Scale** features with `StandardScaler` fitted on train, applied to test.  
3. **Train** an SVM via `GridSearchCV(cv=5)` on the scaled train set, selecting best hyperparameters.  
4. **Evaluate** on test: print accuracy, per-class precision/recall/F1, and confusion matrix.  
5. **Define** `percent_white_on_leaf(mask_path, img_path)` to  
   - resize mask to match image,  
   - threshold to isolate leaf area,  
   - compute `% white (lesion) pixels over leaf pixels`.  
6. **Loop** over each test sample, call that function, collect percentages.  
7. **Append** `infected_pct_leaf` to `test_df` and save to CSV.  


In [None]:
import os
import cv2
import numpy as np
import pandas as pd
import joblib

from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# ─── CONFIGURATION ────────────────────────────────────────────────────────────
FEATURES_CSV   = 'features_all_splits.csv'  # or your features_train_test.csv if it includes val
PROCESSED_DIR  = 'Processed'                # has train/, val/, test/ subfolders
MASK_ROOT      = os.path.join('Segmented', 'masks')  # masks/<class_name>/*.png

# ─── STEP 1: LOAD FEATURES & SPLIT ─────────────────────────────────────────────
df      = pd.read_csv(FEATURES_CSV)
train_df = df[df['split'] == 'train']
val_df   = df[df['split'] == 'val']
test_df  = df[df['split'] == 'test']

def prepare_xy(df_split):
    X = df_split.drop(columns=['split','label','filename'])
    y = df_split['label']
    return X, y

X_train, y_train = prepare_xy(train_df)
X_test,  y_test  = prepare_xy(test_df)

# ─── STEP 2: SCALE FEATURES ───────────────────────────────────────────────────
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled  = scaler.transform(X_test)

# ─── STEP 3: TRAIN SVM ────────────────────────────────────────────────────────
svc = SVC(probability=True, random_state=42)
param_grid = {
    'C':      [0.1, 1, 10],
    'kernel': ['linear','rbf'],
    'gamma':  ['scale','auto']
}
grid = GridSearchCV(svc, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
grid.fit(X_train_scaled, y_train)
print("Best SVM parameters:", grid.best_params_)
model = grid.best_estimator_

# ─── STEP 4: EVALUATE ON TEST SET ─────────────────────────────────────────────
y_pred = model.predict(X_test_scaled)
print(f"\nTest Accuracy: {accuracy_score(y_test, y_pred):.4f}")
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

# ─── STEP 5: PERCENT WHITE PIXELS OVER LEAF AREA ───────────────────────────────
def percent_white_on_leaf(mask_path, img_path, leaf_thr=10):
    mask     = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
    img_gray = cv2.imread(img_path,  cv2.IMREAD_GRAYSCALE)
    if mask is None or img_gray is None:
        return np.nan
    # resize mask if needed
    h, w = img_gray.shape
    if mask.shape != (h, w):
        mask = cv2.resize(mask, (w, h), interpolation=cv2.INTER_NEAREST)
    # leaf area by gray threshold
    leaf_mask = img_gray > leaf_thr
    leaf_area = leaf_mask.sum()
    if leaf_area == 0:
        return 0.0
    # lesions are white (>0)
    lesion_on_leaf = np.logical_and(mask > 0, leaf_mask)
    return lesion_on_leaf.sum() / leaf_area * 100

# ─── STEP 6: COMPUTE FOR ALL SPLITS ────────────────────────────────────────────
all_splits = [('train', train_df), ('val', val_df), ('test', test_df)]
pct_list = []
rows = []
for split, subset in all_splits:
    for _, row in subset.iterrows():
        label = row['label']
        fname = row['filename']
        base, _ = os.path.splitext(fname)
        img_path  = os.path.join(PROCESSED_DIR, split, label, fname)
        mask_path = os.path.join(MASK_ROOT, label, f"{base}_mask.png")
        pct = percent_white_on_leaf(mask_path, img_path)
        pct_list.append(pct)
        rows.append({
            **row.to_dict(),
            'infected_pct_leaf': pct
        })
        print(f"{split}/{label}/{fname} → {pct:.2f}%")

# ─── STEP 7: SAVE COMPLETE RESULTS ────────────────────────────────────────────
out_df = pd.DataFrame(rows)
out_df.to_csv('results_with_infected_area_all_splits.csv', index=False)
print("\nSaved all-splits results to 'results_with_infected_area_all_splits.csv'")


Best SVM parameters: {'C': 10, 'gamma': 'scale', 'kernel': 'rbf'}

Test Accuracy: 0.8449

Classification Report:
                                precision    recall  f1-score   support

Pepper__bell___Bacterial_spot       0.79      0.83      0.81       150
       Pepper__bell___healthy       0.85      0.92      0.88       221
        Potato___Early_blight       0.92      0.83      0.87       150
         Potato___Late_blight       0.83      0.78      0.80       150
             Potato___healthy       0.79      0.48      0.59        23
        Tomato_Bacterial_spot       0.86      0.95      0.91       319
          Tomato_Early_blight       0.70      0.41      0.51        64

                     accuracy                           0.84      1077
                    macro avg       0.82      0.74      0.77      1077
                 weighted avg       0.84      0.84      0.84      1077

Confusion Matrix:
 [[125  15   3   4   1   1   1]
 [  6 203   2   0   0   9   1]
 [  8   4 124   5   1