# Leaf Disease Classification and Spot Counting with YOLOv8

This notebook trains a YOLOv8 Classification model to distinguish between Healthy and Diseased leaves. Additionally, for leaves classified as 'Diseased', it uses image processing to estimate the number of disease spots.

In [None]:
# Install Ultralytics YOLOv8
!pip install ultralytics

import os
import shutil
import random
import cv2
import numpy as np
from ultralytics import YOLO
from google.colab.patches import cv2_imshow
import matplotlib.pyplot as plt

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Unzip the dataset
# Assuming the file is at /content/drive/MyDrive/Final-Leaf-Datasets.zip
zip_path = '/content/drive/MyDrive/Final-Leaf-Datasets.zip'
extract_path = '/content/temp_dataset'

if os.path.exists(zip_path):
    !unzip -q "{zip_path}" -d "{extract_path}"
    print("Dataset extracted successfully.")
else:
    print(f"File not found at {zip_path}. Please check the path.")

## Data Preprocessing

We need to reorganize the dataset into the standard YOLO classification structure:
```
datasets/leaf_dataset/
  train/
    healthy/
    diseased/
  val/
    healthy/
    diseased/
```
The script below recursively finds all images in your extracted `Healthy` and `Diseased` folders and splits them into train/val sets.

In [None]:
# Configuration
output_dir = '/content/datasets/leaf_dataset'
train_ratio = 0.8  # 80% training, 20% validation

# Clear existing dataset if any
if os.path.exists(output_dir):
    shutil.rmtree(output_dir)

for split in ['train', 'val']:
    for category in ['healthy', 'diseased']:
        os.makedirs(os.path.join(output_dir, split, category), exist_ok=True)

def get_image_files(directory):
    image_files = []
    valid_extensions = {'.jpg', '.jpeg', '.png', '.bmp'}
    for root, dirs, files in os.walk(directory):
        for file in files:
            if os.path.splitext(file)[1].lower() in valid_extensions:
                image_files.append(os.path.join(root, file))
    return image_files

# Locate the source directories
# We look for 'Healthy' and 'Diseased' folders in the extracted path
source_healthy = None
source_diseased = None

for root, dirs, files in os.walk(extract_path):
    for d in dirs:
        if d.lower() == 'healthy':
            source_healthy = os.path.join(root, d)
        elif d.lower() == 'diseased':
            source_diseased = os.path.join(root, d)

if not source_healthy or not source_diseased:
    print("Error: Could not find 'Healthy' and 'Diseased' folders. Please check the zip structure.")
    # Attempt to print structure to help debug if this happens
    print("Found directories:")
    for root, dirs, files in os.walk(extract_path):
        for d in dirs:
            print(os.path.join(root, d))
else:
    print(f"Found Healthy folder at: {source_healthy}")
    print(f"Found Diseased folder at: {source_diseased}")

    # Process Healthy Images
    healthy_images = get_image_files(source_healthy)
    random.shuffle(healthy_images)
    split_idx = int(len(healthy_images) * train_ratio)
    
    for i, img_path in enumerate(healthy_images):
        split = 'train' if i < split_idx else 'val'
        shutil.copy(img_path, os.path.join(output_dir, split, 'healthy', os.path.basename(img_path)))
    
    print(f"Processed {len(healthy_images)} healthy images.")

    # Process Diseased Images
    diseased_images = get_image_files(source_diseased)
    random.shuffle(diseased_images)
    split_idx = int(len(diseased_images) * train_ratio)
    
    for i, img_path in enumerate(diseased_images):
        split = 'train' if i < split_idx else 'val'
        shutil.copy(img_path, os.path.join(output_dir, split, 'diseased', os.path.basename(img_path)))
        
    print(f"Processed {len(diseased_images)} diseased images.")

## Train YOLOv8 Classification Model

In [None]:
# Load a pre-trained YOLOv8n-cls model
model = YOLO('yolov8n-cls.pt')

# Train the model
results = model.train(
    data=output_dir, 
    epochs=20, 
    imgsz=224, 
    name='leaf_disease_model'
)

## Spot Counting Logic
Since the dataset does not have bounding box labels for spots, we cannot train an object detector to count them directly. Instead, we use Computer Vision (OpenCV) to detect spots on leaves classified as 'diseased'.

In [None]:
def count_spots(image_path):
    img = cv2.imread(image_path)
    if img is None:
        return 0, None
    
    # Convert to HSV color space
    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    
    # Define range for brown/dark spots (typical for leaf disease)
    # Note: These values might need tuning based on your specific leaf images
    lower_brown = np.array([0, 0, 0])
    upper_brown = np.array([180, 255, 100]) # Low brightness/value captures dark spots
    
    # Or use edge detection/blob detection
    # Let's try a simple adaptive threshold approach which is more robust
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    
    # Invert so dark spots become bright
    gray_inv = cv2.bitwise_not(gray)
    
    # Threshold to isolate the spots
    # We assume spots are significantly darker than the leaf
    _, thresh = cv2.threshold(gray_inv, 200, 255, cv2.THRESH_BINARY)
    
    # Find contours
    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    # Filter small noise
    min_spot_area = 10
    spot_contours = [cnt for cnt in contours if cv2.contourArea(cnt) > min_spot_area]
    
    # Draw contours on image for visualization
    vis_img = img.copy()
    cv2.drawContours(vis_img, spot_contours, -1, (0, 0, 255), 2)
    
    return len(spot_contours), vis_img

def predict_and_count(image_path, model):
    # 1. Run Classification
    results = model(image_path)
    probs = results[0].probs
    top1_index = probs.top1
    class_name = results[0].names[top1_index]
    
    print(f"Prediction: {class_name.upper()}")
    
    # 2. If Diseased, Count Spots
    if class_name == 'diseased':
        count, vis_img = count_spots(image_path)
        print(f"Estimated Spot Count: {count}")
        cv2_imshow(vis_img)
    else:
        print("Leaf is Healthy. No spots to count.")
        img = cv2.imread(image_path)
        cv2_imshow(img)

In [None]:
# Test on a random validation image
import glob
val_diseased = glob.glob(f"{output_dir}/val/diseased/*.jpg")
if val_diseased:
    test_image = random.choice(val_diseased)
    predict_and_count(test_image, model)
else:
    print("No validation images found.")