<a href="https://colab.research.google.com/github/fajaralfad/klasifikasi-penyakit-daun-mangga-yolo11-flutter/blob/main/Model_Deteksi_Penyakit_Mangga_YoloV11.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Splitting data**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
os.listdir("/content/drive/MyDrive")

['Video dan Foto', 'DeteksiPenyakitDaunMangga', 'data']

In [None]:
os.listdir("/content/drive/MyDrive/DeteksiPenyakitDaunMangga")

['data']

In [None]:
os.listdir("/content/drive/MyDrive/DeteksiPenyakitDaunMangga/data")

['MangoLeafBD', 'Model_Deteksi_Penyakit_Mangga_YoloV11.ipynb']

In [None]:
os.listdir("/content/drive/MyDrive/DeteksiPenyakitDaunMangga/data/MangoLeafBD")

['Anthracnose',
 'Die Back',
 'Cutting Weevil',
 'Bacterial Canker',
 'Powdery Mildew',
 'Healthy',
 'Sooty Mould',
 'Gall Midge']

In [None]:
import os
import shutil
from sklearn.model_selection import train_test_split

In [None]:
dataset_dir = "/content/drive/MyDrive/DeteksiPenyakitDaunMangga/data/MangoLeafBD"
base_dir = "/content/drive/MyDrive/data/MangoLeafBD_Split"

In [None]:
for split in ['train', 'val', 'test']:
    os.makedirs(os.path.join(base_dir, split), exist_ok=True)

In [None]:
# Ambil semua nama kelas (folder)
classes = os.listdir(dataset_dir)

for cls in classes:
    cls_path = os.path.join(dataset_dir, cls)
    if not os.path.isdir(cls_path):
        continue  # skip file non-folder

    imgs = os.listdir(cls_path)

    # Split 70% train, 15% val, 15% test
    train_files, temp_files = train_test_split(imgs, test_size=0.3, random_state=42)
    val_files, test_files = train_test_split(temp_files, test_size=0.5, random_state=42)

    # Copy ke folder tujuan
    for split, files in zip(['train', 'val', 'test'], [train_files, val_files, test_files]):
        split_dir = os.path.join(base_dir, split, cls)
        os.makedirs(split_dir, exist_ok=True)
        for f in files:
            shutil.copy(os.path.join(cls_path, f), os.path.join(split_dir, f))

print("Dataset berhasil di-split menjadi train, val, dan test.")


In [None]:
for split in ['train', 'val', 'test']:
    path = os.path.join(base_dir, split)
    total_images = sum([len(files) for _, _, files in os.walk(path)])
    print(f"{split}: {total_images} gambar")

train: 2800 gambar
val: 600 gambar
test: 600 gambar


**Train Model dengan YOLOv11**

In [None]:
!pip install ultralytics

In [None]:
import torch
import torchvision.transforms as T

from ultralytics import YOLO
from ultralytics.data.dataset import ClassificationDataset
from ultralytics.models.yolo.classify import ClassificationTrainer, ClassificationValidator


class CustomizedDataset(ClassificationDataset):
    """A customized dataset class for image classification with enhanced data augmentation transforms."""

    def __init__(self, root: str, args, augment: bool = False, prefix: str = ""):
        """Initialize a customized classification dataset with enhanced data augmentation transforms."""
        super().__init__(root, args, augment, prefix)

        # Enhanced training transforms
        train_transforms = T.Compose(
            [
                T.Resize((args.imgsz, args.imgsz)),
                T.RandomHorizontalFlip(p=0.5),
                T.RandomVerticalFlip(p=0.2),
                T.RandomRotation(degrees=15),
                T.RandomAffine(degrees=0, translate=(0.1, 0.1), scale=(0.9, 1.1)),
                T.RandAugment(num_ops=2, magnitude=9, interpolation=T.InterpolationMode.BILINEAR),
                T.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
                T.ToTensor(),
                T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
                T.RandomErasing(p=0.25, scale=(0.02, 0.33), ratio=(0.3, 3.3), inplace=True),
            ]
        )

        val_transforms = T.Compose(
            [
                T.Resize((args.imgsz, args.imgsz)),
                T.ToTensor(),
                T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
            ]
        )
        self.torch_transforms = train_transforms if augment else val_transforms


class CustomizedTrainer(ClassificationTrainer):
    """A customized trainer class for YOLO classification models with enhanced dataset handling."""

    def build_dataset(self, img_path: str, mode: str = "train", batch=None):
        """Build a customized dataset for classification training and the validation during training."""
        return CustomizedDataset(root=img_path, args=self.args, augment=mode == "train", prefix=mode)


class CustomizedValidator(ClassificationValidator):
    """A customized validator class for YOLO classification models with enhanced dataset handling."""

    def build_dataset(self, img_path: str, mode: str = "train"):
        """Build a customized dataset for classification standalone validation."""
        return CustomizedDataset(root=img_path, args=self.args, augment=mode == "train", prefix=self.args.split)


model = YOLO("yolo11n-cls.pt")

# Training dengan parameter minimal (augmentation sudah di CustomizedDataset)
model.train(
    data="/content/drive/MyDrive/data/MangoLeafBD_Split",
    trainer=CustomizedTrainer,

    # Basic parameters
    epochs=50,
    imgsz=224,
    batch=32,

    # Regularization
    dropout=0.3,
    weight_decay=0.0005,

    # Learning rate
    lr0=0.001,
    lrf=0.01,
    momentum=0.937,
    optimizer='AdamW',

    # Early stopping
    patience=20,

    # Validation & saving
    val=True,
    plots=True,
    save=True,
    save_period=10,

    # Hardware
    device=0,
    workers=8,

    # Reproducibility
    seed=42,
    deterministic=True,
    verbose=True,
)

results = model.val(
    data="/content/drive/MyDrive/data/MangoLeafBD_Split",
    validator=CustomizedValidator,
    imgsz=224,
    batch=32,
    split='test'
)

print(f"\nTest Results:")
print(f"Top-1 Accuracy: {results.top1:.4f}")
print(f"Top-5 Accuracy: {results.top5:.4f}")

In [None]:
!scp -r /content/runs '/content/drive/MyDrive/data/MangoLeafBD_Split'

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

def print_best_metrics(results_csv_path):
    df = pd.read_csv(results_csv_path)
    df.columns = df.columns.str.strip()

    best_val_loss_idx = df['val/loss'].idxmin()
    best_top1_idx = df['metrics/accuracy_top1'].idxmax()
    best_top5_idx = df['metrics/accuracy_top5'].idxmax()

    print("\n" + "="*60)
    print("BEST METRICS SUMMARY")
    print("="*60)

    print("\nBest Validation Loss:")
    print(f"   Epoch: {df.loc[best_val_loss_idx, 'epoch']:.0f}")
    print(f"   Val Loss: {df.loc[best_val_loss_idx, 'val/loss']:.4f}")
    print(f"   Train Loss: {df.loc[best_val_loss_idx, 'train/loss']:.4f}")
    print(f"   Top-1 Acc: {df.loc[best_val_loss_idx, 'metrics/accuracy_top1']:.4f}")
    print(f"   Top-5 Acc: {df.loc[best_val_loss_idx, 'metrics/accuracy_top5']:.4f}")

    print("\nBest Top-1 Accuracy:")
    print(f"   Epoch: {df.loc[best_top1_idx, 'epoch']:.0f}")
    print(f"   Top-1 Acc: {df.loc[best_top1_idx, 'metrics/accuracy_top1']:.4f}")
    print(f"   Top-5 Acc: {df.loc[best_top1_idx, 'metrics/accuracy_top5']:.4f}")
    print(f"   Val Loss: {df.loc[best_top1_idx, 'val/loss']:.4f}")
    print(f"   Train Loss: {df.loc[best_top1_idx, 'train/loss']:.4f}")

    print("\nBest Top-5 Accuracy:")
    print(f"   Epoch: {df.loc[best_top5_idx, 'epoch']:.0f}")
    print(f"   Top-5 Acc: {df.loc[best_top5_idx, 'metrics/accuracy_top5']:.4f}")
    print(f"   Top-1 Acc: {df.loc[best_top5_idx, 'metrics/accuracy_top1']:.4f}")
    print(f"   Val Loss: {df.loc[best_top5_idx, 'val/loss']:.4f}")

    final_idx = df.index[-1]
    print("\nFinal Epoch Metrics:")
    print(f"   Epoch: {df.loc[final_idx, 'epoch']:.0f}")
    print(f"   Train Loss: {df.loc[final_idx, 'train/loss']:.4f}")
    print(f"   Val Loss: {df.loc[final_idx, 'val/loss']:.4f}")
    print(f"   Top-1 Acc: {df.loc[final_idx, 'metrics/accuracy_top1']:.4f}")
    print(f"   Top-5 Acc: {df.loc[final_idx, 'metrics/accuracy_top5']:.4f}")

    final_loss_gap = df.loc[final_idx, 'train/loss'] - df.loc[final_idx, 'val/loss']
    print("\nOverfitting Analysis:")
    print(f"   Loss Gap (Train - Val): {final_loss_gap:.4f}")
    if final_loss_gap < -0.1:
        print("   Status: Good - Model generalizes well")
    elif final_loss_gap < 0.1:
        print("   Status: Slight underfitting/good fit")
    else:
        print("   Status: Overfitting detected")

    print("="*60 + "\n")

    return df


def plot_training_metrics(results_csv_path, save_path="training_metrics.png"):
    df = pd.read_csv(results_csv_path)
    df.columns = df.columns.str.strip()

    sns.set_style("whitegrid")

    fig, axes = plt.subplots(2, 2, figsize=(16, 10))
    fig.suptitle('Training Metrics Overview', fontsize=16, fontweight='bold')

    ax1 = axes[0, 0]
    ax1.plot(df['epoch'], df['train/loss'], label='Train Loss', linewidth=2.5, color='#2E86AB', marker='o', markersize=4)
    ax1.plot(df['epoch'], df['val/loss'], label='Val Loss', linewidth=2.5, color='#A23B72', marker='s', markersize=4)
    ax1.set_xlabel('Epoch', fontsize=12, fontweight='bold')
    ax1.set_ylabel('Loss', fontsize=12, fontweight='bold')
    ax1.set_title('Training vs Validation Loss', fontsize=13, fontweight='bold')
    ax1.legend(fontsize=11, loc='best')
    ax1.grid(True, alpha=0.3)

    best_val_idx = df['val/loss'].idxmin()
    ax1.scatter(df.loc[best_val_idx, 'epoch'], df.loc[best_val_idx, 'val/loss'],
                color='red', s=150, zorder=5, marker='*', edgecolors='black', linewidth=1.5,
                label=f"Best Val Loss: {df.loc[best_val_idx, 'val/loss']:.4f}")
    ax1.legend(fontsize=10, loc='best')

    ax2 = axes[0, 1]
    ax2.plot(df['epoch'], df['metrics/accuracy_top1'], label='Top-1 Accuracy',
             linewidth=2.5, color='#06A77D', marker='o', markersize=4)
    ax2.plot(df['epoch'], df['metrics/accuracy_top5'], label='Top-5 Accuracy',
             linewidth=2.5, color='#F18F01', marker='s', markersize=4)
    ax2.set_xlabel('Epoch', fontsize=12, fontweight='bold')
    ax2.set_ylabel('Accuracy', fontsize=12, fontweight='bold')
    ax2.set_title('Accuracy Metrics', fontsize=13, fontweight='bold')
    ax2.legend(fontsize=11, loc='best')
    ax2.grid(True, alpha=0.3)
    ax2.set_ylim([0, 1.05])

    best_acc_idx = df['metrics/accuracy_top1'].idxmax()
    ax2.scatter(df.loc[best_acc_idx, 'epoch'], df.loc[best_acc_idx, 'metrics/accuracy_top1'],
                color='red', s=150, zorder=5, marker='*', edgecolors='black', linewidth=1.5,
                label=f"Best Top-1: {df.loc[best_acc_idx, 'metrics/accuracy_top1']:.4f}")
    ax2.legend(fontsize=10, loc='best')

    ax3 = axes[1, 0]
    ax3.plot(df['epoch'], df['lr/pg0'], linewidth=2.5, color='#D62828', marker='o', markersize=4)
    ax3.set_xlabel('Epoch', fontsize=12, fontweight='bold')
    ax3.set_ylabel('Learning Rate', fontsize=12, fontweight='bold')
    ax3.set_title('Learning Rate Schedule', fontsize=13, fontweight='bold')
    ax3.grid(True, alpha=0.3)
    ax3.set_yscale('log')

    ax4 = axes[1, 1]
    loss_gap = df['train/loss'] - df['val/loss']
    ax4.plot(df['epoch'], loss_gap, linewidth=2.5, color='#C1121F', marker='o', markersize=4)
    ax4.axhline(y=0, color='black', linestyle='--', linewidth=2, alpha=0.7)
    ax4.fill_between(df['epoch'], loss_gap, 0, where=(loss_gap < 0),
                      alpha=0.3, color='green', label='Generalizing Well')
    ax4.fill_between(df['epoch'], loss_gap, 0, where=(loss_gap > 0),
                      alpha=0.3, color='red', label='Overfitting')
    ax4.set_xlabel('Epoch', fontsize=12, fontweight='bold')
    ax4.set_ylabel('Train Loss - Val Loss', fontsize=12, fontweight='bold')
    ax4.set_title('Overfitting Detection', fontsize=13, fontweight='bold')
    ax4.legend(fontsize=10, loc='best')
    ax4.grid(True, alpha=0.3)

    plt.tight_layout()
    plt.savefig(save_path, dpi=300, bbox_inches='tight')
    print(f"Plot saved to: {save_path}")
    plt.show()

    return df


def analyze_training_progress(results_csv_path):
    df = pd.read_csv(results_csv_path)
    df.columns = df.columns.str.strip()

    print("\n" + "="*60)
    print("TRAINING PROGRESS ANALYSIS")
    print("="*60)

    initial_val_loss = df.loc[0, 'val/loss']
    final_val_loss = df.loc[df.index[-1], 'val/loss']
    val_loss_improvement = ((initial_val_loss - final_val_loss) / initial_val_loss) * 100

    initial_acc = df.loc[0, 'metrics/accuracy_top1']
    final_acc = df.loc[df.index[-1], 'metrics/accuracy_top1']
    acc_improvement = ((final_acc - initial_acc) / initial_acc) * 100

    print(f"\nOverall Improvements:")
    print(f"   Val Loss: {initial_val_loss:.4f} -> {final_val_loss:.4f} ({val_loss_improvement:+.2f}%)")
    print(f"   Top-1 Acc: {initial_acc:.4f} -> {final_acc:.4f} ({acc_improvement:+.2f}%)")

    last_10_epochs = df.tail(min(10, len(df)))
    val_loss_std = last_10_epochs['val/loss'].std()
    acc_std = last_10_epochs['metrics/accuracy_top1'].std()

    print(f"\nLast {len(last_10_epochs)} Epochs Stability:")
    print(f"   Val Loss Std Dev: {val_loss_std:.4f}")
    print(f"   Accuracy Std Dev: {acc_std:.4f}")

    if val_loss_std < 0.01 and acc_std < 0.01:
        print("   Status: Training has plateaued")
    else:
        print("   Status: Still improving")

    print("="*60 + "\n")


import glob

result_dirs = sorted(glob.glob('runs/classify/train*'), key=lambda x: Path(x).stat().st_mtime)

if result_dirs:
    latest_result = result_dirs[-1]
    results_csv = Path(latest_result) / "results.csv"

    print(f"Using results from: {latest_result}\n")

    if results_csv.exists():
        df = print_best_metrics(results_csv)
        analyze_training_progress(results_csv)
        save_plot = Path(latest_result) / "training_analysis.png"
        plot_training_metrics(results_csv, save_path=save_plot)
    else:
        print(f"results.csv not found at {results_csv}")
else:
    print("No training results found. Please check your training directory.")

**Prediksi**

In [None]:
from ultralytics import YOLO

# Load a model
model = YOLO("/content/drive/MyDrive/data/MangoLeafBD_Split/runs/classify/train/weights/best_saved_model/best_float32.tflite")  # load a custom model

# Predict with the model
results = model("/content/drive/MyDrive/data/MangoLeafBD_Split/test/Die Back/IMG_20211028_003845 (Custom).jpg")  # predict on an image

Loading /content/drive/MyDrive/data/MangoLeafBD_Split/runs/classify/train/weights/best_saved_model/best_float32.tflite for TensorFlow Lite inference...

image 1/1 /content/drive/MyDrive/data/MangoLeafBD_Split/test/Die Back/IMG_20211028_003845 (Custom).jpg: 224x224 Die Back 0.60, Sooty Mould 0.18, Gall Midge 0.12, Healthy 0.04, Powdery Mildew 0.03, 8.4ms
Speed: 4.8ms preprocess, 8.4ms inference, 0.0ms postprocess per image at shape (1, 3, 224, 224)


**Export dan Convert ke Tflite**

In [None]:
from ultralytics import YOLO

# Load model hasil training
model = YOLO("/content/drive/MyDrive/data/MangoLeafBD_Split/runs/classify/train/weights/best.pt")

# Export ke format TFLite dengan quantization INT8
model.export(format="tflite", int8=True)