# Klasifikasi Gambar Cabai dengan Model yang Sudah Dilatih

Notebook ini digunakan untuk melakukan klasifikasi gambar cabai menggunakan model SVM atau KNN yang sudah dilatih sebelumnya.

## 1. Import Libraries

In [33]:
import cv2
import joblib
import matplotlib.pyplot as plt
import numpy as np
from pathlib import Path
from skimage import measure
from skimage.feature import graycomatrix, graycoprops

# Konfigurasi
MODEL_DIR = Path("../models")
IMG_SIZE = (224, 224)
CLAHE = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))

## 2. Load Model dan Preprocessing Components

In [34]:
# Load kedua model (SVM dan KNN)
import pandas as pd

models = {}

for model_type in ["svm", "knn"]:
    model_path = MODEL_DIR / f"{model_type}_model.pkl"
    print(f"Loading model {model_type.upper()} dari: {model_path}")
    
    model_artifacts = joblib.load(model_path)
    
    models[model_type] = {
        "model": model_artifacts["model"],
        "scaler": model_artifacts["scaler"],
        "pca": model_artifacts["pca"],
        "label_encoder": model_artifacts["label_encoder"],
        "class_names": model_artifacts["class_names"],
        "feature_names": model_artifacts["feature_names"],
        "n_features": model_artifacts["n_features"],
    }
    
    print(f"  ✓ Model {model_type.upper()} berhasil dimuat")
    print(f"  - Kelas: {model_artifacts['class_names']}")
    print(f"  - Jumlah fitur: {model_artifacts['n_features']}")
    print(f"  - Komponen PCA: {model_artifacts['pca'].n_components_}\n")

print("=" * 50)
print("Semua model berhasil dimuat!")
print("=" * 50)

Loading model SVM dari: ..\models\svm_model.pkl
  ✓ Model SVM berhasil dimuat
  - Kelas: ['belum-matang', 'matang', 'setengah-matang']
  - Jumlah fitur: 128
  - Komponen PCA: 36

Loading model KNN dari: ..\models\knn_model.pkl
  ✓ Model KNN berhasil dimuat
  - Kelas: ['belum-matang', 'matang', 'setengah-matang']
  - Jumlah fitur: 128
  - Komponen PCA: 36

Semua model berhasil dimuat!


## 3. Fungsi Preprocessing (sama dengan training)

In [35]:
def gray_world_white_balance(img_bgr: np.ndarray) -> np.ndarray:
    img = img_bgr.astype(np.float32)
    avg_bgr = img.mean(axis=(0, 1))
    gray_val = avg_bgr.mean()
    scale = gray_val / (avg_bgr + 1e-6)
    balanced = img * scale
    balanced = np.clip(balanced, 0, 255).astype(np.uint8)
    return balanced


def keep_largest_component(binary_mask: np.ndarray) -> np.ndarray:
    labels = measure.label(binary_mask, connectivity=2)
    if labels.max() == 0:
        return binary_mask
    counts = np.bincount(labels.ravel())
    counts[0] = 0
    largest = counts.argmax()
    mask = (labels == largest).astype(np.uint8) * 255
    return mask


def build_fruit_mask(hsv_img: np.ndarray) -> np.ndarray:
    bg_green = cv2.inRange(hsv_img, (30, 40, 0), (90, 255, 200))
    bg_brown = cv2.inRange(hsv_img, (5, 30, 0), (25, 200, 150))
    background = cv2.bitwise_or(bg_green, bg_brown)

    fruit_candidate = cv2.inRange(hsv_img, (0, 40, 40), (179, 255, 255))
    mask = cv2.bitwise_and(fruit_candidate, cv2.bitwise_not(background))

    kernel = np.ones((7, 7), np.uint8)
    mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel, iterations=2)
    mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel, iterations=1)
    mask = keep_largest_component(mask)
    return mask


def mask_aware_crop(img: np.ndarray, mask: np.ndarray, pad_ratio: float = 0.05):
    ys, xs = np.where(mask > 0)
    if len(xs) == 0 or len(ys) == 0:
        fallback_mask = np.ones(img.shape[:2], dtype=np.uint8) * 255
        return img.copy(), fallback_mask
    x_min, x_max = xs.min(), xs.max()
    y_min, y_max = ys.min(), ys.max()
    h, w = img.shape[:2]
    pad_x = int((x_max - x_min) * pad_ratio)
    pad_y = int((y_max - y_min) * pad_ratio)
    x_min = max(x_min - pad_x, 0)
    x_max = min(x_max + pad_x, w)
    y_min = max(y_min - pad_y, 0)
    y_max = min(y_max + pad_y, h)
    cropped_img = img[y_min:y_max, x_min:x_max]
    cropped_mask = mask[y_min:y_max, x_min:x_max]
    return cropped_img, cropped_mask


def preprocess_image(path: Path) -> dict:
    img_bgr = cv2.imread(str(path))
    if img_bgr is None:
        raise ValueError(f"Gagal membaca {path}")

    img_rgb_original = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
    wb = gray_world_white_balance(img_bgr)
    denoised = cv2.bilateralFilter(wb, d=9, sigmaColor=75, sigmaSpace=75)

    hsv = cv2.cvtColor(denoised, cv2.COLOR_BGR2HSV)
    mask = build_fruit_mask(hsv)

    cropped_bgr, cropped_mask = mask_aware_crop(denoised, mask)
    resized_bgr = cv2.resize(cropped_bgr, IMG_SIZE)
    resized_mask = cv2.resize(cropped_mask, IMG_SIZE, interpolation=cv2.INTER_NEAREST)

    hsv_resized = cv2.cvtColor(resized_bgr, cv2.COLOR_BGR2HSV)
    hsv_resized[:, :, 2] = CLAHE.apply(hsv_resized[:, :, 2])
    enhanced_bgr = cv2.cvtColor(hsv_resized, cv2.COLOR_HSV2BGR)

    gray = cv2.cvtColor(enhanced_bgr, cv2.COLOR_BGR2GRAY)
    gray_equalized = cv2.equalizeHist(gray)

    enhanced_rgb = cv2.cvtColor(enhanced_bgr, cv2.COLOR_BGR2RGB)

    return {
        "original": img_rgb_original,
        "mask": resized_mask,
        "enhanced": enhanced_rgb,
        "hsv": hsv_resized,
        "gray_equalized": gray_equalized,
    }

print("✓ Fungsi preprocessing berhasil didefinisikan")

✓ Fungsi preprocessing berhasil didefinisikan


## 4. Fungsi Ekstraksi Fitur (sama dengan training)

In [36]:
def extract_hsv_features(hsv_img: np.ndarray, bins: int = 16) -> np.ndarray:
    h_hist = cv2.calcHist([hsv_img], [0], None, [bins], [0, 180])
    s_hist = cv2.calcHist([hsv_img], [1], None, [bins], [0, 256])
    v_hist = cv2.calcHist([hsv_img], [2], None, [bins], [0, 256])
    hist = np.concatenate([h_hist.flatten(), s_hist.flatten(), v_hist.flatten()])
    hist = hist / (hist.sum() + 1e-6)
    return hist.astype(np.float32)


def extract_ccd_features(mask: np.ndarray, num_points: int = 32) -> np.ndarray:
    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
    if not contours:
        return np.zeros(num_points, dtype=np.float32)
    cnt = max(contours, key=cv2.contourArea)
    moments = cv2.moments(cnt)
    if moments["m00"] == 0:
        return np.zeros(num_points, dtype=np.float32)

    cx = moments["m10"] / moments["m00"]
    cy = moments["m01"] / moments["m00"]
    centroid = np.array([cx, cy])
    points = cnt.reshape(-1, 2)

    vectors = points - centroid
    radii = np.linalg.norm(vectors, axis=1)
    angles = (np.arctan2(vectors[:, 1], vectors[:, 0]) + 2 * np.pi) % (2 * np.pi)

    bins = np.linspace(0, 2 * np.pi, num_points + 1)
    descriptor = np.zeros(num_points, dtype=np.float32)
    for i in range(num_points):
        mask_angle = (angles >= bins[i]) & (angles < bins[i + 1])
        if np.any(mask_angle):
            descriptor[i] = radii[mask_angle].max()
    if descriptor.max() > 0:
        descriptor /= descriptor.max()
    return descriptor


def extract_glcm_features(gray_img: np.ndarray, mask: np.ndarray) -> np.ndarray:
    masked = cv2.bitwise_and(gray_img, gray_img, mask=mask)
    glcm = graycomatrix(
        masked,
        distances=[1, 2, 3],
        angles=[0, np.pi / 4, np.pi / 2, 3 * np.pi / 4],
        levels=256,
        symmetric=True,
        normed=True,
    )
    props = []
    for prop in ("contrast", "correlation", "energy", "homogeneity"):
        props.extend(graycoprops(glcm, prop).flatten())
    return np.array(props, dtype=np.float32)


def extract_features_from_path(path: Path) -> np.ndarray:
    processed = preprocess_image(path)
    hsv_feat = extract_hsv_features(processed["hsv"])
    gray = cv2.cvtColor(processed["enhanced"], cv2.COLOR_RGB2GRAY)
    glcm_feat = extract_glcm_features(gray, processed["mask"])
    ccd_feat = extract_ccd_features(processed["mask"])
    return np.concatenate([hsv_feat, ccd_feat, glcm_feat])

print("✓ Fungsi ekstraksi fitur berhasil didefinisikan")

✓ Fungsi ekstraksi fitur berhasil didefinisikan


## 5. Fungsi Prediksi

In [37]:
def predict_with_both_models(image_path: Path):
    """
    Melakukan prediksi pada satu gambar menggunakan kedua model (SVM dan KNN)
    
    Returns:
        dict: {
            'svm': {...},
            'knn': {...},
            'processed': dict (gambar yang sudah diproses)
        }
    """
    # 1. Preprocessing dan ekstraksi fitur (sekali saja)
    processed = preprocess_image(image_path)
    features = extract_features_from_path(image_path)
    features = features.reshape(1, -1)
    
    results = {'processed': processed}
    
    # 2. Prediksi dengan kedua model
    for model_type, artifacts in models.items():
        # Scaling
        features_scaled = artifacts["scaler"].transform(features)
        
        # PCA transform
        features_pca = artifacts["pca"].transform(features_scaled)
        
        # Prediksi
        prediction = artifacts["model"].predict(features_pca)[0]
        predicted_class = artifacts["label_encoder"].inverse_transform([prediction])[0]
        
        # Probabilitas (jika model support)
        if hasattr(artifacts["model"], 'predict_proba'):
            proba = artifacts["model"].predict_proba(features_pca)[0]
            confidence = proba.max()
            probabilities = {class_name: prob for class_name, prob in zip(artifacts["class_names"], proba)}
        else:
            confidence = 1.0
            probabilities = {predicted_class: 1.0}
        
        results[model_type] = {
            'predicted_class': predicted_class,
            'confidence': confidence,
            'probabilities': probabilities
        }
    
    return results


def visualize_prediction_comparison(image_path: Path, result: dict):
    """
    Visualisasi hasil prediksi dengan perbandingan kedua model
    """
    fig, axes = plt.subplots(2, 3, figsize=(18, 10))
    
    # Row 1: Gambar preprocessing
    axes[0, 0].imshow(result['processed']['original'])
    axes[0, 0].set_title('Gambar Original', fontsize=12, fontweight='bold')
    axes[0, 0].axis('off')
    
    axes[0, 1].imshow(result['processed']['mask'], cmap='gray')
    axes[0, 1].set_title('Mask (Segmentasi)', fontsize=12, fontweight='bold')
    axes[0, 1].axis('off')
    
    axes[0, 2].imshow(result['processed']['enhanced'])
    axes[0, 2].set_title('Gambar Enhancement', fontsize=12, fontweight='bold')
    axes[0, 2].axis('off')
    
    # Row 2: Bar chart perbandingan probabilitas untuk setiap model
    for idx, model_type in enumerate(['svm', 'knn']):
        ax = axes[1, idx]
        model_result = result[model_type]
        
        # Siapkan data untuk bar chart
        classes = list(model_result['probabilities'].keys())
        probs = [model_result['probabilities'][cls] for cls in classes]
        
        # Warna bar berdasarkan prediksi
        colors = ['green' if cls == model_result['predicted_class'] else 'lightblue' for cls in classes]
        
        ax.barh(classes, probs, color=colors)
        ax.set_xlabel('Probability', fontsize=10)
        ax.set_title(f'{model_type.upper()} - Prediksi: {model_result["predicted_class"]}\nConfidence: {model_result["confidence"]:.2%}', 
                     fontsize=11, fontweight='bold')
        ax.set_xlim([0, 1])
        
        # Tambahkan nilai di bar
        for i, (cls, prob) in enumerate(zip(classes, probs)):
            ax.text(prob + 0.02, i, f'{prob:.2%}', va='center', fontsize=9)
    
    # Comparison table di posisi [1, 2]
    axes[1, 2].axis('off')
    comparison_data = []
    for model_type in ['svm', 'knn']:
        comparison_data.append([
            model_type.upper(),
            result[model_type]['predicted_class'],
            f"{result[model_type]['confidence']:.2%}"
        ])
    
    table = axes[1, 2].table(
        cellText=comparison_data,
        colLabels=['Model', 'Prediksi', 'Confidence'],
        cellLoc='center',
        loc='center',
        colWidths=[0.25, 0.4, 0.35]
    )
    table.auto_set_font_size(False)
    table.set_fontsize(10)
    table.scale(1, 2.5)
    
    # Style header
    for i in range(3):
        table[(0, i)].set_facecolor('#4CAF50')
        table[(0, i)].set_text_props(weight='bold', color='white')
    
    # Style cells
    for i in range(1, 3):
        for j in range(3):
            table[(i, j)].set_facecolor('#f0f0f0' if i % 2 == 0 else 'white')
    
    axes[1, 2].set_title('Perbandingan Model', fontsize=12, fontweight='bold')
    
    fig.suptitle(f'Hasil Prediksi: {image_path.name}', fontsize=16, fontweight='bold', y=0.98)
    plt.tight_layout()
    plt.show()
    
    # Print detail
    print(f"\n{'='*70}")
    print(f"File: {image_path.name}")
    print(f"{'='*70}")
    
    for model_type in ['svm', 'knn']:
        print(f"\n{model_type.upper()} Model:")
        print(f"  Prediksi: {result[model_type]['predicted_class']}")
        print(f"  Confidence: {result[model_type]['confidence']:.2%}")
        print(f"  Probabilitas per kelas:")
        for cls, prob in sorted(result[model_type]['probabilities'].items(), key=lambda x: x[1], reverse=True):
            bar = "█" * int(prob * 30)
            print(f"    {cls:20s}: {prob:.2%} {bar}")


def predict_image(image_path: Path):
    """Wrapper untuk kompatibilitas dengan kode lama - menggunakan kedua model"""
    return predict_with_both_models(image_path)


def visualize_prediction(image_path: Path, result: dict):
    """Wrapper untuk kompatibilitas dengan kode lama"""
    return visualize_prediction_comparison(image_path, result)

print("✓ Fungsi prediksi dengan kedua model berhasil didefinisikan")

✓ Fungsi prediksi dengan kedua model berhasil didefinisikan


## 6. Test Prediksi dengan Gambar Contoh

In [38]:
# Contoh 1: Prediksi gambar dari folder data/example
# Ganti path ini dengan path gambar yang ingin diprediksi
test_image_path = Path("../data/example/test_image.jpg")

# Cek apakah file ada
if test_image_path.exists():
    result = predict_image(test_image_path)
    visualize_prediction(test_image_path, result)
else:
    print(f"❌ File tidak ditemukan: {test_image_path}")
    print("Silakan ganti path dengan file gambar yang tersedia")
    print("\nContoh penggunaan:")
    print('test_image_path = Path("../data/example/your_image.jpg")')
    print('result = predict_image(test_image_path)')
    print('visualize_prediction(test_image_path, result)')

❌ File tidak ditemukan: ..\data\example\test_image.jpg
Silakan ganti path dengan file gambar yang tersedia

Contoh penggunaan:
test_image_path = Path("../data/example/your_image.jpg")
result = predict_image(test_image_path)
visualize_prediction(test_image_path, result)


## 7. Prediksi Multiple Gambar

In [39]:
def predict_multiple_images(image_folder: Path, extensions=[".jpg", ".jpeg", ".png", ".bmp"]):
    """
    Prediksi multiple gambar dari sebuah folder menggunakan kedua model
    """
    # Cari semua gambar di folder (hindari duplikasi)
    image_files = []
    seen_files = set()
    
    for ext in extensions:
        # Cari dengan case-insensitive
        for img_path in image_folder.glob(f"**/*{ext}"):
            if img_path not in seen_files:
                image_files.append(img_path)
                seen_files.add(img_path)
        
        # Cari uppercase extension
        ext_upper = ext.upper()
        for img_path in image_folder.glob(f"**/*{ext_upper}"):
            if img_path not in seen_files:
                image_files.append(img_path)
                seen_files.add(img_path)
    
    if not image_files:
        print(f"❌ Tidak ada gambar ditemukan di: {image_folder}")
        return None
    
    print(f"Ditemukan {len(image_files)} gambar\n")
    results = []
    
    for img_path in image_files:
        try:
            result = predict_with_both_models(img_path)
            results.append({
                'filename': img_path.name,
                'knn_prediction': result['knn']['predicted_class'],
                'knn_confidence': result['knn']['confidence'],
                'svm_prediction': result['svm']['predicted_class'],
                'svm_confidence': result['svm']['confidence'],
                'agreement': '✓' if result['svm']['predicted_class'] == result['knn']['predicted_class'] else '✗'
            })
            
            # Status untuk setiap gambar
            agreement_icon = "✓" if result['svm']['predicted_class'] == result['knn']['predicted_class'] else "✗"
            print(f"{agreement_icon} {img_path.name:35s} | SVM: {result['svm']['predicted_class']:15s} ({result['svm']['confidence']:.1%}) | KNN: {result['knn']['predicted_class']:15s} ({result['knn']['confidence']:.1%})")
            
        except Exception as e:
            print(f"❌ Error pada {img_path.name}: {str(e)}")
    
    # Buat DataFrame untuk tabel hasil
    if results:
        df_results = pd.DataFrame(results)
        
        print("\n" + "="*100)
        print("RINGKASAN HASIL PREDIKSI")
        print("="*100)
        
        # Hitung statistik
        agreement_count = (df_results['agreement'] == '✓').sum()
        total_count = len(df_results)
        agreement_rate = agreement_count / total_count * 100
        
        print(f"\nTotal gambar: {total_count}")
        print(f"Prediksi sama (SVM = KNN): {agreement_count} ({agreement_rate:.1f}%)")
        print(f"Prediksi berbeda: {total_count - agreement_count} ({100 - agreement_rate:.1f}%)")
        
        # Tampilkan tabel
        print("\n" + "="*100)
        print("DETAIL HASIL PREDIKSI")
        print("="*100)
        display(df_results)
        
        # Statistik per model
        print("\n" + "="*100)
        print("STATISTIK PER MODEL")
        print("="*100)
        
        for model_type in ['svm', 'knn']:
            print(f"\n{model_type.upper()} Model:")
            pred_col = f'{model_type}_prediction'
            conf_col = f'{model_type}_confidence'
            
            print(f"  Rata-rata confidence: {df_results[conf_col].mean():.2%}")
            print(f"  Min confidence: {df_results[conf_col].min():.2%}")
            print(f"  Max confidence: {df_results[conf_col].max():.2%}")
            print(f"  Distribusi prediksi:")
            for cls, count in df_results[pred_col].value_counts().items():
                print(f"    - {cls}: {count} gambar ({count/total_count*100:.1f}%)")
        
        return df_results
    
    return None


# Contoh penggunaan: prediksi semua gambar di folder data/example
example_folder = Path("../data/example")

if example_folder.exists():
    results_df = predict_multiple_images(example_folder)
else:
    print(f"Folder tidak ditemukan: {example_folder}")
    print("Silakan sesuaikan path folder")

Ditemukan 15 gambar

✓ cabai-dataset-belum-matang.jpg      | SVM: belum-matang    (99.5%) | KNN: belum-matang    (100.0%)
✓ cabai-dataset-belum-matang.jpg      | SVM: belum-matang    (99.5%) | KNN: belum-matang    (100.0%)
✓ cabai-dataset-matang.jpg            | SVM: matang          (99.9%) | KNN: matang          (100.0%)
✓ cabai-dataset-matang.jpg            | SVM: matang          (99.9%) | KNN: matang          (100.0%)
✓ cabai-dataset-setengah-matang.jpg   | SVM: setengah-matang (99.6%) | KNN: setengah-matang (100.0%)
✓ cabai-dataset-setengah-matang.jpg   | SVM: setengah-matang (99.6%) | KNN: setengah-matang (100.0%)
✓ cabai-belum-matang-1.jpeg           | SVM: setengah-matang (54.7%) | KNN: setengah-matang (63.2%)
✓ cabai-belum-matang-1.jpeg           | SVM: setengah-matang (54.7%) | KNN: setengah-matang (63.2%)
✗ cabai-keriting-matang-1.jpeg        | SVM: belum-matang    (96.9%) | KNN: matang          (100.0%)
✗ cabai-keriting-matang-1.jpeg        | SVM: belum-matang    (96.9%) | K

Unnamed: 0,filename,knn_prediction,knn_confidence,svm_prediction,svm_confidence,agreement
0,cabai-dataset-belum-matang.jpg,belum-matang,1.0,belum-matang,0.995259,✓
1,cabai-dataset-matang.jpg,matang,1.0,matang,0.999152,✓
2,cabai-dataset-setengah-matang.jpg,setengah-matang,1.0,setengah-matang,0.995784,✓
3,cabai-belum-matang-1.jpeg,setengah-matang,0.632181,setengah-matang,0.546954,✓
4,cabai-keriting-matang-1.jpeg,matang,1.0,belum-matang,0.968909,✗
5,cabai-matang-1.jpeg,matang,1.0,matang,0.495445,✓
6,cabai-matang.jpeg,matang,1.0,belum-matang,0.632589,✗
7,cabai-setengah-matang-1.jpeg,setengah-matang,0.556602,belum-matang,0.896863,✗
8,cabai-setengah-matang.jpeg,belum-matang,0.907794,belum-matang,0.969479,✓
9,carolina-matang.jpeg,matang,0.644873,belum-matang,0.962119,✗



STATISTIK PER MODEL

SVM Model:
  Rata-rata confidence: 86.50%
  Min confidence: 49.54%
  Max confidence: 99.92%
  Distribusi prediksi:
    - belum-matang: 11 gambar (73.3%)
    - matang: 2 gambar (13.3%)
    - setengah-matang: 2 gambar (13.3%)

KNN Model:
  Rata-rata confidence: 78.74%
  Min confidence: 53.05%
  Max confidence: 100.00%
  Distribusi prediksi:
    - matang: 8 gambar (53.3%)
    - setengah-matang: 4 gambar (26.7%)
    - belum-matang: 3 gambar (20.0%)


## 8. Prediksi dengan Custom Image Path

Gunakan cell ini untuk melakukan prediksi pada gambar spesifik yang Anda tentukan.

In [40]:
# Ganti dengan path gambar yang ingin Anda prediksi
custom_image = Path("../data/augmented/matang/matang_001.jpg")

if custom_image.exists():
    print(f"Melakukan prediksi pada: {custom_image}\n")
    result = predict_image(custom_image)
    visualize_prediction(custom_image, result)
else:
    print(f"File tidak ditemukan: {custom_image}")
    print("\nSilakan ganti dengan path yang valid, contoh:")
    print('  custom_image = Path("../data/example/your_image.jpg")')
    print('  result = predict_image(custom_image)')
    print('  visualize_prediction(custom_image, result)')

File tidak ditemukan: ..\data\augmented\matang\matang_001.jpg

Silakan ganti dengan path yang valid, contoh:
  custom_image = Path("../data/example/your_image.jpg")
  result = predict_image(custom_image)
  visualize_prediction(custom_image, result)
