In [1]:
!pip install --upgrade ultralytics pyyaml opencv-python numpy wandb



In [2]:
import wandb
#login wandb
wandb.login() 
print("WandB login berhasil! Siap untuk tracking eksperimen.")

wandb: Currently logged in as: haikalef8 (haikalef8-) to https://api.wandb.ai. Use `wandb login --relogin` to force relogin


WandB login berhasil! Siap untuk tracking eksperimen.


In [3]:
import os
import cv2
import numpy as np
import yaml
import sys
import torch
import random
import time
from pathlib import Path
from ultralytics import YOLO
import pandas as pd
from typing import List, Dict, Any, Tuple

# RANDOM SEED (Untuk Konsistensi Hasil)
SEED = 42 

def set_seed(seed):
    """Mengatur seed untuk semua sumber keacakan (torch, numpy, random)"""
    torch.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

set_seed(SEED)
print(f"Random seed diatur ke {SEED}")

# DEFINISI PATH
data_root = Path('./data/raw_bottles/') 
target_root = Path('./data/bottles_relabelled/') 

# Path Output Model 
project_name_dir = './ada-mata-bottle-cap-sorting'
model_path_baseline = Path(f'{project_name_dir}/yolov8n_baseline_run/weights/best.pt') 
model_path_revisi = Path(f'{project_name_dir}/yolov8n_regularized_run/weights/best.pt') 

test_image_path = Path('./data/bottles_relabelled/images/test/raw-250110_dc_s001_b3_2.jpg')

Random seed diatur ke 42


In [4]:
# --- KONFIGURASI WARNA HSV (OpenCV: H: 0-179, S,V: 0-255) ---
blue_h_min = 90    # Batas Hue minimum untuk warna Biru
blue_h_max = 140   # Batas Hue maksimum untuk warna Biru
light_v_threshold = 160 # Ambang batas Value (Kecerahan) untuk membedakan Light/Dark

def get_yolo_class(img_hsv: np.ndarray) -> int:
    """
    Menentukan kelas warna tutup botol (0: Light Blue, 1: Dark Blue, 2: Others)
    berdasarkan nilai rata-rata HSV dari patch gambar.
    """
    if img_hsv.size == 0:
        # Menangani patch kosong jika bounding box terlalu kecil atau tidak valid
        return 2 # Default ke Others

    # Hitung rata-rata H, S, V di dalam bounding box
    h_mean, s_mean, v_mean, _ = cv2.mean(img_hsv)

    # 1. Cek apakah warna adalah BIRU (berdasarkan Hue)
    if blue_h_min <= h_mean <= blue_h_max:
        # Jika Hue dalam rentang Biru
        if v_mean > light_v_threshold:
            # Value tinggi -> Light Blue (Kelas 0)
            return 0  
        else:
            # Value rendah -> Dark Blue (Kelas 1)
            return 1  
    else:
        # Bukan Biru
        return 2  # Others (Kelas 2)

def relabel_dataset(source_path: Path, target_path: Path, train_ratio: float = 0.8, val_ratio: float = 0.1) -> None:
    """
    Memproses dan menyesuaikan label kelas YOLO dari dataset asli berdasarkan warna tutup botol.
    Hasilnya akan dibagi menjadi train, val, dan test set.
    """
    print("--- Memulai Penyesuaian Label Dataset ---")
    
    # Cek dan buat folder output
    (target_path / "images" / "train").mkdir(parents=True, exist_ok=True)
    (target_path / "labels" / "train").mkdir(parents=True, exist_ok=True)
    (target_path / "images" / "val").mkdir(parents=True, exist_ok=True)
    (target_path / "labels" / "val").mkdir(parents=True, exist_ok=True)
    (target_path / "images" / "test").mkdir(parents=True, exist_ok=True)
    (target_path / "labels" / "test").mkdir(parents=True, exist_ok=True)
    
    image_files = list((source_path / "images").glob("*.jpg")) # Asumsi ekstensi .jpg
    
    # Shuffle file untuk pembagian yang adil
    np.random.shuffle(image_files)
    
    num_files = len(image_files)
    num_train = int(num_files * train_ratio)
    num_val = int(num_files * val_ratio)
    
    # Bagi file
    train_files = image_files[:num_train]
    val_files = image_files[num_train:num_train + num_val]
    test_files = image_files[num_train + num_val:]
    
    splits = {'train': train_files, 'val': val_files, 'test': test_files}
    
    for split_name, files in splits.items():
        print(f"Memproses {split_name}: {len(files)} file...")
        
        for img_path in files:
            label_name = img_path.stem + '.txt'
            label_path_orig = source_path / "labels" / label_name
            
            # Tentukan path output
            img_path_out = target_path / "images" / split_name / img_path.name
            label_path_out = target_path / "labels" / split_name / label_name
            
            img = cv2.imread(str(img_path))
            if img is None:
                print(f"Gagal memuat gambar: {img_path}")
                continue
                
            img_h, img_w, _ = img.shape
            new_annotations = []
            
            if label_path_orig.exists():
                with open(label_path_orig, 'r') as f:
                    lines = f.readlines()
                
                for line in lines:
                    try:
                        # Asumsi format YOLO asli hanya memiliki 1 kelas: '0'
                        parts = line.strip().split()
                        # Kita hanya mengambil koordinat x_c, y_c, w, h
                        _, x_c, y_c, w, h = map(float, parts) 
                    except ValueError:
                        print(f"Skipping malformed line in {label_path_orig}")
                        continue

                    # Konversi dari normalisasi (0-1) ke pixel
                    x_center = int(x_c * img_w)
                    y_center = int(y_c * img_h)
                    width = int(w * img_w)
                    height = int(h * img_h)
                    
                    # Dapatkan koordinat untuk cropping
                    x1 = max(0, int(x_center - width / 2))
                    y1 = max(0, int(y_center - height / 2))
                    x2 = min(img_w, int(x_center + width / 2))
                    y2 = min(img_h, int(y_center + height / 2))
                    
                    # Crop bounding box area
                    cap_patch = img[y1:y2, x1:x2]
                    
                    if cap_patch.size > 0:
                        # Konversi ke HSV
                        cap_patch_hsv = cv2.cvtColor(cap_patch, cv2.COLOR_BGR2HSV)
                        # Tentukan kelas baru (0, 1, atau 2)
                        new_class_id = get_yolo_class(cap_patch_hsv)
                    else:
                        new_class_id = 2 # Default ke Others jika crop gagal
                        
                    # Simpan anotasi baru (kelas baru, koordinat asli)
                    new_annotations.append(f"{new_class_id} {x_c} {y_c} {w} {h}\n")
            
                # Tulis file label baru
                with open(label_path_out, 'w') as f:
                    f.writelines(new_annotations)
            
            # Salin file gambar
            cv2.imwrite(str(img_path_out), img) # Menggunakan imwrite untuk menyalin

    print(f"\nPenyesuaian label selesai. Data baru tersimpan di: {target_path}")

relabel_dataset(data_root, target_root)

--- Memulai Penyesuaian Label Dataset ---
Memproses train: 9 file...
Memproses val: 1 file...
Memproses test: 2 file...

Penyesuaian label selesai. Data baru tersimpan di: data\bottles_relabelled


In [5]:
data_root = Path('./data/raw_bottles/') 
file_count = len(list((data_root / "images").glob("*.jpg")))

print(f"Jumlah file JPG yang ditemukan: {file_count}")

Jumlah file JPG yang ditemukan: 12


In [6]:
import yaml
from pathlib import Path

config_data = {
    'path': './data/bottles_relabelled',
    'train': 'images/train',
    'val': 'images/val',
    'names': {
        0: 'light_blue',
        1: 'dark_blue',
        2: 'others'
    }
}

config_path = Path('./data_config.yaml')

with open(config_path, 'w') as f:
    yaml.dump(config_data, f, sort_keys=False)

print(f"File konfigurasi YOLOv8 telah dibuat di: {config_path.resolve()}")

File konfigurasi YOLOv8 telah dibuat di: C:\Users\haikalef\Documents\Test Adamata\bottle-sorter\data_config.yaml


In [7]:
#model awal (default)
from ultralytics import YOLO

# Model Nano
print("--- Memulai Training 1: Baseline (Default Hyperparameters) ---")
model_baseline = YOLO('yolov8n.pt') 

# Hyperparameter
img_size = 640
epochs = 50                 
batch_size = 12 # Maksimum batch size agar muat di 12 file

# Training dan Integrasi WandB
print("Memulai proses fine-tuning model YOLOv8n dengan WandB...")

results_baseline = model_baseline.train(
    data='data_config.yaml',    
    epochs=epochs,              
    imgsz=img_size,
    batch=batch_size,
    project='ada-mata-bottle-cap-sorting',
    name='yolov8n_baseline_run',
)

print("\nTraining Baseline Selesai.")

--- Memulai Training 1: Baseline (Default Hyperparameters) ---
Memulai proses fine-tuning model YOLOv8n dengan WandB...
Ultralytics 8.3.228  Python-3.9.25 torch-2.8.0+cpu CPU (Intel Core i5-10500H 2.50GHz)
engine\trainer: agnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=12, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=data_config.yaml, degrees=0.0, deterministic=True, device=cpu, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=50, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8n.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=yolov8n_baseline_run2, nbs=6

In [8]:
from ultralytics import YOLO

# Model Nano
print("\n--- Memulai Training 2: Revisi (Regularization Ditingkatkan) ---")
model_regularized = YOLO('yolov8n.pt') 

# Hyperparameter
img_size = 640
epochs = 100                
batch_size = 12 # Maksimum batch size agar muat di 12 file
initial_lr = 0.001
weight_decay_val = 0.005

# Training dan Integrasi WandB
print("Memulai proses fine-tuning model YOLOv8n dengan WandB...")

results_regularized = model_regularized.train(
    data='data_config.yaml',    # Menggunakan file config
    epochs=epochs,              
    imgsz=img_size,
    batch=batch_size,
    lr0=initial_lr,
    weight_decay=weight_decay_val,
    project='ada-mata-bottle-cap-sorting',
    name='yolov8n_regularized_run',
)

print("\nTraining Revisi Selesai.")


--- Memulai Training 2: Revisi (Regularization Ditingkatkan) ---
Memulai proses fine-tuning model YOLOv8n dengan WandB...
Ultralytics 8.3.228  Python-3.9.25 torch-2.8.0+cpu CPU (Intel Core i5-10500H 2.50GHz)
engine\trainer: agnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=12, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=data_config.yaml, degrees=0.0, deterministic=True, device=cpu, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=100, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.001, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8n.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=yolov8n_regularized_run

In [9]:
# FUNGSI EVALUASI DAN PERBANDINGAN 

model_path_baseline = Path(f'{project_name_dir}/yolov8n_baseline_run/weights/best.pt') 
model_path_revisi = Path(f'{project_name_dir}/yolov8n_regularized_run/weights/best.pt') 
n_tests = 100 
device = 'cuda' if torch.cuda.is_available() else 'cpu' 
# -----------------------------------------------

def evaluate_model_run(model_path: Path, run_name: str, sample_img: np.ndarray, config_path: str) -> dict:
    """Melakukan evaluasi metrik dan mengukur waktu inferensi."""
    print(f"\n=======================================================")
    print(f"EVALUASI MODEL: {run_name}")
    print(f"=======================================================")
    
    try:
        model = YOLO(str(model_path))
        # 'device' sekarang tersedia di scope global
        model.to(device) 
    except FileNotFoundError:
        print(f"ERROR: Model tidak ditemukan di {model_path}. Lewati evaluasi.")
        return {}

    # Pengujian Kecepatan Inferensi
    avg_time_ms = np.nan
    if sample_img is not None:
        model.predict(sample_img, verbose=False, device=device) # Cold start
        timing_data = []
        for _ in range(n_tests):
            start_time = time.time()
            model.predict(sample_img, verbose=False, device=device)
            timing_data.append(time.time() - start_time)
        avg_time_ms = np.mean(timing_data) * 1000

    # Evaluasi Kuantitatif
    metrics = model.val(data=config_path, verbose=False)
    
    # Ekstraksi mAP50 per kelas 
    mAP50_per_class = np.array(metrics.box.all_ap)[:, 0]
    validated_classes = metrics.box.ap_class_index.tolist()

    results_per_class = {'mAP50_LB': 0.0, 'mAP50_DB': 0.0, 'mAP50_Others': 0.0}

    # Isi hasil hanya untuk kelas yang divalidasi
    for i, class_id in enumerate(validated_classes):
        mAP50_value = mAP50_per_class[i]
        if class_id == 0: results_per_class['mAP50_LB'] = mAP50_value
        elif class_id == 1: results_per_class['mAP50_DB'] = mAP50_value
        elif class_id == 2: results_per_class['mAP50_Others'] = mAP50_value

    results = {
        'mAP@0.5': metrics.box.map50,
        'Speed_ms': avg_time_ms,
        **results_per_class,
        'Device': device.upper()
    }
    
    print(f"Selesai. mAP@0.5: {results['mAP@0.5']:.4f}, Speed: {results['Speed_ms']:.3f} ms")
    return results

# RUNNING EVALUASI & PERBANDINGAN FINAL 

sample_img = cv2.imread(str(test_image_path))

comparison_results = {}
comparison_results['Baseline'] = evaluate_model_run(model_path_baseline, "Baseline", sample_img, config_path)
comparison_results['Revisi'] = evaluate_model_run(model_path_revisi, "Revisi", sample_img, config_path)

df = pd.DataFrame.from_dict(comparison_results, orient='index')

# Tampilkan tabel perbandingan
df_final = df[['mAP@0.5', 'mAP50_LB', 'mAP50_DB', 'mAP50_Others', 'Speed_ms', 'Device']]
df_final.columns = ['mAP@0.5', 'Light Blue', 'Dark Blue', 'Others', 'Speed (ms)', 'Device']
print("\n\nTABEL PERBANDINGAN AKHIR:")
print(df_final.to_markdown(floatfmt=".4f"))

# Tentukan model terbaik untuk deployment
model_for_deployment = 'Revisi' 
print(f"\nPilihan Model Deployment: {model_for_deployment}")
print("Alasan: Kinerja generalisasi (mAP) yang lebih stabil dan kecepatan yang optimal.")


EVALUASI MODEL: Baseline
val: Fast image access  (ping: 0.10.0 ms, read: 343.340.3 MB/s, size: 50.1 KB)
val: Scanning C:\Users\haikalef\Documents\Test Adamata\bottle-sorter\data\bottles_relabelled\labels\val.cache... 5 images, 0 backgrounds, 1 corrupt: 100% ━━━━━━━━━━━━ 5/5 2.2Kit/s 0.0s
val: C:\Users\haikalef\Documents\Test Adamata\bottle-sorter\data\bottles_relabelled\images\val\raw-250110_dc_s001_b2_1.jpg: ignoring corrupt image/label: could not convert string to float: 'None'
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ━━━━━━━━━━━━ 1/1 2.3it/s 0.4s
                   all          4         25     0.0354          1       0.99      0.857
Speed: 3.8ms preprocess, 83.5ms inference, 0.0ms loss, 6.3ms postprocess per image
Results saved to C:\Users\haikalef\runs\detect\val14
Selesai. mAP@0.5: 0.9905, Speed: 68.983 ms

EVALUASI MODEL: Revisi
val: Fast image access  (ping: 0.10.0 ms, read: 356.352.6 MB/s, size: 50.1 KB)
val: Scanning C:\U