In [None]:
# Instalasi dependensi dari requirements.txt
#!pip install torch==2.0.1 torchvision==0.15.2 tqdm==4.65.0 pandas==1.5.3 matplotlib==3.7.1 scikit-learn==1.2.2 torchmetrics==0.11.4 albumentations==1.3.0 opencv-python-headless==4.8.0.76

# Impor pustaka dasar yang akan digunakan di beberapa sel
import os
import torch
import shutil
import glob
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torchvision
import albumentations as A
from xml.etree import ElementTree as ET
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
from albumentations.pytorch import ToTensorV2
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchmetrics.detection.mean_ap import MeanAveragePrecision
from sklearn.metrics import precision_recall_fscore_support

print("✅ Instalasi & Impor Awal Selesai.")

  from .autonotebook import tqdm as notebook_tqdm


✅ Instalasi & Impor Awal Selesai.


In [2]:
# --- Cell 2: split_data.py (Diperbaiki dengan Pengecekan File) ---

# Konfigurasi 
DATASET_DIR = "../data/Gabungan"
IMAGE_DIR_NAME = "Image"
ANNOT_DIR_NAME = "Annotation"
OUTPUT_DIR = "../data/Katarak2_Split"
TRAIN_RATIO = 0.8

# Sisa kode akan berjalan otomatis berdasarkan konfigurasi di atas
IMAGE_DIR = os.path.join(DATASET_DIR, IMAGE_DIR_NAME)
ANNOT_DIR = os.path.join(DATASET_DIR, ANNOT_DIR_NAME)

if os.path.exists(IMAGE_DIR) and os.path.exists(ANNOT_DIR):
    TRAIN_IMG_DIR = os.path.join(OUTPUT_DIR, "train", "Image")
    TRAIN_ANNOT_DIR = os.path.join(OUTPUT_DIR, "train", "Annotation")
    VAL_IMG_DIR = os.path.join(OUTPUT_DIR, "val", "Image")
    VAL_ANNOT_DIR = os.path.join(OUTPUT_DIR, "val", "Annotation")
    
    os.makedirs(TRAIN_IMG_DIR, exist_ok=True)
    os.makedirs(TRAIN_ANNOT_DIR, exist_ok=True)
    os.makedirs(VAL_IMG_DIR, exist_ok=True)
    os.makedirs(VAL_ANNOT_DIR, exist_ok=True)
    
    image_filenames = [f for f in os.listdir(IMAGE_DIR) if f.endswith((".jpg", ".png"))]
    train_images, val_images = train_test_split(image_filenames, train_size=TRAIN_RATIO, random_state=42)
    
    # --- FUNGSI copy_files YANG DIPERBAIKI ---
    def copy_files(file_list, src_img, src_annot, dst_img, dst_annot):
        copied_count = 0
        for filename in file_list:
            annot_filename = os.path.splitext(filename)[0] + ".xml"
            src_annot_path = os.path.join(src_annot, annot_filename)
            
            # PERBAIKAN: Cek apakah file anotasi ada sebelum menyalin
            if os.path.exists(src_annot_path):
                # Salin gambar
                shutil.copy(os.path.join(src_img, filename), os.path.join(dst_img, filename))
                # Salin anotasi
                shutil.copy(src_annot_path, os.path.join(dst_annot, annot_filename))
                copied_count += 1
            else:
                # Beri peringatan jika anotasi tidak ditemukan
                print(f"⚠️ Peringatan: Anotasi '{annot_filename}' tidak ditemukan, gambar '{filename}' dilewati.")
        return copied_count

    print("Memproses data latih...")
    num_train = copy_files(train_images, IMAGE_DIR, ANNOT_DIR, TRAIN_IMG_DIR, TRAIN_ANNOT_DIR)
    
    print("\nMemproses data validasi...")
    num_val = copy_files(val_images, IMAGE_DIR, ANNOT_DIR, VAL_IMG_DIR, VAL_ANNOT_DIR)
    
    print("\n--- Ringkasan ---")
    print(f"Total gambar yang dipertimbangkan untuk latih: {len(train_images)}")
    print(f"Total gambar yang berhasil disalin (dengan anotasi): {num_train}")
    print(f"\nTotal gambar yang dipertimbangkan untuk validasi: {len(val_images)}")
    print(f"Total gambar yang berhasil disalin (dengan anotasi): {num_val}")
    print(f"\n✅ Pemisahan data selesai. Data disimpan di: {OUTPUT_DIR}")
else:
    print(f"❌ Direktori data sumber tidak ditemukan di '{IMAGE_DIR}' atau '{ANNOT_DIR}'. Periksa kembali path Anda.")

Memproses data latih...
⚠️ Peringatan: Anotasi 'immature-107_jpg.rf.e6c1dbeaacc81094cc0c733ccf439233(1).xml' tidak ditemukan, gambar 'immature-107_jpg.rf.e6c1dbeaacc81094cc0c733ccf439233(1).jpg' dilewati.

Memproses data validasi...

--- Ringkasan ---
Total gambar yang dipertimbangkan untuk latih: 1296
Total gambar yang berhasil disalin (dengan anotasi): 1295

Total gambar yang dipertimbangkan untuk validasi: 325
Total gambar yang berhasil disalin (dengan anotasi): 325

✅ Pemisahan data selesai. Data disimpan di: ../data/Katarak2_Split


In [3]:
# --- Cell 3: config.py ---

BATCH_SIZE = 8
RESIZE_TO = 640
NUM_EPOCHS = 10
NUM_WORKERS = 4 # Sesuaikan dengan CPU Anda
DEVICE = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# PENTING: Sesuaikan path ini dengan struktur folder Anda
TRAIN_IMG = '../data/train/Image'
TRAIN_ANNOT = '../data/train/Annotation'
VALID_IMG = '../data/Katarak2_Split/val/Image'
VALID_ANNOT = '../data/Katarak2_Split/val/Annotation'

CLASSES = ['__background__', 'Immature', 'Mature', 'Normal']
NUM_CLASSES = len(CLASSES)
OUT_DIR = 'outputs'

os.makedirs(OUT_DIR, exist_ok=True)

print(f"Konfigurasi dimuat. Device: {DEVICE}, Output Dir: {OUT_DIR}")

Konfigurasi dimuat. Device: cuda, Output Dir: outputs


In [4]:
# --- Cell 4: custom_utils.py ---

class Averager:
    def __init__(self): self.reset()
    def send(self, value): self.current_total += value; self.iterations += 1
    @property
    def value(self): return self.current_total / self.iterations if self.iterations > 0 else 0
    def reset(self): self.current_total = 0.0; self.iterations = 0.0

class SaveBestModel:
    def __init__(self, best_valid_map=0.0): self.best_valid_map = best_valid_map
    def __call__(self, model, current_valid_map, epoch, out_dir):
        if current_valid_map > self.best_valid_map:
            self.best_valid_map = current_valid_map
            file_name = f"{out_dir}/best_model_mAP_{current_valid_map:.4f}.pth"
            print(f"\nValid mAP meningkat. Menyimpan model ke {file_name}")
            torch.save({'epoch': epoch + 1, 'model_state_dict': model.state_dict()}, file_name)

def get_train_transform():
    return A.Compose([
        A.HorizontalFlip(p=0.5), A.RandomBrightnessContrast(p=0.3), A.Blur(blur_limit=3, p=0.1),
        ToTensorV2(p=1.0)], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})

def get_valid_transform():
    return A.Compose([ToTensorV2(p=1.0)], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})

def save_loss_plot(out_dir, train_loss_list):
    plt.figure(figsize=(10, 7)); plt.plot(train_loss_list, label='Train Loss')
    plt.xlabel('Epochs'); plt.ylabel('Loss'); plt.title("Training Loss Plot"); plt.legend()
    plt.savefig(f"{out_dir}/train_loss.png"); plt.close()

def save_mAP(out_dir, map_50_list, map_95_list):
    plt.figure(figsize=(10, 7)); plt.plot(map_50_list, label='mAP@0.5', color='orange')
    plt.plot(map_95_list, label='mAP@0.5:0.95', color='red'); plt.xlabel('Epochs')
    plt.ylabel('mAP'); plt.title('mAP Over Epochs'); plt.legend()
    plt.savefig(f"{out_dir}/map_plot.png"); plt.close()

print("✅ Fungsi utilitas ('custom_utils.py') berhasil didefinisikan.")

✅ Fungsi utilitas ('custom_utils.py') berhasil didefinisikan.


In [5]:
# --- Cell 5: datasets.py ---

class CustomDataset(Dataset):
    def __init__(self, img_path, annot_path, width, height, classes, transforms=None):
        self.img_path, self.annot_path = img_path, annot_path
        self.width, self.height = width, height
        self.classes, self.transforms = classes, transforms
        self.all_images = sorted(glob.glob(os.path.join(img_path, "*.jpg")))
    def __len__(self): return len(self.all_images)
    def __getitem__(self, idx):
        image_name = os.path.basename(self.all_images[idx])
        annot_path = os.path.join(self.annot_path, image_name.replace('.jpg', '.xml'))
        image = cv2.imread(self.all_images[idx])
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0
        
        tree = ET.parse(annot_path)
        root = tree.getroot()
        boxes, labels = [], []
        for obj in root.findall("object"):
            label = self.classes.index(obj.find("name").text)
            bndbox = obj.find("bndbox")
            xmin, ymin, xmax, ymax = map(int, [bndbox.find(tag).text for tag in ["xmin", "ymin", "xmax", "ymax"]])
            boxes.append([xmin, ymin, xmax, ymax])
            labels.append(label)
        
        # Buat dictionary target
        target = {}

        # Lakukan transformasi jika ada
        if self.transforms:
            # Konversi ke NumPy array SEBELUM augmentasi (INI PERBAIKANNYA)
            sample = self.transforms(
                image=image,
                bboxes=boxes,
                labels=labels
            )
            image = sample['image']
            # Perbarui boxes dan labels DARI HASIL augmentasi (INI PERBAIKANNYA)
            target['boxes'] = torch.tensor(sample['bboxes'], dtype=torch.float32) if len(sample['bboxes']) > 0 else torch.empty((0, 4))
            target['labels'] = torch.tensor(sample['labels'], dtype=torch.int64) if len(sample['labels']) > 0 else torch.empty(0, dtype=torch.int64)
        else:
            # Jika tidak ada transformasi, konversi langsung ke tensor
            target['boxes'] = torch.tensor(boxes, dtype=torch.float32)
            target['labels'] = torch.tensor(labels, dtype=torch.int64)
            
        return image, target

def collate_fn(batch): return tuple(zip(*batch))

print("✅ Kelas Dataset ('datasets.py') berhasil didefinisikan.")

✅ Kelas Dataset ('datasets.py') berhasil didefinisikan.


In [6]:
# Jalankan ini di sel baru untuk diagnosis
try:
    debug_valid_dataset = CustomDataset(VALID_IMG, VALID_ANNOT, RESIZE_TO, RESIZE_TO, CLASSES, get_valid_transform())
    debug_valid_loader = DataLoader(debug_valid_dataset, batch_size=BATCH_SIZE, collate_fn=collate_fn)

    print(f"Jumlah data di validation dataset: {len(debug_valid_dataset)}")
    if len(debug_valid_dataset) > 0:
        print("✅ Berhasil membuat validation loader. Seharusnya masalah path sudah benar.")
    else:
        print("🔴 Peringatan: Validation dataset kosong! Periksa isi folder validasi Anda.")
except Exception as e:
    print(f"❌ Terjadi error saat membuat validation loader: {e}")

Jumlah data di validation dataset: 325
✅ Berhasil membuat validation loader. Seharusnya masalah path sudah benar.


  self._set_keys()


In [7]:
# --- Cell 6: model.py ---

def create_model(num_classes, min_size=640, max_size=640):
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights='DEFAULT')
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    model.transform.min_size, model.transform.max_size = (min_size,), max_size
    return model

print("✅ Fungsi pembuatan model ('model.py') berhasil didefinisikan.")

✅ Fungsi pembuatan model ('model.py') berhasil didefinisikan.


In [None]:
# --- Cell 7: train.py ---

# Inisialisasi datasets dan loaders
train_dataset = CustomDataset(TRAIN_IMG, TRAIN_ANNOT, RESIZE_TO, RESIZE_TO, CLASSES, get_train_transform())
valid_dataset = CustomDataset(VALID_IMG, VALID_ANNOT, RESIZE_TO, RESIZE_TO, CLASSES, get_valid_transform())
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS, collate_fn=collate_fn)
valid_loader = DataLoader(valid_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS, collate_fn=collate_fn)
print(f"Data latih: {len(train_dataset)} | Data validasi: {len(valid_dataset)}\n")

# Inisialisasi model dan optimizer
model = create_model(NUM_CLASSES, min_size=RESIZE_TO, max_size=RESIZE_TO).to(DEVICE)
optimizer = torch.optim.RMSprop(model.parameters(), lr=0.0001, alpha=0.99, weight_decay=0.0005)

# Inisialisasi pelacak metrik
train_loss_hist = Averager()
map_metric = MeanAveragePrecision()
save_best_model = SaveBestModel()
metrics_data, map_50_list, map_95_list = [], [], []

# Loop Pelatihan
for epoch in range(NUM_EPOCHS):
    print(f"\n--- Epoch {epoch + 1}/{NUM_EPOCHS} ---")
    train_loss_hist.reset(); model.train()
    prog_bar = tqdm(train_loader, total=len(train_loader))
    
    # Fase Training
    for images, targets in prog_bar:
        images = [img.to(DEVICE) for img in images]
        targets = [{k: v.to(DEVICE) for k, v in t.items()} for t in targets]
        optimizer.zero_grad()
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        losses.backward(); optimizer.step()
        train_loss_hist.send(losses.item())
        prog_bar.set_description(f"Loss: {losses.item():.4f}")
    print(f"Loss Pelatihan: {train_loss_hist.value:.4f}")

    # Fase Validasi
    model.eval(); map_metric.reset()
    all_true_labels, all_pred_labels = [], []
    with torch.no_grad():
        for images, targets in tqdm(valid_loader, desc="Validating"):
            images = [img.to(DEVICE) for img in images]; outputs = model(images)
            for i in range(len(images)):
                true_labels, true_boxes = targets[i]["labels"].cpu(), targets[i]["boxes"].cpu()
                pred_scores, pred_labels, pred_boxes = outputs[i]["scores"].cpu(), outputs[i]["labels"].cpu(), outputs[i]["boxes"].cpu()
                mask = pred_scores > 0.5
                map_metric.update([{"boxes": pred_boxes[mask], "scores": pred_scores[mask], "labels": pred_labels[mask]}], 
                                  [{"boxes": true_boxes, "labels": true_labels}])
                all_true_labels.extend(true_labels.numpy())
                all_pred_labels.extend(pred_labels[mask].numpy())

    # Kalkulasi Metrik
    map_result = map_metric.compute()
    min_len = min(len(all_true_labels), len(all_pred_labels))
    precision, recall, f1, _ = precision_recall_fscore_support(all_true_labels[:min_len], all_pred_labels[:min_len], average="weighted", zero_division=0)
    print(f"mAP@0.5: {map_result['map_50']:.4f}, mAP@0.5:0.95: {map_result['map']:.4f}, Precision: {precision:.4f}, F1: {f1:.4f}")

    # Simpan hasil
    map_50_list.append(map_result['map_50'].item()); map_95_list.append(map_result['map'].item())
    metrics_data.append({"Epoch": epoch+1, "Train Loss": train_loss_hist.value, "mAP@0.5": map_result['map_50'].item(), "mAP@0.5:0.95": map_result['map'].item(), "Precision": precision, "Recall": recall, "F1-Score": f1})
    train_losses = [e["Train Loss"] for e in metrics_data]
    save_loss_plot(OUT_DIR, train_losses); save_mAP(OUT_DIR, map_50_list, map_95_list)
    save_best_model(model, map_result['map'].item(), epoch, OUT_DIR)

# Simpan metrik ke Excel
pd.DataFrame(metrics_data).to_excel(f"{OUT_DIR}/all_metrics.xlsx", index=False)
print("\n✅ Pelatihan Selesai!")

Data latih: 1500 | Data validasi: 325


--- Epoch 1/10 ---


Loss: 0.0804: 100%|██████████| 188/188 [02:16<00:00,  1.37it/s]


Loss Pelatihan: 0.1983


Validating: 100%|██████████| 41/41 [00:12<00:00,  3.23it/s]


mAP@0.5: 0.7205, mAP@0.5:0.95: 0.5157, Precision: 0.4428, F1: 0.4330

Valid mAP meningkat. Menyimpan model ke outputs/best_model_mAP_0.5157.pth

--- Epoch 2/10 ---


Loss: 0.0546: 100%|██████████| 188/188 [02:12<00:00,  1.42it/s]


Loss Pelatihan: 0.0732


Validating: 100%|██████████| 41/41 [00:12<00:00,  3.34it/s]


mAP@0.5: 0.8981, mAP@0.5:0.95: 0.6821, Precision: 0.8926, F1: 0.8777

Valid mAP meningkat. Menyimpan model ke outputs/best_model_mAP_0.6821.pth

--- Epoch 3/10 ---


Loss: 0.0623: 100%|██████████| 188/188 [02:12<00:00,  1.42it/s]


Loss Pelatihan: 0.0547


Validating: 100%|██████████| 41/41 [00:12<00:00,  3.32it/s]


mAP@0.5: 0.9373, mAP@0.5:0.95: 0.6881, Precision: 0.7643, F1: 0.7146

Valid mAP meningkat. Menyimpan model ke outputs/best_model_mAP_0.6881.pth

--- Epoch 4/10 ---


Loss: 0.0663: 100%|██████████| 188/188 [02:12<00:00,  1.42it/s]


Loss Pelatihan: 0.0753


Validating: 100%|██████████| 41/41 [00:15<00:00,  2.67it/s]


mAP@0.5: 0.8529, mAP@0.5:0.95: 0.6548, Precision: 0.6135, F1: 0.4093

--- Epoch 5/10 ---


Loss: 0.0421:  98%|█████████▊| 185/188 [02:07<00:02,  1.28it/s] 