In [None]:
import os
import random
import xml.etree.ElementTree as ET

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision.models.detection import ssdlite320_mobilenet_v3_large
from torchvision.transforms import functional as F

from torch.utils.data import Dataset, DataLoader, random_split

import albumentations as A
from albumentations.pytorch import ToTensorV2


import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
from tqdm import tqdm

In [None]:
# Path dataset
IMG_DIR = '/kaggle/input/tajwid-dataset/images'
ANN_DIR = '/kaggle/input/tajwid-dataset/annotations'

# Device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Class names
CLASS_NAMES = ['background', 'ikhfa', 'idgham', 'idzhar', 'iklab']  # 0 is background
NUM_CLASSES = len(CLASS_NAMES)

# Random seed
torch.manual_seed(42)

In [None]:
class TajwidDataset(Dataset):
    def __init__(self, img_dir, ann_dir, transforms=None):
        self.img_dir = img_dir
        self.ann_dir = ann_dir
        self.transforms = transforms
        self.images = list(sorted(os.listdir(img_dir)))

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.images[idx])
        img_filename = os.path.splitext(self.images[idx])[0]
        ann_path = os.path.join(self.ann_dir, img_filename + '.xml')

        img = np.array(Image.open(img_path).convert("RGB"))
        boxes = []
        labels = []
        tree = ET.parse(ann_path)
        root = tree.getroot()
        
        for obj in root.findall('object'):
            label = obj.find('name').text
            bbox = obj.find('bndbox')
            boxes.append([
                int(bbox.find('xmin').text),
                int(bbox.find('ymin').text),
                int(bbox.find('xmax').text),
                int(bbox.find('ymax').text)
            ])
            labels.append(CLASS_NAMES.index(label))

        target = {}
        target['boxes'] = torch.as_tensor(boxes, dtype=torch.float32)
        target['labels'] = torch.as_tensor(labels, dtype=torch.int64)

        if self.transforms:
            sample = self.transforms(image=img, bboxes=boxes, class_labels=labels)
            img = sample['image']
            img = img.float() / 255.0
            target['boxes'] = torch.tensor(sample['bboxes'], dtype=torch.float32)
            target['labels'] = torch.tensor(sample['class_labels'], dtype=torch.int64)
        else:
            img = F.to_tensor(img)

        return img, target

In [None]:
def get_train_transform():
    return A.Compose([
        # üîÑ Ukuran teks sangat kecil/besar dan zoom
        A.OneOf([
            A.RandomScale(scale_limit=(-0.6, 0.4), p=0.6),  # Teks kecil/besar
            A.RandomSizedBBoxSafeCrop(height=224, width=224, erosion_rate=0.2, p=0.3),  # Zoom-in ke objek
        ], p=1.0),

        # üîÑ Flip horizontal sebagai variasi tampilan
        A.HorizontalFlip(p=0.5),

        # üí° Pencahayaan dan kontras
        A.RandomBrightnessContrast(p=0.3),
        A.RandomGamma(p=0.3),
        A.OneOf([
            A.Blur(blur_limit=5, p=0.3),
            A.GaussianBlur(blur_limit=5, p=0.3),
            A.MotionBlur(blur_limit=5, p=0.3),
        ], p=0.4),

        # üß© Tambahan untuk tekstur buruk atau noise
        A.OneOf([
            A.ISONoise(color_shift=(0.01, 0.05), intensity=(0.1, 0.3), p=0.3),  # Simulasi noise ISO
            A.ImageCompression(quality_lower=30, quality_upper=60, p=0.3),     # Simulasi gambar pecah
        ], p=0.5),

        # üìê Rotasi & geser
        A.ShiftScaleRotate(shift_limit=0.03, scale_limit=0.2, rotate_limit=15, p=0.6),
        A.Resize(224, 224),
        ToTensorV2()
    ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['class_labels']))



def get_val_transform():
    return A.Compose([
        A.Resize(224,224),
        ToTensorV2()
    ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['class_labels']))

In [None]:
full_dataset = TajwidDataset(IMG_DIR, ANN_DIR, transforms=get_train_transform())
train_size = int(0.8 * len(full_dataset))
val_size = len(full_dataset) - train_size

train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])

val_dataset.dataset.transforms = get_val_transform()

train_loader = DataLoader(train_dataset, batch_size=3, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))
val_loader = DataLoader(val_dataset, batch_size=3, shuffle=False, collate_fn=lambda x: tuple(zip(*x)))

In [None]:
model = ssdlite320_mobilenet_v3_large(pretrained=True)
model.head.classification_head.num_classes = NUM_CLASSES
model = model.to(device)

In [None]:
def train_one_epoch(model, optimizer, data_loader, device, epoch):
    model.train()
    running_loss = 0.0
    pbar = tqdm(data_loader, desc=f"Epoch {epoch}", leave=False)

    for batch_idx, (images, targets) in enumerate(pbar):
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        running_loss += losses.item()
        pbar.set_postfix({
        'Batch': f"{batch_idx+1}/{len(data_loader)}",
        'Loss': losses.item()
    })

    return running_loss / len(data_loader)

def evaluate(model, data_loader, device):
    model.eval()
    total = 0
    correct = 0
    with torch.no_grad():
        for images, targets in data_loader:
            images = list(img.to(device) for img in images)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            outputs = model(images)

            for output, target in zip(outputs, targets):
                pred_labels = output['labels']
                true_labels = target['labels']
                total += true_labels.size(0)
                correct += (pred_labels == true_labels).sum().item()
    return correct / total if total > 0 else 0

In [None]:
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=0.0005)

num_epochs = 100
best_acc = 0.0
train_losses = []
val_accuracies = []

for epoch in range(num_epochs):
    train_loss = train_one_epoch(model, optimizer, train_loader, device, epoch+1)
    val_acc = evaluate(model, val_loader, device)

    train_losses.append(train_loss)
    val_accuracies.append(val_acc)
    print(f"[Epoch {epoch+1}/{num_epochs}] Train Loss: {train_loss:.4f} - Val Acc: {val_acc:.4f}")

    if val_acc > best_acc:
        best_acc = val_acc
        torch.save(model, 'TajwidModelCNNSSD_FULL.pth') 

print("‚úÖ Training Completed!")

In [None]:
import torchvision.ops as ops

def plot_prediction(model, image_path, threshold=0.5, iou_thresh=0.3):
    model.eval()

    # Load and resize image
    original_img = Image.open(image_path).convert("RGB")
    orig_w, orig_h = original_img.size
    img_resized = original_img.resize((224, 224))
    img_tensor = F.to_tensor(img_resized).unsqueeze(0).to(device)

    with torch.no_grad():
        outputs = model(img_tensor)[0]

    # Simpan semua hasil awal
    all_boxes = outputs['boxes']
    all_labels = outputs['labels']
    all_scores = outputs['scores']
      # Simpan hasil yang disaring

    final_boxes = []
    final_labels = []
    final_scores = []

    # Loop tiap kelas (selain background) untuk NMS per kelas
    for class_idx in range(1, len(CLASS_NAMES)):
        cls_mask = all_labels == class_idx
        cls_boxes = all_boxes[cls_mask]
        cls_scores = all_scores[cls_mask]

        if cls_boxes.size(0) == 0:
            continue
        
        keep = ops.nms(cls_boxes, cls_scores, iou_thresh)
        for idx in keep:
            if cls_scores[idx] >= threshold:
                final_boxes.append(cls_boxes[idx])
                final_labels.append(class_idx)
                final_scores.append(cls_scores[idx])

    # Gambar hasil pada original image
    fig, ax = plt.subplots(1, 1, figsize=(12, 8))
    ax.imshow(original_img)

    scale_x = orig_w / 224
    scale_y = orig_h / 224
    if len(final_boxes) == 0:
        print("‚ö†Ô∏è Tidak ada prediksi valid yang ditemukan.")
    else:
        for box, label, score in zip(final_boxes, final_labels, final_scores):
            x1, y1, x2, y2 = box.cpu().numpy()
            x1 *= scale_x
            x2 *= scale_x
            y1 *= scale_y
            y2 *= scale_y

            ax.add_patch(plt.Rectangle((x1, y1), x2 - x1, y2 - y1,
                                    fill=False, edgecolor='red', linewidth=2))
            ax.text(x1, y1, f"{CLASS_NAMES[label]} ({score*100:.1f}%)",
                    fontsize=12, color='red')

    plt.axis('off')
    plt.show()

In [None]:
import matplotlib.pyplot as plt

# === Plot Train Loss dan Validation Accuracy ===
epochs = range(1, len(train_losses) + 1)

plt.figure(figsize=(12, 5))

# Loss
plt.subplot(1, 2, 1)
plt.plot(epochs, train_losses, marker='o', label='Train Loss')
plt.title('Training Loss per Epoch')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.grid(True)
plt.legend()

# Accuracy
plt.subplot(1, 2, 2)
plt.plot(epochs, val_accuracies, marker='s', color='green', label='Validation Accuracy')
plt.title('Validation Accuracy per Epoch')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.grid(True)
plt.legend()

plt.tight_layout()
plt.show()

In [None]:
def calculate_iou(box1, box2):
    """Calculate IoU (Intersection over Union) between two boxes."""
    x1 = max(box1[0], box2[0])
    y1 = max(box1[1], box2[1])
    x2 = min(box1[2], box2[2])
    y2 = min(box1[3], box2[3])

    inter_area = max(0, x2 - x1) * max(0, y2 - y1)
    box1_area = (box1[2]-box1[0]) * (box1[3]-box1[1])
    box2_area = (box2[2]-box2[0]) * (box2[3]-box2[1])

    union_area = box1_area + box2_area - inter_area
    if union_area == 0:
        return 0
    return inter_area / union_area

In [None]:
from collections import defaultdict

def evaluate_detection(model, data_loader, device, iou_threshold=0.5, score_threshold=0.5):
    model.eval()

    true_positives = defaultdict(int)
    false_positives = defaultdict(int)
    false_negatives = defaultdict(int)

    with torch.no_grad():
        for images, targets in tqdm(data_loader, desc="Evaluating"):
            images = list(img.to(device) for img in images)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            outputs = model(images)
            for output, target in zip(outputs, targets):
                pred_boxes = output['boxes']
                pred_labels = output['labels']
                pred_scores = output['scores']

                gt_boxes = target['boxes']
                gt_labels = target['labels']

                matched_gt = []

                for pred_box, pred_label, pred_score in zip(pred_boxes, pred_labels, pred_scores):
                    if pred_score < score_threshold:
                        continue
                    best_iou = 0
                    best_gt_idx = -1
                    for gt_idx, (gt_box, gt_label) in enumerate(zip(gt_boxes, gt_labels)):
                        if gt_idx in matched_gt:
                            continue
                        if pred_label != gt_label:
                            continue

                        iou = calculate_iou(pred_box.cpu().numpy(), gt_box.cpu().numpy())
                        if iou > best_iou:
                            best_iou = iou
                            best_gt_idx = gt_idx

                    if best_iou >= iou_threshold and best_gt_idx != -1:
                        true_positives[int(pred_label)] += 1
                        matched_gt.append(best_gt_idx)
                    else:
                        false_positives[int(pred_label)] += 1

                # Hitung false negatives (ground truth yang tidak ketemu)
                for gt_idx, gt_label in enumerate(gt_labels):
                    if gt_idx not in matched_gt:
                        false_negatives[int(gt_label)] += 1
    precision = {}
    recall = {}
    f1_score = {}

    for label in range(len(CLASS_NAMES)):
        tp = true_positives[label]
        fp = false_positives[label]
        fn = false_negatives[label]

        prec = tp / (tp + fp) if (tp + fp) > 0 else 0
        rec = tp / (tp + fn) if (tp + fn) > 0 else 0
        f1 = 2 * prec * rec / (prec + rec) if (prec + rec) > 0 else 0

        precision[label] = prec
        recall[label] = rec
        f1_score[label] = f1

        valid_labels = [i for i in range(1, len(CLASS_NAMES))]  # Skip background (label 0)

        macro_precision = np.mean([precision.get(i, 0.0) for i in valid_labels])
        macro_recall = np.mean([recall.get(i, 0.0) for i in valid_labels])
        macro_f1 = np.mean([f1_score.get(i, 0.0) for i in valid_labels])


    return precision, recall, f1_score, macro_precision, macro_recall, macro_f1

In [None]:
precision, recall, f1_score, macro_precision, macro_recall, macro_f1 = evaluate_detection(model, val_loader, device)

print("\nüìä Evaluation Results (Validation Set)")
print("="*60)
for label_idx in range(1, len(CLASS_NAMES)):  # Lewati background
    print(f"Class: {CLASS_NAMES[label_idx]}")
    print(f" - Precision: {precision[label_idx]:.4f}")
    print(f" - Recall   : {recall[label_idx]:.4f}")
    print(f" - F1-Score : {f1_score[label_idx]:.4f}")
    print("-" * 30)

print("\nüìà Macro Averages:")
print(f"Overall Precision: {macro_precision:.4f}")
print(f"Overall Recall   : {macro_recall:.4f}")
print(f"Overall F1-Score : {macro_f1:.4f}")

In [None]:
plot_prediction(model, '/kaggle/input/data-test/data_test/other2.png') #path untuk memanggil image sesuaikan juga path di atas yang digunakan untuk kagle 
                                                                        #di ganti ke local happy coding!