<a href="https://colab.research.google.com/github/Vloker/machine-learning/blob/main/mobilenetV3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torchvision
from tqdm import tqdm
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.datasets import CocoDetection
from torchvision.transforms import functional as F
from torchvision.models.detection.rpn import AnchorGenerator
from torchvision import models
import matplotlib.pyplot as plt
from PIL import Image
import time

In [2]:
# Define transformations
class CocoTransform:
    def __call__(self, image, target):
        image = F.to_tensor(image)  # Convert PIL image to tensor
        return image, target

In [3]:
# Dataset class
def get_coco_dataset(img_dir, ann_file):
    return CocoDetection(
        root=img_dir,
        annFile=ann_file,
        transforms=CocoTransform()
    )

# Load datasets
train_dataset = get_coco_dataset(
    img_dir="/content/drive/MyDrive/machine-learning/database/data-1/train",
    ann_file="/content/drive/MyDrive/machine-learning/database/data-1/train/annotations/instances_default.json"
)

val_dataset = get_coco_dataset(
    img_dir="/content/drive/MyDrive/machine-learning/database/data-1/val",
    ann_file="/content/drive/MyDrive/machine-learning/database/data-1/val/annotations/instances_default.json"
)

# DataLoader
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))
val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False, collate_fn=lambda x: tuple(zip(*x)))

loading annotations into memory...
Done (t=0.04s)
creating index...
index created!
loading annotations into memory...
Done (t=0.19s)
creating index...
index created!


In [4]:
def get_model_with_mobilenet_v3(num_classes):
    # Memuat backbone MobileNetV3 (menggunakan bobot yang telah dilatih sebelumnya)
    backbone = models.mobilenet_v3_large(pretrained=True).features

    # Tambahkan lapisan konvolusi 1x1 untuk mengubah saluran menjadi 1280
    backbone = nn.Sequential(
        backbone,
        nn.Conv2d(960, 1280, kernel_size=1),  # Penyesuaian channel output
    )
    backbone.out_channels = 1280  # Tentukan saluran output backbone

    # Mendefinisikan generator anchor untuk RPN
    anchor_generator = AnchorGenerator(
        sizes=((32, 64, 128, 256, 512),),  # Ukuran anchor
        aspect_ratios=((0.5, 1.0, 2.0),) * 5  # Rasio aspek anchor
    )

    # Define ROI Pooling
    roi_pooler = torchvision.ops.MultiScaleRoIAlign(
        featmap_names=['0'],  # Feature maps to consider
        output_size=10,  # Spatial size of the output
        sampling_ratio=2
    )

    # Create the Faster R-CNN model
    model = FasterRCNN(
        backbone=backbone,

        num_classes=num_classes,
        rpn_anchor_generator=anchor_generator,
        box_roi_pool=roi_pooler
    )

    # Replace the classifier head
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    return model

In [5]:
# Initialize the model
num_classes = 5 # Background + sehat, bercabang, retak, bercak hitam
model = get_model_with_mobilenet_v3(num_classes)



In [6]:
# Move model to GPU if available
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)

# Define optimizer and learning rate scheduler
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.Adam(params, lr=0.001, weight_decay=0.001)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

In [7]:
def IoU(box1, box2):
    x1 = max(box1[0], box2[0])
    y1 = max(box1[1], box2[1])
    x2 = min(box1[2], box2[2])
    y2 = min(box1[3], box2[3])

    # Compute intersection area
    inter_area = max(0, x2 - x1) * max(0, y2 - y1)

    # Compute union area
    box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
    box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])
    union_area = box1_area + box2_area - inter_area

    # Compute IoU
    return inter_area / union_area if union_area > 0 else 0

In [8]:
def evaluate_with_metrics(model, data_loader, device):
    model.eval()
    total_correct = 0
    total_pred_boxes = 0
    total_true_boxes = 0
    total_loss = 0.0
    total_iou = 0.0
    num_batches = 0

    loss_fn = torch.nn.SmoothL1Loss(reduction="sum")

    with torch.no_grad():
        for images, targets in tqdm(data_loader, desc="Evaluating"):
            images = [img.to(device) for img in images]
            outputs = model(images)

            for output, target in zip(outputs, targets):
                pred_boxes = output['boxes'].to(device)
                pred_labels = output['labels'].to(device)
                true_boxes = torch.tensor(
                    [[x, y, x + w, y + h] for x, y, w, h in [obj['bbox'] for obj in target]],
                    dtype=torch.float32,
                    device=device
                )
                true_labels = torch.tensor([obj['category_id'] for obj in target], dtype=torch.int64, device=device)

                matched_pred_indices = set()
                matched_true_indices = set()

                for pred_idx, (pb, pl) in enumerate(zip(pred_boxes, pred_labels)):
                    for true_idx, (tb, tl) in enumerate(zip(true_boxes, true_labels)):
                        if pred_idx in matched_pred_indices or true_idx in matched_true_indices:
                            continue  # Avoid double counting matches

                        iou_score = IoU(pb.cpu().numpy(), tb.cpu().numpy())
                        if tl == pl and iou_score > 0.5:
                            matched_pred_indices.add(pred_idx)
                            matched_true_indices.add(true_idx)
                            total_correct += 1
                            total_iou += iou_score
                            total_loss += loss_fn(pb, tb).item()
                            num_batches += 1
                            break

                total_pred_boxes += len(pred_boxes)
                total_true_boxes += len(true_boxes)

    accuracy = (total_correct / total_true_boxes * 100) if total_true_boxes > 0 else 0.0
    avg_loss = total_loss / num_batches if num_batches > 0 else 0.0
    mean_iou = total_iou / total_correct if total_correct > 0 else 0.0
    return avg_loss, accuracy, mean_iou

In [9]:
def train_one_epoch(model, optimizer, data_loader, device, epoch, num_epochs):
    model.train()
    total_loss = 0.0
    num_batches = 0
    start_time = time.time()

    progress = tqdm(data_loader, desc=f"Epoch [{epoch}/{num_epochs}]", leave=True)
    for images, targets in progress:
        images = [img.to(device) for img in images]

        processed_targets = []
        valid_images = []
        for i, target in enumerate(targets):
            boxes = []
            labels = []
            for obj in target:
                bbox = obj["bbox"]
                x, y, w, h = bbox
                if w > 0 and h > 0:
                    boxes.append([x, y, x + w, y + h])
                    labels.append(obj["category_id"])

            if boxes:
                processed_target = {
                    "boxes": torch.tensor(boxes, dtype=torch.float32).to(device),
                    "labels": torch.tensor(labels, dtype=torch.int64).to(device),
                }
                processed_targets.append(processed_target)
                valid_images.append(images[i])

        if not processed_targets:
            continue

        images = valid_images
        loss_dict = model(images, processed_targets)
        losses = sum(loss for loss in loss_dict.values())

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        total_loss += losses.item()
        num_batches += 1
        progress.set_postfix(loss=losses.item())

    avg_loss = total_loss / num_batches if num_batches > 0 else 0.0
    elapsed_time = time.time() - start_time
    minutes, seconds = divmod(int(elapsed_time), 60)

    return avg_loss, minutes, seconds

In [10]:
# Training loop with formatted output
num_epochs = 5
for epoch in range(1, num_epochs + 1):
    train_loss, train_minutes, train_seconds = train_one_epoch(model, optimizer, train_loader, device, epoch, num_epochs)
    val_loss, val_accuracy, val_mean_iou = evaluate_with_metrics(model, val_loader, device)

    print(f"Epoch [{epoch}/{num_epochs}]")
    print(f"[Train] Loss: {train_loss:.4f} | Time: {train_minutes}m {train_seconds}s")
    print(f"[Validation] Loss: {val_loss:.4f} | Accuracy: {val_accuracy:.2f}% | Mean IoU: {val_mean_iou:.3f}")

    # Save the model's state dictionary after every epoch
    model_path = f"fasterrcnn_mobilenetV3_epoch_{epoch}.pth"
    torch.save(model.state_dict(), model_path)
    print(f"Model saved: {model_path}")

Epoch [1/5]:   1%|          | 1/173 [06:48<19:32:24, 408.98s/it, loss=2.58]


KeyboardInterrupt: 