In [1]:
import torch
import torchvision
from torch.utils.data import DataLoader
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.datasets import CocoDetection
from torchvision.transforms import functional as F
import matplotlib.pyplot as plt
from PIL import Image

In [2]:
# Define transformations
class CocoTransform:
    def __call__(self, image, target):
        image = F.to_tensor(image)  # Convert PIL image to tensor
        return image, target

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
# Dataset class
def get_coco_dataset(img_dir, ann_file):
    return CocoDetection(
        root=img_dir,
        annFile=ann_file,
        transforms=CocoTransform()
    )

# Load datasets
train_dataset = get_coco_dataset(
    img_dir="/content/drive/MyDrive/MaskRCNN_Custom/train",
    ann_file="/content/drive/MyDrive/MaskRCNN_Custom/train/_annotations.coco.json"
)


val_dataset = get_coco_dataset(
    img_dir="/content/drive/McyDrive/MaskRCNN_Custom/valid",
    ann_file="/content/drive/MyDrive/MaskRCNN_Custom/valid/_annotations.coco.json"
)
# DataLoader
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))
val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False, collate_fn=lambda x: tuple(zip(*x)))

loading annotations into memory...
Done (t=0.64s)
creating index...
index created!
loading annotations into memory...
Done (t=0.36s)
creating index...
index created!


In [5]:
# Load Faster R-CNN with ResNet-50 backbone
def get_model(num_classes):
    # Load pre-trained Faster R-CNN
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

    # Get the number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features

    # Replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    return model

In [6]:
# Initialize the model
num_classes = 2 # Background + chair, human, table
model = get_model(num_classes)

Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth
100%|██████████| 160M/160M [00:00<00:00, 190MB/s]


In [7]:
# Move model to GPU if available
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)

# Define optimizer and learning rate scheduler
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

In [8]:
def train_one_epoch(model, optimizer, data_loader, device, epoch):
    model.train()
    for images, targets in data_loader:
        # Move images to the device
        images = [img.to(device) for img in images]

        # Validate and process targets
        processed_targets = []
        valid_images = []
        for i, target in enumerate(targets):
            boxes = []
            labels = []
            for obj in target:
                # Extract bbox
                bbox = obj["bbox"]  # Format: [x, y, width, height]
                x, y, w, h = bbox

                # Ensure the width and height are positive
                if w > 0 and h > 0:
                    boxes.append([x, y, x + w, y + h])  # Convert to [x_min, y_min, x_max, y_max]
                    labels.append(obj["category_id"])

            # Only process if there are valid boxes
            if boxes:
                processed_target = {
                    "boxes": torch.tensor(boxes, dtype=torch.float32).to(device),
                    "labels": torch.tensor(labels, dtype=torch.int64).to(device),
                }
                processed_targets.append(processed_target)
                valid_images.append(images[i])  # Add only valid images

        # Skip iteration if no valid targets
        if not processed_targets:
            continue

        # Ensure images and targets are aligned
        images = valid_images

        # Forward pass
        loss_dict = model(images, processed_targets)
        losses = sum(loss for loss in loss_dict.values())

        # Backpropagation
        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

    print(f"Epoch [{epoch}] Loss: {losses.item():.4f}")

In [None]:
# Training loop
num_epochs = 50
for epoch in range(num_epochs):
    train_one_epoch(model, optimizer, train_loader, device, epoch)
    lr_scheduler.step()

    # Save the model's state dictionary after every epoch
    model_path = f"fasterrcnn_resnet50_epoch_{epoch + 1}.pth"
    torch.save(model.state_dict(), model_path)
    print(f"Model saved: {model_path}")

Epoch [0] Loss: 0.1049
Model saved: fasterrcnn_resnet50_epoch_1.pth
Epoch [1] Loss: 0.0819
Model saved: fasterrcnn_resnet50_epoch_2.pth
Epoch [2] Loss: 0.0720
Model saved: fasterrcnn_resnet50_epoch_3.pth
Epoch [3] Loss: 0.1120
Model saved: fasterrcnn_resnet50_epoch_4.pth
Epoch [4] Loss: 0.1629
Model saved: fasterrcnn_resnet50_epoch_5.pth
Epoch [5] Loss: 0.0373
Model saved: fasterrcnn_resnet50_epoch_6.pth
Epoch [6] Loss: 0.1488
Model saved: fasterrcnn_resnet50_epoch_7.pth
Epoch [7] Loss: 0.0729
Model saved: fasterrcnn_resnet50_epoch_8.pth
Epoch [8] Loss: 0.0339
Model saved: fasterrcnn_resnet50_epoch_9.pth
Epoch [9] Loss: 0.0580
Model saved: fasterrcnn_resnet50_epoch_10.pth
Epoch [10] Loss: 0.0470
Model saved: fasterrcnn_resnet50_epoch_11.pth
Epoch [11] Loss: 0.1550
Model saved: fasterrcnn_resnet50_epoch_12.pth
Epoch [12] Loss: 0.1489
Model saved: fasterrcnn_resnet50_epoch_13.pth
Epoch [13] Loss: 0.1108
Model saved: fasterrcnn_resnet50_epoch_14.pth
Epoch [14] Loss: 0.0396
Model saved: fa

In [None]:
import torch
import torchvision
from torch.utils.data import DataLoader
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.transforms import functional as F
import matplotlib.pyplot as plt
from PIL import Image

# Load Faster R-CNN with ResNet-50 backbone
def get_model(num_classes):
    # Load pre-trained Faster R-CNN
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights="DEFAULT")
    # Get the number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # Replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    return model


# Initialize the model
num_classes = 6  # Background + small + medium + large

# Move model to GPU if available
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# Load the trained model
model = get_model(num_classes)
model.load_state_dict(torch.load("fasterrcnn_resnet50_epoch_18.pth", map_location=device, weights_only=True))
model.to(device)
model.eval()  # Set the model to evaluation mode


# Function to preprocess image
def prepare_image(image_path):
    image = Image.open(image_path).convert("RGB")  # Open image
    image_tensor = F.to_tensor(image).unsqueeze(0)  # Convert image to tensor and add batch dimension
    return image_tensor.to(device)

# Define class names
# COCO_CLASSES = {0: "Background", 1: "yellofish", 2: "bluefish", 3: "goldfish"}
COCO_CLASSES = {0: "Background", 1: "Bluetang", 2: "MoorishIdol", 3: "Nemo", 4: "YellowTang", 5: "Goldfish"}

def get_class_name(class_id):
    return COCO_CLASSES.get(class_id, "Unknown")

# Function to draw bounding boxes
def draw_boxes(image, prediction, fig_size=(12, 10)):
    plt.figure(figsize=fig_size)  # Set figure size
    plt.imshow(image)  # Display the image first

    boxes = prediction[0]['boxes'].cpu().numpy()  # Get bounding boxes
    labels = prediction[0]['labels'].cpu().numpy()  # Get labels
    scores = prediction[0]['scores'].cpu().numpy()  # Get confidence scores

    threshold = 0.5  # Lowered threshold for testing

    print("Boxes Shape:", boxes.shape)
    print("Labels Shape:", labels.shape)
    print("Scores Shape:", scores.shape)

    for box, label, score in zip(boxes, labels, scores):
        if score > threshold:
            x_min, y_min, x_max, y_max = box
            class_name = get_class_name(label)  # Get class name

            # Ensure coordinates are within image bounds
            x_min = max(0, x_min)
            y_min = max(0, y_min)
            x_max = min(image.size[0], x_max)
            y_max = min(image.size[1], y_max)

            # Draw bounding box
            plt.gca().add_patch(plt.Rectangle(
                (x_min, y_min), x_max - x_min, y_max - y_min,
                linewidth=2, edgecolor='r', facecolor='none'
            ))

            # Add text label
            plt.text(x_min, y_min, f"{class_name} ({score:.2f})", color='r', fontsize=12,
                     bbox=dict(facecolor='white', alpha=0.5))

    plt.axis('off')  # Hide axes
    plt.show()


# Load the unseen image
image_path = "/content/drive/MyDrive/coco/test/7117_Caranx_sexfasciatus_juvenile_f000022_RGHS_jpg.rf.2b7c2322f58bf67d49e96446c2b2e560.jpg"
image_tensor = prepare_image(image_path)

# Run inference
with torch.no_grad():  # Disable gradient computation
    prediction = model(image_tensor)

# Display results
draw_boxes(Image.open(image_path), prediction)


In [None]:
# Move the model to Google Drive
!mv fasterrcnn_resnet50_epoch_15.pth /content/drive/MyDrive/coco/


In [None]:
# Download Locally
from google.colab import files
files.download("fasterrcnn_resnet50_epoch_7.pth")
