# Object Detection using Transfer Learning (PyTorch + COCO)

This notebook demonstrates Faster R-CNN with ResNet-50 backbone pretrained on COCO, fine-tuned on COCO dataset.

In [None]:
!pip install torch torchvision pycocotools matplotlib opencv-python tqdm

In [None]:
import torch
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.datasets import CocoDetection
from torchvision.transforms import functional as F
from torch.utils.data import DataLoader
from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import os

## Download COCO Dataset (if needed)

In [None]:
!mkdir -p coco
!cd coco && wget http://images.cocodataset.org/zips/train2017.zip
!cd coco && wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip
!cd coco && unzip -q train2017.zip
!cd coco && unzip -q annotations_trainval2017.zip

In [None]:
# Dataset paths
root = "coco/train2017"
annFile = "coco/annotations/instances_train2017.json"

train_dataset = CocoDetection(root=root, annFile=annFile, transform=F.to_tensor)
print("Dataset size:", len(train_dataset))

In [None]:
def collate_fn(batch):
    return tuple(zip(*batch))

train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True, num_workers=2, collate_fn=collate_fn)

In [None]:
# Load pretrained Faster R-CNN
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights="DEFAULT")
num_classes = 91  # COCO classes + background
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

In [None]:
# Training loop (short demo)
optimizer = torch.optim.SGD(model.parameters(), lr=0.005, momentum=0.9, weight_decay=0.0005)
num_epochs = 1

model.train()
for epoch in range(num_epochs):
    progress = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}")
    for images, targets in progress:
        images = [img.to(device) for img in images]
        formatted_targets = []
        for t, img in zip(targets, images):
            boxes = []
            labels = []
            for obj in t:
                boxes.append(obj['bbox'])
                labels.append(obj['category_id'])
            boxes = torch.as_tensor(boxes, dtype=torch.float32).reshape(-1, 4)
            boxes[:, 2:] += boxes[:, :2]
            labels = torch.as_tensor(labels, dtype=torch.int64)
            formatted_targets.append({'boxes': boxes.to(device), 'labels': labels.to(device)})

        loss_dict = model(images, formatted_targets)
        losses = sum(loss for loss in loss_dict.values())

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()
        progress.set_postfix(loss=losses.item())

In [None]:
# Inference on one sample
model.eval()
test_img = Image.open(train_dataset[0][0])
with torch.no_grad():
    prediction = model([F.to_tensor(test_img).to(device)])

prediction

In [None]:
def plot_detections(img, prediction, score_threshold=0.5):
    img = np.array(img)
    boxes = prediction[0]['boxes'].cpu().numpy()
    scores = prediction[0]['scores'].cpu().numpy()
    labels = prediction[0]['labels'].cpu().numpy()
    
    plt.figure(figsize=(10,10))
    plt.imshow(img)
    ax = plt.gca()
    
    for box, score, label in zip(boxes, scores, labels):
        if score < score_threshold:
            continue
        x1, y1, x2, y2 = box.astype(int)
        rect = plt.Rectangle((x1, y1), x2-x1, y2-y1, fill=False, color='lime', linewidth=2)
        ax.add_patch(rect)
        ax.text(x1, y1, f'{label}:{score:.2f}', color='yellow', fontsize=8, backgroundcolor='black')
    plt.axis('off')
    plt.show()

plot_detections(test_img, prediction)