In [1]:
!pip install torch torchvision pycocotools



You should consider upgrading via the 'C:\Users\harri\AppData\Local\Programs\Python\Python39\python.exe -m pip install --upgrade pip' command.


In [2]:
import torch
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms as T
from pycocotools.coco import COCO
import os
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image

In [3]:
class CocoDetectionDataset(Dataset):
    def __init__(self, root, ann_file, transforms=None):
        self.root = root
        self.coco = COCO(ann_file)
        self.ids = list(sorted(self.coco.imgs.keys()))
        self.transforms = transforms

    def __getitem__(self, index):

        img_id = self.ids[index]
        path = self.coco.loadImgs(img_id)[0]['file_name']
        img = Image.open(os.path.join(self.root, path)).convert("RGB")  # ✅ Load image here

        
        coco = self.coco
        img_id = self.ids[index]
        ann_ids = coco.getAnnIds(imgIds=img_id)
        anns = coco.loadAnns(ann_ids)
    
        # If there are no annotations (i.e., negative image), skip it
        if len(anns) == 0:
            return None
    
        boxes = []
        labels = []
        for ann in anns:
            x, y, w, h = ann['bbox']
            boxes.append([x, y, x + w, y + h])
            labels.append(ann['category_id'])
    
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)
    
        target = {
            "boxes": boxes,
            "labels": labels,
            "image_id": torch.tensor([img_id])
        }
    
        if self.transforms:
            img = self.transforms(img)
    
        return img, target


    def __len__(self):
        return len(self.ids)

In [4]:
def get_transform(train):
    transforms = []
    transforms.append(T.ToTensor())
    return T.Compose(transforms)

In [5]:
image_dir_pos = "./data/FLC2019/trainval/JPEGImages_pos"
image_dir_neg = "./data/FLC2019/trainval/JPEGImages"

image_dir = image_dir_pos
annotation_file = "data/FLC2019/trainval/coco_annotations/instances_trainval_pos.json"

dataset = CocoDetectionDataset(image_dir, annotation_file, transforms=get_transform(train=True))

# Define collate function
def collate_fn(batch):
    batch = list(filter(lambda x: x is not None, batch))
    return tuple(zip(*batch))

data_loader = DataLoader(dataset, batch_size=2, shuffle=True, collate_fn=collate_fn)

loading annotations into memory...
Done (t=0.06s)
creating index...
index created!


In [6]:
def get_model(num_classes):
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights=True)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    return model

num_classes = 2  # e.g., 1 class + background
model = get_model(num_classes)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)



FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(

In [7]:
optimizer = torch.optim.SGD(
    [p for p in model.parameters() if p.requires_grad],
    lr=0.005, momentum=0.9, weight_decay=0.0005
)

num_epochs = 10

model.train()
for epoch in range(num_epochs):
    total_loss = 0
    for images, targets in data_loader:
        images = list(img.to(device) for img in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        total_loss += losses.item()

    print(f"Epoch {epoch + 1}, Loss: {total_loss:.4f}")

Epoch 1, Loss: 101.5753
Epoch 2, Loss: 71.6530
Epoch 3, Loss: 59.1493
Epoch 4, Loss: 51.1365
Epoch 5, Loss: 48.1964
Epoch 6, Loss: 46.1072
Epoch 7, Loss: 39.3699
Epoch 8, Loss: 35.9705
Epoch 9, Loss: 33.7941
Epoch 10, Loss: 32.2781


In [8]:
model.eval()
with torch.no_grad():
    img, _ = dataset[0]
    prediction = model([img.to(device)])
    print(prediction)

[{'boxes': tensor([[1918.1157, 1102.5902, 2116.3806, 1289.3855]], device='cuda:0'), 'labels': tensor([1], device='cuda:0'), 'scores': tensor([0.9985], device='cuda:0')}]


In [9]:
torch.save(model.state_dict(), "fasterrcnn_model.pth")