In [1]:
from roboflow import Roboflow
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torch.utils.data import DataLoader
import os
import torch
import torchvision
from torchvision.transforms import ToTensor
import xml.etree.ElementTree as ET
from PIL import Image


In [2]:
rf = Roboflow(api_key="QmzA8vyVJAsptHIaUGx5")
project = rf.workspace("penalty-detection").project("handball-detection-op71z")
version = project.version(8)
dataset = version.download("voc")
                

loading Roboflow workspace...
loading Roboflow project...


Downloading Dataset Version Zip in handball-detection-8 to voc:: 100%|██████████| 470581/470581 [04:16<00:00, 1832.33it/s]





Extracting Dataset Version Zip to handball-detection-8 in voc:: 100%|██████████| 4647/4647 [00:02<00:00, 1574.39it/s]


In [2]:
class VOCDataset(torch.utils.data.Dataset):
    def __init__(self, image_dir, annotation_dir, classes):
        self.image_dir = image_dir
        self.annotation_dir = annotation_dir
        self.image_files = list(sorted(os.listdir(image_dir)))
        self.classes = classes

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_path = os.path.join(self.image_dir, self.image_files[idx])
        ann_path = os.path.join(self.annotation_dir, self.image_files[idx].replace(".jpg", ".xml"))
        
        img = Image.open(img_path).convert("RGB")
        tree = ET.parse(ann_path)
        root = tree.getroot()

        boxes = []
        labels = []

        for obj in root.findall("object"):
            label = obj.find("name").text
            if label not in self.classes:
                continue
            labels.append(self.classes.index(label))

            bbox = obj.find("bndbox")
            box = [
                float(bbox.find("xmin").text),
                float(bbox.find("ymin").text),
                float(bbox.find("xmax").text),
                float(bbox.find("ymax").text)
            ]
            boxes.append(box)

        boxes = torch.tensor(boxes, dtype=torch.float32)
        labels = torch.tensor(labels, dtype=torch.int64)

        target = {
            "boxes": boxes,
            "labels": labels
        }

        return ToTensor()(img), target

In [4]:
# Class names, make sure the order matches your annotations
classes = ["__background__", "post", "handball"]

# Load datasets
train_dataset = VOCDataset("C:\\Users\\Jacob\\Desktop\\Thesis\\Code\\training-rcnn\\handball-detection-8\\train\\images", "C:\\Users\\Jacob\\Desktop\\Thesis\\Code\\training-rcnn\\handball-detection-8\\train\\annotations", classes)
valid_dataset = VOCDataset("C:\\Users\\Jacob\\Desktop\\Thesis\\Code\\training\\handball-detection-8\\valid\\images", "C:\\Users\\Jacob\\Desktop\\Thesis\\Code\\training-rcnn\\handball-detection-8\\valid\\annotations", classes)

train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))
valid_loader = DataLoader(valid_dataset, batch_size=2, shuffle=False, collate_fn=lambda x: tuple(zip(*x)))

# Model
model = fasterrcnn_resnet50_fpn(pretrained=True)
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = torchvision.models.detection.faster_rcnn.FastRCNNPredictor(in_features, len(classes))

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    for images, targets in train_loader:
        images = list(img.to(device) for img in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

    print(f"Epoch {epoch+1}, Loss: {losses.item():.4f}")

Epoch 1, Loss: 0.0977
Epoch 2, Loss: 0.1919
Epoch 3, Loss: 0.1960
Epoch 4, Loss: 0.1173
Epoch 5, Loss: 0.1379
Epoch 6, Loss: 0.0307
Epoch 7, Loss: 0.1201
Epoch 8, Loss: 0.0299
Epoch 9, Loss: 0.1698
Epoch 10, Loss: 0.0660


In [5]:
torch.save(model.state_dict(), "faster_rcnn_handball.pth")