In [26]:
from roboflow import Roboflow
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torch.utils.data import DataLoader
import os
import torch
import torchvision
from torchvision.transforms import ToTensor
import xml.etree.ElementTree as ET
from PIL import Image
import copy
import csv
import time
from torchvision.ops import box_iou


In [2]:
rf = Roboflow(api_key="QmzA8vyVJAsptHIaUGx5")
project = rf.workspace("penalty-detection").project("handball-detection-op71z")
version = project.version(8)
dataset = version.download("voc")
                

loading Roboflow workspace...
loading Roboflow project...


Downloading Dataset Version Zip in handball-detection-8 to voc:: 100%|██████████| 470581/470581 [04:16<00:00, 1832.33it/s]





Extracting Dataset Version Zip to handball-detection-8 in voc:: 100%|██████████| 4647/4647 [00:02<00:00, 1574.39it/s]


In [27]:
class VOCDataset(torch.utils.data.Dataset):
    def __init__(self, image_dir, annotation_dir, classes):
        self.image_dir = image_dir
        self.annotation_dir = annotation_dir
        self.image_files = list(sorted(os.listdir(image_dir)))
        self.classes = classes

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_path = os.path.join(self.image_dir, self.image_files[idx])
        ann_path = os.path.join(self.annotation_dir, self.image_files[idx].replace(".jpg", ".xml"))
        
        img = Image.open(img_path).convert("RGB")
        tree = ET.parse(ann_path)
        root = tree.getroot()

        boxes = []
        labels = []

        for obj in root.findall("object"):
            label = obj.find("name").text
            if label not in self.classes:
                continue
            labels.append(self.classes.index(label))

            bbox = obj.find("bndbox")
            box = [
                float(bbox.find("xmin").text),
                float(bbox.find("ymin").text),
                float(bbox.find("xmax").text),
                float(bbox.find("ymax").text)
            ]
            boxes.append(box)

        boxes = torch.tensor(boxes, dtype=torch.float32)
        labels = torch.tensor(labels, dtype=torch.int64)

        target = {
            "boxes": boxes,
            "labels": labels
        }

        return ToTensor()(img), target

In [28]:
def evaluate_loss(model, valid_loader, device):
    model.train()  # keep in train mode so it returns a loss dict
    val_loss = 0.0
    
    with torch.no_grad():  # no gradients, but still returns losses
        for images, targets in valid_loader:
            images = list(img.to(device) for img in images)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
            
            loss_dict = model(images, targets)
            losses = sum(loss for loss in loss_dict.values())
            val_loss += losses.item()
    
    return val_loss / len(valid_loader)

In [12]:
csv_file = "faster_rcnn_results.csv"
with open(csv_file, mode="w", newline="") as f:
    writer = csv.writer(f)
    writer.writerow([
        "epoch", "time", "train_loss", "val_loss",
        "precision", "recall", "mAP50", "mAP50-95"
    ])

In [29]:
def evaluate_metrics(model, valid_loader, device):
    model.eval()
    all_precisions, all_recalls, all_map50, all_map5095 = [], [], [], []

    with torch.no_grad():
        for images, targets in valid_loader:
            images = [img.to(device) for img in images]
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            outputs = model(images)

            for output, target in zip(outputs, targets):
                gt_boxes = target["boxes"]
                pred_boxes = output["boxes"]
                scores = output["scores"]

                if len(gt_boxes) == 0 or len(pred_boxes) == 0:
                    continue

                # Sort predictions by confidence
                sorted_idx = scores.argsort(descending=True)
                pred_boxes = pred_boxes[sorted_idx]

                matched_gt = set()
                tp, fp = 0, 0

                for pb in pred_boxes:
                    ious = box_iou(pb.unsqueeze(0), gt_boxes).squeeze(0)
                    max_iou, max_idx = ious.max(0)

                    if max_iou > 0.5 and max_idx.item() not in matched_gt:
                        tp += 1
                        matched_gt.add(max_idx.item())
                    else:
                        fp += 1

                fn = len(gt_boxes) - tp

                # Precision/Recall
                precision = tp / (tp + fp + 1e-6)
                recall = tp / (tp + fn + 1e-6)

                all_precisions.append(precision)
                all_recalls.append(recall)

                # mAP@0.5
                all_map50.append(precision if tp > 0 else 0.0)

                # mAP@0.5:0.95 (still simplified but avoids inflation)
                map_scores = []
                for thr in [x/100 for x in range(50, 100, 5)]:
                    matched_gt = set()
                    tp_thr, fp_thr = 0, 0
                    for pb in pred_boxes:
                        ious = box_iou(pb.unsqueeze(0), gt_boxes).squeeze(0)
                        max_iou, max_idx = ious.max(0)
                        if max_iou > thr and max_idx.item() not in matched_gt:
                            tp_thr += 1
                            matched_gt.add(max_idx.item())
                        else:
                            fp_thr += 1
                    fn_thr = len(gt_boxes) - tp_thr
                    prec_thr = tp_thr / (tp_thr + fp_thr + 1e-6)
                    map_scores.append(prec_thr)
                all_map5095.append(sum(map_scores) / len(map_scores))

    return (
        sum(all_precisions) / len(all_precisions) if all_precisions else 0,
        sum(all_recalls) / len(all_recalls) if all_recalls else 0,
        sum(all_map50) / len(all_map50) if all_map50 else 0,
        sum(all_map5095) / len(all_map5095) if all_map5095 else 0
    )

In [31]:
# Class names, make sure the order matches your annotations
classes = ["__background__", "post", "handball"]

# Load datasets
train_dataset = VOCDataset("C:\\Users\\Jacob\\Desktop\\Thesis\\Code\\training-rcnn\\handball-detection-8\\train\\images", "C:\\Users\\Jacob\\Desktop\\Thesis\\Code\\training-rcnn\\handball-detection-8\\train\\annotations", classes)
valid_dataset = VOCDataset("C:\\Users\\Jacob\\Desktop\\Thesis\\Code\\training\\handball-detection-8\\valid\\images", "C:\\Users\\Jacob\\Desktop\\Thesis\\Code\\training-rcnn\\handball-detection-8\\valid\\annotations", classes)

train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))
valid_loader = DataLoader(valid_dataset, batch_size=2, shuffle=False, collate_fn=lambda x: tuple(zip(*x)))

# Model
model = fasterrcnn_resnet50_fpn(weights="DEFAULT")
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = torchvision.models.detection.faster_rcnn.FastRCNNPredictor(in_features, len(classes))

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)

# Training loop
num_epochs = 200
patience = 10
best_loss = float("inf")
epochs_no_improve = 0
start_time = time.time()

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0

    for images, targets in train_loader:
        images = list(img.to(device) for img in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        running_loss += losses.item()

    # Train/validation loss
    epoch_loss = running_loss / len(train_loader)
    val_loss = evaluate_loss(model, valid_loader, device)   # ✅ fixed

    # Metrics
    precision, recall, mAP50, mAP5095 = evaluate_metrics(model, valid_loader, device)

    # Epoch time
    elapsed = time.time() - start_time

    print(f"Epoch {epoch+1}, Train Loss: {epoch_loss:.4f}, Val Loss: {val_loss:.4f}, "
          f"P: {precision:.4f}, R: {recall:.4f}, mAP50: {mAP50:.4f}, mAP50-95: {mAP5095:.4f}")

    # Save results to CSV
    with open(csv_file, mode="a", newline="") as f:
        writer = csv.writer(f)
        writer.writerow([
            epoch+1, round(elapsed, 2), round(epoch_loss, 4), round(val_loss, 4),
            round(precision, 4), round(recall, 4), round(mAP50, 4), round(mAP5095, 4)
        ])

    # Early stopping
    if val_loss < best_loss:
        best_loss = val_loss
        best_model_wts = copy.deepcopy(model.state_dict())
        epochs_no_improve = 0
    else:
        epochs_no_improve += 1
        if epochs_no_improve >= patience:
            print(f"Early stopping at epoch {epoch+1}")
            break

# Restore best model
model.load_state_dict(best_model_wts)


Epoch 1, Train Loss: 0.2457, Val Loss: 0.2034, P: 0.6832, R: 1.0000, mAP50: 0.6832, mAP50-95: 0.4808
Epoch 2, Train Loss: 0.1653, Val Loss: 0.1619, P: 0.7961, R: 0.9750, mAP50: 0.7961, mAP50-95: 0.6133
Epoch 3, Train Loss: 0.1478, Val Loss: 0.1537, P: 0.8042, R: 0.9822, mAP50: 0.8042, mAP50-95: 0.6303
Epoch 4, Train Loss: 0.1304, Val Loss: 0.1384, P: 0.8120, R: 0.9895, mAP50: 0.8120, mAP50-95: 0.6340
Epoch 5, Train Loss: 0.1225, Val Loss: 0.1456, P: 0.8044, R: 0.9895, mAP50: 0.8044, mAP50-95: 0.6431
Epoch 6, Train Loss: 0.1129, Val Loss: 0.1371, P: 0.8359, R: 0.9804, mAP50: 0.8359, mAP50-95: 0.6520
Epoch 7, Train Loss: 0.1086, Val Loss: 0.1506, P: 0.8365, R: 0.9815, mAP50: 0.8365, mAP50-95: 0.6462
Epoch 8, Train Loss: 0.1032, Val Loss: 0.1425, P: 0.8952, R: 0.9756, mAP50: 0.8952, mAP50-95: 0.7127
Epoch 9, Train Loss: 0.0991, Val Loss: 0.1252, P: 0.8782, R: 0.9814, mAP50: 0.8782, mAP50-95: 0.7164
Epoch 10, Train Loss: 0.0945, Val Loss: 0.1334, P: 0.8523, R: 0.9892, mAP50: 0.8523, mAP50-

<All keys matched successfully>

In [24]:
val_loss = evaluate_loss(model, valid_loader, device)
print(f"Epoch {epoch+1}, Train Loss: {epoch_loss:.4f}, Val Loss: {val_loss:.4f}")


Epoch 11, Train Loss: 0.0890, Val Loss: 0.0000


In [25]:
torch.save(model.state_dict(), "faster_rcnn_handball.pth")