In [8]:
import torch
from torch.utils.data import DataLoader
from torchvision.models.detection import ssd300_vgg16
from torchvision.transforms import functional as F
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
from PIL import Image
import os
import json
import copy
from roboflow import Roboflow
from copy import deepcopy

# Download Dataset

In [5]:
rf = Roboflow(api_key="QmzA8vyVJAsptHIaUGx5")
project = rf.workspace("penalty-detection").project("handball-detection-op71z")
version = project.version(8)
dataset = version.download("coco")
                

loading Roboflow workspace...
loading Roboflow project...


Downloading Dataset Version Zip in handball-detection-8 to coco:: 100%|██████████| 469233/469233 [00:28<00:00, 16484.99it/s]





Extracting Dataset Version Zip to handball-detection-8 in coco:: 100%|██████████| 2329/2329 [00:01<00:00, 1633.98it/s]


# Loading Dataset (COCO format)

In [6]:
class CocoDataset(torch.utils.data.Dataset):
    def __init__(self, images_dir, annotations_file, transforms=None):
        self.images_dir = images_dir
        self.coco = COCO(annotations_file)
        self.img_ids = self.coco.getImgIds()
        self.transforms = transforms

    def __getitem__(self, idx):
        img_id = self.img_ids[idx]
        img_info = self.coco.loadImgs(img_id)[0]
        img_path = os.path.join(self.images_dir, img_info['file_name'])
        img = Image.open(img_path).convert("RGB")
        img_tensor = F.to_tensor(img)

        ann_ids = self.coco.getAnnIds(imgIds=img_id)
        anns = self.coco.loadAnns(ann_ids)

        boxes = []
        labels = []
        for ann in anns:
            x, y, w, h = ann['bbox']
            boxes.append([x, y, x+w, y+h])
            labels.append(ann['category_id'])
        
        target = {
            "boxes": torch.tensor(boxes, dtype=torch.float32),
            "labels": torch.tensor(labels, dtype=torch.int64)
        }

        if self.transforms:
            img_tensor = self.transforms(img_tensor)
        
        return img_tensor, target

    def __len__(self):
        return len(self.img_ids)

# Paths
TRAIN_IMAGES = "handball-detection-8/train"
TRAIN_ANNOTATIONS = "handball-detection-8/train/_annotations.coco.json"
VAL_IMAGES = "handball-detection-8/valid"
VAL_ANNOTATIONS = "handball-detection-8/valid/_annotations.coco.json"

# Dataset and DataLoader
train_dataset = CocoDataset(TRAIN_IMAGES, TRAIN_ANNOTATIONS)
val_dataset = CocoDataset(VAL_IMAGES, VAL_ANNOTATIONS)

train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))
val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False, collate_fn=lambda x: tuple(zip(*x)))


loading annotations into memory...
Done (t=0.02s)
creating index...
index created!
loading annotations into memory...
Done (t=0.01s)
creating index...
index created!


# Setting up Model

In [7]:
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
NUM_CLASSES = 3  # including background

model = ssd300_vgg16(pretrained=True)
model.head.classification_head.num_classes = NUM_CLASSES
model.to(DEVICE)

optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead.
Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=SSD300_VGG16_Weights.COCO_V1`. You can also use `weights=SSD300_VGG16_Weights.DEFAULT` to get the most up-to-date weights.
Downloading: "https://download.pytorch.org/models/ssd300_vgg16_coco-b556d3b4.pth" to C:\Users\Jacob/.cache\torch\hub\checkpoints\ssd300_vgg16_coco-b556d3b4.pth
100%|██████████| 136M/136M [00:08<00:00, 16.6MB/s] 


# Training Model

In [10]:
NUM_EPOCHS = 200
PATIENCE = 10

best_model_wts = deepcopy(model.state_dict())
best_loss = float('inf')
patience_counter = 0

for epoch in range(NUM_EPOCHS):
    model.train()
    running_loss = 0.0

    for images, targets in train_loader:
        images = list(img.to(DEVICE) for img in images)
        targets = [{k: v.to(DEVICE) for k, v in t.items()} for t in targets]

        optimizer.zero_grad()
        loss_dict = model(images, targets)
        loss = sum(loss for loss in loss_dict.values())
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    epoch_loss = running_loss / len(train_loader)
    print(f"Epoch [{epoch+1}/{NUM_EPOCHS}], Loss: {epoch_loss:.4f}")

    # Validation loss for early stopping
    model.train()  # <-- keep in training mode for loss computation
    val_loss = 0.0
    with torch.no_grad():
        for images, targets in val_loader:
            images = list(img.to(DEVICE) for img in images)
            targets = [{k: v.to(DEVICE) for k, v in t.items()} for t in targets]

            loss_dict = model(images, targets)  # returns dict of losses
            loss = sum(loss for loss in loss_dict.values())
            val_loss += loss.item()
    val_loss /= len(val_loader)
    print(f"Validation Loss: {val_loss:.4f}")

    # Early stopping
    if val_loss < best_loss:
        best_loss = val_loss
        best_model_wts = deepcopy(model.state_dict())
        patience_counter = 0
    else:
        patience_counter += 1
        if patience_counter >= PATIENCE:
            print("Early stopping triggered.")
            break

# Load best model weights
model.load_state_dict(best_model_wts)
torch.save(model.state_dict(), "ssd_best_model.pth")


Epoch [1/200], Loss: 2.6357
Validation Loss: 2.1991
Epoch [2/200], Loss: 2.2189
Validation Loss: 1.8472
Epoch [3/200], Loss: 1.9325
Validation Loss: 1.6412
Epoch [4/200], Loss: 1.7673
Validation Loss: 1.5740
Epoch [5/200], Loss: 1.6775
Validation Loss: 1.5304
Epoch [6/200], Loss: 1.5605
Validation Loss: 1.4344
Epoch [7/200], Loss: 1.4353
Validation Loss: 1.3649
Epoch [8/200], Loss: 1.4146
Validation Loss: 1.4523
Epoch [9/200], Loss: 1.5537
Validation Loss: 1.3689
Epoch [10/200], Loss: 1.3102
Validation Loss: 1.3334
Epoch [11/200], Loss: 1.2056
Validation Loss: 1.2886
Epoch [12/200], Loss: 1.1636
Validation Loss: 1.3083
Epoch [13/200], Loss: 1.4899
Validation Loss: 1.3925
Epoch [14/200], Loss: 1.1556
Validation Loss: 1.2645
Epoch [15/200], Loss: 1.1297
Validation Loss: 1.1624
Epoch [16/200], Loss: 1.0327
Validation Loss: 1.1990
Epoch [17/200], Loss: 1.9788
Validation Loss: 1.5875
Epoch [18/200], Loss: 1.4660
Validation Loss: 1.2681
Epoch [19/200], Loss: 1.1193
Validation Loss: 1.2607
Ep

# Evaluation Metrics (Precision, Recall, mAP@0.5, mAP@0.5:0.95)

In [11]:
# Paths
TEST_IMAGES_DIR = 'handball-detection-8/test'
TEST_ANNOTATIONS = 'handball-detection-8/test/_annotations.coco.json'
CONF_THRESHOLD = 0.05

coco = COCO(TEST_ANNOTATIONS)
img_ids = coco.getImgIds()

predictions = []

model.eval()
with torch.no_grad():
    for img_id in img_ids:
        img_info = coco.loadImgs(img_id)[0]
        img_path = os.path.join(TEST_IMAGES_DIR, img_info['file_name'])
        image = F.to_tensor(Image.open(img_path).convert("RGB")).to(DEVICE).unsqueeze(0)

        outputs = model(image)[0]
        keep = outputs['scores'] > CONF_THRESHOLD
        boxes = outputs['boxes'][keep]
        labels = outputs['labels'][keep]
        scores = outputs['scores'][keep]

        for box, label, score in zip(boxes, labels, scores):
            x_min, y_min, x_max, y_max = box.tolist()
            predictions.append({
                'image_id': img_id,
                'category_id': int(label),
                'bbox': [x_min, y_min, x_max - x_min, y_max - y_min],
                'score': float(score)
            })

# COCO Evaluation
coco_pred = coco.loadRes(predictions)
coco_eval = COCOeval(coco, coco_pred, 'bbox')
coco_eval.evaluate()
coco_eval.accumulate()
coco_eval.summarize()


loading annotations into memory...
Done (t=0.01s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.04s).
Accumulating evaluation results...
DONE (t=0.01s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.568
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.852
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.620
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.114
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.502
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.761
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.508
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.660
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets