In [17]:
# fasterrcnn_pretrained_test.ipynb
# Purpose: Use pretrained Faster R-CNN to detect players on SoccerNet dataset frames

import os
import torch
import torchvision
import torchvision.transforms as T
import matplotlib.pyplot as plt
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.utils import draw_bounding_boxes
from PIL import Image
from tqdm import tqdm
from collections import defaultdict

# --- CONFIG ---
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
BASE_DATA_DIR = "../soccernet_data/tracking/test"
GT_FILENAME = "gt.txt"
IMAGE_FOLDER = "img1"
IMAGE_EXTS = ['.jpg', '.png']
NUM_IMAGES = 10
SCORE_THRESH = 0.8
IOU_THRESH = 0.5

In [18]:
# ----- UTILS -----

# --- LOAD MODEL ---
model = fasterrcnn_resnet50_fpn(pretrained=True)
model.eval()
model.to(DEVICE)

# --- IMAGE PREPROCESSING ---
transform = T.Compose([
    T.ToTensor(),
])

def load_gt_boxes(gt_path):
    gt_dict = defaultdict(list)
    with open(gt_path, 'r') as f:
        for line in f:
            parts = line.strip().split(',')
            frame, _, x, y, w, h, cls, _, _ = map(int, parts[:9])
            box = torch.tensor([x, y, x + w, y + h], device=DEVICE)
            gt_dict[frame].append(box)
    return gt_dict

# --- IOU CALCULATION ---
def compute_iou(box1, box2):
    # box1: [N, 4], box2: [M, 4]
    area1 = (box1[:, 2] - box1[:, 0]) * (box1[:, 3] - box1[:, 1])
    area2 = (box2[:, 2] - box2[:, 0]) * (box2[:, 3] - box2[:, 1])

    lt = torch.max(box1[:, None, :2], box2[:, :2])
    rb = torch.min(box1[:, None, 2:], box2[:, 2:])

    wh = (rb - lt).clamp(min=0)
    inter = wh[:, :, 0] * wh[:, :, 1]
    union = area1[:, None] + area2 - inter
    iou = inter / union
    return iou

# --- DRAW DETECTIONS + GT ---
def plot_gt_and_detections(image_tensor, detections, gt_boxes, score_thresh=0.8):
    boxes = detections['boxes']
    scores = detections['scores']
    keep = scores > score_thresh
    pred_boxes = boxes[keep]

    all_boxes = []
    labels = []
    colors = []

    for box in pred_boxes:
        all_boxes.append(box)
        labels.append("pred")
        colors.append("red")

    for box in gt_boxes:
        all_boxes.append(box)
        labels.append("gt")
        colors.append("green")

    if len(all_boxes) == 0:
        return T.ToPILImage()(image_tensor)

    all_boxes_tensor = torch.stack(all_boxes).to("cpu")
    drawn = draw_bounding_boxes(
        (image_tensor * 255).byte().cpu(),
        boxes=all_boxes_tensor,
        labels=labels,
        colors=colors,
        width=2
    )
    return T.ToPILImage()(drawn)

In [None]:

# --- EXECUTION ---
results = []
sample_frames = []
seq_dirs = sorted([d for d in os.listdir(BASE_DATA_DIR) if os.path.isdir(os.path.join(BASE_DATA_DIR, d))])

for seq_id in tqdm(seq_dirs):
    seq_path = os.path.join(BASE_DATA_DIR, seq_id)
    img_dir = os.path.join(seq_path, IMAGE_FOLDER)
    gt_path = os.path.join(seq_path, "gt", GT_FILENAME)
    gt_dict = load_gt_boxes(gt_path)

    all_img_paths = sorted([
        os.path.join(img_dir, file)
        for file in os.listdir(img_dir)
        if any(file.lower().endswith(ext) for ext in IMAGE_EXTS)
    ])

    for idx, path in enumerate(all_img_paths):
        img = Image.open(path).convert("RGB")
        img_tensor = transform(img).unsqueeze(0).to(DEVICE)
        filename = os.path.basename(path)
        frame_id = int(filename.split('.')[0])
        gt_boxes = gt_dict.get(frame_id, [])

        with torch.no_grad():
            output = model(img_tensor)[0]

        pred_boxes = output['boxes'][output['scores'] > SCORE_THRESH]
        if gt_boxes:
            gt_tensor = torch.stack(gt_boxes)
            if len(pred_boxes) > 0:
                ious = compute_iou(pred_boxes, gt_tensor)
                max_ious = ious.max(dim=1)[0]
                acc = (max_ious > IOU_THRESH).float().mean().item()
            else:
                acc = 0.0
        else:
            acc = 1.0 if len(pred_boxes) == 0 else 0.0

        results.append(acc)

        # Save one visual example per sequence
        if idx == 0 and len(sample_frames) < NUM_IMAGES:
            img_vis = plot_gt_and_detections(img_tensor[0], output, gt_boxes)
            sample_frames.append((seq_id, filename, img_vis))


  0%|          | 0/49 [00:38<?, ?it/s]


KeyboardInterrupt: 

In [None]:

# --- DISPLAY SUMMARY ---
avg_acc = sum(results) / len(results) if results else 0
print(f"\nAverage Detection Accuracy over {len(results)} frames: {avg_acc * 100:.2f}%")

# --- SHOW EXAMPLES ---
for seq_id, filename, img in sample_frames:
    plt.imshow(img)
    plt.title(f"Sequence {seq_id}, Frame {filename}\nRed = Prediction, Green = Ground Truth")
    plt.axis("off")
    plt.show()
