
# Full solution — E-ELAN backbone → Object Detection (Faster R-CNN) using Roboflow dataset

**Nội dung:**

- Triển khai E-ELAN (minh họa) và bọc nó thành backbone cho Faster R-CNN.
- Dùng **Roboflow** API để tải dataset (YOLOv7 export), chuyển YOLO labels -> per-image JSON
- Huấn luyện Faster R-CNN trên dataset nhỏ, lưu weights, nạp weights, chạy inference trên thư mục ảnh, in ra boxes + scores
- Tính **precision** và **recall** dựa trên so khớp IoU (threshold configurable)
- Vẽ đồ thị biến thiên loss trong quá trình train



## 1. Imports, seed, device

In [None]:
import os, json, time
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

# from sklearn.metrics import precision_score, recall_score

torch.manual_seed(42)
np.random.seed(42)

DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Device:', DEVICE)
print(torch.__version__)
print(f"Name of GPU: {torch.cuda.get_device_name(DEVICE)}")

Device: cuda
2.8.0+cu126
Name of GPU: Tesla T4


## 2. E-ELAN (lightweight educational implementation)

In [None]:
class ConvBNAct(nn.Module):
    def __init__(self, in_ch, out_ch, k=3, stride=1, padding=None, groups=1):
        super().__init__()
        if padding is None:
            padding = (k - 1) // 2
        self.conv = nn.Conv2d(in_ch, out_ch, k, stride, padding=padding, groups=groups, bias=False)
        self.bn = nn.BatchNorm2d(out_ch)
        self.act = nn.ReLU(inplace=True)

    def forward(self, x):
        return self.act(self.bn(self.conv(x)))


class EELANBlock(nn.Module):
    def __init__(self, in_channels, out_channels, expansion=2, n_branches=4):
        super().__init__()
        cexp = in_channels * expansion
        self.expand = ConvBNAct(in_channels, cexp, k=1)
        csplit = cexp // n_branches
        self.branches = nn.ModuleList([ConvBNAct(csplit, csplit, k=3) for _ in range(n_branches)])
        self.fuse = ConvBNAct(cexp, out_channels, k=1)

    def forward(self, x):
        x = self.expand(x)
        splits = torch.chunk(x, len(self.branches), dim=1)
        outs = [b(s) for b, s in zip(self.branches, splits)]
        x = torch.cat(outs, dim=1)
        x = self.fuse(x)
        return x


b = EELANBlock(16, 32)
xx = torch.randn(2, 16, 64, 64)
yy = b(xx)
print('E-ELAN output shape:', yy.shape)
print(b)

E-ELAN output shape: torch.Size([2, 32, 64, 64])
EELANBlock(
  (expand): ConvBNAct(
    (conv): Conv2d(16, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (act): ReLU(inplace=True)
  )
  (branches): ModuleList(
    (0-3): 4 x ConvBNAct(
      (conv): Conv2d(8, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (act): ReLU(inplace=True)
    )
  )
  (fuse): ConvBNAct(
    (conv): Conv2d(32, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (act): ReLU(inplace=True)
  )
)


## 3. Wrap E-ELAN as backbone for Faster R-CNN

In [None]:
from collections import OrderedDict
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator


class BackboneWrapper(nn.Module):
    def __init__(self, eelan_backbone: nn.Module, out_channels: int):
        super().__init__()
        self.backbone = eelan_backbone
        self.out_channels = out_channels

    def forward(self, x):
        feat = self.backbone(x)
        # Expected return: OrderedDict[str, Tensor]
        return OrderedDict([('0', feat)])


def build_detector_with_eelan(eelan_backbone, backbone_out_channels, num_classes):
    backbone = BackboneWrapper(eelan_backbone, backbone_out_channels)
    anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),), aspect_ratios=((0.5, 1.0, 2.0),))
    model = FasterRCNN(backbone, num_classes=num_classes, rpn_anchor_generator=anchor_generator)
    return model


# Build a small backbone for demonstration (ensure output stride isn't too small)
small_backbone = nn.Sequential(ConvBNAct(3, 32), EELANBlock(32, 64))
if torch.cuda.device_count() > 1:
    print(f"Using {torch.cuda.device_count()} GPUs — DataParallel on E-ELAN backbone only.")
    small_backbone = torch.nn.DataParallel(small_backbone)
else:
    print("Using single GPU or CPU.")
det_model = build_detector_with_eelan(small_backbone, backbone_out_channels=64, num_classes=31).to(
    DEVICE)  # num_classes includes background
print(det_model)

Using single GPU or CPU.
FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWrapper(
    (backbone): Sequential(
      (0): ConvBNAct(
        (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): ReLU(inplace=True)
      )
      (1): EELANBlock(
        (expand): ConvBNAct(
          (conv): Conv2d(32, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (act): ReLU(inplace=True)
        )
        (branches): ModuleList(
          (0-3): 4 x ConvBNAct(
            (conv): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
            (bn): BatchNorm2d(16, eps=1

In [None]:
!pip install roboflow

from roboflow import Roboflow

rf = Roboflow(api_key="wdM97i7Q3ORIQiEJN8JL")
project = rf.workspace("cropdataset").project("plant-doc-dgqyu")
version = project.version(1)
dataset = version.download("yolov7")

dataset_dir = Path(dataset.location) if hasattr(dataset, 'location') else Path("plant-doc")
print(f"Dataset downloaded to: {dataset_dir.resolve()}")


def yolo_to_coco_json_split(split_dir: Path):
    img_dir = split_dir / "images"
    label_dir = split_dir / "labels"
    ann_dir = split_dir / "annotations"
    ann_dir.mkdir(exist_ok=True, parents=True)

    converted = 0
    # Use glob to find all txt files first
    label_files = sorted(label_dir.glob("*.txt"))
    # Generate sequential filenames for JSON
    json_stems = [f"image_{i:06d}" for i in range(len(label_files))]

    for i, txt_path in enumerate(label_files):
        stem = txt_path.stem
        # tìm ảnh tương ứng (có thể jpg hoặc png)
        img_path = img_dir / f"{stem}.jpg"
        if not img_path.exists():
            img_path = img_dir / f"{stem}.png"
        if not img_path.exists():
            continue

        with Image.open(img_path) as img:
            w, h = img.size

        boxes, labels = [], []
        with open(txt_path, "r") as f:
            for line in f:
                parts = line.strip().split()
                if len(parts) != 5:
                    continue
                cls_id, xc, yc, bw, bh = map(float, parts)
                # Chuyển tọa độ YOLO sang pixel tuyệt đối
                x1 = (xc - bw / 2) * w
                y1 = (yc - bh / 2) * h
                x2 = (xc + bw / 2) * w
                y2 = (yc + bh / 2) * h
                boxes.append([x1, y1, x2, y2])
                labels.append(int(cls_id) + 1)  # +1 vì background = 0

        # Use the generated short stem for the JSON filename
        ann = {"boxes": boxes, "labels": labels}
        with open(ann_dir / f"{json_stems[i]}.json", "w") as f:
            json.dump(ann, f)
        converted += 1

    print(f"Converted {converted} annotations in {split_dir}")


for split in ["train", "valid", "test"]:
    split_path = dataset_dir / split
    if split_path.exists():
        yolo_to_coco_json_split(split_path)
    else:
        print(f" Split '{split}' not found, skipping...")
print("Done converting all available splits!")

Collecting roboflow
  Downloading roboflow-1.2.11-py3-none-any.whl.metadata (9.7 kB)
Collecting idna==3.7 (from roboflow)
  Downloading idna-3.7-py3-none-any.whl.metadata (9.9 kB)
Collecting opencv-python-headless==4.10.0.84 (from roboflow)
  Downloading opencv_python_headless-4.10.0.84-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (20 kB)
Collecting pi-heif<2 (from roboflow)
  Downloading pi_heif-1.1.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (6.5 kB)
Collecting pillow-avif-plugin<2 (from roboflow)
  Downloading pillow_avif_plugin-1.5.2-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (2.1 kB)
Collecting filetype (from roboflow)
  Downloading filetype-1.2.0-py2.py3-none-any.whl.metadata (6.5 kB)
Downloading roboflow-1.2.11-py3-none-any.whl (89 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m89.9/89.9 kB[0m [31m9.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading idna-3.7-py3-none-any.whl (66 kB)
[2K   [90m━━━━━━━━━━━━━

Downloading Dataset Version Zip in plant-doc-1 to yolov7pytorch:: 100%|██████████| 128358/128358 [00:08<00:00, 15588.38it/s]





Extracting Dataset Version Zip to plant-doc-1 in yolov7pytorch:: 100%|██████████| 5146/5146 [00:00<00:00, 6146.15it/s]


Dataset downloaded to: /content/plant-doc-1
Converted 1926 annotations in /content/plant-doc-1/train
Converted 513 annotations in /content/plant-doc-1/valid
Converted 128 annotations in /content/plant-doc-1/test
Done converting all available splits!


## 4. Detection dataset class and dataloader helper

In [None]:
class SimpleDetectionDataset(Dataset):
    def __init__(self, root: str, split: str = 'train', transforms=None):
        self.root = Path(root)
        self.split = split
        self.img_dir = self.root / "images"  # Point to the images directory within the split
        self.ann_dir = self.root / 'annotations'  # Point to annotations directory within the split
        self.json_files = sorted(self.ann_dir.glob('image_*.json'))  # Load short JSON filenames
        if not self.json_files:
            raise ValueError(f"No JSON annotation files found in {self.ann_dir}. Check the directory structure.")
        self.transforms = transforms
        # self.num_classes = 3  # Adjust based on your PlantDoc classes (13+ likely) # TODO: Dynamically get num_classes

        # Dynamically determine the number of classes
        all_labels = []
        for json_path in self.json_files:
            with open(json_path, 'r') as f:
                j = json.load(f)
                all_labels.extend(j['labels'])
        self.num_classes = max(all_labels) + 1 if all_labels else 1  # +1 for background

        print(
            f"Dataset for split '{split}' found {len(self.json_files)} annotation files with {self.num_classes} classes (including background).")

    def __len__(self):
        return len(self.json_files)

    def __getitem__(self, idx):
        json_path = self.json_files[idx]
        idd = json_path.stem  # e.g., "image_000001"

        # Find corresponding image (jpg or png) - based on the mapping created during conversion
        # Assuming a simple sequential mapping for now. A more robust solution might store the original filename.
        # For this fix, we'll assume the image files are also sequentially ordered or can be matched by index.
        # Given the conversion generated "image_000001.json" from the first .txt, we'll look for the first image.
        # This is a potential point of failure if image and label files aren't in the same order.
        # A better approach would be to save the original filename in the JSON or use a mapping.
        # For now, let's try to find the image based on the sequential index.
        img_stem_prefix = idd.replace('image_', '')
        img_options = sorted(self.img_dir.glob(f"{img_stem_prefix}.*"))  # Try matching the sequential number

        if not img_options:
            # Fallback: if sequential matching fails, try to find any image that corresponds to the original stem
            # This part needs the original stem, which is not available in the JSON.
            # Let's stick to the sequential assumption for now and add a more explicit error if image is not found by sequential index.
            img_files_in_dir = sorted(self.img_dir.glob("*.*"))
            if idx < len(img_files_in_dir):
                img_path = img_files_in_dir[idx]  # Assume sequential order
            else:
                raise FileNotFoundError(
                    f"Could not find corresponding image for {json_path.name} in {self.img_dir}. Sequential mapping failed.")
        else:
            img_path = img_options[0]

        image = Image.open(img_path).convert('RGB')
        image = transforms.ToTensor()(image)

        # Load JSON annotation
        with open(json_path, 'r') as f:
            j = json.load(f)
        boxes = torch.tensor(j['boxes'], dtype=torch.float32)
        labels = torch.tensor(j['labels'], dtype=torch.int64)

        # Filter out boxes with zero width or height
        keep = (boxes[:, 2] > boxes[:, 0]) & (boxes[:, 3] > boxes[:, 1])
        boxes = boxes[keep]
        labels = labels[keep]

        target = {'boxes': boxes, 'labels': labels, 'image_id': torch.tensor([idx])}
        if self.transforms:
            # Note: torchvision transforms for detection models expect image and target as a tuple
            image, target = self.transforms(image, target)
        return image, target


def collate_fn_detection(batch):
    images, targets = list(zip(*batch))
    return list(images), list(targets)

## 5. Training & evaluation for detection (Faster R-CNN)

In [None]:
from torchvision.ops import box_iou


def compute_iou_ap(gt_boxes_list, pred_boxes_list, pred_scores_list, iou_thresh=0.5):
    all_precisions = []
    all_recalls = []
    for gt_boxes, pred_boxes, scores in zip(gt_boxes_list, pred_boxes_list, pred_scores_list):
        if pred_boxes.numel() == 0:
            tp = 0;
            fp = 0;
            fn = gt_boxes.size(0)
        else:
            ious = box_iou(pred_boxes, gt_boxes) if gt_boxes.numel() > 0 else torch.empty((pred_boxes.size(0), 0))
            P, G = pred_boxes.size(0), gt_boxes.size(0)
            matched_gt = set();
            tp = 0;
            fp = 0
            for i in range(P):
                if G == 0:
                    fp += 1
                    continue
                iou_row = ious[i]
                best_iou, best_idx = torch.max(iou_row, dim=0)
                if float(best_iou) >= iou_thresh and int(best_idx) not in matched_gt:
                    tp += 1
                    matched_gt.add(int(best_idx))
                else:
                    fp += 1
            fn = G - len(matched_gt)
        prec = tp / (tp + fp) if (tp + fp) > 0 else 0.0
        rec = tp / (tp + fn) if (tp + fn) > 0 else 0.0
        all_precisions.append(prec)
        all_recalls.append(rec)
    return float(np.mean(all_precisions)), float(np.mean(all_recalls))


def train_detection_one_epoch(model, optimizer, data_loader, device, epoch):
    model.train()
    running_loss = 0.0
    iters = 0

    for images, targets in data_loader:
        images = [img.to(device) for img in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        running_loss += float(losses)
        iters += 1

    avg_loss = running_loss / max(1, iters)
    print(f"Epoch {epoch} — Average Loss: {avg_loss:.4f}")
    return avg_loss


@torch.no_grad()
def evaluate_detection(model, data_loader, device, iou_thresh=0.5):
    model.eval()
    gt_boxes_list, pred_boxes_list, pred_scores_list = [], [], []
    for images, targets in data_loader:
        images = [img.to(device) for img in images]
        outputs = model(images)
        for t, out in zip(targets, outputs):
            gt_boxes = t["boxes"].cpu()
            pred_boxes = out["boxes"].detach().cpu()
            scores = out["scores"].detach().cpu()
            gt_boxes_list.append(gt_boxes)
            pred_boxes_list.append(pred_boxes)
            pred_scores_list.append(scores)
    prec, rec = compute_iou_ap(gt_boxes_list, pred_boxes_list, pred_scores_list, iou_thresh)
    print(f"Evaluation: Precision={prec:.4f}, Recall={rec:.4f} (IoU>{iou_thresh})")
    return prec, rec

In [None]:
DATA_ROOT = '/content/plant-doc-1'

# Removed manual transforms. The model's internal transform handles resizing and ToTensor.
# transform = transforms.Compose([transforms.Resize((256, 256)), transforms.ToTensor()])

# Initialize datasets
# Pass transforms=None as the model's internal transform will be used.
train_ds = SimpleDetectionDataset(f"{DATA_ROOT}/train", transforms=None)
val_ds = SimpleDetectionDataset(f"{DATA_ROOT}/valid", transforms=None)
test_ds = SimpleDetectionDataset(f"{DATA_ROOT}/test", transforms=None)  # thêm test

# Get the number of classes from the training dataset
num_classes = train_ds.num_classes
print(f"Number of classes determined from training dataset: {num_classes}")

# Rebuild the detection model with the correct number of classes
# (Assuming small_backbone and build_detector_with_eelan are defined in previous cells and accessible)
det_model = build_detector_with_eelan(small_backbone, backbone_out_channels=64, num_classes=num_classes).to(DEVICE)
print("Rebuilt detector model with correct number of classes:")
print(det_model)

train_loader = DataLoader(train_ds, batch_size=1, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))
val_loader = DataLoader(val_ds, batch_size=1, shuffle=False, collate_fn=lambda x: tuple(zip(*x)))
test_loader = DataLoader(test_ds, batch_size=1, shuffle=False, collate_fn=lambda x: tuple(zip(*x)))

optimizer = torch.optim.SGD(det_model.parameters(), lr=0.005, momentum=0.9, weight_decay=0.0005)
num_epochs = 10

history = {'train_loss': [], 'val_prec': [], 'val_rec': []}

for epoch in range(num_epochs):
    t0 = time.time()
    train_loss = train_detection_one_epoch(det_model, optimizer, train_loader, DEVICE, epoch)
    prec, rec = evaluate_detection(det_model, val_loader, DEVICE, iou_thresh=0.5)

    history['train_loss'].append(train_loss)
    history['val_prec'].append(prec)
    history['val_rec'].append(rec)

    print(f"Epoch {epoch + 1}/{num_epochs} completed in {time.time() - t0:.1f}s")
    print(f"\t Train loss: {train_loss:.4f} | Val Precision: {prec:.4f} | Val Recall: {rec:.4f}")
    print("-" * 60)

    Path("weights").mkdir(exist_ok=True)
    torch.save({'model_state_dict': det_model.state_dict()},
               f'weights/det_eelan_epoch{epoch + 1}.pth')

plt.figure(figsize=(10, 4))
plt.subplot(1, 2, 1)
plt.plot(history['train_loss'], marker='o', color='tab:red', label='Train Loss')
plt.title('Training Loss per Epoch')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history['val_prec'], marker='s', label='Validation Precision', color='tab:blue')
plt.plot(history['val_rec'], marker='^', label='Validation Recall', color='tab:green')
plt.title('Validation Precision/Recall')
plt.xlabel('Epoch')
plt.ylabel('Score')
plt.legend()
plt.tight_layout()
plt.show()

print("\n Evaluating on Test Set...")
prec_test, rec_test = evaluate_detection(det_model, test_loader, DEVICE, iou_thresh=0.5)
print(f"Test Precision: {prec_test:.4f}, Test Recall: {rec_test:.4f}")

Dataset for split 'train' found 1926 annotation files with 31 classes (including background).
Dataset for split 'train' found 513 annotation files with 31 classes (including background).
Dataset for split 'train' found 128 annotation files with 31 classes (including background).
Number of classes determined from training dataset: 31
Rebuilt detector model with correct number of classes:
FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWrapper(
    (backbone): Sequential(
      (0): ConvBNAct(
        (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): ReLU(inplace=True)
      )
      (1): EELANBlock(
        (expand): ConvBNAct(
          (conv): Conv2d(32, 64, kernel_size=(1, 1), strid

Converting a tensor with requires_grad=True to a scalar may lead to unexpected behavior.
Consider using tensor.detach() first. (Triggered internally at /pytorch/torch/csrc/autograd/generated/python_variable_methods.cpp:835.)


OutOfMemoryError: CUDA out of memory. Tried to allocate 3.01 GiB. GPU 0 has a total capacity of 14.74 GiB of which 2.61 GiB is free. Process 2719 has 12.13 GiB memory in use. Of the allocated memory 10.97 GiB is allocated by PyTorch, and 1.02 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

## 6. Inference: load saved weights, run on folder of images, print predictions; compute precision & recall

In [None]:
def load_detector_weights(model, path, device='cpu'):
    if path and os.path.exists(path):
        ckpt = torch.load(path, map_location=device)
        model.load_state_dict(ckpt['model_state_dict'])
        print(f'Loaded weights from {path}')
        return True
    else:
        print(f'No weights found at {path}');
        return False


@torch.no_grad()
def infer_folder_detector(model, folder, device='cpu', score_thresh=0.5):
    model = model.to(device)
    model.eval()
    results = []
    for p in Path(folder).glob('*'):
        if p.suffix.lower() not in ['.jpg', '.png', '.jpeg']: continue
        img = Image.open(p).convert('RGB')
        x = transforms.ToTensor()(img).unsqueeze(0).to(device)
        outputs = model(list(x))[0]
        boxes = outputs['boxes'].cpu()
        scores = outputs['scores'].cpu()
        labels = outputs.get('labels', None)
        for b, s in zip(boxes, scores):
            if float(s) < score_thresh: continue
            results.append({'image': str(p), 'box': b.tolist(), 'score': float(s)})
        print(f"{p.name}: {len(results)} detections (score>{score_thresh}) — sample top: {results[:3]}")
    return results


def infer_and_metrics_detector(model, images_folder, annotations_folder, device='cpu', score_thresh=0.5,
                               iou_thresh=0.5):
    model = model.to(device);
    model.eval()
    gt_boxes_list = [];
    pred_boxes_list = [];
    pred_scores_list = []
    for img_path in Path(images_folder).glob('*'):
        if img_path.suffix.lower() not in ['.jpg', '.png', '.jpeg']: continue
        stem = img_path.stem
        ann_path = Path(annotations_folder) / (stem + '.json')
        if not ann_path.exists(): continue
        with open(ann_path, 'r') as f:
            j = json.load(f)
        gt_boxes = torch.tensor(j['boxes'], dtype=torch.float32)
        # run model
        x = transforms.ToTensor()(Image.open(img_path).convert('RGB')).unsqueeze(0).to(device)
        out = model(list(x))[0]
        pred_boxes = out['boxes'].detach().cpu()
        scores = out['scores'].detach().cpu()
        # filter by score_thresh
        keep = scores >= score_thresh
        if keep.numel() == 0:
            pred_boxes = torch.empty((0, 4))
            scores = torch.empty((0,))
        else:
            pred_boxes = pred_boxes[keep]
            scores = scores[keep]
        gt_boxes_list.append(gt_boxes)
        pred_boxes_list.append(pred_boxes)
        pred_scores_list.append(scores)
    prec, rec = compute_iou_ap(gt_boxes_list, pred_boxes_list, pred_scores_list, iou_thresh)
    print(f'Overall precision={prec:.4f}, recall={rec:.4f} at score>={score_thresh} and IoU>={iou_thresh}')
    return prec, rec


In [None]:
weights_path = 'weights/det_eelan_epoch20.pth'
load_detector_weights(det_model, weights_path, device=DEVICE)

TEST_IMG_DIR = 'Playing-Cards-1/test/images'
TEST_ANN_DIR = 'Playing-Cards-1/test/annotations'

print("\n Running inference on test set...")
results = infer_folder_detector(det_model, TEST_IMG_DIR, device=DEVICE, score_thresh=0.5)

if os.path.exists(TEST_ANN_DIR):
    print("\n Evaluating Precision & Recall...")
    prec, rec = infer_and_metrics_detector(det_model, TEST_IMG_DIR, TEST_ANN_DIR,
                                           device=DEVICE, score_thresh=0.5, iou_thresh=0.5)
    print(f"Final Test Precision={prec:.4f}, Recall={rec:.4f}")
else:
    print("No annotation folder found for test set — skipping metric computation.")


def visualize_detections(results, num_images=3):
    shown = 0
    for res in results:
        if shown >= num_images:
            break
        img_path = res['image']
        img = Image.open(img_path).convert("RGB")
        draw = ImageDraw.Draw(img)
        box = res['box']
        draw.rectangle(box, outline='red', width=3)
        draw.text((box[0], box[1] - 10), f"{res['score']:.2f}", fill='red')
        plt.figure(figsize=(6, 6))
        plt.imshow(img)
        plt.axis("off")
        plt.title(f"{Path(img_path).name} | score={res['score']:.2f}")
        plt.show()
        shown += 1


print("\n Visualizing sample detections...")
visualize_detections(results, num_images=10)