In [1]:
import os
from pathlib import Path

import numpy as np
import pandas as pd
from PIL import Image

import torch
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as T
from torchvision.models.detection import fasterrcnn_resnet50_fpn_v2
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [2]:
EVAL_DIRS = [
    "borui_output_guided",
    "borui_output_nlm",
    "dichen_output_otsu",
    "haixin_output_canny",
    "haixin_output_scharr",
    "output_canny_custom",
    "dichen_output_adaptiveGaussian"
]

BASELINE_CKPT = "final_model.pth"
FINETUNED_CKPT = "finetuned_model.pth"

NUM_CLASSES = 2
BATCH_SIZE = 1
NUM_WORKERS = 0
SCORE_THRESH = 0.5
IOU_THRESH = 0.5

print("device:", device)
print("eval dirs:", EVAL_DIRS)


device: cuda
eval dirs: ['borui_output_guided', 'borui_output_nlm', 'dichen_output_otsu', 'haixin_output_canny', 'haixin_output_scharr', 'output_canny_custom', 'dichen_output_adaptiveGaussian']


In [3]:
class NumberPlateDataset(Dataset):
    def __init__(self, images_dir, transform=None):
        self.images_dir = images_dir
        exts = [".jpg", ".jpeg", ".png", ".bmp"]
        self.files = [
            f
            for f in os.listdir(images_dir)
            if os.path.splitext(f)[1].lower() in exts
        ]
        self.files.sort()
        self.transform = transform

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        filename = self.files[idx]
        img_path = os.path.join(self.images_dir, filename)
        image = Image.open(img_path).convert("RGB")
        w, h = image.size

        boxes = torch.tensor([[0.0, 0.0, float(w), float(h)]], dtype=torch.float32)
        labels = torch.tensor([1], dtype=torch.int64)
        target = {
            "boxes": boxes,
            "labels": labels,
            "image_id": torch.tensor([idx]),
        }

        if self.transform is not None:
            image = self.transform(image)

        return image, target


def collate_fn(batch):
    images = [b[0] for b in batch]
    targets = [b[1] for b in batch]
    return images, targets


eval_transform = T.Compose([T.ToTensor()])


def make_loader(images_dir):
    dataset = NumberPlateDataset(images_dir, transform=eval_transform)
    loader = DataLoader(
        dataset,
        batch_size=BATCH_SIZE,
        shuffle=False,
        collate_fn=collate_fn,
        num_workers=NUM_WORKERS,
        pin_memory=True if device.type == "cuda" else False,
    )
    return dataset, loader


"dataset_ready"


'dataset_ready'

In [4]:
def build_model(num_classes, ckpt_path):
    model = fasterrcnn_resnet50_fpn_v2(weights="DEFAULT")
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    if ckpt_path is not None and os.path.exists(ckpt_path):
        state_dict = torch.load(ckpt_path, map_location="cpu")
        model.load_state_dict(state_dict, strict=False)

    model.to(device)
    model.eval()
    return model


baseline_model = build_model(NUM_CLASSES, BASELINE_CKPT)
finetuned_model = build_model(NUM_CLASSES, FINETUNED_CKPT)

baseline_model.eval()
finetuned_model.eval()

"models_ready"


'models_ready'

In [5]:
def box_iou(a, b):
    ax1, ay1, ax2, ay2 = a
    bx1, by1, bx2, by2 = b

    inter_x1 = max(ax1, bx1)
    inter_y1 = max(ay1, by1)
    inter_x2 = min(ax2, bx2)
    inter_y2 = min(ay2, by2)

    inter_w = max(0.0, inter_x2 - inter_x1)
    inter_h = max(0.0, inter_y2 - inter_y1)
    inter = inter_w * inter_h

    area_a = max(0.0, ax2 - ax1) * max(0.0, ay2 - ay1)
    area_b = max(0.0, bx2 - bx1) * max(0.0, by2 - by1)
    union = area_a + area_b - inter

    if union <= 0.0:
        return 0.0

    return inter / union


def evaluate_metrics(model, data_loader, device, score_thresh=0.5, iou_thresh=0.5):
    model.eval()
    ious = []
    detected = 0
    total = 0

    with torch.no_grad():
        for images, targets in data_loader:
            images = [img.to(device) for img in images]
            outputs = model(images)

            for output, target in zip(outputs, targets):
                gt_boxes = target["boxes"].cpu().numpy()
                if gt_boxes.shape[0] == 0:
                    continue

                total += 1

                scores = output["scores"].detach().cpu().numpy()
                boxes = output["boxes"].detach().cpu().numpy()

                keep = scores >= score_thresh
                if keep.sum() == 0:
                    ious.append(0.0)
                    continue

                scores_keep = scores[keep]
                boxes_keep = boxes[keep]

                idx = scores_keep.argmax()
                pred = boxes_keep[idx]
                gt = gt_boxes[0]

                iou = box_iou(pred, gt)
                ious.append(iou)

                if iou >= iou_thresh:
                    detected += 1

    avg_iou = float(np.mean(ious)) if len(ious) > 0 else 0.0
    det_rate = detected / max(1, total)
    return avg_iou, det_rate


In [6]:
results = []

for images_dir in EVAL_DIRS:
    if not os.path.isdir(images_dir):
        print(f"[skip] {images_dir} not found")
        continue

    dataset, loader = make_loader(images_dir)

    baseline_iou, baseline_det = evaluate_metrics(
        baseline_model,
        loader,
        device,
        score_thresh=SCORE_THRESH,
        iou_thresh=IOU_THRESH,
    )

    finetuned_iou, finetuned_det = evaluate_metrics(
        finetuned_model,
        loader,
        device,
        score_thresh=SCORE_THRESH,
        iou_thresh=IOU_THRESH,
    )

    results.append(
        {
            "dir": images_dir,
            "n_images": len(dataset),
            "baseline_iou": baseline_iou,
            "baseline_det": baseline_det,
            "finetuned_iou": finetuned_iou,
            "finetuned_det": finetuned_det,
        }
    )

    print("==========", images_dir, "==========")
    print("num images:", len(dataset))
    print("Baseline  avg IoU:", baseline_iou)
    print("Baseline  det rate:", baseline_det)
    print("Finetuned avg IoU:", finetuned_iou)
    print("Finetuned det rate:", finetuned_det)

results


num images: 201
Baseline  avg IoU: 0.056965666868265215
Baseline  det rate: 0.03980099502487562
Finetuned avg IoU: 0.007144599532087644
Finetuned det rate: 0.004975124378109453
num images: 201
Baseline  avg IoU: 0.07853886309723875
Baseline  det rate: 0.06467661691542288
Finetuned avg IoU: 0.006786071459081636
Finetuned det rate: 0.004975124378109453
num images: 201
Baseline  avg IoU: 0.005732011519701438
Baseline  det rate: 0.004975124378109453
Finetuned avg IoU: 0.0006610024181792317
Finetuned det rate: 0.0
num images: 201
Baseline  avg IoU: 0.02690859484215225
Baseline  det rate: 0.009950248756218905
Finetuned avg IoU: 0.02572658176149302
Finetuned det rate: 0.0
num images: 201
Baseline  avg IoU: 0.0033537792180901144
Baseline  det rate: 0.0
Finetuned avg IoU: 4.312993300643133e-05
Finetuned det rate: 0.0
num images: 201
Baseline  avg IoU: 0.02408449731788028
Baseline  det rate: 0.004975124378109453
Finetuned avg IoU: 0.024763343373860294
Finetuned det rate: 0.0
num images: 201
Base

[{'dir': 'borui_output_guided',
  'n_images': 201,
  'baseline_iou': 0.056965666868265215,
  'baseline_det': 0.03980099502487562,
  'finetuned_iou': 0.007144599532087644,
  'finetuned_det': 0.004975124378109453},
 {'dir': 'borui_output_nlm',
  'n_images': 201,
  'baseline_iou': 0.07853886309723875,
  'baseline_det': 0.06467661691542288,
  'finetuned_iou': 0.006786071459081636,
  'finetuned_det': 0.004975124378109453},
 {'dir': 'dichen_output_otsu',
  'n_images': 201,
  'baseline_iou': 0.005732011519701438,
  'baseline_det': 0.004975124378109453,
  'finetuned_iou': 0.0006610024181792317,
  'finetuned_det': 0.0},
 {'dir': 'haixin_output_canny',
  'n_images': 201,
  'baseline_iou': 0.02690859484215225,
  'baseline_det': 0.009950248756218905,
  'finetuned_iou': 0.02572658176149302,
  'finetuned_det': 0.0},
 {'dir': 'haixin_output_scharr',
  'n_images': 201,
  'baseline_iou': 0.0033537792180901144,
  'baseline_det': 0.0,
  'finetuned_iou': 4.312993300643133e-05,
  'finetuned_det': 0.0},
 {'