In [None]:
import os
import json
import torch
import numpy as np
from torchvision import models, transforms
from PIL import Image
from tqdm import tqdm

In [None]:
class CocoDataset(torch.utils.data.Dataset):
    def __init__(self, image_dir, annotation_path, transform=None):
        with open(annotation_path, "r") as f:
            self.annotations = json.load(f)
        self.image_dir = image_dir
        self.images = {img["id"]: img for img in self.annotations["images"]}
        self.annotations = self.annotations["annotations"]
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, index):
        image_id = list(self.images.keys())[index]
        img_info = self.images[image_id]
        img_path = os.path.join(self.image_dir, img_info["file_name"])
        image = Image.open(img_path).convert("RGB")

        if self.transform is not None:
            image = self.transform(image)

        # Load ground truth
        gt_boxes = [
            ann["bbox"] for ann in self.annotations if ann["image_id"] == image_id
        ]
        gt_labels = [
            ann["category_id"] for ann in self.annotations if ann["image_id"] == image_id
        ]

        gt_boxes = torch.tensor(gt_boxes, dtype=torch.float32)
        gt_labels = torch.tensor(gt_labels, dtype=torch.int64)

        return image, {"boxes": gt_boxes, "labels": gt_labels, "image_id": image_id}

In [None]:
def calculate_iou(box1, box2):
    x1 = max(box1[0], box2[0])
    y1 = max(box1[1], box2[1])
    x2 = min(box1[2], box2[2])
    y2 = min(box1[3], box2[3])

    intersection = max(0, x2 - x1) * max(0, y2 - y1)
    area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
    area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
    union = area1 + area2 - intersection

    return intersection / union if union > 0 else 0

In [None]:
def calculate_map(predictions, ground_truths, iou_threshold=0.5):
    average_precisions = []
    for class_id in set(gt["label"] for gt in ground_truths):
        # 클래스별 예측 및 정답 필터링
        preds = [p for p in predictions if p["label"] == class_id]
        gts = [g for g in ground_truths if g["label"] == class_id]

        # 매칭 상태 추적
        detected = set()

        # 정렬 (Confidence 높은 순)
        preds.sort(key=lambda x: x["score"], reverse=True)

        tp = torch.zeros(len(preds), dtype=torch.float32)
        fp = torch.zeros(len(preds), dtype=torch.float32)

        for pred_idx, pred in enumerate(preds):
            best_iou = 0
            best_gt_idx = -1

            for gt_idx, gt in enumerate(gts):
                iou = calculate_iou(torch.tensor(pred["bbox"]), torch.tensor(gt["bbox"]))
                if iou > best_iou and gt_idx not in detected:
                    best_iou = iou
                    best_gt_idx = gt_idx

            if best_iou > iou_threshold:
                tp[pred_idx] = 1
                detected.add(best_gt_idx)
            else:
                fp[pred_idx] = 1

        # Precision-Recall 계산
        tp_cumsum = torch.cumsum(tp, dim=0)
        fp_cumsum = torch.cumsum(fp, dim=0)
        precisions = tp_cumsum / (tp_cumsum + fp_cumsum + 1e-6)
        recalls = tp_cumsum / len(gts)

        # Average Precision 계산
        ap = torch.tensor(0.0)
        for t in torch.linspace(0, 1, 11):  # Recall 기준점 (0, 0.1, ..., 1)
            valid_precisions = precisions[recalls >= t] if any(recalls >= t) else torch.tensor([0.0])
            ap += torch.max(valid_precisions)
        ap /= 11

        average_precisions.append(ap.item())

    return torch.tensor(average_precisions).mean().item()


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = models.detection.fasterrcnn_resnet50_fpn(pretrained=True).to(device)
model.eval()

In [None]:
transform = transforms.Compose([
    transforms.Resize((640, 640)),  
    transforms.ToTensor(),          
    transforms.Normalize(mean=[0.485, 0.456, 0.406],  
                         std=[0.229, 0.224, 0.225])   
])

In [None]:
coco_image_dir = "/nas/user/jieui/LG/YOLOX/datasets/COCO/val2017"  # COCO 이미지 경로
coco_annotation_path = "/nas/user/jieui/LG/YOLOX/datasets/COCO/annotations/instances_val2017.json"
dataset = CocoDataset(coco_image_dir, coco_annotation_path, transform)
data_loader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False)

In [None]:
predictions = []
ground_truths = []

with torch.no_grad():
    for images, targets in tqdm(data_loader):
        images = [img.to(device) for img in images]
        outputs = model(images)

        for output in outputs:
            image_id = targets["image_id"].item()

            # Ground truth 저장
            for i, box in enumerate(targets["boxes"]):
                print(targets['labels'])
                ground_truths.append({
                    "image_id": image_id,
                    "bbox": box.tolist(),
                    "label": targets["labels"][0][i].item(),
                })

            # 예측 저장
            for i, box in enumerate(output["boxes"]):
                predictions.append({
                    "image_id": image_id,
                    "bbox": box.tolist(),
                    "label": output["labels"][i].item(),
                    "score": output["scores"][i].item(),
                })


In [None]:
map_score = calculate_map(predictions, ground_truths)
print(f"mAP: {map_score:.4f}")