In [1]:
from copy import deepcopy

In [2]:
def create_dummy_coco_dataset(num_images=10, num_annotations=20,image_height=100, image_width=100, bbox_height=10, bbox_width=10):
    images = []
    annotations = []
    categories = [{"id": 1, "name": "object"}]

    for i in range(num_images):
        image = {
            "id": i,
            "width": image_width,
            "height": image_height,
            "file_name": f"image_{i}.jpg"
        }
        images.append(image)#

        assert image_height * image_width >= num_annotations * bbox_height * bbox_width, "Not enough space for annotations in the image."
        for j in range(num_annotations):
            height_index  = j // (image_width // bbox_width)
            width_index = j % (image_width // bbox_width)
            annotation = {
                "id": i * num_annotations + j,
                "image_id": i,
                "category_id": categories[0]["id"],
                "bbox": [width_index * bbox_width, height_index * bbox_height, bbox_width, bbox_height],
                "bbox_xyxy": [width_index * bbox_width, height_index * bbox_height, (width_index + 1) * bbox_width, (height_index + 1) * bbox_height],
                "area": bbox_width * bbox_height,
                "iscrowd": 0,
                "score": 1.0  # Assuming all annotations are perfect for dummy data
            }
            annotations.append(annotation)

    return {"images": images, "annotations": annotations, "categories": categories}

In [3]:
bbox_height = 10
bbox_width = 10
dummy_data = create_dummy_coco_dataset(num_images=2,num_annotations=5,bbox_height=bbox_height, bbox_width=bbox_width)

In [4]:
dummy_images = dummy_data["images"]
dummy_annotations = dummy_data["annotations"]
dummy_categories = dummy_data["categories"]

In [5]:
annotations = dummy_annotations
for index, annotation in enumerate(annotations):
    print(f"Annotation {index}: {annotation}")

Annotation 0: {'id': 0, 'image_id': 0, 'category_id': 1, 'bbox': [0, 0, 10, 10], 'bbox_xyxy': [0, 0, 10, 10], 'area': 100, 'iscrowd': 0, 'score': 1.0}
Annotation 1: {'id': 1, 'image_id': 0, 'category_id': 1, 'bbox': [10, 0, 10, 10], 'bbox_xyxy': [10, 0, 20, 10], 'area': 100, 'iscrowd': 0, 'score': 1.0}
Annotation 2: {'id': 2, 'image_id': 0, 'category_id': 1, 'bbox': [20, 0, 10, 10], 'bbox_xyxy': [20, 0, 30, 10], 'area': 100, 'iscrowd': 0, 'score': 1.0}
Annotation 3: {'id': 3, 'image_id': 0, 'category_id': 1, 'bbox': [30, 0, 10, 10], 'bbox_xyxy': [30, 0, 40, 10], 'area': 100, 'iscrowd': 0, 'score': 1.0}
Annotation 4: {'id': 4, 'image_id': 0, 'category_id': 1, 'bbox': [40, 0, 10, 10], 'bbox_xyxy': [40, 0, 50, 10], 'area': 100, 'iscrowd': 0, 'score': 1.0}
Annotation 5: {'id': 5, 'image_id': 1, 'category_id': 1, 'bbox': [0, 0, 10, 10], 'bbox_xyxy': [0, 0, 10, 10], 'area': 100, 'iscrowd': 0, 'score': 1.0}
Annotation 6: {'id': 6, 'image_id': 1, 'category_id': 1, 'bbox': [10, 0, 10, 10], 'bbo

In [6]:
import json
import argparse
import os
import datetime
from tqdm import tqdm
import regex as re
from torchvision.ops import box_iou
import torch
from transformers import AutoProcessor
import imgviz
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
import math
def save_json(file_path, data):
    """
    Save data to a JSON file.

    Args:
        file_path (str): Path to the JSON file.
        data (dict): Data to save.
    """
    with open(file_path, 'w') as f:
        json.dump(data, f, indent=4)

def load_json(file_path):
    """
    Load data from a JSON file.

    Args:
        file_path (str): Path to the JSON file.

    Returns:
        dict: Data loaded from the file.
    """
    with open(file_path, 'r') as f:
        return json.load(f)

def extract_bbox_from_text(ans):
    pattern = re.compile(r'\[(((0|1)\.(\d){3}\,\s*){3}((0|1)\.(\d){3}))\]')
    match_list = pattern.findall(ans)

    if len(match_list) > 0:
        answer = [list(map(float,match[0].split(","))) for match in match_list]
    else:
        answer = "FAILED"
    return answer

def calculate_iou(gt_bbox_list, pred_bbox_list):
    iou_matrix = box_iou(torch.tensor(gt_bbox_list).float(), torch.tensor(pred_bbox_list).float())
    iou_matrix = torch.nan_to_num(iou_matrix, nan=0.0)  # NaNを0に置き換える
    iou_argsort_matrix = torch.argsort(iou_matrix.flatten(),descending=True).argsort().reshape(iou_matrix.shape)#iouが大きい順にソートしたインデックスを取得
    # print(iou_argsort_matrix)
    # print("-" * 50)
    # print(iou_matrix)
    pred_index_list =  torch.full((len(pred_bbox_list),), False, dtype=torch.bool)
    gt_index_list = torch.full((len(gt_bbox_list),), False, dtype=torch.bool)

    iou_info_list = []

    for i in range(len(gt_bbox_list)):
        max_iou_index = torch.where(iou_argsort_matrix == i)
        if not gt_index_list[max_iou_index[0]] and not pred_index_list[max_iou_index[1]]:
            iou_info_list.append( {
                "gt_index": max_iou_index[0].item(),
                "pred_index": max_iou_index[1].item(),
                "iou_value": iou_matrix[max_iou_index].item()
            })
            gt_index_list[max_iou_index[0]] = True
            pred_index_list[max_iou_index[1]] = True
    # print(iou_info_list)
    # for iou_info in iou_info_list:
    #     if math.isnan(iou_info["iou_value"]):
    #         print(f"IOU value is NaN for gt index {iou_info['gt_index']} and pred index {iou_info['pred_index']}")
    #         print(iou_matrix[iou_info['gt_index'], iou_info['pred_index']])
    #         print(iou_matrix[iou_info['gt_index'], iou_info['pred_index']].item())
    #         print(iou_info["iou_value"])
    #         print(iou_matrix)
    
    return iou_info_list

def sort_list_of_dicts(data, key, reverse=False):
    """
    Sort a list of dictionaries by the specified key.

    Args:
        data (list): List of dictionaries to sort.
        key (str): Key to sort by.
        reverse (bool): Sort in descending order if True, ascending if False.

    Returns:
        list: Sorted list of dictionaries.
    """
    return sorted(data, key=lambda x: x[key], reverse=reverse)

def bbox_relative_to_absolute(relative_bbox, image_width_height):
    width, height = image_width_height
    x1 = relative_bbox[0] * width
    y1 = relative_bbox[1] * height
    x2 = relative_bbox[2] * width
    y2 = relative_bbox[3] * height
    absolute_bbox = [x1, y1, x2, y2]
    return absolute_bbox

def visualize_bbox(image, bbox_list, bbox_name_list,bbox_is_relative=True,with_id=False):
    assert len(bbox_list) == len(bbox_name_list), "bbox_list and bbox_name_list must have the same length"
    if isinstance(image, str):
        image = Image.open(image).convert("RGB")

    if bbox_is_relative:
        # 画像のサイズを取得
        image_width_height = (image.width, image.height)
        # 相対座標を絶対座標に変換
        bbox_list = [bbox_relative_to_absolute(bbox, image_width_height) for bbox in bbox_list]
        
    #bbox_name_listをソート、bbox_listも同じ順番にソート
    # bbox_name_list, bbox_list = zip(*sorted(zip(bbox_name_list, bbox_list), key=lambda x: x[0]))
    # bbox_name_list = list(bbox_name_list)
    # bbox_list = list(bbox_list)
    name_to_label_id_dict = {}
    label_id = 0
    for bbox_name in bbox_name_list:
        if bbox_name not in name_to_label_id_dict:
            name_to_label_id_dict[bbox_name] = label_id
            label_id += 1    
    
    # bbox_listの座標をy1, x1, y2, x2の形式に変換
    bboxes = []
    labels = []
    # label_id = -1
    # old_label = None
    count_object_dict = {}
    id_bbox_name_list = []
    for bbox ,bbox_name in zip(bbox_list, bbox_name_list):
        x1, y1, x2, y2 = bbox
        bboxes.append([y1, x1, y2, x2])
        # if old_label != bbox_name:
        #     label_id += 1
        #     old_label = bbox_name
        label_id = name_to_label_id_dict[bbox_name]
        if bbox_name not in count_object_dict:
            count_object_dict[bbox_name] = 0
        else:
            count_object_dict[bbox_name] += 1
        if with_id:
            bbox_name = f"{bbox_name}_{count_object_dict[bbox_name]}"
            id_bbox_name_list.append(bbox_name)
        labels.append(label_id)
    # bboxes = np.array([bbox[1],bbox[0],bbox[3],bbox[2]]).astype(np.int32).reshape(-1, 4)
    
    base_resolution = 100 * 100
    base_font_size = 3
    image_resolution = image.width * image.height
    font_size = int( base_font_size * (image_resolution / base_resolution) ** 0.5)
    
    if with_id:
        bbox_name_list = id_bbox_name_list
    image = imgviz.instances2rgb(np.array(image), bboxes=bboxes, labels=labels,font_size=font_size,captions=bbox_name_list)

    plt.imshow(image)
    plt.show()

  from .autonotebook import tqdm as notebook_tqdm


In [21]:
correct_json_path = "/data_ssd/mscoco-detection/val_for-kosmos2_mscoco2017-detection.json"
correct_data = load_json(correct_json_path)

generated_json_path = "/data_ssd/mscoco-detection/val_for-kosmos2_mscoco2017-detection.json"
generated_json_path = "/home/omote/omote-data-ssd/iam-llms-finetune/experiment_output/kosmos-2_mscoco2017-detection/mscoco2017-detection_train-vision-proj-llm_cross-entropy_2025-07-03T12_51_20/checkpoint-10536/eval_output/val_for-kosmos2_mscoco2017-detection/2025-07-04T10_36_40/eval_output.json"
generated_json_path = "/home/omote/omote-data-ssd/iam-llms-finetune/experiment_output/kosmos-2_mscoco2017-detection/mscoco2017-detection_train-vision-proj-llm_distance-loss_2025-07-03T12_52_38/checkpoint-10536/eval_output/val_for-kosmos2_mscoco2017-detection/2025-07-04T11_16_55/eval_output.json"
generated_json_path = "/home/omote/omote-data-ssd/iam-llms-finetune/experiment_output/kosmos-2_mscoco2017-detection/mscoco2017-detection_train-vision-proj-llm_distance-forward-kl-loss_2025-07-03T16_46_51/checkpoint-10536/eval_output/val_for-kosmos2_mscoco2017-detection/2025-07-04T11_56_05/eval_output.json"
generated_data = load_json(generated_json_path)

assert len(correct_data) == len(generated_data), "Length of correct and generated data does not match."

correct_data = sort_list_of_dicts(correct_data, "id")
generated_data = sort_list_of_dicts(generated_data, "id")

for correct, generated in zip(correct_data, generated_data):
    assert correct["id"] == generated["id"], f"ID mismatch: {correct['id']} != {generated['id']}"

In [22]:
processor = AutoProcessor.from_pretrained("/data_ssd/huggingface_model_weights/microsoft/kosmos-2-patch14-224")

In [23]:
item = correct_data[0]
for key,value in item.items():
    print(f"{key}: {value}")

id: mscoco2017-detection_train-100083
image: mscoco2017/coco/images/train2017/000000100083.jpg
conversations: [{'from': 'human', 'value': '<image><grounding> Please carefully check the image and detect the following objects: [person, bicycle, car, motorcycle, airplane, bus, train, truck, boat, traffic light, fire hydrant, stop sign, parking meter, bench, bird, cat, dog, horse, sheep, cow, elephant, bear, zebra, giraffe, backpack, umbrella, handbag, tie, suitcase, frisbee, skis, snowboard, sports ball, kite, baseball bat, baseball glove, skateboard, surfboard, tennis racket, bottle, wine glass, cup, fork, knife, spoon, bowl, banana, apple, sandwich, orange, broccoli, carrot, hot dog, pizza, donut, cake, chair, couch, potted plant, bed, dining table, toilet, tv, laptop, mouse, remote, keyboard, cell phone, microwave, oven, toaster, sink, refrigerator, book, clock, vase, scissors, teddy bear, hair drier, toothbrush].'}, {'from': 'gpt', 'value': '<phrase> cup</phrase><object><patch_index_0

In [24]:
from pycocotools.coco import COCO

split = "val"
anno_path = f"/data_ssd/mscoco2017/coco/annotations/instances_{split}2017.json"
#/data_ssd/mscoco2017/coco/annotations/instances_train2017.json
coco_dataset = COCO(anno_path)
cat_name2id = {c["name"]: c["id"] for c in coco_dataset.loadCats(coco_dataset.getCatIds())}

image_folder_root = "/data_ssd"
cat_name2id.update({"unknown": -1})

iou_threshold = 0.5


loading annotations into memory...
Done (t=0.59s)
creating index...
index created!


In [25]:
print(len(cat_name2id))

81


In [26]:
from PIL import Image
from tqdm import tqdm

def create_images_for_coco(conversation_dataset, image_folder_root="/data_ssd"):
    return_images = []
    num_images = len(conversation_dataset)
    
    for i in tqdm(range(num_images)):
        image_name = conversation_dataset[i]["image"]
        image_path = os.path.join(image_folder_root, image_name)
        image = Image.open(image_path)
        image_height = image.height
        image_width = image.width
        image_info = {
            "id": i,
            "width": image_width,
            "height": image_height,
            "file_name": image_name
        }
        return_images.append(image_info)
    
    return return_images

def create_annotations_for_coco(conversation_dataset,categories,processor):
    return_annotations = {}
    
    num_images = len(conversation_dataset)
    
    id_index = 0
    for i in tqdm(range(num_images)):
        caption, entities = processor.post_process_generation(conversation_dataset[i]["conversations"][1]["value"])
        for name,_,bbox_list in entities:
            for bbox in bbox_list:
                annotation = {
                    "id": id_index,
                    "image_id": i,
                    "category_id": categories[name] if name in categories else categories["unknown"],
                    "bbox": [bbox[0], bbox[1], bbox[2] - bbox[0], bbox[3] - bbox[1]],  # [x, y, width, height]
                    "area": (bbox[2] - bbox[0]) * (bbox[3] - bbox[1]),
                    "iscrowd": 0,
                    "score": 1.0,  # Assuming all annotations are perfect for dummy data
                    "category_name": name,
                    "bbox_xyxy": bbox,  # [x1, y1, x2, y2]
                    "is_unknown": 1 if name not in categories else 0
                }
                
                if i not in return_annotations:
                    return_annotations[i] = []
                return_annotations[i].append(annotation)
                id_index += 1

    return return_annotations


In [27]:
images = create_images_for_coco(correct_data, image_folder_root)
all_gt_annotations = create_annotations_for_coco(correct_data, cat_name2id, processor)
all_pred_annotations = create_annotations_for_coco(generated_data, cat_name2id, processor)

100%|██████████| 5000/5000 [00:02<00:00, 1782.78it/s]
100%|██████████| 5000/5000 [00:01<00:00, 4957.86it/s]
100%|██████████| 5000/5000 [00:00<00:00, 13368.38it/s]


In [28]:
for index, annotation_list in all_gt_annotations.items():
    print(f"Image {index} has {len(annotation_list)} ground truth annotations.")
    for annotation in annotation_list:
        print(f"  Annotation ID: {annotation['id']}, Category: {annotation['category_name']}, BBox: {annotation['bbox_xyxy']}")
    break

Image 0 has 5 ground truth annotations.
  Annotation ID: 0, Category: cup, BBox: (0.046875, 0.109375, 0.171875, 0.234375)
  Annotation ID: 1, Category: cup, BBox: (0.171875, 0.109375, 0.265625, 0.234375)
  Annotation ID: 2, Category: fork, BBox: (0.265625, 0.109375, 0.328125, 0.234375)
  Annotation ID: 3, Category: fork, BBox: (0.453125, 0.328125, 0.546875, 0.390625)
  Annotation ID: 4, Category: dining table, BBox: (0.015625, 0.015625, 0.984375, 0.984375)


In [29]:
import sys
sys.path.append("/home/omote/cluster_project/iam2/eval")
from eval_utils.custom_oc_cost import get_cmap,get_ot_cost,DetectedInstance

def oc_cost(pred_instance_list,tgt_instance_list, alpha=0.5,beta=0.6):
    cmap_func = lambda x, y: get_cmap(x, y, alpha=alpha, beta=beta,label_or_sim="label")
    otc = get_ot_cost(pred_instance_list, tgt_instance_list, cmap_func)
    return otc

In [30]:
import math
per_image_result_dict = {}
break_index = 10
debug_mode = False
oc_cost_list = []
for index, gt_per_image_annotation_list in tqdm(all_gt_annotations.items()):
    pred_per_image_annotation_list = all_pred_annotations.get(index, [])
    
    # 画像ごとの評価
    pred_instance_list = [DetectedInstance(
        label=ann["category_id"],
        x1=ann["bbox_xyxy"][0],
        y1=ann["bbox_xyxy"][1],
        x2=ann["bbox_xyxy"][2],
        y2=ann["bbox_xyxy"][3]) for ann in pred_per_image_annotation_list]
    tgt_instance_list = [DetectedInstance(
        label=ann["category_id"],
        x1=ann["bbox_xyxy"][0],
        y1=ann["bbox_xyxy"][1],
        x2=ann["bbox_xyxy"][2],
        y2=ann["bbox_xyxy"][3]) for ann in gt_per_image_annotation_list]
    
    oc_cost_value = oc_cost(pred_instance_list, tgt_instance_list, alpha=0.5, beta=0.6)
    oc_cost_list.append(oc_cost_value)
    
    #画像ごと・カテゴリごとの評価準備
    gt_per_category_dict = {}
    pred_per_category_dict = {}
    per_category_result_dict = {}
    
    for category_id in cat_name2id.values():
        gt_per_category_dict[category_id] = None
        pred_per_category_dict[category_id] = None
        per_category_result_dict[category_id] = None
        
    for annotation in gt_per_image_annotation_list:
        if gt_per_category_dict[annotation["category_id"]] is None:
            gt_per_category_dict[annotation["category_id"]] = []
        gt_per_category_dict[annotation["category_id"]].append(annotation)
    
    for annotation in pred_per_image_annotation_list:
        if pred_per_category_dict[annotation["category_id"]] is None:
            pred_per_category_dict[annotation["category_id"]] = []
        pred_per_category_dict[annotation["category_id"]].append(annotation)
    
    
    for category_id, gt_annotations in gt_per_category_dict.items():
        pred_annotations = pred_per_category_dict[category_id]
        if gt_annotations is None and  pred_annotations is None:
            continue
        
        per_category_result = {
            "iou_list": [],
            "tp_num": 0,
            "fp_num": 0,
            "fn_num": 0,
        }
        if gt_annotations is None and pred_per_category_dict[category_id] is not None:
            per_category_result["fp_num"] = len(pred_per_category_dict[category_id])
        elif gt_annotations is not None:
            if pred_per_category_dict[category_id] is None:
                per_category_result["fn_num"] = len(gt_annotations)
                per_category_result["iou_list"] = [0.0] * len(gt_annotations)
            else: 
                gt_bbox_list = [ann["bbox_xyxy"] for ann in gt_annotations]
                pred_bbox_list = [ann["bbox_xyxy"] for ann in pred_annotations]
                iou_info_list = calculate_iou(gt_bbox_list, pred_bbox_list)
                iou_list = [info["iou_value"] for info in iou_info_list] if not debug_mode else [1.0] * len(gt_bbox_list)
                for iou in iou_list:
                    assert not math.isnan(iou), f"IOU value is NaN in category {category_id}, index {index}"
                if len(iou_list) < len(gt_bbox_list):
                    iou_list += [0.0] * (len(gt_bbox_list) - len(iou_list))
                # for iou in iou_list:
                #     assert not math.isnan(iou), f"IOU value is NaN in category {category_id}, index {index}"
                per_category_result["iou_list"] = iou_list
                tp_num = sum(1 for iou in iou_list if iou >= iou_threshold) if not debug_mode else len(gt_bbox_list)
                per_category_result["tp_num"] = tp_num
                per_category_result["fp_num"] = len(pred_bbox_list) - tp_num
                per_category_result["fn_num"] = len(gt_bbox_list) - tp_num
                # if index == 9 and category_id == 84:
                #     visualize_bbox(
                #         os.path.join(image_folder_root, images[index]["file_name"]),
                #         pred_bbox_list,
                #         [ann["category_name"] for ann in pred_annotations],
                #         bbox_is_relative=True,
                #         with_id=True
                #     )
                #     print(per_category_result)
                #     print(pred_bbox_list == gt_bbox_list)
                #     print(len(pred_bbox_list), len(gt_bbox_list))
                #     print(iou_info_list)
        
        per_category_result_dict[category_id] = per_category_result
    
    per_image_result_dict[index] = per_category_result_dict
    # if index >= break_index:
    #     break


100%|██████████| 5000/5000 [00:05<00:00, 848.24it/s]


In [31]:
for key ,value in per_image_result_dict.items():
    print(f"Image {key} has {len(value)} categories.")
    print(f"OC Cost: {oc_cost_list[key]}")
    for category_id, result in value.items():
        if result is None:
            continue
        print(f"  Category ID: {category_id}, TP: {result['tp_num']}, FP: {result['fp_num']}, FN: {result['fn_num']}, IOU List: {result['iou_list']}")
    if key >= break_index:
        break

Image 0 has 81 categories.
OC Cost: 0.4946666634082794
  Category ID: 47, TP: 0, FP: 0, FN: 2, IOU List: [0.0, 0.0]
  Category ID: 48, TP: 1, FP: 0, FN: 1, IOU List: [0.6666666865348816, 0.0]
  Category ID: 51, TP: 0, FP: 1, FN: 0, IOU List: []
  Category ID: 67, TP: 0, FP: 0, FN: 1, IOU List: [0.0]
Image 1 has 81 categories.
OC Cost: 0.25312500397364296
  Category ID: 10, TP: 2, FP: 0, FN: 1, IOU List: [0.800000011920929, 0.5625, 0.0]
Image 2 has 81 categories.
OC Cost: 0.0
  Category ID: 13, TP: 1, FP: 0, FN: 0, IOU List: [1.0]
Image 3 has 81 categories.
OC Cost: 0.4039682586987813
  Category ID: 15, TP: 1, FP: 0, FN: 0, IOU List: [0.9523809552192688]
  Category ID: 47, TP: 0, FP: 0, FN: 2, IOU List: [0.0, 0.0]
Image 4 has 81 categories.
OC Cost: 0.06020115315914154
  Category ID: 17, TP: 1, FP: 0, FN: 0, IOU List: [1.0]
  Category ID: 64, TP: 1, FP: 0, FN: 0, IOU List: [0.5517241358757019]
Image 5 has 81 categories.
OC Cost: 0.19880952656269074
  Category ID: 1, TP: 6, FP: 1, FN: 0,

In [32]:
per_category_result_dict = {}
for index, per_image_result in per_image_result_dict.items():
    for category_id, result in per_image_result.items():
        if category_id not in per_category_result_dict:
            per_category_result_dict[category_id] = {
                "iou_list": [],
                "tp_num": 0,
                "fp_num": 0,
                "fn_num": 0,
            }
        
        if result is None:
            continue
        # if result["fp_num"] > 0 or result["fn_num"]:
        #     print(index, category_id, result)
        per_category_result_dict[category_id]["tp_num"] += result["tp_num"]
        per_category_result_dict[category_id]["fp_num"] += result["fp_num"]
        per_category_result_dict[category_id]["fn_num"] += result["fn_num"]
        per_category_result_dict[category_id]["iou_list"].extend(result["iou_list"])

for category_id, result in per_category_result_dict.items():
    print(f"Category ID: {category_id}, TP: {result['tp_num']}, FP: {result['fp_num']}, FN: {result['fn_num']}, IOU List: {result['iou_list']}")

Category ID: 1, TP: 5875, FP: 4304, FN: 4619, IOU List: [1.0, 0.8571428656578064, 0.8333333134651184, 0.7142857313156128, 0.6666666865348816, 0.6428571343421936, 0.6666666865348816, 0.0, 0.95652174949646, 0.875, 0.8333333134651184, 0.800000011920929, 0.5714285969734192, 0.5555555820465088, 0.4444444477558136, 0.3333333432674408, 0.25, 0.0, 0.0, 1.0, 0.9642857313156128, 0.855555534362793, 0.9473684430122375, 0.9473684430122375, 0.9473684430122375, 0.7200000286102295, 0.0, 1.0, 0.949999988079071, 0.8571428656578064, 0.8333333134651184, 0.800000011920929, 0.4444444477558136, 0.0, 0.0, 1.0, 1.0, 0.25, 0.9104166626930237, 1.0, 0.5454545617103577, 0.5, 0.5, 0.5, 0.3333333432674408, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.9166666865348816, 0.8571428656578064, 0.75, 0.6111111044883728, 0.0, 0.8947368264198303, 1.0, 0.4761904776096344, 0.8888888955116272, 1.0, 1.0, 0.949999988079071, 0.8888888955116272, 0.800000011920929, 0.800000011920929, 0.800000011920929, 0.5, 0.5, 0.0, 0.0, 0.0, 1.0, 0.142857

In [33]:
import math
per_category_score_dict = {}

dataset_score = {
    "summary_scores":{
      "accuracy": 0.0,
      "m_iou": [],
      "oc_cost": np.mean(oc_cost_list) if len(oc_cost_list) > 0 else 0.0,
      "macro_precision": [],
      "macro_recall": [],
      "macro_f1": [],
      "cm_iou": [],
    },
    "summary_data_num":{
        "tp_num": 0,
        "fp_num": 0,
        "fn_num": 0,
    },
}

for category_id, result in per_category_result_dict.items():
    #クラスごと
    tp_num = result["tp_num"]
    fp_num = result["fp_num"]
    fn_num = result["fn_num"]
    iou_list = result["iou_list"]
    m_iou = np.mean(iou_list) if len(iou_list) > 0 else 0.0
    # if math.isnan(m_iou):
    #     print(f"Category ID: {category_id} has NaN mIoU. Check the IOU list: {iou_list}")
    #     for iou in iou_list:
    #         assert not math.isnan(iou), f"IOU value is NaN in category {category_id}"
    precision = tp_num / (tp_num + fp_num) if (tp_num + fp_num) > 0 else 0.0
    recall = tp_num / (tp_num + fn_num) if (tp_num + fn_num) > 0 else 0.0
    f1_score = (2 * precision * recall) / (precision + recall) if (precision + recall) > 0 else 0.0
    per_category_score = {
        "precision": precision,
        "recall": recall,
        "f1_score": f1_score,
        "m_iou": m_iou,
        "tp_num": tp_num,
        "fp_num": fp_num,
        "fn_num": fn_num,
    }
    per_category_score_dict[category_id] = per_category_score
    
    #データセット全体
    dataset_score["summary_data_num"]["tp_num"] += tp_num
    dataset_score["summary_data_num"]["fp_num"] += fp_num
    dataset_score["summary_data_num"]["fn_num"] += fn_num
    dataset_score["summary_scores"]["m_iou"].extend(iou_list)
    
    if category_id != -1:
        dataset_score["summary_scores"]["macro_precision"].append(precision)
        dataset_score["summary_scores"]["macro_recall"].append(recall)
        dataset_score["summary_scores"]["macro_f1"].append(f1_score)
        dataset_score["summary_scores"]["cm_iou"].append(m_iou)

#データセット全体
dataset_score["summary_scores"]["accuracy"] = dataset_score["summary_data_num"]["tp_num"] / (dataset_score["summary_data_num"]["tp_num"] + dataset_score["summary_data_num"]["fp_num"] + dataset_score["summary_data_num"]["fn_num"]) if (dataset_score["summary_data_num"]["tp_num"] + dataset_score["summary_data_num"]["fp_num"] + dataset_score["summary_data_num"]["fn_num"]) > 0 else 0.0
dataset_score["summary_scores"]["m_iou"] = np.mean(dataset_score["summary_scores"]["m_iou"]) if len(dataset_score["summary_scores"]["m_iou"]) > 0 else 0.0
dataset_score["summary_scores"]["macro_precision"] = np.mean(dataset_score["summary_scores"]["macro_precision"]) if len(dataset_score["summary_scores"]["macro_precision"]) > 0 else 0.0
dataset_score["summary_scores"]["macro_recall"] = np.mean(dataset_score["summary_scores"]["macro_recall"]) if len(dataset_score["summary_scores"]["macro_recall"]) > 0 else 0.0
dataset_score["summary_scores"]["macro_f1"] = np.mean(dataset_score["summary_scores"]["macro_f1"]) if len(dataset_score["summary_scores"]["macro_f1"]) > 0 else 0.0
dataset_score["summary_scores"]["cm_iou"] = np.mean(dataset_score["summary_scores"]["cm_iou"]) if len(dataset_score["summary_scores"]["cm_iou"]) > 0 else 0.0 

In [34]:
for category_id, score in per_category_score_dict.items():
    print(f"Category ID: {category_id}, Precision: {score['precision']:.4f}, Recall: {score['recall']:.4f}, F1 Score: {score['f1_score']:.4f}, mIoU: {score['m_iou']:.4f}, TP: {score['tp_num']}, FP: {score['fp_num']}, FN: {score['fn_num']}")

for key,score in dataset_score["summary_scores"].items():
    print(f"{key}: {score:.4f}")
    
for key,num in dataset_score["summary_data_num"].items():
    print(f"{key}: {num}")

Category ID: 1, Precision: 0.5772, Recall: 0.5598, F1 Score: 0.5684, mIoU: 0.5059, TP: 5875, FP: 4304, FN: 4619
Category ID: 2, Precision: 0.4106, Recall: 0.3553, F1 Score: 0.3810, mIoU: 0.3204, TP: 108, FP: 155, FN: 196
Category ID: 3, Precision: 0.3833, Recall: 0.3358, F1 Score: 0.3580, mIoU: 0.2971, TP: 644, FP: 1036, FN: 1274
Category ID: 4, Precision: 0.7403, Recall: 0.3976, F1 Score: 0.5174, mIoU: 0.3382, TP: 134, FP: 47, FN: 203
Category ID: 5, Precision: 0.7857, Recall: 0.7021, F1 Score: 0.7416, mIoU: 0.6414, TP: 165, FP: 45, FN: 70
Category ID: 6, Precision: 0.6748, Recall: 0.4029, F1 Score: 0.5046, mIoU: 0.3707, TP: 83, FP: 40, FN: 123
Category ID: 7, Precision: 0.8462, Recall: 0.7516, F1 Score: 0.7961, mIoU: 0.6832, TP: 121, FP: 22, FN: 40
Category ID: 8, Precision: 0.5813, Recall: 0.2980, F1 Score: 0.3940, mIoU: 0.2564, TP: 118, FP: 85, FN: 278
Category ID: 9, Precision: 0.4488, Recall: 0.3539, F1 Score: 0.3958, mIoU: 0.3108, TP: 149, FP: 183, FN: 272
Category ID: 10, Preci

# cross entropy
accuracy: 0.2815
m_iou: 0.3360
oc_cost: 0.2708
macro_precision: 0.5672
macro_recall: 0.3237
macro_f1: 0.3933
cm_iou: 0.2927
tp_num: 13345
fp_num: 11581
fn_num: 22474

# distance loss
accuracy: 0.2675
m_iou: 0.3222
oc_cost: 0.2726
macro_precision: 0.5664
macro_recall: 0.3119
macro_f1: 0.3821
cm_iou: 0.2832
tp_num: 12769
fp_num: 11911
fn_num: 23050

# distance forward kl loss
accuracy: 0.2674
m_iou: 0.3235
oc_cost: 0.2735
macro_precision: 0.5697
macro_recall: 0.3168
macro_f1: 0.3879
cm_iou: 0.2842
tp_num: 12868
fp_num: 12304
fn_num: 22951


# 欲しい情報
* 画像レベルで、クラスごとに分けて、どのpredとどのgtの検出インスタンスが紐づいたか、紐づいた分のiouのリスト、
# クラスごとの評価指標
* cm-accuracy
* cm-IoU
* macro-f1
* macro-recall
* macro-precision
# 全体の評価指標
* accuracy=micro-f1,micro-precision,micro-recall
* m-iou
# 画像ごとの評価指標
* oc-cost


In [None]:
def calculate_per_class_m_iou(per_category_result_dict, iou_threshold=0.5):
    """
    Calculate per-class mIoU from the per-category results.

    Args:
        per_category_result_dict (dict): Dictionary containing per-category results.
        iou_threshold (float): IoU threshold to consider a prediction as true positive.

    Returns:
        dict: Dictionary with category IDs as keys and mIoU values as values.
    """
    per_class_miou = {}
    for category_id, result in per_category_result_dict.items():
        tp_num = result["tp_num"]
        fp_num = result["fp_num"]
        fn_num = result["fn_num"]
        iou_list = result["iou_list"]

        if tp_num + fp_num + fn_num == 0:
            continue

        # Calculate mIoU
        m_iou = sum(iou for iou in iou_list if iou >= iou_threshold) / max(tp_num, 1)
        per_class_miou[category_id] = m_iou

    return per_class_miou