In [16]:
import os
import sys
import numpy as np
import pandas as pd
import json

# trial list path for verification 
trial_list_path = "./file_list.csv"

# automatic annotation root directory
auto_annotation_root = "/standard/storage/EgoExoEMS_CVPR2025/Dataset/Final/"

#  manual annotation file path
manual_annotation_path = "./manual_annotation/project-1-at-2024-11-03-02-33-a188226c.json"

manual_annotations= None
with open(manual_annotation_path, 'r') as f:
    manual_annotations = json.load(f)

trials = pd.read_csv(trial_list_path)


In [17]:
# process the manual annotations

import json
from collections import defaultdict

# Initialize the final dictionary
final_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(list))))

# Process each entry in the JSON data
for entry in manual_annotations:
    # Extract the path information from the 'image' field in 'data'
    image_path = entry['data']['image']
    path_parts = image_path.split('/')
    
    # Extract subject, scenario, and trial from the path
    subject = path_parts[3].replace("?d=", "")  # Removes the "?d=" prefix
    scenario = path_parts[4]
    trial = path_parts[5]

    # Extract frame ID
    frame_id = path_parts[-1]  # e.g., '0000.jpg'

    # Collect bbox details for each annotation result
    for annotation in entry['annotations']:
        for result in annotation['result']:
            bbox = {
                'x': result['value']['x'],
                'y': result['value']['y'],
                'width': result['value']['width'],
                'height': result['value']['height'],
                'label': result['value']['rectanglelabels'][0]
            }
            # Append the bbox to the list of boxes for the specific frame within the trial
            final_dict[subject][scenario][trial][frame_id].append(bbox)

# Convert the final dictionary to a regular dict for easier use
manual_annotation_data = {subject: {scenario: {trial: dict(frames) for trial, frames in trials.items()}
                        for scenario, trials in scenarios.items()}
              for subject, scenarios in final_dict.items()}

# Example output
print(json.dumps(manual_annotation_data, indent=4))

{
    "ng5": {
        "cardiac_arrest": {
            "2": {
                "0000.jpg": [
                    {
                        "x": 74.54693308550185,
                        "y": 36.80297397769517,
                        "width": 12.128252788104092,
                        "height": 34.386617100371744,
                        "label": "bvm"
                    }
                ],
                "0001.jpg": [
                    {
                        "x": 62.73234200743494,
                        "y": 47.58364312267658,
                        "width": 11.500929368029745,
                        "height": 32.34200743494425,
                        "label": "bvm"
                    }
                ],
                "0002.jpg": [
                    {
                        "x": 44.64451672862453,
                        "y": 65.24163568773234,
                        "width": 18.506040892193305,
                        "height": 34.75836431226766,
               

In [25]:
import os
import json
import cv2
from shapely.geometry import box

HEIGHT_SCALE = 1080 / 100
WIDTH_SCALE = 1920 / 100

# Remove and recreate the output directory
if os.path.exists("./output"):
    os.system("rm -r ./output")
os.mkdir("./output")

def calculate_iou(bbox1, bbox2):
    box1 = box(bbox1['xmin'], bbox1['ymin'], bbox1['xmax'], bbox1['ymax'])
    box2 = box(bbox2['xmin'], bbox2['ymin'], bbox2['xmax'], bbox2['ymax'])
    intersection = box1.intersection(box2).area
    union = box1.union(box2).area
    return intersection / union if union != 0 else 0

def calculate_bbox_error(manual_bbox, auto_bbox):
    # Calculate center and dimension errors
    manual_center = ((manual_bbox['xmin'] + manual_bbox['xmax']) / 2,
                     (manual_bbox['ymin'] + manual_bbox['ymax']) / 2)
    auto_center = ((auto_bbox['xmin'] + auto_bbox['xmax']) / 2,
                   (auto_bbox['ymin'] + auto_bbox['ymax']) / 2)
    center_error = ((manual_center[0] - auto_center[0]) ** 2 +
                    (manual_center[1] - auto_center[1]) ** 2) ** 0.5
    
    # Dimension error
    manual_width = manual_bbox['xmax'] - manual_bbox['xmin']
    manual_height = manual_bbox['ymax'] - manual_bbox['ymin']
    auto_width = auto_bbox['xmax'] - auto_bbox['xmin']
    auto_height = auto_bbox['ymax'] - auto_bbox['ymin']
    
    width_error = abs(manual_width - auto_width)
    height_error = abs(manual_height - auto_height)
    
    return center_error, width_error, height_error

# Iterate through the trials
iou_results = []
center_errors = []
dimension_errors = []
for index, row in trials.iterrows():
    subject = row['subject']
    trial = row['trial']
    scenario = "cardiac_arrest"
    trial_path = os.path.join(auto_annotation_root, subject, scenario, str(trial))
    original_frame_path = os.path.join(trial_path, "BBOX_MASKS", "original")

    # Check if the trial path exists
    if not os.path.exists(trial_path):
        print(f"Trial path does not exist: {trial_path}")
        continue

    # Get data from the manual annotations
    m_current_trial_data = manual_annotation_data.get(subject, {}).get(scenario, {}).get(str(trial), {})
    # Get automatic annotations for the trial
    a_current_trial_data = {}
    for root, dirs, files in os.walk(trial_path):
        for file in files:
            if file.endswith("bbox_annotations.json"):
                with open(os.path.join(root, file), 'r') as f:
                    a_current_trial_data = json.load(f)
                    break

    # Calculate IoU and Errors for each frame
    frame_ious = []
    for frame, manual_bboxes in m_current_trial_data.items():
        auto_bboxes = [item['bboxes'] for item in a_current_trial_data if item['frame_counter'] == int(frame.split('.')[0])]

        # Flatten nested lists of bounding boxes if needed
        if auto_bboxes:
            auto_bboxes = auto_bboxes[0]  # Access the actual list of bounding boxes directly

        if not auto_bboxes:
            continue

        # Load the frame image for visualization
        frame_path = os.path.join(original_frame_path, frame)
        image = cv2.imread(frame_path)
        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        frame_iou_sum = 0
        frame_iou_count = 0

        for m_bbox in manual_bboxes:
            matched_auto_bboxes = [a_bbox for a_bbox in auto_bboxes if a_bbox['class'] == m_bbox['label'] and a_bbox['confidence'] > 0.8]

            for a_bbox in matched_auto_bboxes:
                a_bbox_coords = {
                    'xmin': a_bbox['xmin'], 'ymin': a_bbox['ymin'],
                    'xmax': a_bbox['xmax'], 'ymax': a_bbox['ymax']
                }
                m_bbox_coords = {
                    'xmin': m_bbox['x'] * WIDTH_SCALE, 'ymin': m_bbox['y'] * HEIGHT_SCALE,
                    'xmax': m_bbox['x'] * WIDTH_SCALE + m_bbox['width'] * WIDTH_SCALE,
                    'ymax': m_bbox['y'] * HEIGHT_SCALE + m_bbox['height'] * HEIGHT_SCALE
                }

                # Draw bounding boxes for visualization
                # Manual bounding box in green
                cv2.rectangle(
                    image_rgb,
                    (int(m_bbox_coords['xmin']), int(m_bbox_coords['ymin'])),
                    (int(m_bbox_coords['xmax']), int(m_bbox_coords['ymax'])),
                    (0, 255, 0), 2
                )

                # Automatic bounding box in red
                cv2.rectangle(
                    image_rgb,
                    (int(a_bbox_coords['xmin']), int(a_bbox_coords['ymin'])),
                    (int(a_bbox_coords['xmax']), int(a_bbox_coords['ymax'])),
                    (255, 0, 0), 2
                )

                # Calculate IoU and Errors
                iou = calculate_iou(m_bbox_coords, a_bbox_coords)
                center_error, width_error, height_error = calculate_bbox_error(m_bbox_coords, a_bbox_coords)

                # Append errors for overall statistics
                center_errors.append(center_error)
                dimension_errors.append((width_error, height_error))
                if iou > 0:
                    frame_iou_sum += iou
                    frame_iou_count += 1

        if frame_iou_count > 0:
            frame_ious.append(frame_iou_sum / frame_iou_count)

        # Save the image with bounding boxes
        save_path = f"./output/iou_{subject}_{trial}_{frame.split('.')[0]}.png"
        # cv2.imwrite(save_path, cv2.cvtColor(image_rgb, cv2.COLOR_RGB2BGR))

    # Average IoU for the trial
    if frame_ious:
        avg_iou = sum(frame_ious) / len(frame_ious)
        iou_results.append(avg_iou)
        print(f"Average IoU for {subject} {trial}: {avg_iou}")

# Overall metrics
overall_avg_iou = sum(iou_results) / len(iou_results) if iou_results else 0
overall_center_error = sum(center_errors) / len(center_errors) if center_errors else 0
avg_width_error = sum(err[0] for err in dimension_errors) / len(dimension_errors) if dimension_errors else 0
avg_height_error = sum(err[1] for err in dimension_errors) / len(dimension_errors) if dimension_errors else 0

overall_center_error_percent = overall_center_error / 1920 * 100
avg_width_error_percent = avg_width_error / 1920 * 100
avg_height_error_percent = avg_height_error / 1080 * 100


print(f"Overall Average IoU: {overall_avg_iou}")
print(f"Overall Center Error: {overall_center_error}")
print(f"Average Width Error: {avg_width_error}")
print(f"Average Height Error: {avg_height_error}")

print(f"Overall Center Error (%): {overall_center_error_percent}")
print(f"Average Width Error (%): {avg_width_error_percent}")
print(f"Average Height Error (%): {avg_height_error_percent}")



Average IoU for ng5 2: 0.7276459138388762
Average IoU for ng5 3: 0.7081314042126214
Average IoU for ng3 4: 0.7206911825432778
Average IoU for ng3 5: 0.7827847019953321
Average IoU for ng8 0: 0.7762400003888819
Average IoU for ng8 1: 0.775337742537784
Average IoU for ng8 2: 0.8167037870003723
Average IoU for ng8 3: 0.7846055333377636
Average IoU for ng8 4: 0.7697035698714969
Average IoU for ng3 0: 0.7189122062186062
Overall Average IoU: 0.7580756041945012
Overall Center Error: 224.1560050991033
Average Width Error: 41.888483324882564
Average Height Error: 56.63527333266488
Overall Center Error (%): 11.674791932244965
Average Width Error (%): 2.1816918398376335
Average Height Error (%): 5.2440067900615634


In [27]:
print(iou_results)
overall_avg_iou = sum(iou_results) / len(iou_results)
print(f"Overall Average IoU: {overall_avg_iou}")

# add the results to the trial df
trials['iou'] = iou_results


[0.7276459138388762, 0.7081314042126214, 0.7206911825432778, 0.7827847019953321, 0.7762400003888819, 0.775337742537784, 0.8167037870003723, 0.7846055333377636, 0.7697035698714969, 0.7189122062186062]
Overall Average IoU: 0.7580756041945012


# mAP

In [22]:
import os
import json
import cv2
import matplotlib.pyplot as plt
from shapely.geometry import box
from collections import defaultdict

HEIGHT_SCALE = 1080 / 100
WIDTH_SCALE = 1920 / 100
IOU_THRESHOLD = 0.5  # Threshold for a positive match

# Remove the output directory if it exists
if os.path.exists("./output"):
    os.system("rm -r ./output")

# Create the output directory
os.mkdir("./output")

def calculate_iou(bbox1, bbox2):
    """
    Calculate Intersection over Union (IoU) between two bounding boxes.
    bbox format: [xmin, ymin, xmax, ymax]
    """
    box1 = box(bbox1['xmin'], bbox1['ymin'], bbox1['xmax'], bbox1['ymax'])
    box2 = box(bbox2['xmin'], bbox2['ymin'], bbox2['xmax'], bbox2['ymax'])
    intersection = box1.intersection(box2).area
    union = box1.union(box2).area
    return intersection / union if union != 0 else 0

# Containers for mAP calculation
all_detections = defaultdict(list)  # Store detections per class with confidence and match status
num_ground_truths = defaultdict(int)  # Store total GT boxes per class

# Iterate through the trials
for index, row in trials.iterrows():
    subject = row['subject']
    trial = row['trial']
    scenario = "cardiac_arrest"
    trial_path = os.path.join(auto_annotation_root, subject, scenario, str(trial))

    original_frame_path = os.path.join(trial_path, "BBOX_MASKS", "original")

    # Check if the trial path exists
    if not os.path.exists(trial_path):
        print(f"Trial path does not exist: {trial_path}")
        continue

    # Get the data from the manual annotations
    m_current_trial_data = manual_annotation_data.get(subject, {}).get(scenario, {}).get(str(trial), {})

    # Get the automatic annotations for the trial
    a_current_trial_data = {}
    for root, dirs, files in os.walk(trial_path):
        for file in files:
            if file.endswith("bbox_annotations.json"):
                with open(os.path.join(root, file), 'r') as f:
                    a_current_trial_data = json.load(f)
                    break

    # Calculate IoU and accumulate TP, FP, FN for each frame
    for frame, manual_bboxes in m_current_trial_data.items():
        auto_bboxes = [item['bboxes'] for item in a_current_trial_data if item['frame_counter'] == int(frame.split('.')[0])]

        # Flatten nested lists of bounding boxes if needed
        if auto_bboxes:
            auto_bboxes = auto_bboxes[0]  # Access the actual list of bounding boxes directly

        if not auto_bboxes:
            continue

        # Load the frame image for visualization
        frame_path = os.path.join(original_frame_path, frame)
        image = cv2.imread(frame_path)
        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB for matplotlib

        matched = set()  # Track matched ground truth boxes

        # Process each automatic bounding box
        for a_bbox in auto_bboxes:
            a_bbox_coords = {
                'xmin': a_bbox['xmin'], 'ymin': a_bbox['ymin'],
                'xmax': a_bbox['xmax'], 'ymax': a_bbox['ymax']
            }
            a_class = a_bbox['class']
            a_confidence = a_bbox.get('confidence', 1.0)  # Use confidence if available

            best_iou = 0
            best_gt_index = -1

            # Match auto box to a manual bounding box
            for i, m_bbox in enumerate(manual_bboxes):
                if i in matched:
                    continue  # Skip already matched ground truth

                m_bbox_coords = {
                    'xmin': m_bbox['x'] * WIDTH_SCALE, 'ymin': m_bbox['y'] * HEIGHT_SCALE,
                    'xmax': m_bbox['x'] * WIDTH_SCALE + m_bbox['width'] * WIDTH_SCALE,
                    'ymax': m_bbox['y'] * HEIGHT_SCALE + m_bbox['height'] * HEIGHT_SCALE
                }
                m_class = m_bbox['label']

                # Calculate IoU only for matching classes
                if m_class == a_class:
                    iou = calculate_iou(m_bbox_coords, a_bbox_coords)
                    if iou > best_iou:
                        best_iou = iou
                        best_gt_index = i

            # Determine if detection is TP or FP
            if best_iou >= IOU_THRESHOLD and best_gt_index >= 0:
                all_detections[a_class].append((a_confidence, 1))  # True positive
                matched.add(best_gt_index)
            else:
                all_detections[a_class].append((a_confidence, 0))  # False positive

        # Count FN for unmatched ground truth boxes
        for m_bbox in manual_bboxes:
            m_class = m_bbox['label']
            if m_class not in matched:
                num_ground_truths[m_class] += 1

        # Save image with bounding boxes
        # cv2.imwrite(f"./output/iou_{subject}_{trial}_{frame.split('.')[0]}.png", cv2.cvtColor(image_rgb, cv2.COLOR_RGB2BGR))

# Calculate mAP
average_precisions = {}
for class_name, detections in all_detections.items():
    # Sort detections by confidence score in descending order
    detections.sort(key=lambda x: x[0], reverse=True)
    
    tp_cumsum = 0
    fp_cumsum = 0
    precisions = []
    recalls = []
    total_gts = num_ground_truths[class_name]

    for confidence, is_tp in detections:
        if is_tp:
            tp_cumsum += 1
        else:
            fp_cumsum += 1

        precision = tp_cumsum / (tp_cumsum + fp_cumsum) if (tp_cumsum + fp_cumsum) > 0 else 0
        recall = tp_cumsum / total_gts if total_gts > 0 else 0
        precisions.append(precision)
        recalls.append(recall)

    # Calculate AP for this class
    if precisions:
        average_precisions[class_name] = sum(precisions) / len(precisions)

# Calculate mean AP across all classes
mAP = sum(average_precisions.values()) / len(average_precisions) if average_precisions else 0
print(f"Mean Average Precision (mAP): {mAP}")


Mean Average Precision (mAP): 0.6441202212706075
