# Example of Output
## Detection
Output of the detection module of a volume:

```
[
  [[zs1, ys1, xs1, ze1, ye1, xe1], obj_score1, class1_score1, class2_score1],
  [[zs2, ys2, xs2, ze2, ye2, xe2], obj_score2, class1_score2, class2_score2],
  ...
]
```

In the format above, xs, ys, and zs represents starts of the bounding box, xe, ye and ze represents ends of the bounding box. "obj_score" represents the confidence of this bounding box, "class1_score" represents the probability of this bounding box as the first class (intracranial aneurysm in this challenge), and "class2_score" represents the probability of this bounding box as the second class (stenosis). Note that the sum of probabilities of two classes should be 1.

When evaluating each class of detection, the output will be processed using the following code to adapt the detection metrics code.

In [4]:
import numpy as np
def convert_output_to_detection(output, wanted_class_num=1):
    """
    Convert the output of the model to a list of detections.
    """
    detections = []
    for i in range(len(output)):  # detections of each image
        lst_this_image = []
        for j in range(len(output[i])):  # detections in this image
            lst2 = output[i][j][2:]
            max_idx = np.argmax(lst2)
            if max_idx + 1 == wanted_class_num:
                lst_this_image.append((output[i][j][0], output[i][j][1]))
        detections.append(lst_this_image)
    return detections

output = [
    [[[50, 50, 50, 150, 150, 150], 0.6, 0.1, 0.9], [[10, 10, 10, 80, 80, 80], 0.5, 0.8, 0.2]],  # Prediction for image 1
    [[[20, 20, 20, 70, 70, 70], 0.4, 0.7, 0.3], [[60, 60, 60, 120, 120, 120], 0.1, 0.4, 0.6]],  # Prediction for image 2
]

print(convert_output_to_detection(output, 1))
print(convert_output_to_detection(output, 2))

[[([10, 10, 10, 80, 80, 80], 0.5)], [([20, 20, 20, 70, 70, 70], 0.4)]]
[[([50, 50, 50, 150, 150, 150], 0.6)], [([60, 60, 60, 120, 120, 120], 0.1)]]


## Segmentation
Output of the segmentation task should be **an array with the same size as the input image**, with label 1 as segmented lesion (no matter whether intracranial aneurysm or stenosis). Patches in successfully detected bounding boxes will be cropped and segmentation metrics and clinical metrics will be calculated within the box.

# Detection Part
Definitions of Metrics

In [1]:
import numpy as np

def iou_3d(box1, box2):
    """
    Calculate Intersection over Union (IoU) between two 3D bounding boxes.
    box1 and box2 should be in the format [x1, y1, z1, x2, y2, z2]
    """
    x1_inter = max(box1[0], box2[0])
    y1_inter = max(box1[1], box2[1])
    z1_inter = max(box1[2], box2[2])
    x2_inter = min(box1[3], box2[3])
    y2_inter = min(box1[4], box2[4])
    z2_inter = min(box1[5], box2[5])

    if x1_inter < x2_inter and y1_inter < y2_inter and z1_inter < z2_inter:
        inter_volume = (x2_inter - x1_inter) * (y2_inter - y1_inter) * (z2_inter - z1_inter)
    else:
        inter_volume = 0

    box1_volume = (box1[3] - box1[0]) * (box1[4] - box1[1]) * (box1[5] - box1[2])
    box2_volume = (box2[3] - box2[0]) * (box2[4] - box2[1]) * (box2[5] - box2[2])

    iou_value = inter_volume / (box1_volume + box2_volume - inter_volume)
    return iou_value

def compute_precision_recall(pred_boxes_lst, gt_boxes_lst, iou_threshold=0.5):
    """
    :param pred_boxes_lst: list of instances, each instance is a list of predicted boxes, each box is a tuple of [bbox, confidence]. 
    e.g. [[([x1, y1, z1, x2, y2, z2], confidence), ...], ...]
    :param gt_boxes_lst: list of instances, each instance is a list of ground truth boxes
    e.g. [[[x1, y1, z1, x2, y2, z2], ...], ...]
    """
    all_tp = []
    all_fp = []
    all_probs = []
    total_gt_boxes = 0
    for pred_boxes, gt_boxes in zip(pred_boxes_lst, gt_boxes_lst):
        
        pred_boxes = sorted(pred_boxes, key=lambda x: x[1], reverse=True)
        tp = np.zeros(len(pred_boxes))
        fp = np.zeros(len(pred_boxes))
        probs = np.zeros(len(pred_boxes))
        total_gt_boxes_this = len(gt_boxes)
        total_gt_boxes += total_gt_boxes_this

        matched_gt = []

        for pred_idx, pred in enumerate(pred_boxes):
            best_iou = 0
            best_gt_idx = -1
            for gt_idx, gt in enumerate(gt_boxes):
                if gt_idx in matched_gt:
                    continue
                iou_value = iou_3d(pred[0], gt)
                if iou_value > best_iou:
                    best_iou = iou_value
                    best_gt_idx = gt_idx

            if best_iou >= iou_threshold:
                tp[pred_idx] = 1
                matched_gt.append(best_gt_idx)
            else:
                fp[pred_idx] = 1
            probs[pred_idx] = pred[1]

        all_tp.append(tp)
        all_fp.append(fp)
        all_probs.append(probs)
    
    tp = np.concatenate(all_tp)
    fp = np.concatenate(all_fp)
    probs = np.concatenate(all_probs)

    sorted_indices = np.argsort(-probs)
    tp = tp[sorted_indices]
    fp = fp[sorted_indices]
    probs = probs[sorted_indices]

    cumulative_tp = np.cumsum(tp)
    cumulative_fp = np.cumsum(fp)
    precision = cumulative_tp / (cumulative_tp + cumulative_fp)
    recall = cumulative_tp / total_gt_boxes

    return precision, recall, probs

def compute_ap(precision, recall, eleven_points_avg=False):
    """
    Compute Average Precision (AP) given precision and recall arrays
    """
    precision = np.concatenate(([1.0], precision, [0.0]))
    recall = np.concatenate(([0.0], recall, [1.0]))

    for i in range(len(precision) - 1, 0, -1):  # Smoothing the precision curve
        precision[i - 1] = np.maximum(precision[i - 1], precision[i])

    # Compute the area under the curve
    if not eleven_points_avg:
        indices = np.where(recall[1:] != recall[:-1])[0]
        ap = np.sum((recall[indices + 1] - recall[indices]) * precision[indices + 1])
    else:
        # Compute average precision at 11 recall points
        recall_points = np.linspace(0, 1, 11)
        ap = 0
        for recall_point in recall_points:
            if np.sum(recall >= recall_point) == 0:
                precision_point = 0
            else:
                precision_point = np.max(precision[recall >= recall_point])
            ap += precision_point
        ap /= 11

    return ap

def calculate_ap(predictions, ground_truths, iou_threshold=0.5, eleven_points_avg=False):
    """
    Calculate AP50 from prediction and ground truth boxes for multiple instances
    predictions: list of list of predictions per image, each prediction is a list of [bbox, confidence]
    ground_truths: list of list of ground truth boxes per image
    """
    all_precisions, all_recalls, _ = compute_precision_recall(predictions, ground_truths, iou_threshold)
    if len(all_precisions) == 0:
        return 0

    return compute_ap(all_precisions, all_recalls, eleven_points_avg=eleven_points_avg)


Example Usage

In [9]:
predictions_IA = [  # Predictions for each image in the ENTIRE test set / validation set / training set of ONE class, e.g. aneurysm
    [([50, 50, 50, 150, 150, 150], 0.6), ([10, 10, 10, 80, 80, 80], 0.5)],  # Prediction for image 1
    [([20, 20, 20, 70, 70, 70], 0.4), ([60, 60, 60, 120, 120, 120], 0.1)],  # Prediction for image 2
    [([40, 40, 40, 160, 160, 160], 0.7), ([35, 35, 35, 105, 105, 105], 0.2)],  # Prediction for image 3
]
ground_truths_IA = [  # Ground truths for each image in the ENTIRE test set / validation set / training set of ONE class, e.g. aneurysm
    [[40, 40, 40, 160, 160, 160], [35, 35, 35, 105, 105, 105]],  # Ground truth for image 1
    [[15, 15, 15, 75, 75, 75], [65, 65, 65, 130, 130, 130]],  # Ground truth for image 2
    [[50, 50, 50, 150, 150, 150], [10, 10, 10, 80, 80, 80]],  # Ground truth for image 3
]

ap15 = calculate_ap(predictions_IA, ground_truths_IA, iou_threshold=0.15, eleven_points_avg=True)
ap25 = calculate_ap(predictions_IA, ground_truths_IA, iou_threshold=0.25, eleven_points_avg=True)
ap_IA = (ap15 + ap25) / 2
print(f'AP: {ap_IA}')

AP: 0.7803030303030303


# Segmentation Part
Definition of Metrics

In [17]:
import numpy as np
from medpy.metric.binary import hd95

def dice_score(pred, gt):
    """
    Calculate Dice score between two binary masks
    """
    pred = np.bool_(pred)
    gt = np.bool_(gt)
    intersection = np.count_nonzero(pred & gt)
    union = np.count_nonzero(pred | gt)
    dice = 2 * intersection / (np.count_nonzero(pred) + np.count_nonzero(gt))
    return dice

def hausdorff_distance_unified(pred, gt, baseline, voxel_spacing):
    """
    Calculate Hausdorff distance between two binary masks, then unify the result to (0-1) with a baseline
    """
    pred = np.bool_(pred)
    gt = np.bool_(gt)
    hd = hd95(pred, gt, voxel_spacing)
    hd_baseline = hd95(baseline, gt, voxel_spacing)
    hd = 1 - hd / hd_baseline
    if hd < 0:
        hd = 0
    return hd

Example Usage

In [20]:
# each metric of one type of lesion is calculated for each lesion (i.e. each bounding box), then averaged across all lesions
dice_scores = []
hd_scores = []

# Example: 160*200*200 image, spacing is (0.8, 0.6, 0.6), lesion ground-truth bounding box at [x1, y1, z1, x2, y2, z2] = [30, 30, 30, 50, 50, 50]
label_img = np.zeros((160, 200, 200))
label_img[30:50, 30:50, 30:50] = 1
pred_img = np.zeros((160, 200, 200))
pred_img[32:48, 31:49, 34:45] = 1
baseline_pred_img = np.zeros((160, 200, 200))  # baseline prediction made by simple thresholding
baseline_pred_img[33:47, 32:48, 36:42] = 1
spacing = (0.8, 0.6, 0.6)

bbox_gt = [30, 30, 30, 50, 50, 50]

pred_img_in_bbox = pred_img[bbox_gt[0]:bbox_gt[3], bbox_gt[1]:bbox_gt[4], bbox_gt[2]:bbox_gt[5]]
label_img_in_bbox = label_img[bbox_gt[0]:bbox_gt[3], bbox_gt[1]:bbox_gt[4], bbox_gt[2]:bbox_gt[5]]
baseline_pred_img_in_bbox = baseline_pred_img[bbox_gt[0]:bbox_gt[3], bbox_gt[1]:bbox_gt[4], bbox_gt[2]:bbox_gt[5]]

dice = dice_score(pred_img_in_bbox, label_img_in_bbox)
hd = hausdorff_distance_unified(pred_img_in_bbox, label_img_in_bbox, baseline_pred_img_in_bbox, spacing)
dice_scores.append(dice)
hd_scores.append(hd)

total_dice = np.mean(dice_scores)
total_hd = np.mean(hd_scores)

print(f'Dice: {total_dice}, HD: {total_hd}')


Dice: 0.5673352435530086, HD: 0.38350379760491016


# Clinical Part
## Stenosis percentage
Definition of Metrics

In [5]:
import numpy as np
import matplotlib.pyplot as plt
from skimage import io, measure, morphology
from scipy.ndimage import distance_transform_edt

def max_and_min_diameters(segmentation_image, spacing):
    """
    :param segmentation_image: Binary segmentation image with the vessel as 1 and background as 0.
    """
    binary_image = segmentation_image > 0
    skeleton = morphology.skeletonize(binary_image)
    distance_transform = distance_transform_edt(binary_image, sampling=spacing)

    # Get the coordinates of the skeleton
    skeleton_coords = np.column_stack(np.where(skeleton))

    # Calculate the diameter at each point in the skeleton
    diameters = [2 * distance_transform[tuple(coord)] for coord in skeleton_coords]

    return np.max(diameters), np.min(diameters)

Example Usage

In [8]:
from skimage.morphology import disk

def generate_example_image(disk_radii):
    image = np.zeros([len(disk_radii), 50, 50])
    for i, radius in enumerate(disk_radii):
        image[i, 25 - radius:25 + radius + 1, 25 - radius:25 + radius + 1] = disk(radius)

    return image

spacing = (0.8, 0.6, 0.6)

gt_label = generate_example_image([5, 5, 5, 4, 3, 2, 3, 4, 5, 5, 5])  # diameter of ordinary vessel is 9 and stenosis is 5
pred_label = generate_example_image([4, 3, 2, 1, 2, 3, 4])  # predicted diameter of ordinary vessel is 9 and stenosis is 3
gt_max, gt_min = max_and_min_diameters(gt_label, spacing)  # note that we have labelled the ordinary vessel beside the stenosis site in our test set GT using another label, so we can calculate the diameter of the ordinary vessel
_, pred_min = max_and_min_diameters(pred_label, spacing)
gt_percentage = (gt_max - gt_min) / gt_max
pred_percentage = (gt_max - pred_min) / gt_max
print(f'Ground truth vessel percentage: {gt_percentage}, Predicted vessel percentage: {pred_percentage}, Difference: {abs(gt_percentage - pred_percentage)}')

Ground truth vessel percentage: 0.45943752238266466, Predicted vessel percentage: 0.6581182706210862, Difference: 0.19868074823842152


## Aneurysm long and short axes length
Definition of Metrics

In [1]:
import os
from tqdm import tqdm
from skimage import measure
from scipy.spatial.distance import pdist, squareform
import numpy as np

def max_diameter_short_radius(arr):
    # 将2D数组转换为1D数组
    flat_arr = np.ravel(arr)
    # 找到所有值为1的索引
    indices = np.where(flat_arr == 1)[0]
    # 将索引转换为2D坐标
    coordinates = np.column_stack(np.unravel_index(indices, arr.shape))
    # 计算所有点之间的距离
    distances = squareform(pdist(coordinates))
    # 找到距离最远的两个点
    i, j = np.unravel_index(np.argmax(distances), distances.shape)
    # 计算最大直径
    max_diameter = distances[i, j]
    # 计算垂直于最大直径的两个点
    midpoint = (coordinates[i] + coordinates[j]) / 2
    vector = coordinates[j] - coordinates[i]
    perp_vector = np.array([-vector[1], vector[0]])
    # 计算垂直于最大直径的两个点的坐标
    k = np.argmax(np.abs(np.dot(coordinates - midpoint, perp_vector)))
    l = np.argmin(np.abs(np.dot(coordinates - midpoint, perp_vector)))
    # 计算垂直于最大直径的长度
    short_radius = np.linalg.norm(coordinates[k] - coordinates[l])
    return max_diameter, short_radius

def get_2d_diameters(label_arr,nodule_spacing):
    mask = label_arr
    largest_z = np.argmax(np.sum(mask, axis=(1,2)))
    lag_z = mask[largest_z]
    max_diameter, short_diameter =max_diameter_short_radius(lag_z)
    return max_diameter*nodule_spacing[1],short_diameter*nodule_spacing[1]

Example Usage

In [2]:
from skimage.morphology import ball
import numpy as np

def generate_example_image(disk_radius):
    i = np.zeros([100, 100, 100])
    b = ball(disk_radius)
    i[50:50 + b.shape[0], 50:50 + b.shape[1], 50:50 + b.shape[2]] = b
    return i

spacing = (0.8, 0.6, 0.6)

gt_label = generate_example_image(5)
pred_label = generate_example_image(4)
bbox = [50, 50, 50, 50 + 2 * 5 + 1, 50 + 2 * 5 + 1, 50 + 2 * 5 + 1]

gt_label_in_bbox = gt_label[bbox[0]:bbox[3], bbox[1]:bbox[4], bbox[2]:bbox[5]]
pred_label_in_bbox = pred_label[bbox[0]:bbox[3], bbox[1]:bbox[4], bbox[2]:bbox[5]]

gt_max_diameter, gt_short_diameter = get_2d_diameters(gt_label, spacing)
pred_max_diameter, pred_short_diameter = get_2d_diameters(pred_label, spacing)
print(f'Ground truth max diameter: {gt_max_diameter}, Predicted max diameter: {pred_max_diameter}, Difference: {abs(gt_max_diameter - pred_max_diameter)}')
print(f'Ground truth short diameter: {gt_short_diameter}, Predicted short diameter: {pred_short_diameter}, Difference: {abs(gt_short_diameter - pred_short_diameter)}')



Ground truth max diameter: 6.0, Predicted max diameter: 4.8, Difference: 1.2000000000000002
Ground truth short diameter: 4.242640687119285, Predicted short diameter: 3.394112549695428, Difference: 0.8485281374238567
