In [1]:
import SimpleITK as sitk
from glob import glob
from pathlib import Path
import json
import pandas as pd
import os
import tqdm
import numpy as np
from skimage import measure

np.set_printoptions(precision=4, suppress=True)

In [2]:
path_preds_private = Path(
    "/home/ceballosarroyo.a/workspace/medical/cta-det2/outputs/adeform_decoder_only_non_rec_crop_vessel_pe_gpe_PRIV/inference_66k/predict.csv"
)
df_preds_private = pd.read_csv(path_preds_private)

private_csv = (
    "/home/ceballosarroyo.a/workspace/medical/cta-det2/labels/hospital_0.4.csv"
)
df_private = pd.read_csv(private_csv)

path_private_metadata = "/home/ceballosarroyo.a/workspace/medical/cta-det2/labels/metadata/hospital_meta.json"
with open(path_private_metadata, "r") as f:
    private_metadata = json.load(f)

In [5]:
# sphere volume formula:


def get_aneurysm_diameter(volume):
    # V = 4/3 * π * r^3
    r = (3 * volume / (4 * np.pi)) ** (1 / 3)
    return r * 2


get_aneurysm_diameter(145.8)

6.530097021476057

In [57]:
df_private[["w", "h", "d"]].mean()

w    5.213793
h    5.048276
d    3.606897
dtype: float64

In [59]:
df_preds_private[["w", "h", "d"]].mean() * 0.4

w    8.344863
h    8.275120
d    6.841162
dtype: float64

In [19]:
df_preds_private.head(2)

Unnamed: 0,seriesuid,probability,coordZ,coordY,coordX,d,h,w
0,CA_00000_0000.nii.gz,0.999941,172.54694,318.60513,306.28152,14.45573,16.51391,16.343042
1,CA_00000_0000.nii.gz,0.927154,131.88417,228.06854,140.72795,18.617321,23.665508,23.169699


In [None]:
def fix_bad_origin(pred, spacing, origin):
    pred_part =  pred*spacing + origin*np.array([1, -1, 1])
    return pred_part*np.array([1, -1, 1])


In [63]:
cols = ["tp", "fp", "fn"]
rows = []
aneurysms_found = []
volume_in_voxels = []
for case_name in df_private["seriesuid"].unique():

    df_case_preds = df_preds_private[df_preds_private["seriesuid"] == case_name]
    df_case_preds = df_case_preds[df_case_preds["probability"] > 0.95]
    df_case_gt = df_private[df_private["seriesuid"] == case_name]

    df_case_gt = df_case_gt[["coordX", "coordY", "coordZ", "w", "h", "d"]]
    df_case_preds = df_case_preds[["coordX", "coordY", "coordZ", "w", "h", "d"]]

    if len(df_case_gt) == 0:
        new_row = [0, len(df_case_preds), 0]
        rows.append(new_row)
    else:
        matches_scan = []
        aneurysms = df_case_gt.values
        tps = 0
        fps = 0
        fns = len(aneurysms)
        metadata = private_metadata[case_name]
        # seriesuid,coordX,coordY,coordZ,w,h,d,lesion
        for aneurysm in aneurysms:
            gt_x, gt_y, gt_z, gt_w, gt_h, gt_d = aneurysm

            center_gt = [gt_x, gt_y, gt_z]
            radius_gt = max(gt_w / 2, gt_h / 2, gt_d / 2)
            found = False
            matches_aneurysm = []
            pred_aneurysms = df_case_preds.values
            for pred_aneurysm in pred_aneurysms:
                pred_x, pred_y, pred_z, pred_w, pred_h, pred_d = pred_aneurysm
                pred_x = pred_x * metadata["spacing"][0] + metadata["origin"][0]
                pred_y = pred_y * metadata["spacing"][1] - metadata["origin"][1]
                pred_z = pred_z * metadata["spacing"][2] + metadata["origin"][2]
                pred_w = pred_w * metadata["spacing"][0]
                pred_h = pred_h * metadata["spacing"][1]
                pred_d = pred_d * metadata["spacing"][2]
                center_pred = [pred_x, -pred_y, pred_z]
                radius_pred = max(pred_w / 2, pred_h / 2, pred_d / 2)

                distance_gt = np.linalg.norm(
                    np.array(center_gt) - np.array(center_pred)
                )
                # print(distance_gt, radius_gt, radius)
                # print("Center pred: ", center_pred, "\nCenter gt: ", center_gt)
                if distance_gt < radius_gt + radius_pred * 0.4:
                    matches_aneurysm.append(True)
                else:
                    matches_aneurysm.append(False)
            matches_scan.append(matches_aneurysm)
        matches_scan = np.array(matches_scan).astype(int)
        true_positive_count = np.sum(matches_scan, axis=1)
        detected_aneurysms_scan = list(true_positive_count)
        aneurysms_found += detected_aneurysms_scan
        true_positive_count = np.sum(true_positive_count > 0)
        false_positive_count = np.sum(matches_scan, axis=0)
        false_positive_count = np.sum(false_positive_count == 0)
        false_negative_count = len(aneurysms) - true_positive_count
        new_row = [true_positive_count, false_positive_count, false_negative_count]

        rows.append([true_positive_count, false_positive_count, false_negative_count])
    print("\n", cols)
    print(new_row)


 ['tp', 'fp', 'fn']
[1, 0, 1]

 ['tp', 'fp', 'fn']
[1, 2, 0]

 ['tp', 'fp', 'fn']
[1, 0, 1]

 ['tp', 'fp', 'fn']
[1, 1, 0]

 ['tp', 'fp', 'fn']
[0, 1, 1]

 ['tp', 'fp', 'fn']
[3, 0, 1]

 ['tp', 'fp', 'fn']
[0, 0, 2]

 ['tp', 'fp', 'fn']
[2, 0, 0]

 ['tp', 'fp', 'fn']
[1, 0, 0]

 ['tp', 'fp', 'fn']
[1, 0, 0]

 ['tp', 'fp', 'fn']
[1, 10, 0]

 ['tp', 'fp', 'fn']
[1, 1, 0]

 ['tp', 'fp', 'fn']
[1, 1, 0]

 ['tp', 'fp', 'fn']
[1, 1, 0]

 ['tp', 'fp', 'fn']
[0, 2, 2]

 ['tp', 'fp', 'fn']
[1, 2, 0]

 ['tp', 'fp', 'fn']
[2, 0, 0]

 ['tp', 'fp', 'fn']
[1, 1, 0]

 ['tp', 'fp', 'fn']
[1, 4, 0]

 ['tp', 'fp', 'fn']
[2, 0, 0]

 ['tp', 'fp', 'fn']
[1, 0, 0]

 ['tp', 'fp', 'fn']
[1, 0, 1]

 ['tp', 'fp', 'fn']
[2, 1, 0]

 ['tp', 'fp', 'fn']
[1, 1, 0]

 ['tp', 'fp', 'fn']
[1, 1, 0]

 ['tp', 'fp', 'fn']
[1, 0, 1]

 ['tp', 'fp', 'fn']
[1, 1, 0]

 ['tp', 'fp', 'fn']
[4, 0, 1]

 ['tp', 'fp', 'fn']
[0, 0, 1]

 ['tp', 'fp', 'fn']
[1, 2, 0]

 ['tp', 'fp', 'fn']
[2, 4, 0]

 ['tp', 'fp', 'fn']
[2, 1, 0]

 ['tp'

In [71]:
df_results_private = pd.DataFrame(rows, columns=cols)
df_results_healthy = df_results_private[
    (df_results_private["tp"] == 0) & (df_results_private["fn"] == 0)
]
len(df_results_healthy[df_results_healthy["fp"] == 0]), len(df_results_healthy),

(0, 0)

In [72]:
df_results_private["tp"].sum(), df_results_private["fp"].sum(), df_results_private[
    "fn"
].sum()

(45, 46, 13)

In [73]:
45 / 58

0.7758620689655172

In [69]:
df_private["seriesuid"].nunique()

38

In [70]:
46 / 38

1.2105263157894737

In [49]:
49 / 58

0.8448275862068966

In [None]:
# each voxel is 0.4mm spacing, to transform to voxel coords we..

# 1. divide by 0.4
# 2. round to the nearest integer

In [202]:
path_glia_external = Path(
    "/home/ceballosarroyo.a/workspace/medical/.outputs_to_compare/glia_external/"
)
files_glia = list(path_glia_external.glob("*.nii.gz"))

external_csv = (
    "/home/ceballosarroyo.a/workspace/medical/cta-det2/labels/external_0.4_crop.csv"
)
df_gt = pd.read_csv(external_csv)

In [247]:
path_ours = "/home/ceballosarroyo.a/workspace/medical/cta-det2/outputs/adeform_decoder_only_non_rec_crop_vessel_pe_gpe_EXT/inference_final/predict.csv"
t = 0.9
df_ours = pd.read_csv(path_ours)
df_ours = df_ours[df_ours["probability"] > t]
df_ours

Unnamed: 0,seriesuid,probability,coordZ,coordY,coordX,d,h,w
0,ExtA0001.nii.gz,0.999944,203.10632,217.78381,317.24124,14.051196,19.045586,18.773787
3,ExtA0002.nii.gz,0.995259,143.36404,310.64230,335.04596,18.705542,23.615960,23.119833
4,ExtA0003.nii.gz,0.999890,183.24693,291.37690,362.99872,12.243849,15.275521,15.238411
5,ExtA0003.nii.gz,0.978573,67.84530,237.62146,209.86569,12.718115,13.703934,13.909168
9,ExtA0004.nii.gz,0.999928,118.26263,186.06973,355.70694,23.580835,29.897550,28.380339
...,...,...,...,...,...,...,...,...
639,ExtB0062.nii.gz,0.998699,63.91131,374.89633,266.86548,23.236143,30.307926,28.635010
646,ExtB0063.nii.gz,0.998836,184.72472,199.74622,283.46414,7.124098,9.086171,9.674917
653,ExtB0064.nii.gz,0.999993,202.41454,238.72940,253.55380,20.457235,20.094625,19.976519
654,ExtB0064.nii.gz,0.983821,203.50590,226.62917,306.28503,8.322208,9.674705,10.206977


In [248]:
import json

cols = ["tp", "fp", "fn"]
rows_ours = []
aneurysms_found = []
volume_in_voxels = []
metadata_file = "/home/ceballosarroyo.a/workspace/medical/cta-det2/labels/metadata/external_crop_meta.json"
metadata = json.load(open(metadata_file))
for i in range(0, len(files_glia)):

    case_name = files_glia[i].name
    df_case = df_gt[df_gt["seriesuid"] == case_name]
    df_case_pred = df_ours[df_ours["seriesuid"] == case_name]

    # get all connected components
    origin = np.array(metadata[case_name]["origin"])
    spacing = np.array(metadata[case_name]["spacing"])

    if len(df_case) == 0:
        new_row = [0, len(df_case_pred), 0]
        rows_ours.append([0, len(df_case_pred), 0])

    else:
        aneurysms = df_case.values
        preds = df_case_pred.values
        matches_scan = []
        aneurysms = df_case.values
        tps = 0
        fps = 0
        fns = len(aneurysms)
        # seriesuid,coordX,coordY,coordZ,w,h,d,lesion
        for aneurysm in aneurysms:
            gt_x, gt_y, gt_z, gt_w, gt_h, gt_d = aneurysm[1:-1]

            center_gt = np.array([gt_x, gt_y, gt_z])

            radius_gt = max(gt_w / 2, gt_h / 2, gt_d / 2)
            found = False
            matches_aneurysm = []
            for prop in preds:
                pred_z, pred_y, pred_x, pred_w, pred_h, pred_d = prop[2:]
                center_pred = (np.array([pred_x, pred_y, pred_z])) * spacing + origin
                distance_gt = np.linalg.norm(
                    np.array(center_gt) - np.array(center_pred)
                )
                radius = np.max((pred_w / 2, pred_h / 2, pred_d / 2))
                if distance_gt < radius_gt + radius:
                    matches_aneurysm.append(True)
                else:
                    matches_aneurysm.append(False)
                print(distance_gt, radius_gt, radius)

            matches_scan.append(matches_aneurysm)
        matches_scan = np.array(matches_scan).astype(int)
        true_positive_count = np.sum(matches_scan, axis=1)
        detected_aneurysms_scan = list(true_positive_count)
        aneurysms_found += detected_aneurysms_scan
        true_positive_count = np.sum(true_positive_count > 0)
        false_positive_count = np.sum(matches_scan, axis=0)
        false_positive_count = np.sum(false_positive_count == 0)
        false_negative_count = len(aneurysms) - true_positive_count
        new_row = [true_positive_count, false_positive_count, false_negative_count]
        print(new_row)
        rows_ours.append(
            [true_positive_count, false_positive_count, false_negative_count]
        )

0.5495174395552817 3.8000000566244125 9.522793
[1, 0, 0]
1.0933959543281522 4.400000065565109 11.80798
[1, 0, 0]
0.8565325268735476 4.000000059604645 7.6377605
79.3951375517996 4.000000059604645 6.954584
[1, 1, 0]
0.7442817806805997 5.40000008046627 14.948775
[1, 0, 0]
0.08046262954285487 3.6000000536441803 7.968532
[1, 0, 0]
57.80518018110415 12.000000178813934 12.099992
1.505590485986148 12.000000178813934 29.56969
106.01107598353327 12.000000178813934 12.20637
[1, 2, 0]
1.5946001480057204 7.000000104308128 14.450423
5.642147658785429 7.000000104308128 6.7680865
18.714297805089576 7.000000104308128 6.6335595
86.01737470410944 7.000000104308128 22.92173
[1, 2, 0]
0.7079046901658472 5.800000086426735 12.2469185
236.7991888181042 5.800000086426735 22.594923
[1, 1, 0]
0.6044411146904811 2.400000035762787 7.0130885
[1, 0, 0]
0.5606860840443189 2.600000038743019 6.464857
83.11194383294975 2.600000038743019 15.8694745
[1, 1, 0]
0.6142773653631769 2.400000035762787 5.8257755
[1, 0, 0]
0.3105

In [249]:
df_results_internal = pd.DataFrame(rows_ours, columns=cols)
df_results_healthy = df_results_internal[
    (df_results_internal["tp"] == 0) & (df_results_internal["fn"] == 0)
]
len(df_results_healthy[df_results_healthy["fp"] == 0]), len(df_results_healthy),

(32, 46)

In [250]:
df_results_internal["total_aneurysms"] = (
    df_results_internal["tp"] + df_results_internal["fn"]
)
len(
    df_results_internal[
        (df_results_internal["tp"] > 0) & (df_results_internal["fn"] == 0)
    ]
), len(df_results_internal[df_results_internal["total_aneurysms"] > 0]),

(88, 92)

In [251]:
df_results_healthy["fp"].sum() / len(df_results_healthy)

0.41304347826086957

In [252]:
aggregte = df_results_internal.sum()
aggregte["tp"] / len(df_gt), len(df_gt), aggregte["fp"] / len(files_glia), len(
    files_glia
)

(0.9603960396039604, 101, 0.8115942028985508, 138)

In [253]:
true_positive_count = np.sum(matches_scan, axis=1)
true_positive_count

array([1])

In [67]:
649 / 126

5.150793650793651

In [26]:
sum(aneurysms_found)

126

In [11]:
np.unique(label)

array([0, 1], dtype=uint8)

In [9]:
cols = ["tp", "fp", "fn"]
rows_ours = []
aneurysms_found = []
volume_in_voxels = []
for i in range(0, len(files_glia)):

    case_name = files_glia_internal[i].name
    df_case = df_gt[df_gt["seriesuid"] == case_name]
    df_case_pred = df_ours[df_ours["seriesuid"] == case_name]

    # get all connected components

    if len(df_case) == 0:
        new_row = [0, len(df_case_pred), 0]
        rows_ours.append([0, len(df_case_pred), 0])

    else:
        aneurysms = df_case.values
        preds = df_case_pred.values
        matches_scan = []
        aneurysms = df_case.values
        tps = 0
        fps = 0
        fns = len(aneurysms)
        # seriesuid,coordX,coordY,coordZ,w,h,d,lesion
        for aneurysm in aneurysms:
            gt_z, gt_y, gt_x, gt_w, gt_h, gt_d = aneurysm[1:-1]

            center_gt = [gt_x, gt_y, gt_z]

            radius_gt = max(gt_w / 2, gt_h / 2, gt_d / 2)
            found = False
            matches_aneurysm = []
            for prop in preds:
                pred_x, pred_y, pred_z, pred_w, pred_h, pred_d = prop[2:]
                center_pred = [pred_x, pred_y, pred_z]
                distance_gt = (
                    np.linalg.norm(np.array(center_gt) - np.array(center_pred)) / 0.4
                )
                radius = np.max((pred_w / 2, pred_h / 2, pred_d / 2))
                print(distance_gt, radius_gt, radius)
                if distance_gt < radius_gt + radius:
                    matches_aneurysm.append(True)
                else:
                    matches_aneurysm.append(False)
            matches_scan.append(matches_aneurysm)
        matches_scan = np.array(matches_scan).astype(int)
        true_positive_count = np.sum(matches_scan, axis=1)
        detected_aneurysms_scan = list(true_positive_count)
        aneurysms_found += detected_aneurysms_scan
        true_positive_count = np.sum(true_positive_count > 0)
        false_positive_count = np.sum(matches_scan, axis=0)
        false_positive_count = np.sum(false_positive_count == 0)
        false_negative_count = len(aneurysms) - true_positive_count
        new_row = [true_positive_count, false_positive_count, false_negative_count]

        rows_ours.append(
            [true_positive_count, false_positive_count, false_negative_count]
        )
    print("\n", cols)
    print(new_row)

[<skimage.measure._regionprops.RegionProperties at 0x7fe33621e220>,
 <skimage.measure._regionprops.RegionProperties at 0x7fe370a92880>,
 <skimage.measure._regionprops.RegionProperties at 0x7fe370a92370>,
 <skimage.measure._regionprops.RegionProperties at 0x7fe370a92fa0>,
 <skimage.measure._regionprops.RegionProperties at 0x7fe370a921c0>,
 <skimage.measure._regionprops.RegionProperties at 0x7fe370a92a90>,
 <skimage.measure._regionprops.RegionProperties at 0x7fe370a92b80>,
 <skimage.measure._regionprops.RegionProperties at 0x7fe370a92550>]

In [None]:
path_glia_external = Path(
    "/home/ceballosarroyo.a/workspace/medical/.outputs_to_compare/glia_external/"
)
files_glia = list(path_glia_external.glob("*.nii.gz"))

external_csv = (
    "/home/ceballosarroyo.a/workspace/medical/cta-det2/labels/external_0.4_crop.csv"
)
df_gt = pd.read_csv(external_csv)
dict_props = {}

for i in range(0, len(files_glia)):
    case_name = files_glia[i].name
    print(case_name)
    dict_props[case_name] = {}
    header = sitk.ReadImage(str(files_glia[i]))
    label = sitk.GetArrayFromImage(header)
    label = label.astype(np.uint8)
    all_labels = measure.label(label, background=0)
    props = measure.regionprops(all_labels)
    dict_props[case_name]["props"] = props

In [260]:
len(files_glia)

138

In [270]:
import json

cols = ["tp", "fp", "fn"]
rows = []
aneurysms_found = []
volume_in_voxels = []
metadata_file = "/home/ceballosarroyo.a/workspace/medical/cta-det2/labels/metadata/external_crop_meta.json"
metadata = json.load(open(metadata_file))
for i in range(0, len(files_glia)):

    case_name = files_glia[i].name
    df_case = df_gt[df_gt["seriesuid"] == case_name]
    spacing = np.array(metadata[case_name]["spacing"])
    origin = np.array(metadata[case_name]["origin"])

    # get all connected components
    props = dict_props[case_name]["props"]

    if len(df_case) == 0:
        new_row = [0, len(props), 0]
        rows.append([0, len(props), 0])

    else:
        matches_scan = []
        aneurysms = df_case.values
        tps = 0
        fps = 0
        fns = len(aneurysms)
        # seriesuid,coordX,coordY,coordZ,w,h,d,lesion
        for aneurysm in aneurysms:
            gt_x, gt_y, gt_z, gt_w, gt_h, gt_d = aneurysm[1:-1]
            center_gt = [gt_x, gt_y, gt_z]
            radius_gt = max(gt_w / 2, gt_h / 2, gt_d / 2)
            found = False
            matches_aneurysm = []
            for prop in props:
                min_z, min_y, min_x, max_z, max_y, max_x = (
                    np.array(prop.bbox) * spacing[0]
                )
                z_pred, y_pred, x_pred = prop.centroid
                volume_in_voxels.append(prop.area)
                center_pred = np.array([x_pred, y_pred, z_pred]) * spacing + origin
                distance_gt = np.linalg.norm(
                    np.array(center_gt) - np.array(center_pred)
                )
                radius = max((max_z - min_z, max_y - min_y, max_x - min_x)) / 2
                print(distance_gt, radius_gt, radius)
                if distance_gt < radius_gt + radius:
                    matches_aneurysm.append(True)
                else:
                    matches_aneurysm.append(False)
            matches_scan.append(matches_aneurysm)
        matches_scan = np.array(matches_scan).astype(int)
        true_positive_count = np.sum(matches_scan, axis=1)
        detected_aneurysms_scan = list(true_positive_count)
        aneurysms_found += detected_aneurysms_scan
        true_positive_count = np.sum(true_positive_count > 0)
        false_positive_count = np.sum(matches_scan, axis=0)
        false_positive_count = np.sum(false_positive_count == 0)
        false_negative_count = len(aneurysms) - true_positive_count
        new_row = [true_positive_count, false_positive_count, false_negative_count]

        rows.append([true_positive_count, false_positive_count, false_negative_count])
    print("\n", cols)
    print(new_row)

30.651083453735197 3.8000000566244125 0.6000000089406967
31.20000046491623 3.8000000566244125 0.4000000059604645
44.04305559765908 3.8000000566244125 3.0000000447034836
0.6583296515800078 3.8000000566244125 3.6000000536441803

 ['tp', 'fp', 'fn']
[1, 3, 0]
12.281539704492285 4.400000065565109 1.0000000149011612

 ['tp', 'fp', 'fn']
[0, 1, 1]
22.04719189571027 4.000000059604645 1.0000000149011612
22.842243365325086 4.000000059604645 1.2000000178813934
2.9484282975417693 4.000000059604645 1.2000000178813934
3.617679324625925 4.000000059604645 1.0000000149011612
2.6030751433526707 4.000000059604645 0.800000011920929
23.93430082300334 4.000000059604645 1.2000000178813934
4.466581287057203 4.000000059604645 1.4000000208616257

 ['tp', 'fp', 'fn']
[1, 3, 0]
61.38594267791256 5.40000008046627 1.0000000149011612
61.05930296406192 5.40000008046627 0.800000011920929
61.49113848911361 5.40000008046627 0.4000000059604645
33.54241591672256 5.40000008046627 1.4000000208616257
32.37875146085598 5.400

In [271]:
df_results_internal = pd.DataFrame(rows, columns=cols)
df_results_healthy = df_results_internal[
    (df_results_internal["tp"] == 0) & (df_results_internal["fn"] == 0)
]
len(df_results_healthy[df_results_healthy["fp"] == 0]), len(df_results_healthy),

(1, 46)

In [272]:
df_results_internal["total_aneurysms"] = (
    df_results_internal["tp"] + df_results_internal["fn"]
)
len(
    df_results_internal[
        (df_results_internal["tp"] > 0) & (df_results_internal["fn"] == 0)
    ]
), len(df_results_internal[df_results_internal["total_aneurysms"] > 0]),

(68, 92)

In [273]:
aggregte = df_results_internal.sum()
aggregte["tp"] / len(df_gt), len(df_gt), aggregte["fp"] / len(files_glia), len(
    files_glia
)

(0.7227722772277227, 101, 5.22463768115942, 138)