In [3]:
import pandas as pd
import numpy as np

def summarize_ci(scores, B=10_000, alpha=0.05, seed=42):
    x = np.asarray(scores, dtype=float)
    x = x[~np.isnan(x)]
    N = x.size
    if N == 0:
        return np.nan, np.nan, np.nan
    if N == 1:
        return float(x[0]), float(x[0]), float(x[0])

    rng = np.random.default_rng(seed)
    idx = rng.integers(0, N, size=(B, N))  # 10k resamples
    boot_meds = np.median(x[idx], axis=1)  # median in each resample
    lo, hi = np.percentile(boot_meds, [100 * alpha / 2, 100 * (1 - alpha / 2)])

    return float(np.median(x)), float(lo), float(hi)


meld = pd.read_csv("meld_results.csv")
# my_model = pd.read_csv("exp3_dice_lobe_results.csv")
# my_model = pd.read_csv("exp3_dice_hemi_results.csv")
# my_model = pd.read_csv("exp3_dice_full_text_results.csv")
my_model = pd.read_csv("exp3_dice_lobe+hemi_results.csv")
# my_model = pd.read_csv("exp2_dice_results.csv")
# my_model = pd.read_csv("exp1_loss_results.csv")

# --- собираем ненулевые значения
metrics = ["dice", "iou", "ppv"]
meld_non_zero = {m: [] for m in metrics}
my_model_non_zero = {m: [] for m in metrics}

for i in range(len(meld)):
    meld_row = meld.iloc[i]
    my_row = my_model.iloc[i]

    if meld_row["dice"] > 0.001 and my_row["dice"] > 0.001:
        for m in metrics:
            meld_non_zero[m].append(meld_row[m])
            my_model_non_zero[m].append(my_row[m])

# --- считаем CI
print("MELD results (median [95% CI])")
for m in metrics:
    median, low, up = summarize_ci(meld_non_zero[m])
    print(len(meld_non_zero[m]))
    print(f"{m:>4}: {median:.3f} (95% CI {low:.3f}-{up:.3f})")

print("\nMy Model results (median [95% CI])")
for m in metrics:
    median, low, up = summarize_ci(my_model_non_zero[m])
    print(len(my_model_non_zero[m]))
    print(f"{m:>4}: {median:.3f} (95% CI {low:.3f}-{up:.3f})")

MELD results (median [95% CI])
52
dice: 0.566 (95% CI 0.433-0.625)
52
 iou: 0.395 (95% CI 0.277-0.455)
52
 ppv: 0.507 (95% CI 0.389-0.680)

My Model results (median [95% CI])
52
dice: 0.590 (95% CI 0.548-0.666)
52
 iou: 0.419 (95% CI 0.377-0.499)
52
 ppv: 0.675 (95% CI 0.572-0.790)


In [6]:
import pandas as pd
import numpy as np

meld = pd.read_csv("meld_results.csv")
# my_model = pd.read_csv("exp3_dice_lobe_results.csv")
# my_model = pd.read_csv("exp3_dice_hemi_results.csv")
# my_model = pd.read_csv("exp3_dice_full_text_results.csv")
# my_model = pd.read_csv("exp3_dice_lobe+hemi_results.csv")
# my_model = pd.read_csv("exp2_dice_results.csv")
# my_model = pd.read_csv("exp1_loss_results.csv")

# --- собираем ненулевые значения
metrics = ["number FP clusters", "number TP clusters"]
meld_non_zero = {m: [] for m in metrics}
# my_model_non_zero = {m: [] for m in metrics}

type1 = []

tp_sum = 0
full_sum = 0 
for i in range(len(meld)):
    meld_row = meld.iloc[i]
    # my_row = my_model.iloc[i]
    fp = meld_row["number FP clusters"]
    tp = meld_row["number TP clusters"]

    if fp == 0 and tp == 0:
        type1.append(0)
    else:
        type1.append(tp / (tp + fp))
    
    tp_sum += tp
    full_sum += (tp + fp)

ppv1 = np.sum(type1) / len(type1)
ppv2 = tp_sum / full_sum

print('PPV cluster Type1: ', ppv1)
print('PPV cluster Type2: ', ppv2)

PPV cluster Type1:  0.6504065040650406
PPV cluster Type2:  0.725
