In [None]:
import os
import pandas as pd
import numpy as np
import plotly.express as px

newnames_reco = {"aice_body-sharp":"DLR", 
                 "first_body":"MBIR", 
                 "fbp_fc08":"FBP", 
                 "aidr3d_fc08":"HIR", 
                 "asir_standard":"HIR", 
                 "fbp_standard":"FBP",
                 "qr40f-q3_0.4":"Qr40f-Q3_0.4",
                 "qr40f-q3_0.6":"Qr40f-Q3_0.6",
                 "qr40f-q3_1":"Qr40f-Q3_1",
                 "br40f-q3_0.4":"Br40f-Q3_0.4",
                 "br40f-q3_0.6":"Br40f-Q3_0.6",
                 "br40f-q3_1":"Br40f-Q3_1"}

newnames_ct = {
    "CT4": "<b>Vendor 1",
    "GE": "<b>Vendor 2",
    "photon":"<b>Vendor 3",
}

newnames_network = {
    "3d_fullres_LiTS_151":"<b>3D",
    "3d_fullres_LiTS_151_res":"<b>3Dres",
}

df_lesion_meta = pd.read_csv('results_csv/results_lesion_paper.csv')


measure_point_mapping = {
    # Canon
    '230915': '0',
    '231205': '0', 
    '250321': '1',
    '250328': '2',
    '250404': '3',
    '250411': '4',
    # GE
    '240306': '0',
    '250515': '1',
    '250618': '4',
}

size_filter =  4/3 * 3.14159265359 * (5/2)**3   # in cubic millimeters, 5mm sphere
df_lesion_meta = df_lesion_meta[(df_lesion_meta["volume_pred"] > size_filter) | (df_lesion_meta["volume_pred"].isna()) | (df_lesion_meta["volume_gt"] > size_filter)]
df_lesion_meta = df_lesion_meta[(df_lesion_meta["volume_pred"] > size_filter) | (df_lesion_meta["volume_pred"].isna()) ]

df_lesion_meta["measure_point"] = df_lesion_meta["date"].apply(lambda x: measure_point_mapping[str(x)] if str(x) in measure_point_mapping else None)
df_lesion_meta = df_lesion_meta[df_lesion_meta["CTDIvol"] < 36]

df_lesion_meta["detected"] = df_lesion_meta["dice"] > 0   
filter_lesion_props = (df_lesion_meta["C in HU"] < -10) | (df_lesion_meta["C in HU"].isna())
df_lesion_meta = df_lesion_meta[filter_lesion_props]
df_lesion_meta = df_lesion_meta[df_lesion_meta["kernel"] != "FC13"]

# AUC calculation only stable and valid for range were we have high equaly spaced coverage
dose_threshold = 11

output_dir = "../../plots/3) deep learning performance eval/monitoring"


Columns (1,23,25,26) have mixed types. Specify dtype option on import or set low_memory=False.



In [162]:
df_lesion_meta["lesion_prop"] = df_lesion_meta.apply(lambda x: f"{str(2*int(x['R in mm'])).zfill(2)}mm/{int(x['C in HU'])}HU" if not np.isnan(x['R in mm']) else None , axis=1)
df_lesion_meta["reco+kernel"] = df_lesion_meta["reco"].str.lower() + "_" + df_lesion_meta["kernel"].str.lower()
df_lesion_meta["reco+kernel"] = df_lesion_meta["reco+kernel"].map(newnames_reco)
df_lesion_meta["network"] = df_lesion_meta["network"].map(newnames_network)
df_lesion_meta["ct"] = df_lesion_meta["ct"].map(newnames_ct)



In [163]:
df_lesions = pd.read_csv('results_csv/results_lesions_paper.csv')
df_lesions["reco+kernel"] = df_lesions["reco"].str.lower() + "_" + df_lesions["kernel"].str.lower()
df_lesions["reco+kernel"] = df_lesions["reco+kernel"].map(newnames_reco)
df_lesions["network"] = df_lesions["network"].map(newnames_network)
df_lesions["ct"] = df_lesions["ct"].map(newnames_ct)
df_lesions = df_lesions[df_lesions["kernel"] != "FC13"]
df_lesions["measure_point"] = df_lesions["date"].apply(lambda x: measure_point_mapping[str(x)] if str(x) in measure_point_mapping else None)


In [164]:
groupby_lesion_prop = ["network", "CTDIvol", "reco+kernel", "ct", "lesion_prop", "R in mm", "C in HU", "measure_point"]
groupby_lesion_prop_rep = ["network", "CTDIvol", "reco+kernel", "ct", "lesion_prop", "R in mm", "C in HU", "rep", "measure_point"]
groupby_lesion_all = ["network", "CTDIvol", "reco+kernel", "ct", "measure_point"]
groupby_lesion_all_phantom_rep = ["network", "CTDIvol", "reco+kernel", "ct", "measure_point", "phantom", "rep"]
groupby_lesion_all_phantom = ["network", "CTDIvol", "reco+kernel", "ct", "measure_point", "phantom"]

In [165]:
df_lesion_recall_rep = df_lesion_meta[~df_lesion_meta["filename_gt"].isna()] 
df_lesion_recall_rep = df_lesion_recall_rep.groupby(groupby_lesion_all + ["rep"]).agg({"detected": ["mean", "sum", "count"]}).reset_index()
df_lesion_recall_rep.columns = groupby_lesion_all + ["rep"] + ["recall", "num_detected", "num_total"]

df_lesion_meta_precision_rep = df_lesion_meta.groupby(groupby_lesion_all + ["rep"]).agg({"detected": ["mean", "sum", "count"], "dice":["mean", "std"]}).reset_index()
df_lesion_meta_precision_rep.columns = groupby_lesion_all + ["rep"] + ["precision", "num_detected", "num_total", "dice_mean", "dice_std"]

lesion_seg_summary_rep = df_lesion_recall_rep.merge(df_lesion_meta_precision_rep, on=groupby_lesion_all + ["rep"], how="left")
lesion_seg_summary_rep["F1-score"] = 2 * (lesion_seg_summary_rep["precision"] * lesion_seg_summary_rep["recall"]) / (lesion_seg_summary_rep["precision"] + lesion_seg_summary_rep["recall"])
lesion_seg_summary_rep["F1-score"] = lesion_seg_summary_rep["F1-score"].fillna(0)

In [166]:
df_lesion_prop_recall_rep = df_lesion_meta[~df_lesion_meta["filename_gt"].isna()] 
df_lesion_prop_recall_rep = df_lesion_prop_recall_rep.groupby(groupby_lesion_prop_rep).agg({"detected": ["mean", "sum", "count"]}).reset_index()
df_lesion_prop_recall_rep.columns = groupby_lesion_prop_rep + ["recall", "num_detected", "num_total"]


In [167]:
lesions_seg_summary = df_lesions.groupby(groupby_lesion_all).agg({"dice":["mean", "std"]}).reset_index()
lesions_seg_summary.columns = groupby_lesion_all + ["dice_mean", "dice_std"]

In [168]:
df_lesion_meta_recall_phantom_rep = df_lesion_meta[~df_lesion_meta["filename_gt"].isna()]
df_lesion_meta_recall_phantom_rep = df_lesion_meta_recall_phantom_rep.groupby(groupby_lesion_all_phantom_rep).agg({"detected": ["mean", "sum", "count"]}).reset_index()
df_lesion_meta_recall_phantom_rep.columns = groupby_lesion_all_phantom_rep + ["recall", "num_detected", "num_total"]
df_lesion_meta_precision_phantom_rep = df_lesion_meta.groupby(groupby_lesion_all_phantom_rep).agg({"detected": ["mean", "sum", "count"], "dice":["mean", "std"]}).reset_index()
df_lesion_meta_precision_phantom_rep.columns = groupby_lesion_all_phantom_rep + ["precision", "num_detected", "num_total", "dice_mean", "dice_std"]
lesion_seg_summary_phantom_rep = df_lesion_meta_recall_phantom_rep.merge(df_lesion_meta_precision_phantom_rep, on=groupby_lesion_all_phantom_rep, how="left")
lesion_seg_summary_phantom_rep["F1-score"] = 2 * (lesion_seg_summary_phantom_rep["precision"] * lesion_seg_summary_phantom_rep["recall"]) / (lesion_seg_summary_phantom_rep["precision"] + lesion_seg_summary_phantom_rep["recall"])


In [169]:
lesion_seg_summary_rep["CTDIvol"] = lesion_seg_summary_rep["CTDIvol"].astype(float).round(5).astype(str)
lesions_seg_summary["CTDIvol"] = lesions_seg_summary["CTDIvol"].astype(float).round(5).astype(str)
lesion_seg_summary_phantom_rep["CTDIvol"] = lesion_seg_summary_phantom_rep["CTDIvol"].astype(float).round(5).astype(str)

In [170]:
# filter data for plots
lesion_seg_summary_rep_plot = lesion_seg_summary_rep[(lesion_seg_summary_rep["network"].isin(["<b>3D", "<b>3Dres"]))]
lesion_seg_summary_rep_plot = lesion_seg_summary_rep_plot[lesion_seg_summary_rep_plot["num_total_x"] == 72]
#lesion_seg_summary_rep_plot = lesion_seg_summary_rep_plot[lesion_seg_summary_rep_plot["reco+kernel"] == "HIR"]
lesion_seg_summary_rep_plot = lesion_seg_summary_rep_plot[lesion_seg_summary_rep_plot["measure_point"].isin(["0", "1", "4"])]
index_to_remove = (lesion_seg_summary_rep_plot["CTDIvol"] == "7.0") & (lesion_seg_summary_rep_plot["measure_point"] == "1") & \
                  (lesion_seg_summary_rep_plot["rep"] == 5) & (lesion_seg_summary_rep_plot["ct"] == "<b>Vendor 1")
lesion_seg_summary_rep_plot = lesion_seg_summary_rep_plot[~index_to_remove]
index_to_remove = (lesion_seg_summary_rep_plot["CTDIvol"] == "22.39") & (lesion_seg_summary_rep_plot["measure_point"] == "4") & \
                  (lesion_seg_summary_rep_plot["rep"] == 5) & (lesion_seg_summary_rep_plot["ct"] == "<b>Vendor 2")
lesion_seg_summary_rep_plot = lesion_seg_summary_rep_plot[~index_to_remove]

lesion_seg_summary_rep_plot["CTDIvol"] = lesion_seg_summary_rep_plot["CTDIvol"].astype(float)
lesion_seg_summary_rep_plot = lesion_seg_summary_rep_plot.sort_values(by=["measure_point", "network", "CTDIvol" ,"rep"])
lesion_seg_summary_rep_plot["CTDIvol"] = lesion_seg_summary_rep_plot["CTDIvol"].astype(str)
# for each measure point, we want to know the difference in F1-score between this measure point and the first measure point
lesion_seg_summary_plot = lesion_seg_summary_rep_plot.groupby(["network", "CTDIvol", "reco+kernel", "measure_point"]).agg({"F1-score": "mean"}).reset_index()
lesion_seg_summary_plot["F1-score_diff"] = lesion_seg_summary_plot.groupby(["network", "CTDIvol", "reco+kernel"])["F1-score"].diff().fillna(0)

# save plot data for phillip jordan
lesion_seg_summary_plot[lesion_seg_summary_plot["reco+kernel"] == "HIR"].to_csv(os.path.join("../../../../../../promotion/daten_philipp/Figure 4", "lesion_detection_monitoring_data.csv"), index=False)

In [171]:
fig_strip = px.strip(lesion_seg_summary_rep_plot[lesion_seg_summary_rep_plot["reco+kernel"] == "HIR"],
             x="CTDIvol", y="F1-score",
             color="measure_point", facet_col="network", facet_row="ct",
             title="Monitoring F1-score (only HIR)", range_y=[-0.1,1], template="simple_white", 
             hover_data=["precision", "recall","rep"],
             category_orders={"network": ["<b>3D", "<b>3Dres"],
                              "ct": ["<b>Vendor 1", "<b>Vendor 2"]})

fig_strip.update_xaxes(title_text="<b>CTDIvol", row=1)
fig_strip.update_yaxes(title_text="<b>F1-score", col=1)
fig_strip.update_xaxes(tickprefix="<b>", ticksuffix="</b>", matches=None)
fig_strip.update_yaxes(tickprefix="<b>", ticksuffix="</b>")

fig_strip.update_layout(
    font_family="Arial",
    font_size=14,
    title_font_family="Arial",
    title_font_color="black",
    height=800,
    width=1100,
    legend_title="<b>Measure Point</b>",
    barmode="group"
)

# # Update annotations
fig_strip.for_each_annotation(lambda a: a.update(text=a.text.split("=")[1]))

fig_strip.show()

In [172]:
# calc AUC for each curve F1 vs CTDIvol
from sklearn.metrics import auc

lesion_seg_summary_rep_auc = lesion_seg_summary_rep_plot[(lesion_seg_summary_rep_plot["network"].isin(["<b>3D", "<b>3Dres"]))]
lesion_seg_summary_rep_auc = lesion_seg_summary_rep_auc[lesion_seg_summary_rep_auc["ct"].isin(["<b>Vendor 1", "<b>Vendor 2"])]
lesion_seg_summary_rep_auc = lesion_seg_summary_rep_auc[lesion_seg_summary_rep_auc["CTDIvol"].astype(float) < dose_threshold]

aucs = []
for group, df in lesion_seg_summary_rep_auc.groupby(["ct", "network", "reco+kernel", "rep", "measure_point"]):
    #print(group)
    x = df["CTDIvol"]
    x = x.astype(float)
    CTDIvol_min = x.min()
    CTDIvol_max = x.max()
    #print(x)
    x = x/x.max()
    y = df["F1-score"]

    #print(y)
    aucs.append((group, auc(x, y), CTDIvol_min, CTDIvol_max))

aucs
aucs_df = pd.DataFrame(aucs, columns=["group", "AUC", "CTDIvol_min", "CTDIvol_max"])
# split group into network and reco+kernel and rep and keep AUC column
aucs_df["ct"] = aucs_df["group"].apply(lambda x: x[0])
aucs_df["network"] = aucs_df["group"].apply(lambda x: x[1])
aucs_df["reco+kernel"] = aucs_df["group"].apply(lambda x: x[2])
aucs_df["rep"] = aucs_df["group"].apply(lambda x: x[3])
aucs_df["measure_point"] = aucs_df["group"].apply(lambda x: x[4])

aucs_df = aucs_df.drop(columns=["group"])

aucs_df[aucs_df["reco+kernel"] == "HIR"].to_csv("../../../../../../promotion/daten_philipp/Figure 4/lesion_detection_monitoring_data_auc.csv", index=False)


aucs_df_mean = aucs_df.groupby(["ct","network", "reco+kernel", "measure_point"]).agg({"AUC":["mean", "std"], "CTDIvol_min": "min", "CTDIvol_max": "max"}).reset_index()
aucs_df_mean.columns = ["ct", "network", "reco+kernel", "measure_point", "AUC_mean", "AUC_std", "CTDIvol_min", "CTDIvol_max"]
aucs_df_mean["CTDIvol_range"] = aucs_df_mean.apply(lambda x: f"{x['CTDIvol_min']} - {x['CTDIvol_max']}", axis=1)



In [173]:
# check distribution of F1-score differences at CTDIvols
# substract mean from each group and plot residual distribution
# important info for statistical analysis
lesion_seg_summary_rep["mean_F1_per_CTDI"] = lesion_seg_summary_rep.groupby(["network", "CTDIvol", "ct", "reco+kernel"])["F1-score"].transform("mean")
lesion_seg_summary_rep["F1-score_diff_per_CTDI"] = lesion_seg_summary_rep["F1-score"] - lesion_seg_summary_rep["mean_F1_per_CTDI"]

px.histogram(lesion_seg_summary_rep, x="F1-score_diff_per_CTDI", barmode="overlay", nbins=150, range_x=[-0.2, 0.2], 
             title="Lesion segmentation F1-score distribution (Vendor 1 + Vendor 2, HIR reconstruction)").show()

In [174]:
# plot auc values using plotly
fig = px.bar(aucs_df_mean[aucs_df_mean["reco+kernel"] == "HIR"], 
             x="CTDIvol_range", 
             y="AUC_mean", 
             error_y="AUC_std",
             color="measure_point",
             facet_col="network",
             facet_row="ct",
             title="AUC Values: Dose vs F1-score Performance",
             labels={
                 "auc_normalized": "<b>Normalized AUC",
                 "measure_point": "<b>Measure Point",
                 "network": "<b>Network"
             },
             barmode="group",
             template="simple_white")

# Update layout for better visualizationa
fig.update_layout(
    font_family="Arial",
    font_size=14,
    title_font_family="Arial",
    title_font_color="black",
    height=700,
    width=1000,
    showlegend=True,
    legend_title="<b>Measure Point</b>"
)

# Update axes
fig.update_xaxes(title_text="")
fig.update_yaxes(title_text="<b>Normalized AUC", range=[0, 1], col=1, row=1)
fig.update_xaxes(tickprefix="<b>", ticksuffix="</b>", matches=None)
fig.update_yaxes(tickprefix="<b>", ticksuffix="</b>")

# Remove facet column titles or update them
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[1]))


# Show the plot
fig.show()

#fig.write_image(os.path.join(output_dir, "auc_dose_vs_f1score_performance_canon.png"), width=1000, height=500)

In [175]:
from scipy.stats import ttest_rel, wilcoxon
from statsmodels.stats.multitest import multipletests

measure_point_comparisons = []
networks = []
vendors = []
mp_1_list = []
mp_2_list = []
t_statistic = []
p_values = []
p_values_holm = []
p_values_othercorrection = []

# quick check for FBP, DLR and MBIR showed 1, 2 and again 1 significant differences respectively for vendor 1
# however if reco is included, the number of comparisons increases drastically and the correction for multiple comparisons should be adjusted accordingly (not done in the first check)
aucs_df_HIR = aucs_df[aucs_df["reco+kernel"] == "FBP"]

for network in aucs_df_HIR["network"].unique():
    for ct in aucs_df_HIR["ct"].unique():
        p_values_temp = []
        measure_points = ['0', '1', '4']
        for measure_point_1 in measure_points:
            for measure_point_2 in measure_points[1:]:
                measure_point_comparison = f"{sorted([measure_point_1, measure_point_2])} for network {network} and ct {ct}"
                if measure_point_1 != measure_point_2 and measure_point_comparison not in measure_point_comparisons:
                    measure_point_comparisons.append(measure_point_comparison)
                    aucs_meas_1 = aucs_df_HIR[(aucs_df_HIR["network"] == network) & (aucs_df_HIR["measure_point"] == measure_point_1) & (aucs_df_HIR["ct"] == ct)]["AUC"]
                    aucs_meas_2 = aucs_df_HIR[(aucs_df_HIR["network"] == network) & (aucs_df_HIR["measure_point"] == measure_point_2) & (aucs_df_HIR["ct"] == ct)]["AUC"]
                    if len(aucs_meas_1) == 0 or len(aucs_meas_2) == 0:
                        continue
                    else:
                        pass
                       # print(f"{ct} {network} {reco} vs {reco_2}")
                       # print(f" AUCs {aucs_reco.values} vs {aucs_reco_2.values}")
                    t, p = ttest_rel(aucs_meas_1, aucs_meas_2)
                    p_values_temp.append(p)
                    networks.append(network)
                    vendors.append(ct)
                    mp_1_list.append(measure_point_1)
                    mp_2_list.append(measure_point_2)
                    t_statistic.append(t)
        
        # replace p-values with nan 
        p_values_temp = [pv if not np.isnan(pv) else 1.0 for pv in p_values_temp]
        p_values.extend(p_values_temp)

        # correct p-values for multiple comparisons
        reject, pvals_corrected, _, _ = multipletests(p_values_temp, method='holm')
        p_values_holm.extend(pvals_corrected)
        # you can add other correction methods if needed
        reject, pvals_corrected_other, _, _ = multipletests(p_values_temp, method='fdr_bh')
        p_values_othercorrection.extend(pvals_corrected_other)

df_stats_aucs_monitoring = pd.DataFrame({
    "network": networks,
    "ct": vendors,
    "measure_point_1": mp_1_list,
    "measure_point_2": mp_2_list,
    "t_statistic": t_statistic,
    "p_value": p_values,
    "p_value_holm": p_values_holm,
    "p_value_fdr_bh": p_values_othercorrection
})

df_stats_aucs_monitoring.to_csv(os.path.join(output_dir, f"lesion_seg_f1_score_auc_stats_dicethreshold_HIR_{0}.csv"), index=False)

In [176]:
fig = px.bar(aucs_df_mean[aucs_df_mean["measure_point"].isin(['0', '1', '4'])], x="measure_point", y="AUC_mean", error_y="AUC_std", color="reco+kernel", barmode="group",
       title="AUC for F1-score vs CTDIvol", template="simple_white", range_y=[0,1], facet_row="ct", facet_col="network",
       category_orders={"reco+kernel": ["FBP", "HIR", "MBIR", "DLR"],
                        "ct": ["<b>Vendor 1", "<b>Vendor 2"],
                        "network": ["<b>3D", "<b>3Dres"]},
                        color_discrete_sequence=["#2FA97C", "#0021E8", "#FF7F00", "darkred"],)


fig.update_xaxes(title_text="<b>Reconstruction", row=1) 
fig.update_yaxes(title_text="<b>AUC", col=1)
fig.update_xaxes(tickprefix="<b>",ticksuffix ="</b>")
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[1]))

fig.update_layout(
       font_family="Arial",
       font_size=14,
       title_font_family="Arial",
       title_font_color="black",
       height=500,
       width=600
       )
fig.show()
#fig.write_image(os.path.join(output_dir, f"lesion_seg_f1_score_auc_dicethreshold_{dice_detection_threshold}.png"))


In [177]:
from scipy.stats import ttest_rel, wilcoxon
from statsmodels.stats.multitest import multipletests

reco_comparisons = []
networks = []
vendors = []
reco_1_list = []
reco_2_list = []
measure_points_list = []
t_statistic = []
p_values = []
p_values_holm = []
p_values_othercorrection = []

for network in aucs_df["network"].unique():
    for ct in aucs_df["ct"].unique():
        measure_points = ['0', '1', '4']
        for measure_point in measure_points:
            reco_order = {"FBP": 0, "HIR": 1, "MBIR": 2, "DLR": 3}
            recos = aucs_df[(aucs_df["network"] == network) & (aucs_df["ct"] == ct) & (aucs_df["measure_point"] == measure_point)]["reco+kernel"].unique()
            recos = sorted(recos, key=lambda x: reco_order[x])
            p_values_temp = []
            for reco in recos:
                for reco_2 in recos[1:]:
                    reco_comparison = f"{sorted([reco, reco_2])} for network {network} and ct {ct} and measure point {measure_point}"
                    if reco != reco_2 and reco_comparison not in reco_comparisons:
                        reco_comparisons.append(reco_comparison)
                        aucs_reco = aucs_df[(aucs_df["network"] == network) & (aucs_df["reco+kernel"] == reco) & (aucs_df["ct"] == ct) & (aucs_df["measure_point"] == measure_point)]["AUC"]
                        aucs_reco_2 = aucs_df[(aucs_df["network"] == network) & (aucs_df["reco+kernel"] == reco_2) & (aucs_df["ct"] == ct) & (aucs_df["measure_point"] == measure_point)]["AUC"]
                        if len(aucs_reco) == 0 or len(aucs_reco_2) == 0:
                            continue    
                        else:
                            print(f"{ct} {network} {reco} vs {reco_2} at measure point {measure_point}")
                        # print(f" AUCs {aucs_reco.values} vs {aucs_reco_2.values}")
                        t, p = ttest_rel(aucs_reco, aucs_reco_2)
                        p_values_temp.append(p)
                        networks.append(network)
                        vendors.append(ct)
                        reco_1_list.append(reco)
                        reco_2_list.append(reco_2)
                        t_statistic.append(t)
                        measure_points_list.append(measure_point)
            
            # replace p-values with nan 
            p_values_temp = [pv if not np.isnan(pv) else 1.0 for pv in p_values_temp]
            print(len(p_values_temp))
            p_values.extend(p_values_temp)

            # correct p-values for multiple comparisons
            reject, pvals_corrected, _, _ = multipletests(p_values_temp, method='holm')
            p_values_holm.extend(pvals_corrected)
            # you can add other correction methods if needed
            reject, pvals_corrected_other, _, _ = multipletests(p_values_temp, method='fdr_bh')
            p_values_othercorrection.extend(pvals_corrected_other)

df_stats_aucs_reco = pd.DataFrame({
    "network": networks,
    "ct": vendors,
    "reco_1": reco_1_list,
    "reco_2": reco_2_list,
    "t_statistic": t_statistic,
    "p_value": p_values,
    "p_value_holm": p_values_holm,
    "p_value_fdr_bh": p_values_othercorrection,
    "measure_point": measure_points_list
})

df_stats_aucs_reco.to_csv(os.path.join(output_dir, f"lesion_seg_f1_score_auc_stats_dicethreshold_{0}.csv"), index=False)

<b>Vendor 1 <b>3D FBP vs HIR at measure point 0
<b>Vendor 1 <b>3D FBP vs MBIR at measure point 0
<b>Vendor 1 <b>3D FBP vs DLR at measure point 0
<b>Vendor 1 <b>3D HIR vs MBIR at measure point 0
<b>Vendor 1 <b>3D HIR vs DLR at measure point 0
<b>Vendor 1 <b>3D MBIR vs DLR at measure point 0
6
<b>Vendor 1 <b>3D FBP vs HIR at measure point 1
<b>Vendor 1 <b>3D FBP vs MBIR at measure point 1
<b>Vendor 1 <b>3D FBP vs DLR at measure point 1
<b>Vendor 1 <b>3D HIR vs MBIR at measure point 1
<b>Vendor 1 <b>3D HIR vs DLR at measure point 1
<b>Vendor 1 <b>3D MBIR vs DLR at measure point 1
6
<b>Vendor 1 <b>3D FBP vs HIR at measure point 4
<b>Vendor 1 <b>3D FBP vs DLR at measure point 4
<b>Vendor 1 <b>3D HIR vs DLR at measure point 4
3
<b>Vendor 2 <b>3D FBP vs HIR at measure point 0
1
<b>Vendor 2 <b>3D FBP vs HIR at measure point 1
1
<b>Vendor 2 <b>3D FBP vs HIR at measure point 4
1
<b>Vendor 1 <b>3Dres FBP vs HIR at measure point 0
<b>Vendor 1 <b>3Dres FBP vs MBIR at measure point 0
<b>Vendor 1 <b>