In [None]:
import json
import pandas as pd
import os
from IPython.display import display
from scipy import stats
import numpy as np
import dataframe_image as dfi
import matplotlib.pyplot as plt

TYPE_OF_SEGMENTATION = "TC"

RESULTS_FOLDER = f"results/{TYPE_OF_SEGMENTATION.lower()}"

DATA_FILE = os.path.join(RESULTS_FOLDER, f"{TYPE_OF_SEGMENTATION}_results.json")

os.makedirs(RESULTS_FOLDER, exist_ok=True)



with open(DATA_FILE, "r") as f:
    data = json.load(f)

    
def compute_mean(samples, dim):
    ret = 0
    for s in samples:
        ret += s
    
    ret /= len(samples)
    
    return ret
    
def compute_margin(samples):
    vals = [s for s in samples]
    t_val = stats.t.ppf(q=0.975, df=len(samples)-1)
    
    stddev = np.std(vals)
    
    sig = stddev/np.sqrt(len(samples))
    return t_val * sig


dfs = {}

with pd.ExcelWriter(os.path.join(RESULTS_FOLDER, f"{TYPE_OF_SEGMENTATION}_results.xlsx")) as writer:  
    for dim in ["mean", "median"]:
        for seqs, ckpts in data.items():
            seq_df = {}
            for ckpt, folds in ckpts.items():
                l = []
                for i in range(5):
                    l.append(folds['fold-' + str(i + 1) + '-holdout'][dim])
                    l[-1] = round(l[-1], 5)
                    #l[-1]['median'] = round(l[-1]['median'], 5)

                l_ext = []

                mean = compute_mean(l, dim)

                ci_margin = compute_margin(l)

                l_ext.extend([mean, ci_margin, max(mean - ci_margin, 0), min(mean + ci_margin, 1)])

                l.extend(l_ext)

                seq_df[int(ckpt.split("_")[-1])] = l

            df = pd.DataFrame.from_dict(seq_df, orient='index', columns=['fold_' + str(i + 1) for i in range(5)] 
                            + ["mean", "ci_margin", "ci_low", "ci_high"])
            name = TYPE_OF_SEGMENTATION + "_" + seqs + "_" + dim

            df.index.name = name

            display(df)

            dfs[seqs + "_" + dim] = df

            dfi.export(df, os.path.join(RESULTS_FOLDER, name + ".png"), max_cols = -1)

            df.to_excel(writer,
                      sheet_name=f"{TYPE_OF_SEGMENTATION}_{seqs}_{dim}")
        #         print(folds)
        #         raise ValueError("S")

In [None]:
mean_df = pd.DataFrame()
median_df = pd.DataFrame()

seqs = ["t1ce", "flair", "t1ce,flair", "t1ce,flair,t1,t2"]


for seq in seqs:
    for type1 in ["mean", "median"]:
        name = seq + "_" + type1
        df = dfs[name]
    #     print(name)
    #     print(df["mean"])
    #     df.plot()
        name2 = name.split("_")[0]
        if "median" in name:
            median_df[name2] = df["mean"]
        else:
            mean_df[name2] = df["mean"]

In [None]:
mean_df.columns = mean_df.columns.str.upper()
median_df.columns = median_df.columns.str.upper()

In [None]:
median_df



In [None]:
# et_median_t1ce = [0.27277, 0.82618, 0.82547, 0.84702, 0.84294, 0.86216, 0.85647, 0.82016, 0.85407, 0.85598, 0.8597, 0.83765]
# et_median_flair = [0.19334, 0.18001, 0.24695, 0.25511, 0.28967, 0.19856, 0.24291, 0.22325, 0.19818, 0.25898, 0.14257, 0.13137]
# et_median_t1ce_flair = [0.46656, 0.64605, 0.79329, 0.78737, 0.78554, 0.80364, 0.79944, 0.82448, 0.82426, 0.82091, 0.82694, 0.83706]
# et_median_all = [0.44458, 0.64926, 0.61982, 0.73097, 0.76157, 0.75816, 0.67139, 0.6781, 0.58306, 0.79294, 0.76052, 0.77717 ]




# tc_median_t1ce = [0.64344, 0.85561, 0.87871, 0.87562, 0.87589, 0.88327, 0.86023, 0.87791, 0.88189, 0.897, 0.89414, 0.87359]
# tc_median_flair = [0.47314, 0.51952, 0.542, 0.63161, 0.60847, 0.62028, 0.68299, 0.61957, 0.67037, 0.68205, 0.70904, 0.69833]
# tc_median_t1ce_flair = [0.68196, 0.52993, 0.77157, 0.78406, 0.83826, 0.79296, 0.8626, 0.83409, 0.83638, 0.85976, 0.87472, 0.87369 ]
# tc_median_all = [0.42093, 0.62322, 0.7209, 0.79264, 0.77772, 0.82913, 0.82071, 0.81572, 0.81129,  0.83061, 0.84407, 0.8391 ]

In [None]:
# median_df["T1CE"] = et_median_t1ce
# median_df["FLAIR"] = et_median_flair
# median_df["T1CE,FLAIR"] = et_median_t1ce_flair
# median_df["T1CE,FLAIR,T1,T2"] = et_median_all

In [None]:
# median_df

In [None]:
# # mean_df.index.name = f"{TYPE_OF_SEGMENTATION} average of mean Dice Scores over k=5 fold CV"
# title = f"{TYPE_OF_SEGMENTATION} mean of Dice Scores over k=5 fold CV (mean over samples)"
# # mean_df.plot()

# mean_df_plot = mean_df.plot(xlabel="Epoch", ylabel="Dice Score", title=title, ylim=(0,1)).get_figure()
# mean_df_plot.savefig(os.path.join(RESULTS_FOLDER, "mean_df_plot.png"))

In [None]:
title = f"{TYPE_OF_SEGMENTATION} validation results (median over samples)"
# median_df.plot()

median_df_plot = median_df.plot(xlabel="Epoch", ylabel="Dice Score", title=title, ylim=(0,1)).get_figure()
median_df_plot.savefig(os.path.join(RESULTS_FOLDER, "median_df_plot.png"))

In [None]:
import matplotlib.pyplot as plt
import statistics
from math import sqrt


def plot_confidence_interval(x, values, z=1.96, color='#2187bb', horizontal_line_width=0.25):

    left = x - horizontal_line_width / 2
    top = values[0] - values[1]
    right = x + horizontal_line_width / 2
    bottom = values[0] + values[1]
    plt.plot([x, x], [top, bottom], color=color)
    plt.plot([left, right], [top, top], color=color)
    plt.plot([left, right], [bottom, bottom], color=color)
    plt.plot(x, values[0], 'o', color='#f44336')

    return mean


#plt.xticks([1, 2, 3, 4], ['T1CE', 'FLAIR', 'T1CE,FLAIR', 'T1CE,FLAIR,T1,T2'])
#plt.title('Confidence Interval (Mean)')
# plot_confidence_interval(1, [10, 11, 42, 45, 44])
# plot_confidence_interval(2, [10, 21, 42, 45, 44])
# plot_confidence_interval(3, [20, 2, 4, 45, 44])
# plot_confidence_interval(4, [30, 31, 42, 45, 44])
# plt.show()

CI_mean = {}
CI_median = {}

for name, df in dfs.items():
#     print(name)
#     print(df["mean"])
#     df.plot()
    ckpt_60 = df.iloc[-1]
    name2 = name.split("_")[0]
    if "median" in name:
        CI_median[name2] = [ckpt_60["mean"], ckpt_60["ci_margin"]]
        #median_df[name] = df["mean"]
    else:
        CI_mean[name2] = [ckpt_60["mean"], ckpt_60["ci_margin"]]
        
print(CI_mean)

In [None]:
plt.xticks([1, 2, 3, 4], ['t1ce', 'flair', 't1ce,flair', 't1ce,flair,t1,t2'])
plt.xlabel("Sequence sets")
plt.ylabel("Dice score")
plt.title(f"{TYPE_OF_SEGMENTATION} Confidence Interval (Mean) 60 epochs")
plot_confidence_interval(1, CI_mean["t1ce"])
plot_confidence_interval(2, CI_mean["flair"])
plot_confidence_interval(3, CI_mean["t1ce,flair"])
plot_confidence_interval(4, CI_mean["t1ce,flair,t1,t2"])
# plt.show()
plt.savefig(os.path.join(RESULTS_FOLDER, "ci_mean.png"))

In [None]:
plt.xticks([1, 2, 3, 4], ['t1ce', 'flair', 't1ce,flair', 't1ce,flair,t1,t2'])
plt.xlabel("Sequence sets")
plt.ylabel("Dice score")
plt.title(f"{TYPE_OF_SEGMENTATION} Confidence Interval (Median) 60 epochs")
plot_confidence_interval(1, CI_median["t1ce"])
plot_confidence_interval(2, CI_median["flair"])
plot_confidence_interval(3, CI_median["t1ce,flair"])
plot_confidence_interval(4, CI_median["t1ce,flair,t1,t2"])
# plt.show()
plt.savefig(os.path.join(RESULTS_FOLDER, "ci_median.png"))

In [None]:
#https://thedatascientist.com/how-to-do-a-t-test-in-python/
from numpy.random import seed
from numpy.random import randn
from numpy.random import normal
from scipy.stats import ttest_ind

seqs = ["t1ce", "flair", "t1ce,flair", "t1ce,flair,t1,t2"]

result_df_equal = pd.DataFrame(columns=["comparison", "samples_A", "samples_B", "t-statistic", "p-value", "Reject null hypothesis"])
result_df_greater = pd.DataFrame(columns=["comparison", "samples_A", "samples_B", "t-statistic", "p-value", "Reject null hypothesis"])


def get_data(seq, epoch, scoreType):
    seq_name = seq + "_" + scoreType
    seq_df = dfs[seq_name]
    if epoch == 60:
        seq_df_epoch = seq_df.iloc[-1]
    else:
        seq_df_epoch = seq_df.iloc[-5]
    seq_samples = [seq_df_epoch[f"fold_{i}"] for i in range(1, 6)]
    
    return seq_name, seq_samples, seq_df_epoch["mean"]


def do_ttest(seqA, seqB, epoch=60, scoreType="median", equal_var=False):
    global result_df_equal
    global result_df_greater
    print(f"Doing t-test for {seqA} vs {seqB} at epoch {epoch} for {TYPE_OF_SEGMENTATION} segmentation and for metric type {scoreType}:")
    print(f"Equal_var is set to {equal_var}")

    seqA_name, seqA_samples, seqA_epoch_mean = get_data(seqA, epoch, scoreType)
    print("Statistics for", scoreType)
    print(seqA_name, seqA_samples, "mean:", seqA_epoch_mean)

    print()

    seqB_name, seqB_samples, seqB_epoch_mean = get_data(seqB, epoch, scoreType)
    print(seqB_name, seqB_samples, "mean:", seqB_epoch_mean)

    print("######################")
#     print(f"Comparing {seqA_name} vs {seqB_name} for {TYPE_OF_SEGMENTATION} segmentation:")
    print("Alternative: Two-sided")
    t, p = ttest_ind(seqA_samples, seqB_samples, equal_var=equal_var)
    insert_data = {
        "comparison": f"{seqA} vs {seqB}: epoch={epoch} equal_var={equal_var}",
        "samples_A": seqA_samples,
        "samples_B": seqB_samples,
        "t-statistic": t,
        "p-value": p,
        "Reject null hypothesis": str(p <= 0.05)
    }
    result_df_equal = pd.concat([result_df_equal, pd.DataFrame([insert_data])],axis=0, ignore_index=True)
    print(f"T-statistic: {t}, p-value: {p}")
    print(f"p-value <= 0.05: {p <= 0.05}")
    print("Alternative: Less")
    t, p = ttest_ind(seqA_samples, seqB_samples, equal_var=equal_var, alternative='less')
    insert_data = {
        "comparison": f"{seqA} vs {seqB}: epoch={epoch} assume equal_var={equal_var}",
        "samples_A": seqA_samples,
        "samples_B": seqB_samples,
        "t-statistic": t,
        "p-value": p,
        "Reject null hypothesis": str(p <= 0.05)
    }
    result_df_greater = pd.concat([result_df_greater, pd.DataFrame([insert_data])],axis=0, ignore_index=True)
    print(f"T-statistic: {t}, p-value: {p}")
    print(f"p-value <= 0.05: {p <= 0.05}")
    print("######################")
    print()
    print()
    print()

for eq in [True, False]:
    do_ttest("t1ce", "t1ce,flair,t1,t2", equal_var=eq)
    do_ttest("t1ce,flair", "t1ce,flair,t1,t2", equal_var=eq)
    do_ttest("t1ce", "t1ce,flair", equal_var=eq)
    do_ttest("t1ce", "flair", equal_var=eq)
    if TYPE_OF_SEGMENTATION == "TC":
        do_ttest("t1ce", "t1ce,flair,t1,t2", epoch=40, equal_var=eq)
        do_ttest("t1ce,flair", "t1ce,flair,t1,t2", epoch=40, equal_var=eq)
        do_ttest("t1ce", "t1ce,flair", epoch=40, equal_var=eq)
        do_ttest("t1ce", "flair", epoch=40, equal_var=eq)



result_df_equal = result_df_equal.style.set_caption(f"{TYPE_OF_SEGMENTATION} segmentation (median scores) null hypothesis: equal means")
result_df_greater = result_df_greater.style.set_caption(f"{TYPE_OF_SEGMENTATION} segmentation (median scores) null hypothesis: greater means")

        
# result_df_equal

# result_df_greater

In [None]:
import dataframe_image as dfi

file_name = f"{TYPE_OF_SEGMENTATION}__median-scores_t-test_null_hyp_eq.png"

dfi.export(result_df_equal, os.path.join(RESULTS_FOLDER, file_name))

In [None]:
file_name = f"{TYPE_OF_SEGMENTATION}__median-scores_t-test_null_hyp_gt.png"

dfi.export(result_df_greater, os.path.join(RESULTS_FOLDER, file_name))

In [None]:
from scipy.stats import f_oneway, kruskal


epoch = 60

print(f"Running one-way parametric ANOVA for {TYPE_OF_SEGMENTATION}: assuming variances are not equal")
print("Classes are: t1ce, t1ce + flair, t1ce + flair + t1 + t2")
print(f"Epoch is {epoch} with median scores used")

t1ce_name, t1ce_samples, t1ce_mean = get_data("t1ce", epoch, "median")
t1ce_flair_name, t1ce_flair_samples, t1ce_flair_mean = get_data("t1ce,flair", epoch, "median")
all_name, all_samples, all_mean = get_data("t1ce,flair,t1,t2", epoch, "median")

print("t1ce: ", t1ce_name, t1ce_samples, t1ce_mean)
print("t1ce,flair: ", t1ce_flair_name, t1ce_flair_samples, t1ce_flair_mean)
print("t1ce,flair,t1,t2: ", all_name, all_samples, all_mean)

F, p = kruskal(t1ce_samples, t1ce_flair_samples, all_samples)

print(f"F-statistic is {F}, p-value is {p}")
print(f"p-value <= 0.05 (Reject null hypothesis): {p <= 0.05}")


print()
print()
print()
print()


print(f"Running one-way parametric ANOVA for {TYPE_OF_SEGMENTATION}: assuming variances are equal")
print("Classes are: t1ce, t1ce + flair, t1ce + flair + t1 + t2")
print(f"Epoch is {epoch} with median scores used")

t1ce_name, t1ce_samples, t1ce_mean = get_data("t1ce", epoch, "median")
t1ce_flair_name, t1ce_flair_samples, t1ce_flair_mean = get_data("t1ce,flair", epoch, "median")
all_name, all_samples, all_mean = get_data("t1ce,flair,t1,t2", epoch, "median")

print("t1ce: ", t1ce_name, t1ce_samples, t1ce_mean)
print("t1ce,flair: ", t1ce_flair_name, t1ce_flair_samples, t1ce_flair_mean)
print("t1ce,flair,t1,t2: ", all_name, all_samples, all_mean)

F, p = f_oneway(t1ce_samples, t1ce_flair_samples, all_samples)

print(f"F-statistic is {F}, p-value is {p}")
print(f"p-value <= 0.05 (Reject null hypothesis): {p <= 0.05}")



In [None]:
epoch = 60

print(f"Running one-way parametric ANOVA for {TYPE_OF_SEGMENTATION}: assuming variances are not equal")
print("Classes are: t1ce, flair, t1ce + flair, t1ce + flair + t1 + t2")
print(f"Epoch is {epoch} with median scores used")

t1ce_name, t1ce_samples, t1ce_mean = get_data("t1ce", epoch, "median")
flair_name, flair_samples, flair_mean = get_data("flair", epoch, "median")
t1ce_flair_name, t1ce_flair_samples, t1ce_flair_mean = get_data("t1ce,flair", epoch, "median")
all_name, all_samples, all_mean = get_data("t1ce,flair,t1,t2", epoch, "median")

print("t1ce: ", t1ce_name, t1ce_samples, t1ce_mean)
print("flair: ", flair_name, flair_samples, flair_mean)
print("t1ce,flair: ", t1ce_flair_name, t1ce_flair_samples, t1ce_flair_mean)
print("t1ce,flair,t1,t2: ", all_name, all_samples, all_mean)

F, p = kruskal(t1ce_samples, flair_samples, t1ce_flair_samples, all_samples)

print(f"F-statistic is {F}, p-value is {p}")
print(f"p-value <= 0.05 (Reject null hypothesis): {p <= 0.05}")


print()
print()
print()
print()


print(f"Running one-way parametric ANOVA for {TYPE_OF_SEGMENTATION}: assuming variances are equal")
print("Classes are: t1ce, t1ce + flair, t1ce + flair + t1 + t2")
print(f"Epoch is {epoch} with median scores used")

t1ce_name, t1ce_samples, t1ce_mean = get_data("t1ce", epoch, "median")
flair_name, flair_samples, flair_mean = get_data("flair", epoch, "median")
t1ce_flair_name, t1ce_flair_samples, t1ce_flair_mean = get_data("t1ce,flair", epoch, "median")
all_name, all_samples, all_mean = get_data("t1ce,flair,t1,t2", epoch, "median")

print("t1ce: ", t1ce_name, t1ce_samples, t1ce_mean)
print("t1ce: ", flair_name, flair_samples, flair_mean)
print("t1ce,flair: ", t1ce_flair_name, t1ce_flair_samples, t1ce_flair_mean)
print("t1ce,flair,t1,t2: ", all_name, all_samples, all_mean)

F, p = f_oneway(t1ce_samples, flair_samples, t1ce_flair_samples, all_samples)

print(f"F-statistic is {F}, p-value is {p}")
print(f"p-value <= 0.05 (Reject null hypothesis): {p <= 0.05}")

