In [1]:
from scipy.stats import ranksums, sem
import numpy as np
from statannot import add_stat_annotation
import copy
import os

In [2]:
import matplotlib.pyplot as plt
import matplotlib

In [3]:
save_dir = os.path.join("/analysis/fabiane/documents/publications/patch_individual_filter_layers/MIA_revision")

In [4]:
plt.style.use('ggplot')
matplotlib.use("pgf")
matplotlib.rcParams.update({
    "pgf.texsystem": "pdflatex",
    'font.family': 'serif',
    'font.size':8,
    'text.usetex': True,
    'pgf.rcfonts': False,
})

In [5]:
def get_runtime(filename):
    all_runtime_lines = ! grep "Total time elapsed:" $filename
    all_iter_lines = ! grep "output_dir = " $filename
    times = []
    iterations = []
    
    for runtime_line in all_runtime_lines:
        time = 0        
        # convert runtime
        for idx, inner_line in enumerate(runtime_line.split(":")[1:4]):
            inner_line = inner_line.strip().split("\\")[0]
            if idx == 0:
                # remove time symbol and convert to seconds
                time += int(inner_line.split("h")[0]) * 3600
            elif idx == 1:
                # remove time symbol and convert to seconds
                time += int(inner_line.split("m")[0]) * 60
            elif idx == 2:
                # remove time symbol
                time += int(inner_line.split("s")[0])
        times.append(time)
        
    # UKB baseline full set didn't finish reporting for the last of the 10 runs
    # it finished running but did not print out its running time.
    # Therefore, we ran the model again in a separate file for that run.
    if len(times) == 9:
        failed_time = 5 * 3600 + 42 * 60 + 49 # taken from individual run of final iteration
        times.append(failed_time)

    # convert iterations
    assert(len(all_iter_lines) == 1)
    iter_line = all_iter_lines[0].split("\"")[2]
    all_checkpoints = !ls -l $iter_line
    for checkpoint in all_checkpoints:
        if checkpoint.endswith("FINAL.h5"):
            iterations.append(int(checkpoint.split("_")[-2]))

    #assert(len(times) == 50 or len(times) == 10)
    return times, iterations

In [6]:
# cleanup script for old runs
#!destination="/ritter/share/projects/Methods/Eitel_local_filter/experiments_submission/models/MS/full_set/10xrandom_splits/experiment_r3/backup" find "/ritter/share/projects/Methods/Eitel_local_filter/experiments_submission/models/MS/full_set/10xrandom_splits/experiment_r3/" -type f -newermt "2021-01-01 00:00" -not -newermt "2021-01-22 15:55" -exec bash -c ' dirname=$(dirname {}); mkdir -p "${destination}/${dirname}"; echo ! mv {} ${destination}/${dirname}/' \;;

In [7]:
times, iterations = get_runtime("ADNI_experiment-20_percent-10xrandom_sampling-random_search-Copy1.ipynb")

In [8]:
filename_list = {
    "ADNI_small" : [
            "ADNI_baseline-20_percent-10xrandom_sampling-random_search.ipynb",
            "ADNI_LiuPatches-20_percent-10xrandom_sampling_random_search.ipynb",
            "ADNI_experiment-20_percent-10xrandom_sampling-random_search-Copy1.ipynb"
            
        ],
    "UKB_small" : [
            "UKB_sex_baseline-20_percent-10xrandom_sampling_random_search-Copy1.ipynb",
            "UKB_sex_LiuPatches-20_percent-10xrandom_sampling_random_search.ipynb",
            "UKB_sex_experiment-20_percent-10xrandom_sampling-random_search-Copy1.ipynb"
        ],
    "MS_small" : [
            "MS_baseline-full_set-10xrandom_splits-random_search-Copy2.ipynb",
            "MS_LiuPatches-full_set-10xrandom_splits.ipynb",
            "MS_experiment-full_set-10xrandom_splits-random_search-Copy1.ipynb"
        ],
    "ADNI_big" : [
            "ADNI_baseline-full_set-10xrandom_sampling-random_search.ipynb",
            "ADNI_LiuPatches-full_set-10xrandom_sampling.ipynb",
            "ADNI_experiment-full_set-10xrandom_sampling-random_search.ipynb"
        ],
    
    "UKB_big" : [
            "UKB_sex_baseline-full_set-10xrandom_sampling-random_search-Copy1.ipynb",
            "UKB_sex_LiuPatches-full_set-10xrandom_sampling_random_search.ipynb",
            "UKB_sex_experiment-full_set-10xrandom_sampling-random_search-Copy1.ipynb"
        ],
    
}

In [9]:
"""fig = plt.Figure()
axs = []
for i, experiment in enumerate(filename_list):
    print(experiment)
    time_base, iter_base = get_runtime(filename_list[experiment][0])
    time_pif, iter_pif = get_runtime(filename_list[experiment][1])
    # run statistical test
    test_time = ranksums(time_base, time_pif)
    test_iter = ranksums(iter_base, iter_pif)
    print(f"Avg time baseline in seconds: {np.mean(time_base)}")
    print(f"Avg time PIF in seconds: {np.mean(time_pif)}")
    print(test_time)
    print(f"Avg number of iterations baseline: {np.mean(iter_base)}")
    print(f"Avg number of iterations PIF: {np.mean(iter_pif)}")
    print(test_iter)
    
    # plot results
    i *= 3
    ax = plt.bar([i, i+1],
                 [np.mean(time_base), np.mean(time_pif)],
                 color=["tab:blue", "tab:orange"],
                 label=["Baseline", "PIF"])
    axs.append(ax)
    plt.errorbar(x=[i, i+1], 
                 y=[np.mean(time_base), np.mean(time_pif)], 
                 yerr=[sem(time_base), sem(time_pif)], 
                 color="black",
                 ls="none",
                 label="_Errorbar")
    
    x1, x2 = i, i+1
    y, h, col = np.max(time_base) + 70, 2, 'k'
    #plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
    if test_time.pvalue < 0.05:
        plt.text((x1+x2)*.5, y+h, "*", ha='center', va='bottom', color=col)
    #else:
    #    plt.text((x1+x2)*.5, y+h, "ns", ha='center', va='bottom', color=col)

leg = plt.legend(axs, ["Baseline", "PIF"])
leg.legendHandles[0].set_color('tab:blue')
leg.legendHandles[1].set_color('tab:orange')
plt.xticks(np.arange(0.5, 13, step=3), ["ADNI small", "ADNI big", "UKB small", "UKB big", "VIMS"])
plt.ylabel("Seconds")
plt.title("Runtime in seconds")
plt.show()"""

'fig = plt.Figure()\naxs = []\nfor i, experiment in enumerate(filename_list):\n    print(experiment)\n    time_base, iter_base = get_runtime(filename_list[experiment][0])\n    time_pif, iter_pif = get_runtime(filename_list[experiment][1])\n    # run statistical test\n    test_time = ranksums(time_base, time_pif)\n    test_iter = ranksums(iter_base, iter_pif)\n    print(f"Avg time baseline in seconds: {np.mean(time_base)}")\n    print(f"Avg time PIF in seconds: {np.mean(time_pif)}")\n    print(test_time)\n    print(f"Avg number of iterations baseline: {np.mean(iter_base)}")\n    print(f"Avg number of iterations PIF: {np.mean(iter_pif)}")\n    print(test_iter)\n    \n    # plot results\n    i *= 3\n    ax = plt.bar([i, i+1],\n                 [np.mean(time_base), np.mean(time_pif)],\n                 color=["tab:blue", "tab:orange"],\n                 label=["Baseline", "PIF"])\n    axs.append(ax)\n    plt.errorbar(x=[i, i+1], \n                 y=[np.mean(time_base), np.mean(time_pif)],

In [13]:
fig, axes = plt.subplots(1, 2, figsize=(12, 6))
fig.set_size_inches(w=6.1, h=3.1)

def inc_y(y):
    if y > 1000:
        y += 2500
    else:
        y += 23
    return y

def sub_plot(i, ax, base_data, patch_data, pif_data, test_base_patch, test_base_pif, test_patch_pif):
    i *= 4
    ax.bar([i, i+1, i+2],
                 [np.mean(base_data), np.mean(patch_data), np.mean(pif_data)],
                 color=["tab:gray", "tab:blue", "tab:orange"],
          ) #label=["Baseline", "PIF"])

    ax.errorbar(x=[i, i+1, i+2], 
                 y=[np.mean(base_data), np.mean(patch_data), np.mean(pif_data)], 
                 yerr=[sem(base_data), sem(patch_data), sem(pif_data)], 
                 color="black",
                 ls="none")
    
    # define coords for significance labels
    y, col = np.mean(patch_data), 'k'
    if y > 360:
        y += 800
        h = 500
    else:
        y += 5
        h = 2
    
    # test between baseline and patch based
    x1, x2 = i, i+1
    if test_base_patch.pvalue < 0.001:
        ax.text((x1+x2)*.5, y+h/2, "**", ha='center', va='bottom', color=col)
        ax.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
    elif test_base_patch.pvalue < 0.01:
        ax.text((x1+x2)*.5, y+h/2, "*", ha='center', va='bottom', color=col)
        ax.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
    
    # test between baseline and PIF
    x1, x2 = i, i+2
    if test_base_pif.pvalue < 0.001:
        y = inc_y(y)
        ax.text((x1+x2)*.5, y+h/2, "**", ha='center', va='bottom', color=col)
        ax.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
    elif test_base_pif.pvalue < 0.01:
        y = inc_y(y)
        ax.text((x1+x2)*.5, y+h/2, "*", ha='center', va='bottom', color=col)
        ax.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
        
    # test between patch-based and PIF
    x1, x2 = i+1, i+2
    if test_patch_pif.pvalue < 0.001:
        y = inc_y(y)
        ax.text((x1+x2)*.5, y+h/2, "**", ha='center', va='bottom', color=col)
        ax.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
    elif test_patch_pif.pvalue < 0.05:
        y = inc_y(y)
        ax.text((x1+x2)*.5, y+h/2, "*", ha='center', va='bottom', color=col)
        ax.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
        

for i, experiment in enumerate(filename_list):
    print(experiment)
    time_base, iter_base = get_runtime(filename_list[experiment][0])
    time_patch, iter_patch = get_runtime(filename_list[experiment][1])
    time_pif, iter_pif = get_runtime(filename_list[experiment][2])
    # run statistical test
    test_time_base_pif = ranksums(time_base, time_pif)
    test_time_base_patch = ranksums(time_base, time_patch)
    test_time_patch_pif = ranksums(time_patch, time_pif)
    test_iter_base_pif = ranksums(iter_base, iter_pif)
    test_iter_base_patch = ranksums(iter_base, iter_patch)
    test_iter_patch_pif = ranksums(iter_patch, iter_pif)
    print(f"Avg time baseline in seconds: {np.mean(time_base)}")
    print(f"Avg time patch-based in seconds: {np.mean(time_patch)}")
    print(f"Avg time PIF in seconds: {np.mean(time_pif)}")
    print("Test time base vs patch ", test_time_base_patch)
    print("Test time base vs pif ", test_time_base_pif)
    print("Test time patch vs pif ", test_time_patch_pif)
    print(f"Avg number of iterations baseline: {np.mean(iter_base)}")
    print(f"Avg number of iterations patch-based: {np.mean(iter_patch)}")
    print(f"Avg number of iterations PIF: {np.mean(iter_pif)}")
    print("Test iter base vs patch ", test_iter_base_patch)
    print("Test iter base vs pif ", test_iter_base_pif)
    print("Test iter patch vs pif ", test_iter_patch_pif)
    
    # plot run time results
    sub_plot(i, axes[0], time_base, time_patch, time_pif, test_time_base_patch, test_time_base_pif, test_time_patch_pif)
        
    # plot training iters results
    sub_plot(i, axes[1], iter_base, iter_patch, iter_pif, test_iter_base_patch, test_iter_base_pif, test_iter_patch_pif)
    
    
    for ax_idx, ax in enumerate(axes):
        ax.set_xticks(np.arange(1, 20, step=4))
        #ax.set_xticklabels(["ADNI small", "ADNI big", "UKB small", "UKB big", "VIMS"], rotation=45)
        ax.set_xticklabels(["ADNI", "UKB", "VIMS", "ADNI",  "UKB"], rotation=45)
        ax.annotate('Small', (0.22,0), (0, -42), color="gray", xycoords='axes fraction', textcoords='offset points', va='top') 
        ax.annotate('Big', (0.74,0), (0, -42), color="gray", xycoords='axes fraction', textcoords='offset points', va='top') 
        trans = ax.get_xaxis_transform()
        
        #ax.annotate('Big', (0.7,0), (0, -30), xycoords=trans, textcoords='offset points', ha='center', va='top') 
        #ax.annotate('Neonatal', xy=(1, -.1), xycoords=trans, ha="center", va="top")
        ax.plot([-.4,-.4,10,10],[-.20,-.20-0.03,-.20-0.03,-.20], color="gray", transform=trans, clip_on=False) # line small
        ax.plot([12,12,19,19],[-.20,-.20-0.03,-.20-0.03,-.20], color="gray", transform=trans, clip_on=False) # line big
        if ax_idx == 0:
            ax.set_ylabel("Seconds")
            ax.set_title("Run time in seconds")
            handles, labels = ax.get_legend_handles_labels()
            leg = ax.legend(["Baseline", "Patch-based", "PIF"])
            leg.legendHandles[0] = matplotlib.patches.Rectangle(xy=(-0, -0), width=20, height=7, angle=0)
            leg.legendHandles[0].set_color('tab:gray')
            leg.legendHandles[1] = matplotlib.patches.Rectangle(xy=(-0, -0), width=20, height=7, angle=0)
            leg.legendHandles[1].set_color('tab:blue')
            leg.legendHandles[2] = matplotlib.patches.Rectangle(xy=(-0, -0), width=20, height=7, angle=0)
            leg.legendHandles[2].set_color('tab:orange')
            ax.legend(leg.legendHandles, ["Baseline", "Patch-based", "PIF"], loc="upper left")            
        else:
            ax.set_ylabel("Iterations")
            ax.set_title("Number of iterations")
    
    
#leg = plt.legend(axes, ["Baseline", "PIF"])

#plt.show()

fig.savefig(os.path.join(save_dir, "Training_speed_comparison.pgf"), bbox_inches='tight', dpi=250)
#fig.show()

ADNI_small
Avg time baseline in seconds: 511.42
Avg time patch-based in seconds: 554.28
Avg time PIF in seconds: 369.16
Test time base vs patch  RanksumsResult(statistic=-1.5304280123514791, pvalue=0.12591081960766878)
Test time base vs pif  RanksumsResult(statistic=4.2879559625343244, pvalue=1.8032483134953376e-05)
Test time patch vs pif  RanksumsResult(statistic=6.8938198754571127, pvalue=5.4313789912691996e-12)
Avg number of iterations baseline: 100.16
Avg number of iterations patch-based: 175.74
Avg number of iterations PIF: 78.76
Test iter base vs patch  RanksumsResult(statistic=-7.8658484778965656, pvalue=3.6660406342101307e-15)
Test iter base vs pif  RanksumsResult(statistic=2.9298734470692729, pvalue=0.003391000757993385)
Test iter patch vs pif  RanksumsResult(statistic=8.1105790834752938, pvalue=5.0379086455673431e-16)
UKB_small
Avg time baseline in seconds: 5397.66
Avg time patch-based in seconds: 8297.1
Avg time PIF in seconds: 4103.74
Test time base vs patch  RanksumsResult