In [1]:
%matplotlib inline
#%run script

import os
os.sys.path.append("..") #script path

import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as stat
import defs

In [2]:
def filterdf(df, filtername, filter):
    df = df[~df['Model'].str.contains("_rep")]
    df = df[~df['Checkpoint'].str.contains("_rep")]
    return df[df[filtername] == filter]

file_coarse = "../../results/orig_size_res/coarse_test_100k.csv"
filter_coarse = "hr3d_half_e-3_16-72_dice_1024s"

file_fine = "../../results/orig_size_res/fine_test_50k.csv"
filter_fine = "hr3d_h_e-3_16-72_d_100k__full_e-4_24-24_gdsc_1024s_dil13"

#file_fine = "../../results/orig_size/coarse_train_100k.csv"
#filter_fine = "hr3d_half_e-3_24-24_dice_1024s"

df_coarse = pd.read_csv(file_coarse)
df_fine = pd.read_csv(file_fine)

df_coarse = filterdf(df_coarse, "Model", filter_coarse)
df_fine = filterdf(df_fine, "Model", filter_fine)

df_coarse_newidx = df_coarse.set_index(["File", "Organ"])
df_fine_newidx = df_fine.set_index(["File", "Organ"])

df_join = df_coarse_newidx.join(df_fine_newidx, how="inner", lsuffix="_coarse", rsuffix="_fine")

### p-Wert:
Die Nullhypothese wird verworfen, wenn der p-Wert kleiner als das vom Anwender festgelegte Signifikanzniveau \alpha ist.

### Normalverteilungs-Tests

#### Shapiro-Wilk Test
- https://de.wikipedia.org/wiki/Shapiro-Wilk-Test#Vor-_und_Nachteile
- Die Nullhypothese H0 nimmt an, dass eine Normalverteilung der Grundgesamtheit vorliegt

#### Kolmogorow-Smirnow-Test
- https://de.wikipedia.org/wiki/Kolmogorow-Smirnow-Test
- H0 zwei Zufallsvariablen besitzen die gleiche Verteilung

In [3]:
display(df_join.head(2))
val = "dice"
alpha = 0.05
all_organs = True

#accept H0 returns true we want the test to be normal distributed
def shapiroTest(msg, data):
    test_res = stat.shapiro(data)
    print("Shapiro-Test:", msg, test_res)
    if test_res[1] > alpha:
        return True
    return False
        
def kolmogrowTest(msg, data):
    test_res = stat.kstest(data, 'norm')
    print("Kolmogorw-Test:", msg, test_res)
    if test_res[1] > alpha:
        return True
    return False
        
shapiroTest("all coarse", df_join[val+"_coarse"])
shapiroTest("all fine", df_join[val+"_fine"])

kolmogrowTest("all coarse", df_join[val+"_coarse"])
kolmogrowTest("all fine", df_join[val+"_fine"])
print("\n\n")

if all_organs:

    for organ in defs.LABELS.keys():
        df_filter = df_join.filter(like=organ, axis=0)
        
        test_coarse = shapiroTest(organ+" coarse", df_filter[val+"_coarse"])
        test_fine = shapiroTest(organ+" fine", df_filter[val+"_fine"])
        
        if (test_coarse and test_fine):
            print("Shapiro-Test passed (=Normal dist) for: ", organ)
            
        test_coarse = kolmogrowTest(organ+" coarse", df_filter[val+"_coarse"])
        test_fine = kolmogrowTest(organ+" fine", df_filter[val+"_fine"])

        if (test_coarse and test_fine):
            print("Kolmogorow-Test passed (=Normal dist) for: ", organ)

        print("\n")

Unnamed: 0_level_0,Unnamed: 1_level_0,Model_coarse,Checkpoint_coarse,dice_coarse,95haus_dist_coarse,avghaus_dist_coarse,Model_fine,Checkpoint_fine,dice_fine,95haus_dist_fine,avghaus_dist_fine
File,Organ,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
0522c0555_niftynet_out.nii.gz,BrainStem,hr3d_half_e-3_16-72_dice_1024s,100000_test,0.797774,4.482116,1.847789,hr3d_h_e-3_16-72_d_100k__full_e-4_24-24_gdsc_1...,50000_test,0.814544,6.0,5.990296
0522c0555_niftynet_out.nii.gz,Chiasm,hr3d_half_e-3_16-72_dice_1024s,100000_test,0.420202,2.870143,1.090161,hr3d_h_e-3_16-72_d_100k__full_e-4_24-24_gdsc_1...,50000_test,0.538879,2.583598,0.931909


('Shapiro-Test:', 'all coarse', (0.8568063974380493, 2.724259218211955e-08))
('Shapiro-Test:', 'all fine', (0.9000102281570435, 1.7402350067641237e-06))
('Kolmogorw-Test:', 'all coarse', KstestResult(statistic=0.6158423370464622, pvalue=0.0))
('Kolmogorw-Test:', 'all fine', KstestResult(statistic=0.6074506441199103, pvalue=0.0))



('Shapiro-Test:', 'OpticNerve_L coarse', (0.8486568331718445, 0.02129794843494892))
('Shapiro-Test:', 'OpticNerve_L fine', (0.8484792709350586, 0.021180318668484688))
('Kolmogorw-Test:', 'OpticNerve_L coarse', KstestResult(statistic=0.6713373511573103, pvalue=9.343075748624585e-07))
('Kolmogorw-Test:', 'OpticNerve_L fine', KstestResult(statistic=0.6898255760885807, pvalue=3.6364489308660097e-07))


('Shapiro-Test:', 'Chiasm coarse', (0.9600569009780884, 0.724033772945404))
('Shapiro-Test:', 'Chiasm fine', (0.9529434442520142, 0.6072680354118347))
('Shapiro-Test passed (=Normal dist) for: ', 'Chiasm')
('Kolmogorw-Test:', 'Chiasm coarse', KstestResult(statisti

### Signifikanz Tests

Die Nullhypothese wird verworfen, wenn der p-Wert kleiner als das vom Anwender festgelegte Signifikanzniveau \alpha ist.

#### T-test
- Normalverteilte Grundgesamtheit, N möglichst groß, und gleiche Varianz
- H0 zwei Stichproben (paired=abhängig oder unpaired=unabhängig) haben den gleichen (erwarteten) Mittelwert - wir wollen p < alpha um H0 zu verwerfen zu können
- https://en.wikipedia.org/wiki/Student%27s_t-test#Unpaired_and_paired_two-sample_t-tests
- **Unpaired**: 
- https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.ttest_ind.html
- Welch test: aber Varianzen müssen nicht gleich sein, in scipy gibs nur einen unpaird welch test
- **Paired**: 
- https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.ttest_rel.html


#### Wilcoxon Test
- H0 zwei Stichproben (paired=abhängig oder unpaired=unabhängig) stammen aus der selben verteilung
- **Unpaired**:
- https://en.wikipedia.org/wiki/Mann%E2%80%93Whitney_U_test
- https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.ranksums.html
- Mann Whitney test does some continuety correction
- **Paired**:
- https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.stats.wilcoxon.html
- https://en.wikipedia.org/wiki/Wilcoxon_signed-rank_test
- Typischerweiße sollte n>20 sein

In [7]:
display(df_join.head(2))
#val = "95haus_dist"
val = "dice"
alpha = 0.05
all_organs = False

########UNPAIRED
#reject H0 returns true we want the mean values to be different
def tTest(msg, coarse_data, fine_data):
    test_res = stat.ttest_ind(coarse_data, fine_data, equal_var=True)
    print("t-Test: ", msg, test_res)
    if test_res[1] < alpha:
        return True
    return False

def welchtTest(msg, coarse_data, fine_data):
    test_res = stat.ttest_ind(coarse_data, fine_data, equal_var=True)
    print("Welch t-Test: ", msg, test_res)
    if test_res[1] < alpha:
        return True
    return False

def rankSumTest(msg, coarse_data, fine_data):
    test_res = stat.ranksums(coarse_data, fine_data)
    print("Wilcoxon Test(unpaired): ", msg, test_res)
    if test_res[1] < alpha:
        return True
    return False

def rankSumMWTest(msg, coarse_data, fine_data):
    test_res = stat.mannwhitneyu(coarse_data, fine_data, use_continuity=True, alternative='two-sided')
    print("Wilcoxon-Mann-Whitneyu Test(unpaired): ", msg, test_res)
    if test_res[1] < alpha:
        return True
    return False


#######PAIRED
def pairedtTest(msg, coarse_data, fine_data):
    test_res = stat.ttest_rel(coarse_data, fine_data)
    print("Paired t-Test: ", msg, test_res)
    if test_res[1] < alpha:
        return True
    return False

def wilcoxonTest(msg, coarse_data, fine_data):
    test_res = stat.wilcoxon(coarse_data, fine_data)
    print("Wilcoxon Test(paired): ", msg, test_res)
    if test_res[1] < alpha:
        return True
    return False

def callAll(msg, coarse_data, fine_data):
    print("\nUnpaired Tests:")
    tTest(msg, coarse_data, fine_data)
    welchtTest(msg, coarse_data, fine_data)
    rankSumTest(msg, coarse_data, fine_data)
    rankSumMWTest(msg, coarse_data, fine_data)

    print("\nPaired Tests:")
    pairedtTest(msg, coarse_data, fine_data)
    wilcoxonTest(msg, coarse_data, fine_data)
    

callAll("", df_join[val+"_coarse"], df_join[val+"_fine"])
    

if all_organs:

    for organ in defs.LABELS.keys():
        df_filter = df_join.filter(like=organ, axis=0)
        
        display(df_filter.head(2))
        
        callAll(organ,  df_filter[val+"_coarse"], df_filter[val+"_fine"])
        print("\n\n")

Unnamed: 0_level_0,Unnamed: 1_level_0,Model_coarse,Checkpoint_coarse,dice_coarse,95haus_dist_coarse,avghaus_dist_coarse,Model_fine,Checkpoint_fine,dice_fine,95haus_dist_fine,avghaus_dist_fine
File,Organ,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
0522c0555_niftynet_out.nii.gz,BrainStem,hr3d_half_e-3_16-72_dice_1024s,100000_test,0.797774,4.482116,1.847789,hr3d_h_e-3_16-72_d_100k__full_e-4_24-24_gdsc_1...,50000_test,0.814544,6.0,5.990296
0522c0555_niftynet_out.nii.gz,Chiasm,hr3d_half_e-3_16-72_dice_1024s,100000_test,0.420202,2.870143,1.090161,hr3d_h_e-3_16-72_d_100k__full_e-4_24-24_gdsc_1...,50000_test,0.538879,2.583598,0.931909



Unpaired Tests:
('t-Test: ', '', Ttest_indResult(statistic=-0.32267272147087145, pvalue=0.7472906026691292))
('Welch t-Test: ', '', Ttest_indResult(statistic=-0.32267272147087145, pvalue=0.7472906026691292))
('Wilcoxon Test(unpaired): ', '', RanksumsResult(statistic=-0.07303473068541455, pvalue=0.9417784802074283))
('Wilcoxon-Mann-Whitneyu Test(unpaired): ', '', MannwhitneyuResult(statistic=4773.0, pvalue=0.9427805617820442))

Paired Tests:
('Paired t-Test: ', '', Ttest_relResult(statistic=-0.9963063228695512, pvalue=0.32158024444195804))
('Wilcoxon Test(paired): ', '', WilcoxonResult(statistic=1938.0, pvalue=0.08407902106054022))
