In [47]:
import pickle
import numpy as np
from scipy import stats
## stats.ttest_ind, stats.ttest_ind_from_stats(mean1, std1, nobs1, mean2, std2, nobs2, equal_var=True, alternative='two-sided')
import pandas as pd

In [48]:
def which_is_better(mean1, std1, nobs1, mean2, std2, nobs2, alpha_one_tail=0.025):
    # Test whether mean(M1) > mean(M2) using Welch's t-test
    # at default alpha = 0.025 significance level for the one tail test
    #
    # M1 - relevant
    # M2 - irrelevant
    #
    # Note that stats.ttest_ind return the two-tailed p-value
    #
    # Null hypothesis: mean(M1) <= mean(M2)
    # Alternative: mean(M1) > mean(M2) 
    #
    # REJECT if pvalue/2 < alpha
    
    # tstat, pvalue = stats.ttest_ind(M1_values, M2_values, equal_var = False)
    tstat, pvalue = stats.ttest_ind_from_stats(mean1, std1, nobs1, mean2, std2, nobs2, equal_var = False)    
    print('tstat = ', tstat)
    print('pvalue = ', pvalue)
    if pvalue/2 < alpha_one_tail:
        if tstat < 0.0:
            s = 'Null rejected: more confident that mean(M1) < mean(M2)'
            print(s)
            return (tstat, pvalue, s) ## return 0
        else:
            # you want to be here
            s = 'Null rejected: more confident that mean(M1) > mean(M2)'
            print(s)
            return (tstat, pvalue, s) ## return 1-pvalue
        return (tstat, pvalue, '-----------------') ## return True
    else:
        s = 'Cannot reject null: no confidence which one is better'
        print(s)
        return (tstat, pvalue, s) ## return 0

In [49]:
list_dt_st = [('nonlinear', 'mlp')] ## [('nonlinear', 'mlp'), ('linear', 'mlp')]
list_n = [200, 1000] ## [200, 1000]
list_d = [10, 20] ## [10, 20]
list_s0_factor = [1, 4] ## [1, 4]
list_gt = ['ER', 'SF'] ## ['ER', 'SF']
list_should_std = [False, True] ## [False, True]
n_trials = 50 ## 10 or 50
# list_dt_st = [('nonlinear', 'mlp')] ## [('nonlinear', 'mlp'), ('linear', 'mlp')]
# list_n = [200] ## [200, 1000]
# list_d = [10] ## [10, 20]
# list_s0_factor = [1, 4] ## [1, 4]
# list_gt = ['ER'] ## ['ER', 'SF']
# list_should_std = [False] ## [False, True]
# n_trials = 3 ## 10 or 50

In [50]:
d_result = {}

for dt, st in list_dt_st:
    for n in list_n:
        for d in list_d:
            for s0_factor in list_s0_factor:
                for gt in list_gt:
                    for should_std in list_should_std:
                        with open('datasets/d_result_' + str(n) + '_' + str(d) + '_' + str(s0_factor) + '_' + str(gt) + '_' + str(should_std) + '.pickle', 'rb') as handle:
                            d_result_local = pickle.load(handle)
                            d_result.update(d_result_local)

In [51]:
# d_result ## len := 4800 = 32 * 50 * 3 (nCon, nReg, nRegFlat)

In [52]:
list_row = [
    [
        'dim_con',
        'should_std', 'n', 'd', 's0', 'gt',
        '# success (nCon, nReg)',
        'tpr (nCon)', 'tpr (nReg)', 'pval', 'tstat', 'comment',
        'shd (nCon)', 'shd (nReg)', 'pval', 'tstat', 'comment'        
    ]
]
for dt, st in list_dt_st:
    for should_std in list_should_std:    
        for n in list_n:
            for d in list_d:
                for s0_factor in list_s0_factor:
                    for gt in list_gt:
                    
                        
                        fdr, tpr, fpr, shd, nnz = [], [], [], [], []
                        for tn in range(n_trials):
                            key = (n, d, s0_factor, gt, should_std, tn, 'nCon')
                            value = d_result[key]
                            if value[0] != '-':
                                fdr.append(value[0])
                            if value[1] != '-':
                                tpr.append(value[1])
                            if value[2] != '-':
                                fpr.append(value[2])
                            if value[3] != '-':
                                shd.append(value[3])
                            if value[4] != '-':
                                nnz.append(value[4])
                        nvalid = len(tpr)
                        assert nvalid == len(fdr) == len(fpr) == len(shd) == len(nnz)
                            
                        fdr2, tpr2, fpr2, shd2, nnz2 = [], [], [], [], []
                        for tn in range(n_trials):
                            key = (n, d, s0_factor, gt, should_std, tn, 'nReg') ## nReg or nRegFlat
                            value = d_result[key]
                            if value[0] != '-':
                                fdr2.append(value[0])
                            if value[1] != '-':
                                tpr2.append(value[1])
                            if value[2] != '-':
                                fpr2.append(value[2])
                            if value[3] != '-':
                                shd2.append(value[3])
                            if value[4] != '-':
                                nnz2.append(value[4])      
                        nvalid2 = len(tpr2)
                        assert nvalid2 == len(fdr2) == len(fpr2) == len(shd2) == len(nnz2)

                        fdr3, tpr3, fpr3, shd3, nnz3 = [], [], [], [], []
                        for tn in range(n_trials):
                            key = (n, d, s0_factor, gt, should_std, tn, 'nRegFlat') ## nReg or nRegFlat
                            value = d_result[key]
                            if value[0] != '-':
                                fdr3.append(value[0])
                            if value[1] != '-':
                                tpr3.append(value[1])
                            if value[2] != '-':
                                fpr3.append(value[2])
                            if value[3] != '-':
                                shd3.append(value[3])
                            if value[4] != '-':
                                nnz3.append(value[4])      
                        nvalid3 = len(tpr3)
                        assert nvalid3 == len(fdr3) == len(fpr3) == len(shd3) == len(nnz3)
                        

                        # print(n, d, s0_factor, gt, should_std)                        
                        # print("{} ==> {:0.2f} ± {:0.2f} vs {:0.2f} ± {:0.2f}".format(key[:-2], np.mean(fdr), np.std(fdr), np.mean(fdr2), np.std(fdr2)))
                        # t1, p1, s1 = which_is_better(np.mean(fdr), np.std(fdr), nvalid, np.mean(fdr2), np.std(fdr2), nvalid2)                                                
                        # print("{} ==> {:0.2f} ± {:0.2f} vs {:0.2f} ± {:0.2f}".format(key[:-2], np.mean(tpr), np.std(tpr), np.mean(tpr2), np.std(tpr2)))
                        # which_is_better(np.mean(tpr), np.std(tpr), nvalid, np.mean(tpr2), np.std(tpr2), nvalid2)
                        # print("{} ==> {:0.2f} ± {:0.2f} vs {:0.2f} ± {:0.2f}".format(key[:-2], np.mean(fpr), np.std(fpr), np.mean(fpr2), np.std(fpr2)))
                        # which_is_better(np.mean(fpr), np.std(fpr), nvalid, np.mean(fpr2), np.std(fpr2), nvalid2)                        
                        # print("{} ==> {:0.2f} ± {:0.2f} vs {:0.2f} ± {:0.2f}".format(key[:-2], np.mean(shd), np.std(shd), np.mean(shd2), np.std(shd2)))  
                        # t2, p2, s2 = which_is_better(np.mean(shd), np.std(shd), nvalid, np.mean(shd2), np.std(shd2), nvalid2)                    
                        # print("{} ==> {:0.2f} ± {:0.2f} vs {:0.2f} ± {:0.2f}".format(key[:-2], np.mean(nnz), np.std(nnz), np.mean(nnz2), np.std(nnz2)))   
                        # which_is_better(np.mean(nnz), np.std(nnz), nvalid, np.mean(nnz2), np.std(nnz2), nvalid2)   
                        # print(key[:-2], nvalid, nvalid2)
                        # print()
                        # print()
                        # print(n, d, s0_factor, gt, should_std)  
                        # print("{} ==> {:0.2f} ± {:0.2f} vs {:0.2f} ± {:0.2f}".format(key[:-2], np.mean(fdr), np.std(fdr), np.mean(fdr3), np.std(fdr3)))
                        t1, p1, s1 = which_is_better(np.mean(fdr), np.std(fdr), nvalid, np.mean(fdr3), np.std(fdr3), nvalid3)                                                                        
                        # print("{} ==> {:0.2f} ± {:0.2f} vs {:0.2f} ± {:0.2f}".format(key[:-2], np.mean(tpr), np.std(tpr), np.mean(tpr3), np.std(tpr3)))
                        # which_is_better(np.mean(tpr), np.std(tpr), nvalid, np.mean(tpr3), np.std(tpr3), nvalid3)                        
                        # print("{} ==> {:0.2f} ± {:0.2f} vs {:0.2f} ± {:0.2f}".format(key[:-2], np.mean(fpr), np.std(fpr), np.mean(fpr3), np.std(fpr3)))
                        # which_is_better(np.mean(fpr), np.std(fpr), nvalid, np.mean(fpr3), np.std(fpr3), nvalid3)                                                
                        # print("{} ==> {:0.2f} ± {:0.2f} vs {:0.2f} ± {:0.2f}".format(key[:-2], np.mean(shd), np.std(shd), np.mean(shd3), np.std(shd3)))  
                        t2, p2, s2 = which_is_better(np.mean(shd), np.std(shd), nvalid, np.mean(shd3), np.std(shd3), nvalid3)                    
                        # print("{} ==> {:0.2f} ± {:0.2f} vs {:0.2f} ± {:0.2f}".format(key[:-2], np.mean(nnz), np.std(nnz), np.mean(nnz3), np.std(nnz3)))   
                        # which_is_better(np.mean(nnz), np.std(nnz), nvalid, np.mean(nnz3), np.std(nnz3), nvalid3)   
                        # print(key[:-2], nvalid, nvalid3)
                        # print()tpr
                        # print()   
                        
                        row = [
                            '(1-3)', 
                            str(should_std), n, d, s0_factor*d, gt, 
                            '{0}, {1}'.format(nvalid, nvalid3),
                            "{:0.2f} ± {:0.2f}".format(np.mean(fdr), np.std(fdr)),
                            "{:0.2f} ± {:0.2f}".format(np.mean(fdr3), np.std(fdr3)),
                            p1, t1, s1,
                            "{:0.2f} ± {:0.2f}".format(np.mean(shd), np.std(shd)),
                            "{:0.2f} ± {:0.2f}".format(np.mean(shd3), np.std(shd3)),
                            p2, t2, s2                            
                        ]
                        list_row.append(row)                        

tstat =  2.139897888106929
pvalue =  0.035437725567253345
Null rejected: more confident that mean(M1) > mean(M2)
tstat =  -14.65982321648635
pvalue =  1.4109825640540557e-25
Null rejected: more confident that mean(M1) < mean(M2)
tstat =  2.5086756986794985
pvalue =  0.013963772236659582
Null rejected: more confident that mean(M1) > mean(M2)
tstat =  -17.364163720597183
pvalue =  2.42453436895732e-29
Null rejected: more confident that mean(M1) < mean(M2)
tstat =  -1.1167987547656082
pvalue =  0.26769964787890194
Cannot reject null: no confidence which one is better
tstat =  -14.044275072215068
pvalue =  1.392761756251286e-24
Null rejected: more confident that mean(M1) < mean(M2)
tstat =  0.9923335523041268
pvalue =  0.32447683399935157
Cannot reject null: no confidence which one is better
tstat =  -12.987004305443902
pvalue =  3.069648828561921e-22
Null rejected: more confident that mean(M1) < mean(M2)
tstat =  1.8820581207806353
pvalue =  0.06389536087176907
Cannot reject null: no conf

In [53]:
len(list_row)

33

In [54]:
import pandas as pd

df = pd.DataFrame(list_row)
writer = pd.ExcelWriter('datasets/result_2_nCon_nRegFlat.xlsx', engine='xlsxwriter')
df.to_excel(writer, sheet_name='1', index=False)
writer.save()