In [1]:
import pickle
import numpy as np
from scipy import stats
## stats.ttest_ind, stats.ttest_ind_from_stats(mean1, std1, nobs1, mean2, std2, nobs2, equal_var=True, alternative='two-sided')
import pandas as pd

In [2]:
def which_is_better(mean1, std1, nobs1, mean2, std2, nobs2, alpha_one_tail=0.025):
    # Test whether mean(M1) > mean(M2) using Welch's t-test
    # at default alpha = 0.025 significance level for the one tail test
    #
    # M1 - relevant
    # M2 - irrelevant
    #
    # Note that stats.ttest_ind return the two-tailed p-value
    #
    # Null hypothesis: mean(M1) <= mean(M2)
    # Alternative: mean(M1) > mean(M2) 
    #
    # REJECT if pvalue/2 < alpha
    
    # tstat, pvalue = stats.ttest_ind(M1_values, M2_values, equal_var = False)
    tstat, pvalue = stats.ttest_ind_from_stats(mean1, std1, nobs1, mean2, std2, nobs2, equal_var = False)    
    print('tstat = ', tstat)
    print('pvalue = ', pvalue)
    if pvalue/2 < alpha_one_tail:
        if tstat < 0.0:
            s = 'Null rejected: more confident that mean(M1) < mean(M2)'
            print(s)
            return (tstat, pvalue, s) ## return 0
        else:
            # you want to be here
            s = 'Null rejected: more confident that mean(M1) > mean(M2)'
            print(s)
            return (tstat, pvalue, s) ## return 1-pvalue
        return (tstat, pvalue, '-----------------') ## return True
    else:
        s = 'Cannot reject null: no confidence which one is better'
        print(s)
        return (tstat, pvalue, s) ## return 0

In [3]:
list_should_std = [False, True] ## [False, True]
n_trials = 50 ## 10 or 50

In [4]:
d_result = {}

for should_std in list_should_std:
    with open('datasets/d_result_' + str(should_std) + '.pickle', 'rb') as handle:
        d_result_local = pickle.load(handle)
        d_result.update(d_result_local)

In [5]:
# d_result ## len := 4800 = 32 * 50 * 3 (nCon, nReg, nRegFlat)
len(d_result)

200

In [6]:
# d_result

In [7]:
list_row = [
    [
        'should_std', '# success (nCon, nReg)',
        'fdr (nCon)', 'fdr (nReg)', 'pval', 'tstat', 'comment',
        'shd (nCon)', 'shd (nReg)', 'pval', 'tstat', 'comment'        
    ]
]
for should_std in list_should_std:    
    fdr, tpr, fpr, shd, nnz = [], [], [], [], []
    for tn in range(n_trials):
        key = (should_std, tn, 'nCon')
        value = d_result[key]
        if value[0] != '-':
            fdr.append(value[0])
        if value[1] != '-':
            tpr.append(value[1])
        if value[2] != '-':
            fpr.append(value[2])
        if value[3] != '-':
            shd.append(value[3])
        if value[4] != '-':
            nnz.append(value[4])
    nvalid = len(tpr)
    assert nvalid == len(fdr) == len(fpr) == len(shd) == len(nnz)

    fdr3, tpr3, fpr3, shd3, nnz3 = [], [], [], [], []
    for tn in range(n_trials):
        key = (should_std, tn, 'nRegFlat') ## nReg or nRegFlat
        value = d_result[key]
        if value[0] != '-':
            fdr3.append(value[0])
        if value[1] != '-':
            tpr3.append(value[1])
        if value[2] != '-':
            fpr3.append(value[2])
        if value[3] != '-':
            shd3.append(value[3])
        if value[4] != '-':
            nnz3.append(value[4])      
    nvalid3 = len(tpr3)
    assert nvalid3 == len(fdr3) == len(fpr3) == len(shd3) == len(nnz3)


    # print(n, d, s0_factor, gt, should_std)                        
    # print("{} ==> {:0.2f} ± {:0.2f} vs {:0.2f} ± {:0.2f}".format(key[:-2], np.mean(fdr), np.std(fdr), np.mean(fdr2), np.std(fdr2)))
#     t1, p1, s1 = which_is_better(np.mean(fdr), np.std(fdr), nvalid, np.mean(fdr2), np.std(fdr2), nvalid2)                                                
    # print("{} ==> {:0.2f} ± {:0.2f} vs {:0.2f} ± {:0.2f}".format(key[:-2], np.mean(tpr), np.std(tpr), np.mean(tpr2), np.std(tpr2)))
    # which_is_better(np.mean(tpr), np.std(tpr), nvalid, np.mean(tpr2), np.std(tpr2), nvalid2)
    # print("{} ==> {:0.2f} ± {:0.2f} vs {:0.2f} ± {:0.2f}".format(key[:-2], np.mean(fpr), np.std(fpr), np.mean(fpr2), np.std(fpr2)))
    # which_is_better(np.mean(fpr), np.std(fpr), nvalid, np.mean(fpr2), np.std(fpr2), nvalid2)                        
    # print("{} ==> {:0.2f} ± {:0.2f} vs {:0.2f} ± {:0.2f}".format(key[:-2], np.mean(shd), np.std(shd), np.mean(shd2), np.std(shd2)))  
#     t2, p2, s2 = which_is_better(np.mean(shd), np.std(shd), nvalid, np.mean(shd2), np.std(shd2), nvalid2)                    
    # print("{} ==> {:0.2f} ± {:0.2f} vs {:0.2f} ± {:0.2f}".format(key[:-2], np.mean(nnz), np.std(nnz), np.mean(nnz2), np.std(nnz2)))   
    # which_is_better(np.mean(nnz), np.std(nnz), nvalid, np.mean(nnz2), np.std(nnz2), nvalid2)   
    # print(key[:-2], nvalid, nvalid2)
    # print()
    # print()
    # print(n, d, s0_factor, gt, should_std)  
    # print("{} ==> {:0.2f} ± {:0.2f} vs {:0.2f} ± {:0.2f}".format(key[:-2], np.mean(fdr), np.std(fdr), np.mean(fdr3), np.std(fdr3)))
    t1, p1, s1 = which_is_better(np.mean(fdr), np.std(fdr), nvalid, np.mean(fdr3), np.std(fdr3), nvalid3)                                                                        
    # print("{} ==> {:0.2f} ± {:0.2f} vs {:0.2f} ± {:0.2f}".format(key[:-2], np.mean(tpr), np.std(tpr), np.mean(tpr3), np.std(tpr3)))
    # which_is_better(np.mean(tpr), np.std(tpr), nvalid, np.mean(tpr3), np.std(tpr3), nvalid3)                        
    # print("{} ==> {:0.2f} ± {:0.2f} vs {:0.2f} ± {:0.2f}".format(key[:-2], np.mean(fpr), np.std(fpr), np.mean(fpr3), np.std(fpr3)))
    # which_is_better(np.mean(fpr), np.std(fpr), nvalid, np.mean(fpr3), np.std(fpr3), nvalid3)                                                
    # print("{} ==> {:0.2f} ± {:0.2f} vs {:0.2f} ± {:0.2f}".format(key[:-2], np.mean(shd), np.std(shd), np.mean(shd3), np.std(shd3)))  
    t2, p2, s2 = which_is_better(np.mean(shd), np.std(shd), nvalid, np.mean(shd3), np.std(shd3), nvalid3)                    
    # print("{} ==> {:0.2f} ± {:0.2f} vs {:0.2f} ± {:0.2f}".format(key[:-2], np.mean(nnz), np.std(nnz), np.mean(nnz3), np.std(nnz3)))   
    # which_is_better(np.mean(nnz), np.std(nnz), nvalid, np.mean(nnz3), np.std(nnz3), nvalid3)   
    # print(key[:-2], nvalid, nvalid3)
    # print()tpr
    # print()   

    row = [
        str(should_std), '{0}, {1}'.format(nvalid, nvalid3),
        "{:0.2f} ± {:0.2f}".format(np.mean(fdr), np.std(fdr)),
        "{:0.2f} ± {:0.2f}".format(np.mean(fdr3), np.std(fdr3)),
        p1, t1, s1,
        "{:0.2f} ± {:0.2f}".format(np.mean(shd), np.std(shd)),
        "{:0.2f} ± {:0.2f}".format(np.mean(shd3), np.std(shd3)),
        p2, t2, s2                            
    ]
    list_row.append(row)                        

tstat =  -4.630365281520934
pvalue =  1.8431391607422654e-05
Null rejected: more confident that mean(M1) < mean(M2)
tstat =  -18.403817483999052
pvalue =  3.8281918245434327e-31
Null rejected: more confident that mean(M1) < mean(M2)
tstat =  -3.1841759973140653
pvalue =  0.002400690594842813
Null rejected: more confident that mean(M1) < mean(M2)
tstat =  -11.906765799334782
pvalue =  6.626362804504458e-20
Null rejected: more confident that mean(M1) < mean(M2)


In [8]:
len(list_row)

3

In [9]:
import pandas as pd

df = pd.DataFrame(list_row)
writer = pd.ExcelWriter('datasets/result_1_nCon_nRegFlat.xlsx', engine='xlsxwriter')
df.to_excel(writer, sheet_name='1', index=False)
writer.save()

In [10]:
pd.read_excel('datasets/result_1_nCon_nRegFlat.xlsx').head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,should_std,"# success (nCon, nReg)",fdr (nCon),fdr (nReg),pval,tstat,comment,shd (nCon),shd (nReg),pval,tstat,comment
1,False,"50, 50",0.61 ± 0.18,0.74 ± 0.07,1.84314e-05,-4.63037,Null rejected: more confident that mean(M1) < ...,40.86 ± 1.72,49.14 ± 2.68,3.82819e-31,-18.4038,Null rejected: more confident that mean(M1) < ...
2,True,"50, 50",0.56 ± 0.29,0.69 ± 0.07,0.00240069,-3.18418,Null rejected: more confident that mean(M1) < ...,41.62 ± 1.74,46.84 ± 2.56,6.62636e-20,-11.9068,Null rejected: more confident that mean(M1) < ...
