In [8]:
import h5py as h5
import numpy as np
import matplotlib
%matplotlib inline
import matplotlib.pyplot as plt
from sklearn.metrics import auc, roc_curve, matthews_corrcoef
from scipy.stats import norm
from scipy.io import loadmat
import seaborn as sns
import os, sys

In [9]:
def load_and_calc(file_name):
    with h5.File(file_name, 'r') as f:
        pred = f.get('pred_param')[...].reshape(-1)
        target = f.get('target_param')[...].reshape(-1)
        # target[target > 0] = 1
        # target = new_target
    fpr, tpr, thresholds = roc_curve(target, pred)
    auc_score = auc(fpr, tpr)
    
    pred[pred >= 0.5] = 1
    pred[pred < 0.5] = 0
    mcc = matthews_corrcoef(target, pred)
    
    false_positives = np.sum((pred == 1) & (target == 0))
    false_negatives = np.sum((pred == 0) & (target == 1))
    # return auc_score, mcc, np.argwhere(target == pred)[:, 0].shape[0] / target.shape[0], false_positives, false_negatives
    return mcc, false_positives, false_negatives

In [10]:
def aggregated_performance(file_name_list, aggregated_num):
    mcc_list = list()
    fp_list = list()
    fn_list = list()
    correlation_list = list()
    np.random.seed(0)
    select_idx_list = [np.random.choice(np.arange(len(file_name_list)), aggregated_num, replace=False) for _ in range(1000)]

    file_name_list = np.array(file_name_list)
    for select_idx in select_idx_list:
        pred_list = list()
        for file_name in file_name_list[select_idx]:
            with h5.File(file_name, 'r') as f:
                pred = f.get('pred_param')[...].reshape(-1)
                target = f.get('target_param')[...].reshape(-1)
                target[target > 0] = 1
            # pred[pred >= 0.5] = 1
            # pred[pred < 0.5] = 0
            pred_list.append(pred)
        pred_list = np.array(pred_list)
        pred = np.mean(pred_list, axis=0)
        pred[pred >= 0.5] = 1
        pred[pred < 0.5] = 0
        mcc = matthews_corrcoef(target, pred)
        mcc_list.append(mcc)
    
        false_positives = np.sum((pred == 1) & (target == 0))
        false_negatives = np.sum((pred == 0) & (target == 1))
        fp_list.append(false_positives)
        fn_list.append(false_negatives)
        correlation_list.append(pred)
    correlation_list = np.array(correlation_list)
    correlation_list = np.corrcoef(correlation_list)
    upper_triangular = np.triu(correlation_list, k=1)
    mean_triangular = np.mean(upper_triangular)
    return mcc_list, fp_list, fn_list, mean_triangular

## Original

In [11]:
reps_st = 0
reps_en = 10

aggregated_num = 5
t_factor_list = [0.1]
da_factor_list = [0.01, 0.001, 0.0001][2:]
da_thre_list = [0.05, 0.01, 0.001][1:2]
st_thre_list = [0.05, 0.01, 0.001][1:2]
min_thre = 0.01
file_reps_whole = 1
exp_type = 'pos_9_neg_66_20220303'
start_idx= 0

can_list = list()
can_fp_list = list()
can_fn_list = list()
root_dir = '../'
temp_cnt = 0
total_cnt = 0
aggregated_can_list = list()
aggregated_can_fp_list = list()
aggregated_can_fn_list = list()
for t_factor in t_factor_list:
    for da_factor in da_factor_list:
        for da_thre in da_thre_list:
            for st_thre in st_thre_list:
                if st_thre > da_thre:
                    continue
                can_list.append(list())
                can_fp_list.append(list())
                can_fn_list.append(list())
                file_name_list = list()
                for r in range(reps_st, reps_en):
                    result_dir = f'{root_dir}/{exp_type}/dast/min_thre_{min_thre}/t_factor_{t_factor}/{r}/da_factor_{da_factor}/da_thre_{da_thre}/st_thre_{st_thre}/start_idx_{start_idx}'
                    try:
                        score = load_and_calc(os.path.join(result_dir, 'pred.h5'))
                        can_list[-1].append(score[0])
                        can_fp_list[-1].append(score[1])
                        can_fn_list[-1].append(score[2])
                        file_name_list.append(result_dir+'/pred.h5')
                    except FileNotFoundError as e:
                        print(e)
                        print(os.path.join(result_dir, 'pred.h5'))
                        
                aggregated_score = aggregated_performance(file_name_list, aggregated_num)
                aggregated_can_list.append(aggregated_score[0])
                aggregated_can_fp_list.append(aggregated_score[1])
                aggregated_can_fn_list.append(aggregated_score[2])

In [12]:
### MCC
cnt = 0
print(f'lambda_dis\tG_DA\tG_ST\taggregated MCC MEAN±std\taggregated MCC mean±std', end='\t')
for r in range(reps_st, reps_en):
    print(f'seed{r}', end='\t')
print()
for t_factor in t_factor_list:
    for da_factor in da_factor_list:
        for da_thre in da_thre_list:
            for st_thre in st_thre_list:
                if st_thre > da_thre:
                    continue
                print(f'{da_factor}\t{da_thre}\t{st_thre}', end='\t')
                print(fr'{np.round(np.mean(aggregated_can_list[cnt]), 3)}±{np.round(np.std(aggregated_can_list[cnt]), 3)}', end='\t')
                print(f'{np.round(np.mean(can_list[cnt]), 3)}±{np.round(np.std(can_list[cnt]), 3)}', end='\t')
                for r in range(len(can_list[cnt])):
                    print(np.round(can_list[cnt][r], 3), end='\t')
                cnt += 1

                print()

lambda_dis	G_DA	G_ST	aggregated MCC MEAN±std	aggregated MCC mean±std	seed0	seed1	seed2	seed3	seed4	seed5	seed6	seed7	seed8	seed9	
0.0001	0.01	0.01	0.959±0.04	0.907±0.069	0.941	0.771	1.0	0.846	0.941	0.891	0.941	0.891	1.0	0.846	


In [13]:
### FP
cnt = 0
print(f'lambda_dis\tG_DA\tG_ST\taggregated FP mean±std\taggregated FP mean±std', end='\t')
for r in range(reps_st, reps_en):
    print(f'seed{r}', end='\t')
print()
for t_factor in t_factor_list:
    for da_factor in da_factor_list:
        for da_thre in da_thre_list:
            for st_thre in st_thre_list:
                if st_thre > da_thre:
                    continue
                print(f'{da_factor}\t{da_thre}\t{st_thre}', end='\t')
                print(f'{np.round(np.mean(aggregated_can_fp_list[cnt]), 3)}±{np.round(np.std(aggregated_can_fp_list[cnt]), 3)}', end='\t')
                print(f'{np.round(np.mean(can_fp_list[cnt]), 3)}±{np.round(np.std(can_fp_list[cnt]), 3)}', end='\t')
                for r in range(len(can_fp_list[cnt])):
                    print(np.round(can_fp_list[cnt][r], 3), end='\t')
                cnt += 1

                print()

lambda_dis	G_DA	G_ST	aggregated FP mean±std	aggregated FP mean±std	seed0	seed1	seed2	seed3	seed4	seed5	seed6	seed7	seed8	seed9	
0.0001	0.01	0.01	0.723±0.736	1.8±1.47	1	5	0	3	1	2	1	2	0	3	


In [14]:
### FN
cnt = 0
print(f'lambda_dis\tG_DA\tG_ST\taggregated FN mean±std\taggregated FN mean±std', end='\t')
for r in range(reps_st, reps_en):
    print(f'seed{r}', end='\t')
print()
for t_factor in t_factor_list:
    for da_factor in da_factor_list:
        for da_thre in da_thre_list:
            for st_thre in st_thre_list:
                if st_thre > da_thre:
                    continue
                print(f'{da_factor}\t{da_thre}\t{st_thre}', end='\t')
                print(f'{np.round(np.mean(aggregated_can_fn_list[cnt]), 3)}±{np.round(np.std(aggregated_can_fn_list[cnt]), 3)}', end='\t')
                print(f'{np.round(np.mean(can_fn_list[cnt]), 3)}±{np.round(np.std(can_fn_list[cnt]), 3)}', end='\t')
                for r in range(len(can_fn_list[cnt])):
                    print(np.round(can_fn_list[cnt][r], 3), end='\t')
                cnt += 1

                print()

lambda_dis	G_DA	G_ST	aggregated FN mean±std	aggregated FN mean±std	seed0	seed1	seed2	seed3	seed4	seed5	seed6	seed7	seed8	seed9	
0.0001	0.01	0.01	0.0±0.0	0.0±0.0	0	0	0	0	0	0	0	0	0	0	
