In [7]:
import numpy as np
import pandas as pd

In [8]:
def calc_uncertainty_curves(correct_mask, I, n_points=100):
    thresholds = np.linspace(0, 1, n_points, endpoint=True)
    Rcc, Riu, UA = np.zeros(len(thresholds)), np.zeros(len(thresholds)), np.zeros(len(thresholds))
    I[I < 0] = 0
    I[I > 1] = 1
    for idx, t in enumerate(thresholds):
        certain_mask = I <= t
        Ncc = np.sum(correct_mask & certain_mask)
        Nic = np.sum(~correct_mask & certain_mask)
        Niu = np.sum(~correct_mask & ~certain_mask)
        Ncu = np.sum(correct_mask & ~certain_mask)
        Rcc[idx] = Ncc/(Ncc+Nic) if (Ncc+Nic)>0 else -1
        Riu[idx] = Niu/(Niu+Nic) if (Niu+Nic)>0 else -1
        UA[idx] = (Ncc+Niu)/(Ncc+Niu+Ncu+Nic)
    return Rcc, Riu, UA

In [9]:
def create_df(ensemble_names, table_names, epsilons, attack_type, normalize_by_first_set):
    results = []

    for ensemble_name, table_name in zip(ensemble_names, table_names):
        data_dir = f'../adv_exp_new/{ensemble_name}_{attack_type}'
        labels = np.load(f'{data_dir}/labels.npy')
        predictions = np.load(f'{data_dir}/predictions.npy')
        uncertainties = np.load(f'{data_dir}/uncertainties.npy')

        if normalize_by_first_set:
            I_max, I_min = np.max(uncertainties[0]), np.min(uncertainties[0])
        else:
            I_max, I_min = np.max(uncertainties, axis=1), np.min(uncertainties, axis=1)
            I_max, I_min = np.expand_dims(I_max, -1), np.expand_dims(I_min, -1)

        I_norm = (uncertainties-I_min) / (I_max-I_min)
        correct_mask = predictions == labels

        for idx, epsilon in enumerate(epsilons):
            Rcc, Riu, UA = calc_uncertainty_curves(correct_mask[idx], I_norm[idx])
            accuracy = np.mean(correct_mask[idx])
            Rcc_AUC, Riu_AUC, UA_AUC = Rcc[Rcc>=0].mean(), Riu[Riu>=0].mean(), UA.mean()
            results.append({
                'Name': table_name,
                '$\epsilon$': epsilon,
                'Accuracy': accuracy,
                r'$R_{cc}$': Rcc_AUC,
                r'$R_{iu}$': Riu_AUC,
                '$UA$': UA_AUC
            })

    return pd.DataFrame(results)
        

In [15]:
#China 2018
epsilons = np.array([0, 1, 2, 4, 6, 7])/40 # from yaml files
ensemble_names = ['cor', 'dec', 'fcor', 'fdec', 'dverge', 'cor_adv', 'dec_adv']
table_names = ['baseline', 'dec', 'part', 'dec+part',
                  'dverge', 'adv', 'dec+adv']
normalize_by_first_set = True

pgd_df = create_df(ensemble_names, table_names, epsilons, 'china_pgd', normalize_by_first_set)
sap_df = create_df(ensemble_names, table_names, epsilons, 'china_sap', normalize_by_first_set)

epsilons = np.array([0, 1, 2, 6, 7])/40 # from yaml files

In [16]:
epsilons


array([0.   , 0.025, 0.05 , 0.15 , 0.175])

In [12]:
#Physionet 2017
epsilons = np.array([0, 10, 50, 75, 100, 150]) # from yaml files
ensemble_names = ['cor_test_run', 'dec_test_run', 'fcor_test', 'fdec_test',
                  'adversarial_training_test', 'dverge', 'dec_adv']
table_names = ['baseline', 'dec', 'part', 'dec+part',
                  'adv', 'dverge', 'dec+adv']
normalize_by_first_set = True

pgd_df = create_df(ensemble_names, table_names, epsilons, 'pgd', normalize_by_first_set)
sap_df = create_df(ensemble_names, table_names, epsilons, 'sap', normalize_by_first_set)

epsilons = np.array([0, 10, 50, 75, 100]) # from yaml files

In [17]:
file_name = 'results_table_china.txt'

num_columns = 2*len(epsilons)

with open(file_name, 'w') as f:
    # Generate the table header with the specified column span for headers
    header = "\\begin{tabular}{ c | c | c c c c | c c c c  }\n"
    header += "\\hline\n"  # Add a horizontal line after the header
    header += " &  & \\multicolumn{4}{c}{Attack Strength $\epsilon$ (PGD)} & \\multicolumn{4}{c}{Attack Strength $\epsilon$ (SAP)} \\\\ \n"
    #header += " & 0 & 10 & 50 & 75 & 100 & 10 & 50 & 75 & 100 \\\\ \n"  # Add another horizontal line after the headers
    header += " & 0 & .025 & .05 & .15 & .175 & .025 & .05 & .15 & .175 \\\\ \n"  # Add another horizontal line after the headers
    f.write(header)
    
    for metric in ['Accuracy', r'$R_{cc}$', r'$R_{iu}$', '$UA$']:
        line = "\\hline\n"
        line += f"{metric} (\%)" if metric=='Accuracy' else  f"{metric} (\% AUC)"
        line += ' \\\\ \n'
        line += "\\hline\n"
        f.write(line)
        
        for name in table_names:
            line = f"{name} "
            for epsilon in epsilons:
                best =  np.max(pgd_df.loc[(pgd_df['$\epsilon$'] == epsilon), metric].values)
                value = pgd_df.loc[(pgd_df['Name'] == name) & (pgd_df['$\epsilon$'] == epsilon), metric].values[0]
                if value == best:
                    line += "& \\textbf{"
                    line += f"{100*value:.2f}"
                    line += "} "
                else:
                    line += f"& {100*value:.2f} "
            for epsilon in epsilons:
                if epsilon != 0:
                    best =  np.max(sap_df.loc[(sap_df['$\epsilon$'] == epsilon), metric].values)
                    value = sap_df.loc[(sap_df['Name'] == name) & (sap_df['$\epsilon$'] == epsilon), metric].values[0]
                    if value == best:
                        line += "& \\textbf{"
                        line += f"{100*value:.2f}"
                        line += "} "
                    else:
                        line += f"& {100*value:.2f} "
            line += '\\\\ \n'
            f.write(line)
   # f.write("data & data & data & data & data & data & data & data & data & data & data & data \\\\ \n")
    # Add more rows of data as needed

    f.write("\\hline\n")  # Add a final horizontal line at the bottom
    f.write("\\end{tabular} \n")