In [11]:
import numpy as np
import pandas as pd

In [4]:
# load calibration error
DATAPATH = '../output/accuracy_estimation_error'
PSEUDOCOUNT = [0.1, 1, 10]
NUM_RUNS = 10
WEIGHTED = ['weighted', 'unweighted']
DATASET_LIST = ['imagenet', 'dbpedia', 'cifar100', '20newsgroup', 'svhn', 'imagenet2_topimages'] 

def get_bayesian_filename(dataset, weighted, pseudo_n):
    return "%s/%s_error_%s_PseudoCount%.1f_runs%d_bayesian.csv" % (DATAPATH, weighted, dataset, pseudo_n, NUM_RUNS)
def get_frequentist_filename(dataset, weighted, pseudo_n):
    return "%s/%s_error_%s_PseudoCount%.1f_runs%d_frequentist.csv" % (DATAPATH, weighted, dataset, pseudo_n, NUM_RUNS)

In [8]:
N_list = [10, 20, 50, 100, 200, 500, 1000, 2000, 5000]
header = ['dataset', 'weighted', 'pseudo_n'] + N_list
bayesian_calibration_error = []
frequentist_calibration_error = []

In [9]:
for dataset in DATASET_LIST:
    for weighted in WEIGHTED:
        for pseudo_n in PSEUDOCOUNT:
            params = [dataset, weighted, pseudo_n]
            bayesian_result = np.mean(np.genfromtxt(get_bayesian_filename(dataset, weighted, pseudo_n)
                                                    , delimiter=","), axis=0).tolist()
            frequentist_result = np.mean(np.genfromtxt(get_frequentist_filename(dataset, weighted, pseudo_n)
                                                       , delimiter=","), axis=0).tolist()
            bayesian_calibration_error.append(params + bayesian_result)
            frequentist_calibration_error.append(params + frequentist_result)

In [14]:
bayesian_calibration_error =  pd.DataFrame(bayesian_calibration_error, columns=header)
frequentist_calibration_error =  pd.DataFrame(frequentist_calibration_error, columns=header)

In [17]:
bayesian_calibration_error.to_csv('../output/bayesian_calibration_error.csv')
bayesian_calibration_error

Unnamed: 0,dataset,weighted,pseudo_n,10,20,50,100,200,500,1000,2000,5000
0,imagenet,weighted,0.1,0.139118,0.131491,0.106357,0.083706,0.060351,0.030852,0.023789,0.018729,0.010757
1,imagenet,weighted,1.0,0.103762,0.098418,0.081729,0.069304,0.054738,0.029955,0.02337,0.018532,0.010704
2,imagenet,weighted,10.0,0.047634,0.050489,0.041443,0.036057,0.033748,0.024849,0.020439,0.017136,0.010335
3,imagenet,unweighted,0.1,0.156008,0.182569,0.185636,0.156502,0.122517,0.066984,0.053811,0.045961,0.028883
4,imagenet,unweighted,1.0,0.105449,0.120379,0.130047,0.121075,0.105506,0.0634,0.050318,0.042912,0.027502
5,imagenet,unweighted,10.0,0.052107,0.053972,0.052335,0.052306,0.055824,0.047398,0.039122,0.034281,0.022891
6,dbpedia,weighted,0.1,0.00415,0.004742,0.004968,0.005505,0.004947,0.004567,0.00448,0.003268,0.00185
7,dbpedia,weighted,1.0,0.001834,0.002395,0.003848,0.004792,0.004319,0.003791,0.003574,0.00286,0.001749
8,dbpedia,weighted,10.0,0.022098,0.013821,0.005542,0.002694,0.002801,0.002899,0.002271,0.002111,0.001475
9,dbpedia,unweighted,0.1,0.113702,0.120188,0.121632,0.128752,0.149494,0.171802,0.191713,0.159252,0.118071


In [18]:
frequentist_calibration_error.to_csv('../output/frequentist_calibration_error.csv')
frequentist_calibration_error

Unnamed: 0,dataset,weighted,pseudo_n,10,20,50,100,200,500,1000,2000,5000
0,imagenet,weighted,0.1,0.240259,0.191722,0.121216,0.087239,0.061372,0.031015,0.023854,0.018761,0.010763
1,imagenet,weighted,1.0,0.240259,0.191722,0.121216,0.087239,0.061372,0.031015,0.023854,0.018761,0.010763
2,imagenet,weighted,10.0,0.240259,0.191722,0.121216,0.087239,0.061372,0.031015,0.023854,0.018761,0.010763
3,imagenet,unweighted,0.1,0.343398,0.304612,0.227645,0.173792,0.130159,0.070978,0.05538,0.046949,0.029062
4,imagenet,unweighted,1.0,0.343398,0.304612,0.227645,0.173792,0.130159,0.070978,0.05538,0.046949,0.029062
5,imagenet,unweighted,10.0,0.343398,0.304612,0.227645,0.173792,0.130159,0.070978,0.05538,0.046949,0.029062
6,dbpedia,weighted,0.1,0.011236,0.011236,0.010929,0.010804,0.00915,0.0063,0.00489,0.003355,0.001827
7,dbpedia,weighted,1.0,0.011236,0.011236,0.010929,0.010804,0.00915,0.0063,0.00489,0.003355,0.001827
8,dbpedia,weighted,10.0,0.011236,0.011236,0.010929,0.010804,0.00915,0.0063,0.00489,0.003355,0.001827
9,dbpedia,unweighted,0.1,0.267618,0.267618,0.261885,0.253121,0.237529,0.191641,0.155715,0.105351,0.069359


In [19]:
for dataset in DATASET_LIST:
    for weighted in WEIGHTED:
        for pseudo_n in PSEUDOCOUNT:
            params = [dataset, weighted, pseudo_n]
            print(params)
            bayesian_result = np.mean(np.genfromtxt(get_bayesian_filename(dataset, weighted, pseudo_n)
                                                    , delimiter=","), axis=0)
            frequentist_result = np.mean(np.genfromtxt(get_frequentist_filename(dataset, weighted, pseudo_n)
                                                       , delimiter=","), axis=0)
            print((bayesian_result-frequentist_result)/frequentist_result)

['imagenet', 'weighted', 0.1]
[-0.42096383 -0.31415543 -0.12258601 -0.04050171 -0.01663869 -0.00524511
 -0.00271989 -0.00166428 -0.00058254]
['imagenet', 'weighted', 1]
[-0.56812355 -0.48666211 -0.3257589  -0.2055897  -0.10809908 -0.0341706
 -0.02032289 -0.01216135 -0.00551734]
['imagenet', 'weighted', 10]
[-0.80174032 -0.73665367 -0.6581028  -0.58668926 -0.4500994  -0.19880622
 -0.14316127 -0.08661919 -0.03977153]
['imagenet', 'unweighted', 0.1]
[-0.54569453 -0.40065268 -0.18453694 -0.09948879 -0.05870879 -0.05626881
 -0.02832648 -0.02104713 -0.00613985]
['imagenet', 'unweighted', 1]
[-0.69292411 -0.60481239 -0.42872936 -0.30333394 -0.18940475 -0.10675827
 -0.09139416 -0.08597243 -0.0536612 ]
['imagenet', 'unweighted', 10]
[-0.84826103 -0.8228178  -0.77010243 -0.69903258 -0.57110824 -0.33220503
 -0.29356261 -0.26981113 -0.21233276]
['dbpedia', 'weighted', 0.1]
[-0.63064873 -0.57793481 -0.54544048 -0.49042264 -0.45934762 -0.2751164
 -0.08383883 -0.02596187  0.01252444]
['dbpedia', 'wei