In [1]:
import argparse
import pathlib
import random
from collections import deque
from typing import List, Dict, Tuple, Union
from data import Dataset
from data_utils import *
from models import BetaBernoulli
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm
from sampling import *
%matplotlib inline  

import matplotlib;matplotlib.rcParams['font.size'] = 10
import matplotlib;matplotlib.rcParams['font.family'] = 'serif'
LINEWIDTH = 13.97
LOG_FREQ = 10
output_dir = pathlib.Path("../output/difference_random_2_groups")

In [2]:
DATASET_LIST = ['cifar100', 'dbpedia', '20newsgroup', 'svhn'] #'imagenet'
method_list = ['random_arm_symmetric', 'random_data_symmetric', 'random_arm_informed', \
               'random_data_informed', 'ts_uniform', 'ts_informed']
method_format = {#'random_arm_symmetric': ('Random Arm Symmetric', 'g', '.', '--'), 
                 'random_data_symmetric': ('Random Uninformative', 'b', '^', '--'), 
                 #'random_arm_informed': ('Random Arm Informed', 'g', '.', '-'), 
                 'random_data_informed': ('Random Informative', 'b', '^', '-'), 
                 #'ts_uniform': ('TS Symmetric', 'k', '*', '-'), 
                 'ts_informed': ('TS Informative', 'r', '+', '-'),
                }

metric = 'difference' # 'ece', 'confusion_matrix'
group_method = 'predicted_class'
pseudocount = 1
RUNS = 100
rope_width = 0.03

In [3]:
configs = {}
samples = {}
mpe_log = {}
rope_eval = {}

for dataset_name in tqdm(DATASET_LIST): # takes 4 minutes to load results of imagenet
    experiment_name = '%s_groupby_%s_pseudocount%.2f' % (dataset_name, group_method, pseudocount)
    samples[dataset_name], mpe_log[dataset_name], rope_eval[dataset_name] = {}, {}, {}
    
    configs[dataset_name] = np.load(open(output_dir / experiment_name / 'configs.npy', 'rb'))
    for method in method_list:
#         samples[dataset_name][method] = np.load(open(output_dir / experiment_name / \
#                                                      ('samples_%s.npy' % method), 'rb'))
#         mpe_log[dataset_name][method] = np.load(open(output_dir / experiment_name / \
#                                                      ('mpe_log_%s.npy' % method), 'rb'))
        rope_eval[dataset_name][method] = np.load(open(output_dir / experiment_name / \
                                                       ('rope_eval_%s.npy' % method), 'rb'))

100%|██████████| 4/4 [00:00<00:00, 127.79it/s]


In [4]:
def rope(alpha0, alpha1, beta0, beta1):
    num_samples = 10000
    theta_0 = np.random.beta(alpha0, beta0, size=(num_samples))
    theta_1 = np.random.beta(alpha1, beta1, size=(num_samples))
    delta = theta_0 - theta_1
    return [(delta < -rope_width).mean(), (np.abs(delta) <= rope_width).mean(), (delta > rope_width).mean()]

counts = dict()
budgets = dict()
rope_ground_truth_dict = dict()

for i, dataset_name in enumerate(DATASET_LIST):
    counts[dataset_name] = {}
    budgets[dataset_name] = np.zeros((RUNS,))
    rope_ground_truth_dict[dataset_name] = np.zeros((RUNS,))
    for method_name in method_format:  
        counts[dataset_name][method_name] = []

    dataset = Dataset.load_from_text(dataset_name)
    dataset.group(group_method = group_method)
    dataset_len = dataset.__len__()
    dataset_accuracy_k = dataset.accuracy_k
    dataset_weight_k = dataset.weight_k
    del dataset
   
    if dataset_name == 'imagenet':
        RUNS = 10
    for r in tqdm(range(RUNS)):
        group0, group1, budget, delta = configs[dataset_name][r]
        group0, group1, budget = int(group0), int(group1), int(budget)
        budgets[dataset_name][r] = budget
        rope_ground_truth = rope(dataset_len * dataset_weight_k[group0] * (dataset_accuracy_k[group0]+ 1e-6), 
             dataset_len * dataset_weight_k[group1] * (dataset_accuracy_k[group1]+ 1e-6), 
             dataset_len * dataset_weight_k[group0] * (1-dataset_accuracy_k[group0] + 1e-6),
             dataset_len * dataset_weight_k[group1] * (1-dataset_accuracy_k[group1] + 1e-6))

        if delta < -rope_width:
            rope_region = 0
        elif delta > rope_width:
            rope_region = 2
        else:
            rope_region = 1
            
        rope_ground_truth_dict[dataset_name][r] = rope_ground_truth[rope_region]
        
        for method_name in method_format:  
            rope_ = rope_eval[dataset_name][method_name][r,:budget//LOG_FREQ, rope_region]
            error_rate = np.abs(rope_ - rope_ground_truth[rope_region]) / rope_ground_truth[rope_region]
            error = (error_rate < 0.01)
            counts[dataset_name][method_name].append(np.argmax(error)*LOG_FREQ+LOG_FREQ)
            #counts[dataset_name][method_name].append(np.abs(rope_[15] - rope_ground_truth[rope_region]))

100%|██████████| 100/100 [00:00<00:00, 512.66it/s]
100%|██████████| 100/100 [00:00<00:00, 514.07it/s]
100%|██████████| 100/100 [00:00<00:00, 513.59it/s]
100%|██████████| 100/100 [00:00<00:00, 516.52it/s]


In [5]:
val = np.zeros((len(DATASET_LIST), len(method_format)))
for i, dataset_name in enumerate(DATASET_LIST):
    tmp = []
    for method_name in method_format:
        tmp.append(np.mean(counts[dataset_name][method_name]))
    val[i] = np.array(tmp)
df = pd.DataFrame(val.T, 
                  index=[method_format[i][0] for i in method_format], 
                  columns=[DATASET_NAMES[dataset_name] for dataset_name in DATASET_LIST])

In [6]:
df

Unnamed: 0,CIFAR-100,DBpedia,20 Newsgroups,SVHN
Random Uninformative,92.7,236.6,308.8,575.4
Random Informative,80.5,70.7,290.3,446.3
TS Informative,82.0,62.7,269.6,372.4


In [7]:
print(df.to_latex())

\begin{tabular}{lrrrr}
\toprule
{} &  CIFAR-100 &  DBpedia &  20 Newsgroups &   SVHN \\
\midrule
Random Uninformative &       92.7 &    236.6 &          308.8 &  575.4 \\
Random Informative   &       80.5 &     70.7 &          290.3 &  446.3 \\
TS Informative       &       82.0 &     62.7 &          269.6 &  372.4 \\
\bottomrule
\end{tabular}



In [8]:
budget_list = []
rope_list = []
for i, dataset_name in enumerate(DATASET_LIST):
    budget_list.append(budgets[dataset_name].mean())
    rope_list.append(rope_ground_truth_dict[dataset_name].mean())
print(budget_list,rope_list)

[200.94, 9997.0, 738.41, 4941.1] [0.8269920000000001, 0.977835, 0.8943410000000002, 0.993753]
