Licensed under the Apache License, Version 2.0.

In [0]:
import numpy as np
import os
import tensorflow as tf
gfile = tf.compat.v1.gfile
import itertools
import pickle
from collections import defaultdict

In [0]:
rootdir = '/tmp/wheel_bandit'

In [0]:
def get_trial_results(savefile):
  h_rewards = None
  if gfile.Exists(savefile):
    with gfile.Open(savefile, 'rb') as infile:
      saved_state = pickle.load(infile)
      h_rewards = saved_state['h_rewards'][:, 0]
  return h_rewards

In [0]:
algos = [['uniform'], ['neurolinear']]
deltas = [0.5, 0.7, 0.9, 0.95, 0.99]
num_trials = 50

model_types = ['cnp', 'np', 'anp', 'acnp', 'acns']
weights = ['offline']
for mt, wt in itertools.product(model_types, weights):
  algos.append(['gnp_' + mt + '_' + wt])

In [0]:
for delta in deltas:
  results_dict = defaultdict(list)
  print('delta', delta)
  for trial_idx in range(num_trials):
    instance = str(delta) + '_' + str(trial_idx)
    dataset = os.path.join(rootdir, 'data', instance + '.npz')
    with gfile.GFile(dataset, 'r') as f:
      sampled_vals = np.load(f)
      opt_rewards = sampled_vals['opt_rewards']
    print('trial_idx', trial_idx)
    for algo in algos:
      print('algo', algo)
      all_algo_names = '_'.join(algo)
      filename = instance + '_' + all_algo_names + '.pkl'
      if all_algo_names[:3] == 'gnp':
        filename = 'gnp/' + instance + '_' + all_algo_names + '.pkl'
      savefile = os.path.join(rootdir, 'results', filename)
      h_rewards = get_trial_results(savefile)
      if h_rewards is not None:
        per_time_step_regret = np.array(opt_rewards - h_rewards)
        if np.any(per_time_step_regret < 0):
          import pdb
          pdb.set_trace()
        results_dict[algo[0]].append(per_time_step_regret)
    print()
  aggfile = os.path.join(rootdir, 'results', str(delta) + '_all_results.pkl')
  with gfile.Open(aggfile, 'wb') as outfile:
      pickle.dump(results_dict, outfile)

delta 0.5
trial_idx 0
algo ['uniform']
algo ['neurolinear']
algo ['gnp_cnp_offline']
algo ['gnp_np_offline']
algo ['gnp_anp_offline']
algo ['gnp_acnp_offline']
algo ['gnp_acns_offline']

trial_idx 1
algo ['uniform']
algo ['neurolinear']
algo ['gnp_cnp_offline']
algo ['gnp_np_offline']
algo ['gnp_anp_offline']
algo ['gnp_acnp_offline']
algo ['gnp_acns_offline']

trial_idx 2
algo ['uniform']
algo ['neurolinear']
algo ['gnp_cnp_offline']
algo ['gnp_np_offline']
algo ['gnp_anp_offline']
algo ['gnp_acnp_offline']
algo ['gnp_acns_offline']

trial_idx 3
algo ['uniform']
algo ['neurolinear']
algo ['gnp_cnp_offline']
algo ['gnp_np_offline']
algo ['gnp_anp_offline']
algo ['gnp_acnp_offline']
algo ['gnp_acns_offline']

trial_idx 4
algo ['uniform']
algo ['neurolinear']
algo ['gnp_cnp_offline']
algo ['gnp_np_offline']
algo ['gnp_anp_offline']
algo ['gnp_acnp_offline']
algo ['gnp_acns_offline']

trial_idx 5
algo ['uniform']
algo ['neurolinear']
algo ['gnp_cnp_offline']
algo ['gnp_np_offline']
algo [