Code for generating results for the graph with two instrumental variables (Figure 2(b) and Figure 3(b)).

In [None]:
import numpy
import sympy
import pandas
import numpy as np
import pandas as pd
import datetime
import copy
import attr
import time
import logging
import itertools
import pickle
import sys
import os
import functools

import matplotlib.pyplot as plt
from matplotlib import rcParams
import collections

from scipy.optimize import minimize
import warnings
from IPython.display import clear_output

HOME_DIR = ""
warnings.simplefilter(action='ignore', category=UserWarning)

In [None]:
class ModelParams:
  def __init__(self, b, c, d, a1, a2, z1, z2, w, x, y):
    self.b = b
    self.c = c
    self.d = d
    self.a1 = a1
    self.a2 = a2
    self.z1 = z1
    self.z2 = z2
    self.w = w
    self.x = x
    self.y = y
  
  def get_true_param(self):
    return self.b
  
  def __str__(self):
    return "b=%f, c=%f, d=%f, a1=%f, a2=%f, z1=%f, z2=%f, w=%f, x=%f, y=%f" % (self.b, self.c, self.d, self.a1, self.a2, self.z1, self.z2, self.w, self.x, self.y)

truth = ModelParams(b=1, c=1, d=1, a1=1, a2=1, z1=1, z2=4, w=1, x=1, y=1)

In [None]:
def generate_data_samples(num_samples, model, k=0.5):
  np = numpy
  pd = pandas
  
  u_z1 = np.random.normal(scale=np.sqrt(model.z1), size=(num_samples,))
  u_z2 = np.random.normal(scale=np.sqrt(model.z2), size=(num_samples,))
  u_w = np.random.normal(scale=np.sqrt(model.w), size=(num_samples,))
  u_x = np.random.normal(scale=np.sqrt(model.x), size=(num_samples,))
  u_y = np.random.normal(scale=np.sqrt(model.y), size=(num_samples,))

  def get_Z1(u):
    return u
  
  def get_Z2(u):
    return u
  
  def get_W(u):
    return u
  
  def get_X(Z1, Z2, W, u):
    return model.a1 * Z1 + model.a2 * Z2 + model.c * W + u
  
  def get_Y(X, W, u):
    return model.b * X + model.d * W + u
  
  Z1 = get_Z1(u_z1)
  Z2 = get_Z2(u_z2)
  W = get_W(u_w)
  X = get_X(Z1, Z2, W, u_x)
  Y = get_Y(X, W, u_y)
  SEL = np.zeros_like(Y, np.int32)
  SEL[:int(SEL.shape[0] * k)] = 1

  df = pd.DataFrame({
      "Z1": Z1,
      "Z2": Z2,
      "X": X,
      "Y": Y,
      "SEL": SEL
  })
  return df

In [None]:
num_samples = 1000
df = generate_data_samples(num_samples, truth)
df.head()

Unnamed: 0,Z1,Z2,X,Y,SEL
0,-2.071566,-1.133387,-3.96213,-3.581768,1
1,-0.419037,3.996725,3.317942,5.427126,1
2,-0.099292,-0.049934,-1.036306,-1.210658,1
3,-1.907402,-2.215544,-5.649825,-5.745939,1
4,-1.250415,2.165945,-2.899494,-3.417869,1


In [None]:
class GMM:

  def __init__(self, df):
    np = numpy
    self.df_dict = {k: df[k].values for k in df.columns}
    self.df = self.df_dict
    self.num_samples = len(df)
    self.num_moments = 2
  
  def momconds(self, params):
    """Returns the emprical moment vector (equivalent of \hat{g}_n(\theta))."""
    np = numpy
    n = self.num_samples
    return np.sum(self._momconds_arr(params), axis=-1) / n
  
  def _compute_moment_covariance(self, params):
    """Estimate the optimal weight matrix using the emprical moments."""
    n = self.num_samples
    moms = self._momconds_arr(params)
    moment_covariance = (moms @ moms.T) / n
    return moment_covariance
  
  def _get_objective_fn(self, weight_matrix_inv):
    np = numpy
    
    # print(weight_matrix_inv)
    def objective(params):
      moms = self.momconds(params)
      w_inv_mom = np.linalg.solve(weight_matrix_inv, moms)
      obj = moms.T @ w_inv_mom
      return obj
    
    return objective
  
  def _momconds_arr(self, params):
    np = numpy

    p = params
    df = self.df
    n = self.num_samples
    moment_1_arr = df["SEL"] * df["Z1"] * (df["Y"] - df["X"] * p[0])
    moment_2_arr = (1 - df["SEL"]) * df["Z2"] * (df["Y"] - df["X"] * p[0])

    return np.array([moment_1_arr, moment_2_arr])

  def _get_asymptotic_variance_matrix(self, k, moment_covariance, params):
    np = numpy

    current_k = np.mean(self.df["SEL"])

    df = self.df
    n = self.num_samples
    jacobian_1_1 = (k / current_k) * np.sum(-df["SEL"] * df["Z1"] * df["X"]) / n
    jacobian_2_1 = ((1 - k) / (1 - current_k)) * np.sum(-(1 - df["SEL"]) * df["Z2"] * df["X"]) / n

    jacobian = np.array([[jacobian_1_1], [jacobian_2_1]])
    moment_covariance_reweight = np.array([[k/current_k, 0],
                                       [0, (1-k)/(1-current_k)]])
    
    moment_covariance = moment_covariance * moment_covariance_reweight
    mom_cov_inv_jac = np.linalg.solve(moment_covariance, jacobian)
    return np.linalg.inv(jacobian.T @ mom_cov_inv_jac)
  
  def _get_asymptotic_variance(self, k, moment_covariance, params):
    return self._get_asymptotic_variance_matrix(k, moment_covariance, params)[0][0]
  
  def _optimize_find_parameters(self, weight_matrix_inv, initial_guess=None):
    np = numpy

    initial_guess = np.array([truth.b]) * 1.5 if initial_guess is None else initial_guess
    res = minimize(self._get_objective_fn(weight_matrix_inv), initial_guess)
    return res.x
  
  def find_parameters(self, num_iters=2, weight_matrix_reg=None):
    np = numpy

    weight_matrix_inv = np.eye(self.num_moments)
    for i in range(num_iters):
      params = self._optimize_find_parameters(weight_matrix_inv, initial_guess=params if i > 0 else None)
      weight_matrix_inv = self._compute_moment_covariance(params)

      if weight_matrix_reg is not None:
        weight_matrix_inv += weight_matrix_reg * np.eye(weight_matrix_inv.shape[0])
    
    return params, weight_matrix_inv
  
  def find_optimal_k(self, moment_covariance, params):
    np = numpy
    
    initial_guess = np.array([0.5])
    lower_bound = 0.2
    upper_bound = 0.8
    res = minimize(lambda x: self._get_asymptotic_variance(x[0], moment_covariance, params),
                   initial_guess, bounds=[(lower_bound, upper_bound)])
    
    return 0 if res.x[0] <= 0.5 else 1

In [None]:
gmm = GMM(df)
params, moment_covariance = gmm.find_parameters(num_iters=2)

print("True params:")
print(truth)
print("Estimated params:")
print(params)

optimal_k = gmm.find_optimal_k(moment_covariance, params)
print("Estimated optimal selection ratio k: %f" % optimal_k)

del gmm, params, moment_covariance, optimal_k

True params:
b=1.000000, c=1.000000, d=1.000000, a1=1.000000, a2=1.000000, z1=1.000000, z2=4.000000, w=1.000000, x=1.000000, y=1.000000
Estimated params:
[0.96418235]
Estimated optimal selection ratio k: 0.000000


In [None]:
class SampleRevealer:

  def __init__(self, budget, df):
    self.initial_budget = budget
    self.budget = budget
    self.buffer_size = len(df)
    self.df = df
    self.counter = 0
    self.cost_per_reveal = 1

  def reset(self):
    self.counter = 0
    self.budget = self.initial_budget
  
  def is_budget_left(self, samples_to_reveal):
    return self.budget >= (self.cost_per_reveal * samples_to_reveal)
  
  def reveal(self, reveal_k, samples_to_reveal):
    if self.counter + samples_to_reveal >= self.buffer_size:
      raise ValueError("no buffer")
    
    observe_count = int(reveal_k * samples_to_reveal)
    self.df["SEL"].values[self.counter:self.counter+observe_count] = 1
    self.df["SEL"].values[self.counter+observe_count:self.counter+samples_to_reveal] = 0

    self.counter += samples_to_reveal
    self.budget -= (self.cost_per_reveal * samples_to_reveal)
  
  def get_dataset(self):
    return self.df[:self.counter]

In [None]:
def batch_fractions_to_sizes(horizon, batch_fractions):
  batch_sizes = []
    
  for i in range(len(batch_fractions) + 1):
    prev_batch_end = 0 if i == 0 else int(horizon * batch_fractions[i - 1])
    curr_batch_end = horizon if i == len(batch_fractions) else int(horizon * batch_fractions[i])

    batch_sizes.append(curr_batch_end - prev_batch_end)
  
  return batch_sizes

In [None]:
def compute_reveal_k(current_samples, samples_to_reveal, current_k, target_k):
  reveal_k = (target_k * (current_samples + samples_to_reveal) - current_k * current_samples) / (samples_to_reveal)
  return min(1, (max(0, reveal_k)))

In [None]:
class StrategyRunResult:

  def __init__(self):
    self.budgets_used = []
    self.squared_errors = []
    self.optimal_ks = []
    self.current_ks = []
  
  def append(self, budget_used, squared_error, optimal_k, current_k):
    self.budgets_used.append(budget_used)
    self.squared_errors.append(squared_error)
    self.optimal_ks.append(optimal_k)
    self.current_ks.append(current_k)

In [None]:
class OracleStrategy:

  def __init__(self, sample_revealer, optimal_k, horizon, batch_fractions):
    self.sample_revealer = sample_revealer
    self.name = 'oracle'
    self.batch_sizes = batch_fractions_to_sizes(horizon, batch_fractions)
    self.optimal_k = optimal_k
  
  def get_current_df_vals(self):
    np = numpy

    dataset = self.sample_revealer.get_dataset()

    len_left_corner = np.sum(dataset["SEL"])
    len_right_corner = len(dataset) - len_left_corner
    total = (len_left_corner + len_right_corner)
    return len_left_corner / total, total

  def can_step(self):
    return self.sample_revealer.is_budget_left()
  
  def get_and_store_params(self):
    dataset = self.sample_revealer.get_dataset()
    self.gmm = GMM(dataset)
    self.params, self.moment_covariance = self.gmm.find_parameters(num_iters=2)
    return self.params

  def get_squared_error(self):
    error = (truth.get_true_param() - self.params[0])**2
    return error
  
  def execute_run(self):
    np = numpy

    result = StrategyRunResult()
    for i, batch_size in enumerate(self.batch_sizes):
      if i == 0:
        current_k, current_samples = 0, 0
      else:
        current_k, current_samples = self.get_current_df_vals()
      reveal_k = compute_reveal_k(current_samples, batch_size, current_k, self.optimal_k)

      self.sample_revealer.reveal(reveal_k=reveal_k, samples_to_reveal=batch_size)
      _ = self.get_and_store_params()

      current_k, _ = self.get_current_df_vals()

      result.append(
          self.sample_revealer.initial_budget - self.sample_revealer.budget,
          self.get_squared_error(), self.optimal_k, current_k,
      )
    
    return result

In [None]:
class ETCStrategy:

  def __init__(self, sample_revealer, horizon, batch_fractions):
    self.sample_revealer = sample_revealer
    self.name = 'etc'
    # We assume that the first batch size is exploration.
    self.batch_sizes = batch_fractions_to_sizes(horizon, batch_fractions)
  
  def get_current_df_vals(self):
    np = numpy

    dataset = self.sample_revealer.get_dataset()

    len_left_corner = np.sum(dataset["SEL"])
    len_right_corner = len(dataset) - len_left_corner
    total = (len_left_corner + len_right_corner)
    return len_left_corner / total, total

  def can_step(self):
    return self.sample_revealer.is_budget_left()
  
  def get_and_store_params(self):
    dataset = self.sample_revealer.get_dataset()
    self.gmm = GMM(dataset)
    self.params, self.moment_covariance = self.gmm.find_parameters(num_iters=2)
    return self.params

  def get_squared_error(self):
    error = (truth.get_true_param() - self.params[0])**2
    return error
  
  def execute_run(self):
    np = numpy

    result = StrategyRunResult()
    for i, batch_size in enumerate(self.batch_sizes):
      if i == 0:
        reveal_k = 0.5
      else:
        current_k, current_samples = self.get_current_df_vals()
        reveal_k = compute_reveal_k(current_samples, batch_size, current_k, self.optimal_k)

      self.sample_revealer.reveal(reveal_k=reveal_k, samples_to_reveal=batch_size)
      params = self.get_and_store_params()

      if i == 0:
        self.optimal_k = self.gmm.find_optimal_k(self.moment_covariance, params)

      current_k, _ = self.get_current_df_vals()

      result.append(
          self.sample_revealer.initial_budget - self.sample_revealer.budget,
          self.get_squared_error(), self.optimal_k, current_k,
      )
    
    return result

In [None]:
class ETGreedyStrategy:

  def __init__(self, sample_revealer, horizon, batch_fractions):
    self.sample_revealer = sample_revealer
    self.name = 'et_greedy'
    # We assume that the first batch size is exploration.
    self.batch_sizes = batch_fractions_to_sizes(horizon, batch_fractions)
    self.weight_matrix_reg = 0.01
  
  def get_current_df_vals(self):
    np = numpy

    dataset = self.sample_revealer.get_dataset()

    len_left_corner = np.sum(dataset["SEL"])
    len_right_corner = len(dataset) - len_left_corner
    total = (len_left_corner + len_right_corner)
    return len_left_corner / total, total

  def can_step(self):
    return self.sample_revealer.is_budget_left()
  
  def get_and_store_params(self, is_last_step):
    dataset = self.sample_revealer.get_dataset()
    self.gmm = GMM(dataset)
    if is_last_step:
      self.params, self.moment_covariance = self.gmm.find_parameters(num_iters=2)
    else:
      self.params, self.moment_covariance = (
          self.gmm.find_parameters(num_iters=2, weight_matrix_reg=self.weight_matrix_reg)
      )
    return self.params

  def get_squared_error(self):
    error = (truth.get_true_param() - self.params[0])**2
    return error
  
  def execute_run(self):
    np = numpy

    result = StrategyRunResult()
    for i, batch_size in enumerate(self.batch_sizes):
      if i == 0:
        reveal_k = 0.5
      else:
        current_k, current_samples = self.get_current_df_vals()
        reveal_k = compute_reveal_k(current_samples, batch_size, current_k, self.optimal_k)

      self.sample_revealer.reveal(reveal_k=reveal_k, samples_to_reveal=batch_size)
      is_last_step = (i==len(self.batch_sizes)-1)
      params = self.get_and_store_params(is_last_step)

      self.optimal_k = self.gmm.find_optimal_k(self.moment_covariance, params)

      current_k, _ = self.get_current_df_vals()

      result.append(
          self.sample_revealer.initial_budget - self.sample_revealer.budget,
          self.get_squared_error(), self.optimal_k, current_k,
      )
    
    return result

In [None]:
def execute_strategy_iteration(strategy_name, iteration_num, horizon):
  np = numpy

  # Uncomment the next two lines to replicate the exact runs used in the paper.
  # random_seed = 232281293 + iteration_num
  # np.random.seed(random_seed)
  df = generate_data_samples(num_samples=horizon * 5, model=truth)
  np.random.seed(None)

  sample_revealer = SampleRevealer(budget=horizon, df=df)

  if strategy_name == "etc_0.2":
    strategy = ETCStrategy(sample_revealer,
                           horizon=horizon,
                           batch_fractions=[0.2, 0.9])
  elif strategy_name == "etc_0.1":
    strategy = ETCStrategy(sample_revealer,
                           horizon=horizon,
                           batch_fractions=[0.1, 0.9])
  elif strategy_name == "etc_0.4":
    strategy = ETCStrategy(sample_revealer,
                           horizon=horizon,
                           batch_fractions=[0.4, 0.9])
  elif strategy_name == "oracle":
    strategy = OracleStrategy(sample_revealer, optimal_k=0.05,
                              horizon=horizon, batch_fractions=[0.1, 0.9])
  elif strategy_name == "etg_0.1":
    strategy = ETGreedyStrategy(sample_revealer,
                        horizon=horizon,
                        batch_fractions=[0.1, 0.2, 0.3, 0.4, 0.6, 0.8])
  elif strategy_name == "etg_0.2":
    strategy = ETGreedyStrategy(sample_revealer,
                        horizon=horizon,
                        batch_fractions=[0.2, 0.4, 0.6, 0.8])
  else:
    raise ValueError("invalid strategy_name")

  return strategy.execute_run()

In [None]:
# Test a strategy.

result = execute_strategy_iteration("etc_0.4", 1, horizon=1000)
print(result.squared_errors)

[0.00010819794897878681, 0.0005009086364382247, 0.0005204006336571472]


**Executing the runs for each strategy in parallel.**

For the paper, we execute 12,000 runs for each strategy. We execute the runs
in parallel using
[`ipyparallel`](https://ipyparallel.readthedocs.io/en/latest/).

The following command starts the `ipyparallel` engines:
```
ipcluster start -n <num_engines>
```
 

In [None]:
import ipyparallel as ipp

In [None]:
# Verify that ipcluster is running and import the necessary Python packages.

parallel_client = ipp.Client(debug=False)
dview = parallel_client[:]
# Execute an identity map in parallel.
ar = dview.map(lambda x: x, (i for i in range(0, 2000000, 2)))
assert ar.get()[0] == 0

# Import the required Python packages.
with dview.sync_imports():
  from abc import ABC, abstractmethod
  import numpy
  import sympy
  import pandas
  import sympy
  import datetime
  import copy
  import attr
  import time
  import logging
  import itertools
  import pickle
  import os
  import functools
  import ipyparallel

  import collections

  from scipy.optimize import minimize
  import warnings

  try:
    from cPickle import dumps, loads, HIGHEST_PROTOCOL as PICKLE_PROTOCOL
  except ImportError:
    from pickle import dumps, loads, HIGHEST_PROTOCOL as PICKLE_PROTOCOL

# Make sure ipyparallel is still able to execute functions.
dview = parallel_client[:]
ar = dview.map(lambda x: x, (i for i in range(0, 2000000, 2)))
assert ar.get()[0] == 0

importing ABC,abstractmethod from abc on engine(s)
importing numpy on engine(s)
importing sympy on engine(s)
importing pandas on engine(s)
importing datetime on engine(s)
importing copy on engine(s)
importing attr on engine(s)
importing time on engine(s)
importing logging on engine(s)
importing itertools on engine(s)
importing pickle on engine(s)
importing os on engine(s)
importing functools on engine(s)
importing ipyparallel on engine(s)
importing collections on engine(s)
importing minimize from scipy.optimize on engine(s)


In [None]:
def combine_parallel_results(async_result):
  budgets = async_result.get()[0].budgets_used
  errors = np.vstack([res.squared_errors for res in async_result.get()])
  optimal_ks = np.vstack([res.optimal_ks for res in async_result.get()])
  current_ks = np.vstack([res.current_ks for res in async_result.get()])

  return budgets, errors, optimal_ks, current_ks

def execute_strategy_in_parallel(strategy_name, horizon, iterations):
  num_threads = len(parallel_client.ids)
  dview = parallel_client[:]

  print("Executing %s over %d iterations across %d engines" % (strategy_name, iterations, num_threads))

  dview["batch_fractions_to_sizes"] = batch_fractions_to_sizes
  dview["compute_reveal_k"] = compute_reveal_k
  dview["ModelParams"] = ModelParams
  dview["truth"] = truth
  dview["generate_data_samples"] = generate_data_samples
  dview["StrategyRunResult"] = StrategyRunResult
  dview["GMM"] = GMM
  dview["ETCStrategy"] = ETCStrategy
  dview["OracleStrategy"] = OracleStrategy
  dview["ETGreedyStrategy"] = ETGreedyStrategy
  dview["SampleRevealer"] = SampleRevealer
  dview["execute_strategy_iteration"] = execute_strategy_iteration

  def execute_iteration(i):
    return execute_strategy_iteration(strategy_name, i, horizon)

  return dview.map(execute_iteration, range(iterations))

In [None]:
def get_timeseries_for(strategy_names, horizons, iterations, results_dict):
  results_dict["truth"] = truth

  for strategy_name in strategy_names:
    for horizon in horizons:
      print("Timestamp start: %s, Strategy: %s, Horizon: %d, Iters: %d" % (
          datetime.datetime.now(), strategy_name, horizon, iterations))
      async_result = execute_strategy_in_parallel(strategy_name, horizon, iterations)
      budgets, errors, optimal_ks, current_ks = combine_parallel_results(async_result)
      print("Timestamp end: %s" % (datetime.datetime.now()))

      if strategy_name not in results_dict:
        results_dict[strategy_name] = {}
      
      results_dict[strategy_name][horizon] = {
          "budgets": budgets,
          "errors": errors * budgets,
          "optimal_ks": optimal_ks,
          "current_ks": current_ks,
      }

In [None]:
results_dict = {}
get_timeseries_for([
                    "oracle",
                    "etc_0.1", "etc_0.2", "etc_0.4",
                    "etg_0.1", "etg_0.2",
                    ],
                   horizons=[200, 300, 400, 500, 600, 800, 1000, 1300],
                   iterations=12,
                   results_dict=results_dict)

In [None]:
# Save results to file.
# pickle.dump(results_dict,
#             open(os.path.join(HOME_DIR,
#                               "%s_two_ivs.pkl" % datetime.datetime.now()),
#                  "wb"))

### Plot the results

In [None]:
# For the color map:
# https://gist.github.com/AndiH/c957b4d769e628f506bd

# Tableau 20 Colors
tableau20 = [(31, 119, 180), (174, 199, 232), (255, 127, 14), (255, 187, 120),  
             (44, 160, 44), (152, 223, 138), (214, 39, 40), (255, 152, 150),  
             (148, 103, 189), (197, 176, 213), (140, 86, 75), (196, 156, 148),  
             (227, 119, 194), (247, 182, 210), (127, 127, 127), (199, 199, 199),  
             (188, 189, 34), (219, 219, 141), (23, 190, 207), (158, 218, 229)]
             
# Tableau Color Blind 10
tableau20blind = [(0, 107, 164), (255, 128, 14), (171, 171, 171), (89, 89, 89),
             (95, 158, 209), (200, 82, 0), (137, 137, 137), (163, 200, 236),
             (255, 188, 121), (207, 207, 207)]
  
# Rescale to values between 0 and 1 
for i in range(len(tableau20)):  
    r, g, b = tableau20[i]  
    tableau20[i] = (r / 255., g / 255., b / 255.)
for i in range(len(tableau20blind)):  
    r, g, b = tableau20blind[i]  
    tableau20blind[i] = (r / 255., g / 255., b / 255.)

In [None]:
def plot_regret_curve(results_dict):

  clist = rcParams['axes.prop_cycle']
  cgen = itertools.cycle(clist)

  oracle_mses = {}
  for horizon, timeseries in results_dict["oracle"].items():
    oracle_mses[timeseries["budgets"][-1]] = np.mean(timeseries["errors"][:, -1])

  def plot(axs, name, info, result, with_var=False):
    x_vals = []
    y_mean = []
    y_std = []

    current_k_mean = []
    optimal_k_mean = []

    for horizon, timeseries in result.items():
      x_vals.append(timeseries["budgets"][-1])
      y_scaled = ( timeseries["errors"][:, -1] - oracle_mses[x_vals[-1]] )  / oracle_mses[x_vals[-1]] * 100
      y_mean.append(np.mean(y_scaled))
      y_std.append(np.std(y_scaled))

    x_vals = np.array(x_vals)
    y_mean = np.array(y_mean)
    y_std = np.array(y_std)

    x_sort_idx = np.argsort(x_vals)
    x_vals = x_vals[x_sort_idx]
    y_mean = y_mean[x_sort_idx]
    y_std = y_std[x_sort_idx]
    
    color = next(cgen)["color"]

    plt.plot(x_vals, y_mean, label=name, color=info[1], linestyle=info[0],  marker=info[2])
    plt.legend()

    if with_var:
      ci = 1.96 * y_std / np.sqrt(timeseries["errors"].shape[0])
      axs.errorbar(x_vals, y_mean, yerr=ci, ls="none", color=info[1])
      
  name_to_linestyle_color = {
      "etc_0.1": ["dashed", tableau20blind[0], "o"],
      "etc_0.2": ["dashdot", tableau20blind[1], "s"],
      "etc_0.4": ["solid", tableau20blind[2], "v"],
      "etg_0.1": ["dotted", tableau20blind[4], "^"],
      "etg_0.2": ["dashed", tableau20blind[6], "D"],
  }
  plt.title("Relative regret vs horizon")
  plt.xlabel("Total samples collected (Horizon)")
  plt.ylabel("Relative regret (%)")
  for name, info in name_to_linestyle_color.items():  
    plot(plt, name, info, results_dict[name], with_var=True)
  
  # plt.savefig(os.path.join(HOME_DIR, "figures/two_ivs_regret_curve.eps"), bbox_inches='tight', pad_inches=0.0)

In [None]:
SMALL_SIZE = 12
MEDIUM_SIZE = 12
BIGGER_SIZE = 12

plt.rc('font', size=SMALL_SIZE)          # controls default text sizes
plt.rc('axes', titlesize=SMALL_SIZE+4)     # fontsize of the axes title
plt.rc('axes', labelsize=MEDIUM_SIZE+4)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('ytick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('legend', fontsize=SMALL_SIZE)    # legend fontsize
plt.rc('figure', titlesize=BIGGER_SIZE+20)  # fontsize of the figure title

In [None]:
plot_regret_curve(results_dict)