Adds the pe_overwrite_probability parameter to the UCBPEConfig

This parameter configures the probability with which the `VizierGPUCBPEBandit` designer chooses the pure exploration acquisition function when there are newly completed trials. The default value is 0.0, which is a no-op from the behavior of the base code. In the fully sequential use case where every suggestion is completed before the next one is requested, setting this parameter to a positive value `p` in [0, 1] configures the `VizierGPUCBPEBandit` designer to perform (controlled) exploration on roughly `p`-fraction of the suggestions. PiperOrigin-RevId: 627101720
google · Apr 22, 2024 · 41f2bcc · 41f2bcc
1 parent c57c73a
commit 41f2bcc
Show file tree

Hide file tree

Showing 2 changed files with 30 additions and 17 deletions.
diff --git a/vizier/_src/algorithms/designers/gp_ucb_pe.py b/vizier/_src/algorithms/designers/gp_ucb_pe.py
@@ -66,11 +66,18 @@ class UCBPEConfig(eqx.Module):
   cb_violation_penalty_coefficient: jt.Float[jt.Array, ''] = eqx.field(
       default=10.0, converter=jnp.asarray
   )
-  # Probability of using empty pending trials during batched suggestions.
+  # Probability of selecting the UCB acquisition function when there are no new
+  # completed trials.
   ucb_overwrite_probability: jt.Float[jt.Array, ''] = eqx.field(
       default=0.25, converter=jnp.asarray
   )
 
+  # Probability of selecting the PE acquisition function when there are new
+  # completed trials.
+  pe_overwrite_probability: jt.Float[jt.Array, ''] = eqx.field(
+      default=0.0, converter=jnp.asarray
+  )
+
   def __repr__(self):
     return eqx.tree_pformat(self, short_arrays=False)
 
@@ -690,23 +697,26 @@ def suggest(
     active_trials = list(self._all_active_trials)
     for _ in range(count):
       self._rng, rng = jax.random.split(self._rng, 2)
-      ucb_overwrite = jax.random.bernoulli(
-          key=rng, p=self._config.ucb_overwrite_probability
-      )
-      # Optimize the UCB acquisition when there are trials completed after all
-      # active trials were created, or when `ucb_overwrite` is true. The
-      # `ucb_overwrite_probability` config  parameter should be set to a small
-      # positive value so that the UCB acquisition function is optimized for
-      # more than one but not too many suggestions in a batch suggestion
-      # request. This helps compensate for sub-optimality of the acquisition
-      # function optimizer, without compromising the diversity of the
-      # suggestions in the feature space.
-      use_ucb = _has_new_completed_trials(
+      if _has_new_completed_trials(
           completed_trials=self._all_completed_trials,
           active_trials=active_trials,
-      ) or (
-          ucb_overwrite and (len(self._all_completed_trials) > 0)  # pylint:disable=g-explicit-length-test
-      )
+      ):
+        # When there are trials completed after all active trials were created,
+        # we optimize the UCB acquisition function except with a small
+        # probability the PE acquisition function to ensure exploration.
+        use_ucb = not jax.random.bernoulli(
+            key=rng, p=self._config.pe_overwrite_probability
+        )
+      else:
+        has_completed_trials = len(self._all_completed_trials) > 0  # pylint:disable=g-explicit-length-test
+        # When there are no trials completed after all active trials were
+        # created, we optimize the PE acquisition function except with a small
+        # probability the UCB acquisition function, in case the UCB acquisition
+        # function is not well optimized.
+        use_ucb = has_completed_trials and jax.random.bernoulli(
+            key=rng, p=self._config.ucb_overwrite_probability
+        )
+
       # TODO: Feed the eagle strategy with completed trials.
       # TODO: Change budget based on requested suggestion count.
       acquisition_optimizer = self._acquisition_optimizer_factory(

diff --git a/vizier/_src/algorithms/designers/gp_ucb_pe_test.py b/vizier/_src/algorithms/designers/gp_ucb_pe_test.py
@@ -55,6 +55,7 @@ class GpUcbPeTest(parameterized.TestCase):
       dict(iters=5, batch_size=1, num_seed_trials=2),
       dict(iters=5, batch_size=3, num_seed_trials=2, ensemble_size=3),
       dict(iters=3, batch_size=5, num_seed_trials=5, applies_padding=True),
+      dict(iters=5, batch_size=1, num_seed_trials=2, pe_overwrite=True),
   )
   def test_on_flat_continuous_space(
       self,
@@ -64,6 +65,7 @@ def test_on_flat_continuous_space(
       ard_optimizer: str = 'default',
       ensemble_size: int = 1,
       applies_padding: bool = False,
+      pe_overwrite: bool = False,
   ):
     # We use string names so that test case names are readable. Convert them
     # to objects.
@@ -92,6 +94,7 @@ def test_on_flat_continuous_space(
             explore_region_ucb_coefficient=0.5,
             cb_violation_penalty_coefficient=10.0,
             ucb_overwrite_probability=0.0,
+            pe_overwrite_probability=1.0 if pe_overwrite else 0.0,
         ),
         ensemble_size=ensemble_size,
         padding_schedule=padding.PaddingSchedule(
@@ -168,7 +171,7 @@ def test_on_flat_continuous_space(
                 'gp_ucb_pe_bandit_test'
             )
         )
-        if jdx == 0 and idx < (iters + 1):
+        if jdx == 0 and idx < (iters + 1) and not pe_overwrite:
           # Except for the last batch of suggestions, the acquisition value of
           # the first suggestion in a batch is expected to be UCB, which
           # combines the predicted mean based only on completed trials and the