adding doc for stopper

deephyper · Mar 2, 2023 · 93d8910 · 93d8910
1 parent f2ca195
commit 93d8910
Show file tree

Hide file tree

Showing 9 changed files with 92 additions and 11 deletions.
diff --git a/deephyper/search/hps/_mpi_dbo.py b/deephyper/search/hps/_mpi_dbo.py
@@ -194,7 +194,7 @@ def __init__(
         self._init_params = _init_params
 
         logging.info(
-            f"DBO rank {self.rank} has {self._evaluator.num_workers} local worker(s)"
+            f"MPIDistributedBO rank {self.rank} has {self._evaluator.num_workers} local worker(s)"
         )
 
     def check_evaluator(self, evaluator):

diff --git a/deephyper/stopper/__init__.py b/deephyper/stopper/__init__.py
@@ -1,6 +1,72 @@
-"""The ``stopper`` module provides features to observe intermediate performances of a black-box function and allow to stop or continue its evaluation with respect to some budget.
+"""The ``stopper`` module provides features to observe intermediate performances of iterative algorithm and decide dynamically if its evaluation should be stopped or continued.
 
 This module was inspired from the Pruner interface and implementation of `Optuna <https://optuna.readthedocs.io/en/stable/reference/pruners.html>`_.
+
+The ``Stopper`` class is the base class for all stoppers. It provides the interface for the ``observe`` and ``stop`` methods that should be implemented by all stoppers. The ``observe`` method is called at each iteration of the iterative algorithm and the ``stop`` method is called at the end of each iteration to decide if the evaluation should be stopped or continued. The stopper object is not used directly but through the ``RunningJob`` received by the ``run``-function. In the following example we demonstrate with a simulation how it can be used:
+
+.. code-block:: python
+
+    import time
+
+    from deephyper.problem import HpProblem
+    from deephyper.search.hps import CBO
+    from deephyper.stopper import SuccessiveHalvingStopper
+
+
+    def run(job):
+
+        x = job.parameters["x"]
+
+        # Simulation of iteration
+        cum = 0
+        for i in range(100):
+            cum += x
+            time.sleep(0.01) # each iteration cost 0.1 secondes
+
+            # Record the intermediate performance
+            # Calling stopper.observe(budget, objective) under the hood
+            job.record(budget=i + 1, objective=cum)
+
+            # Check if the evaluation should be stopped
+            # Calling stopper.stop() under the hood
+            if job.stopped():
+                break
+
+        # Return objective and metadata to save what is the maximum step reached
+        return {"objective": cum, "metadata": {"i_stopped": i}}
+
+
+    problem = HpProblem()
+    problem.add_hyperparameter((0.0, 100.0), "x")
+
+    stopper = SuccessiveHalvingStopper(min_steps=1, max_steps=100)
+    search =  CBO(problem, run, stopper=stopper, log_dir="multi-fidelity-exp")
+    results = search.search(timeout=10)
+
+
+As it can be observed in the following results many evaluation stopped after the first iteration which saved
+a lot of computation time. If evaluated fully, each configuration would take about 1 seconds and we would be able
+to compute only a maximum of 10 configurations (because we set a timeout of 10). However, with the stopper we managed
+to perform 15 evaluations instead.
+
+.. code-block:: verbatim
+
+              p:x    objective  job_id  m:timestamp_submit  m:timestamp_gather  m:i_stopped
+    0   79.654299  7965.429869       0            0.016269            1.234227           99
+    1   74.266072    74.266072       1            1.256349            1.269175            0
+    2   74.491125    74.491125       2            1.281712            1.294496            0
+    3   10.245385    10.245385       3            1.305979            1.317513            0
+    4    4.229917     4.229917       4            1.417226            1.430005            0
+    5   53.690895    53.690895       5            1.437582            1.450419            0
+    6   54.902216    54.902216       6            1.458042            1.470806            0
+    7   22.945529    22.945529       7            1.478365            1.491140            0
+    8   94.051310  9405.130978       8            1.498538            2.733619           99
+    9   23.024237    23.024237       9            2.753319            2.766194            0
+    10  97.121528  9712.152792      10            2.884685            4.114600           99
+    11  97.192445  9719.244491      11            4.241939            5.467425           99
+    12  98.844525  9884.452486      12            5.598530            6.833938           99
+    13  99.722437  9972.243688      13            6.946300            8.172941           99
+    14  99.988566  9998.856623      14            8.376363            9.615355           99
 """
 
 from deephyper.stopper._stopper import Stopper

diff --git a/deephyper/stopper/_median_stopper.py b/deephyper/stopper/_median_stopper.py
@@ -4,6 +4,8 @@
 
 
 class MedianStopper(Stopper):
+    """Stopper based on the median of observed objectives at similar budgets."""
+
     def __init__(
         self,
         max_steps: int,

diff --git a/deephyper/stopper/_stopper.py b/deephyper/stopper/_stopper.py
@@ -36,24 +36,36 @@ def transform_objective(self, objective: float):
 
     @property
     def step(self):
+        """Last observed step."""
         return self.observed_budgets[-1]
 
-    def observe(self, budget: float, objective: float):
-        self._count_steps += 1
+    def observe(self, budget: float, objective: float) -> None:
+        """Observe a new objective value.
 
+        Args:
+            budget (float): the budget used to obtain the objective (e.g., the number of epochs).
+            objective (float): the objective value to observe (e.g, the accuracy).
+        """
         objective = self.transform_objective(objective)
 
         self.observed_budgets.append(budget)
         self.observed_objectives.append(objective)
 
     def stop(self) -> bool:
-        return self._count_steps >= self.max_steps
+        """Returns ``True`` if the evaluation should be stopped and ``False`` otherwise.
+
+        Returns:
+            bool: ``(step >= max_steps)``.
+        """
+        return self.step >= self.max_steps
 
     @property
     def observations(self) -> list:
+        """Returns a copy of the list of observations with 0-index the budgets and 1-index the objectives."""
         obs = [self.observed_budgets, self.observed_objectives]
         return copy.deepcopy(obs)
 
     @property
     def objective(self):
+        """Last observed objective."""
         return self.observations[-1][-1]
diff --git a/docs/api.rst b/docs/api.rst
@@ -11,4 +11,4 @@
    deephyper.problem
    deephyper.search
    deephyper.sklearn
-   deephyper.stopper
+   deephyper.stopper
diff --git a/docs/faq.rst b/docs/faq.rst
@@ -35,7 +35,7 @@ Here we are interested in increasing the number of parallel evaluations performe
 
 * If only a **few evaluations** of the ``run``-function can be afforded (``~ max_evals < 100``) then a good setting is ``CBO(problem, evaluator, surrogate_model="GP")`` with a relatively low number of parallel workers (``num_workers <= 10``). The ``surrogate_model="GP"`` parameter sets the surrogate model to Gaussian Process which has a cubic temporal complexity w.r.t. the number of collected evaluations.
 * If a **large number of  evaluations** can be afforded (``~ max_evals > 100``) for the ``run``-function then a good setting is ``CBO(problem, evaluator, multi_point_strategy="qUCB" )`` which will replace the default iterative constant-liar strategy by a one-shot strategy. In this case, we tested with a number of parallel workers up to ``num_workers == 4196`` with a ``run``-function having a run-time of 60 secondes in average and bounded between 30 to 90 secondes (which is relatively fast compared to a neural network training).
-* If **the number of collected evaluations** becomes large (i.e., fitting the surrogate model becomes more expensive which also depends on the number of parameters in the search space) then it is better to use a distributed Bayesian optimization (DBO) scheme to avoid congestion in the master's queue of received jobs. In DBO, each worker has a local Bayesian optimizer attribute which avoids congestion problems. Therefore the search use should be ``DBO(problem, run_function)``.
+* If **the number of collected evaluations** becomes large (i.e., fitting the surrogate model becomes more expensive which also depends on the number of parameters in the search space) then it is better to use a distributed Bayesian optimization (DBO) scheme to avoid congestion in the master's queue of received jobs. In DBO, each worker has a local Bayesian optimizer attribute which avoids congestion problems. Therefore the search use should be ``MPIDistributedBO(problem, run_function)``.
 
 
 .. _why-more-results-than-max-evals:

diff --git a/docs/index.rst b/docs/index.rst
@@ -104,6 +104,7 @@ Table of Contents
     Problem <_autosummary/deephyper.problem>
     Search <_autosummary/deephyper.search>
     Sklearn <_autosummary/deephyper.sklearn>
+    Stopper <_autosummary/deephyper.stopper>
 
 .. toctree::
     :maxdepth: 2

diff --git a/tests/deephyper/search/hps/test_dbo_max_evals.py b/tests/deephyper/search/hps/test_dbo_max_evals.py
@@ -13,7 +13,7 @@ def _test_dbo_max_evals(tmp_path):
     import numpy as np
 
     from deephyper.problem import HpProblem
-    from deephyper.search.hps import DBO
+    from deephyper.search.hps import MPIDistributedBO
 
     d = 10
     domain = (-32.768, 32.768)
@@ -36,7 +36,7 @@ def run(job):
         x = np.asarray_chkfinite(x)  # ValueError if any NaN or Inf
         return -ackley(x)
 
-    search = DBO(
+    search = MPIDistributedBO(
         hp_problem,
         run,
         log_dir=tmp_path,

diff --git a/tests/deephyper/search/hps/test_dbo_timeout.py b/tests/deephyper/search/hps/test_dbo_timeout.py
@@ -14,7 +14,7 @@ def _test_dbo_timeout():
     import numpy as np
 
     from deephyper.problem import HpProblem
-    from deephyper.search.hps import DBO
+    from deephyper.search.hps import MPIDistributedBO
 
     d = 10
     domain = (-32.768, 32.768)
@@ -38,7 +38,7 @@ def run(job):
         return -ackley(x)
 
     log_dir = "log-dbo"
-    search = DBO(
+    search = MPIDistributedBO(
         hp_problem,
         run,
         log_dir=log_dir,