awslabs · mseeger · Aug 3, 2023 · Jul 7, 2023 · Aug 1, 2023 · Aug 1, 2023
diff --git a/README.md b/README.md
@@ -259,7 +259,7 @@ learn more about Syne Tune functionalities.
 * [How can I specify dependencies to remote launcher or when using the SageMaker backend?](https://syne-tune.readthedocs.io/en/latest/faq.html#how-can-i-specify-dependencies-to-remote-launcher-or-when-using-the-sagemaker-backend)
 * [How can I benchmark different methods?](https://syne-tune.readthedocs.io/en/latest/faq.html#how-can-i-benchmark-different-methods)
 * [What different schedulers do you support? What are the main differences between them?](https://syne-tune.readthedocs.io/en/latest/faq.html#what-different-schedulers-do-you-support-what-are-the-main-differences-between-them)
-* [How do I define the search space?](https://syne-tune.readthedocs.io/en/latest/faq.html#how-do-i-define-the-search-space) 
+* [How do I define the configuration space?](https://syne-tune.readthedocs.io/en/latest/faq.html#how-do-i-define-the-configuration-space) 
 * [How do I set arguments of multi-fidelity schedulers?](https://syne-tune.readthedocs.io/en/latest/faq.html#how-do-i-set-arguments-of-multi-fidelity-schedulers)
 * [How can I visualize the progress of my tuning experiment with Tensorboard?](https://syne-tune.readthedocs.io/en/latest/faq.html#how-can-i-visualize-the-progress-of-my-tuning-experiment-with-tensorboard)
 * [How can I add a new scheduler?](https://syne-tune.readthedocs.io/en/latest/faq.html#how-can-i-add-a-new-scheduler)

diff --git a/benchmarking/benchmark_definitions/finetune_transformer_swag.py b/benchmarking/benchmark_definitions/finetune_transformer_swag.py
@@ -51,7 +51,7 @@ def finetune_transformer_swag_benchmark(
     config_space = {
         "learning_rate": loguniform(1e-6, 1e-4),
         "warmup_ratio": uniform(0, 0.5),
-        "weight_decay": uniform(0, 1e-1),
+        "weight_decay": uniform(0, 0.1),
         "adam_beta1": uniform(0.0, 0.9999),
         "adam_beta2": uniform(0.0, 0.9999),
         "adam_epsilon": loguniform(1e-10, 1e-6),

diff --git a/benchmarking/training_scripts/mlp_on_fashion_mnist/mlp_on_fashion_mnist.py b/benchmarking/training_scripts/mlp_on_fashion_mnist/mlp_on_fashion_mnist.py
@@ -35,7 +35,13 @@
     )
 
 from syne_tune import Reporter
-from syne_tune.config_space import randint, uniform, loguniform, add_to_argparse
+from syne_tune.config_space import (
+    randint,
+    lograndint,
+    uniform,
+    loguniform,
+    add_to_argparse,
+)
 from syne_tune.utils import (
     resume_from_checkpointed_model,
     checkpoint_model_at_rung_level,
@@ -57,8 +63,8 @@
 
 
 _config_space = {
-    NUM_UNITS_1: randint(4, 1024),
-    NUM_UNITS_2: randint(4, 1024),
+    NUM_UNITS_1: lograndint(4, 1024),
+    NUM_UNITS_2: lograndint(4, 1024),
     "batch_size": randint(8, 128),
     "dropout_1": uniform(0, 0.99),
     "dropout_2": uniform(0, 0.99),

diff --git a/docs/source/faq.rst b/docs/source/faq.rst
@@ -702,14 +702,18 @@ Further schedulers provided by Syne Tune include:
 * `Transfer learning schedulers <examples.html#transfer-tuning-on-nasbench-201>`__
 * `Wrappers for Ray Tune schedulers <examples.html#launch-hpo-experiment-with-ray-tune-scheduler>`__
 
-How do I define the search space?
-=================================
+How do I define the configuration space?
+========================================
 
 While the training script defines the function to be optimized, some
-care needs to be taken to define the search space for the hyperparameter
+care needs to be taken to define the configuration space for the hyperparameter
 optimization problem. This being a global optimization problem without
 gradients easily available, it is most important to reduce the number of
-parameters. Some advice is given `here <search_space.html>`__.
+parameters. A general recommendation is to use
+:func:`~syne_tune.utils.streamline_config_space` on your configuration space,
+which does some automatic rewriting to enforce best practices. Details on how
+to choose a configuration space, and on automatic rewriting, is given
+`here <search_space.html>`__.
 
 A powerful approach is to run experiments in parallel. Namely, split
 your hyperparameters into groups A, B, such that HPO over B is

diff --git a/docs/source/search_space.rst b/docs/source/search_space.rst
@@ -4,7 +4,10 @@ How to Choose a Configuration Space
 One important step in applying hyperparameter optimization to your tuning
 problem is to define a configuration space (or search space). Doing this
 optimally for any given problem is more of an art than a science, but in this
-tutorial you will learn about the basics and some gotchas.
+tutorial you will learn about the basics and some gotchas. Syne Tune also
+provides some logic in :func:`~syne_tune.utils.streamline_config_space` to
+automatically transform domains into forms more suitable for Bayesian
+optimization, this is explained here as well.
 
 Introduction
 ------------
@@ -14,11 +17,11 @@ Here is an example for a configuration space:
 .. code-block:: python
 
    from syne_tune.config_space import (
-       randint, uniform, loguniform, choice,
+       lograndint, uniform, loguniform, choice,
    )
 
    config_space = {
-       'n_units': randint(4, 1024),
+       'n_units': lograndint(4, 1024),
        'dropout': uniform(0, 0.9),
        'learning_rate': loguniform(1e-6, 1),
        'activation': choice(['relu', 'tanh']),
@@ -50,7 +53,7 @@ currently supported (for full details, see :mod:`syne_tune.config_space`):
   ``x`` is drawn uniformly in ``[log(lower), log(upper)]``.
 * ``randint(lower, upper)``: Integer uniform in ``lower, ..., upper``.
   The value range includes both ``lower`` and ``upper`` (difference to
-  Python range convention).
+  Python range convention, where ``upper`` would not be included).
 * ``lograndint(lower, upper)``: Integer log-uniform in
   ``lower, ..., upper``. More precisely, the value is
   ``int(round(exp(x)))``, where ``x`` is drawn uniformly in
@@ -100,9 +103,39 @@ Recommendations
 
 How to choose the domain for a given hyperparameter? Obviously, we want to
 avoid illegal values: learning rates should be positive, probabilities lie
-in ``[0, 1]``. Apart from this, the choice of domain is not always obvious,
-and different choices can affect search performance significantly in some
-cases. Here, we provide some recommendations:
+in ``[0, 1]``, and numbers of units must be integers. Apart from this, the
+choice of domain is not always obvious, and different choices can affect
+search performance significantly in some cases.
+
+With :func:`~syne_tune.utils.streamline_config_space`, Syne Tune provides some
+logic which transforms domains into others more suitable for Bayesian
+optimization. For example:
+
+.. code-block:: python
+
+   from syne_tune.config_space import randint, uniform, choice
+   from syne_tune.utils import streamline_config_space
+
+   config_space = {
+       'n_units': randint(4, 1024),
+       'dropout': uniform(0, 0.9),
+       'learning_rate': uniform(1e-6, 1),
+       'weigth_decay': choice([0.001, 0.01, 0.1, 1.0]),
+       'magic_constant': choice([1, 2, 5, 10, 15, 30]),
+   }
+   new_config_space = streamline_config_space(config_space)
+   # Results in:
+   # new_config_space = {
+   #     'n_units': lograndint(4, 1024),
+   #     'dropout': uniform(0, 0.9),
+   #     'learning_rate': loguniform(1e-6, 1),
+   #     'weigth_decay': logfinrange(0.001, 1.0, 4),
+   #     'magic_constant': logordinal([1, 2, 5, 10, 15, 30]),
+   # }
+
+Here, ``new_config_space`` results in the same set of configurations, but the
+internal encoding is more suitable for many of the model-based HPO methods in
+Syne Tune. Why?
 
 * **Avoid using choice (categorical) for numerical parameters.**
   Many HPO algorithms make very good use of the information that a
@@ -119,6 +152,11 @@ cases. Here, we provide some recommendations:
   distance in this embedding, so that any ordering or distance
   information is lost. Bayesian optimization does not perform well in
   general in high-dimensional embedding spaces.
+
+  It is for this reason that :func:`~syne_tune.utils.streamline_config_space`
+  converts the domains of ``weight_decay`` and ``magic_constant`` from
+  ``choice`` to ``logfinrange`` and ``logordinal`` respectively.
+
 * **Use infinite ranges.** No competitive HPO algorithm ever enumerates
   all possible configurations and iterates over all of them. There is
   almost certainly no gain in restricting a learning rate to 5 values
@@ -134,15 +172,57 @@ cases. Here, we provide some recommendations:
   respectively. If your value spacing is not regular, you can use ``ordinal``
   or ``logordinal``. For example,
   ``choice([0.0005, 0.001, 0.005, 0.01, 0.05, 0.1])`` can be replaced by
-  ``logordinal([0.0005, 0.001, 0.005, 0.01, 0.05, 0.1])``.
-* **Explore ordinal or logordinal as alternative to choice.** What if your
-  finite set of numerical values is not equi-spaced? Ordinal parameters are
-  encoded by a single int value (if ``kind="equal"``) or a single float value
-  (if ``kind in {"nn", "nn-log"}``), which is more economical in Bayesian
-  optimization.
+  ``logordinal([0.0005, 0.001, 0.005, 0.01, 0.05, 0.1])``, which is what
+  :func:`~syne_tune.utils.streamline_config_space` would do.
 * **Use a log transform** for parameters which may vary over several orders
   of magnitude. Examples are learning rates or regularization constants.
+  In the example above, :func:`~syne_tune.utils.streamline_config_space`
+  converts ``n_units`` from :code:`randint(4, 1024)` to :code:`lograndint(4, 1024)`
+  and ``learning_rate`` from :code:`uniform(1e-6, 1)` to
+  :code:`loguniform(1e-6, 1)`.
 * **Use points_to_evaluate**. On top of refining your configuration space, we
   strongly recommend to
   `specify initial default configurations <schedulers.html#fifoscheduler>`__
   by ``points_to_evaluate``.
+
+As a user, you can memory all of this, or you can use
+:func:`~syne_tune.utils.streamline_config_space` and just do the following:
+
+* Use ``uniform`` for ``float`` values, ``randint`` for ``int`` values, and
+  leave the decision for log scaling to the logic.
+* Use ``choice`` for each finite domain, just make sure that all entries have
+  the same type (``str``, ``int``, or ``float``).
+  :func:`~syne_tune.utils.streamline_config_space` will transform your choice
+  into ``finrange``, ``logfinrange``, ``ordinal``, or ``logordinal`` for value
+  types ``float`` or ``int``.
+
+You should also use :func:`~syne_tune.utils.streamline_config_space` when
+importing configuration spaces from other HPO libraries, which may not support
+the finite numerical domains Syne Tune has.
+
+.. note::
+   The conversion of ``choice`` to ``finrange`` or ``logfinrange`` in
+   :func:`~syne_tune.utils.streamline_config_space` can be approximate. While
+   the list has the same size, some entries may be changed. For example,
+   :code:`choice([1, 2, 5, 10, 20, 50])` is replaced by ``logfinrange`` with
+   values ``1, 2, 5, 10, 22, 48``. If this is a problem for certain domains, use
+   the ``exclude_names`` argument.
+
+Finally, here is what :func:`~syne_tune.utils.streamline_config_space` is doing:
+
+* For a domain :code:`uniform(lower, upper)` or :code:`randint(lower, upper)`:
+  If :code:`lower > 0` and :code:`upper >= lower * 100`, replace domain by
+  :code:`loguniform(lower, upper)` or :code:`lograndint(lower, upper)`.
+* For a domain :code:`choice(categories)`, where all entries in ``categories``
+  are of type ``int`` or ``float``: This domain is replaced by
+  ``finrange``, ``logfinrange``, ``ordinal``, or ``logordinal`` (with the same
+  value type), depending on best fit. Namely, ``categories`` is sorted to
+  :math:`x_0 < \dots < x_{n-1}`, and a linear function
+  :math:`a * j + b, j = 0,\dots, n-1` is fit to :math:`[x_j]`, and to
+  :math:`[\log x_j]` if :math:`x_0 > 0`. The quality of the fit is scored by
+  :math:`R^2`, it determines logarithmic or linear encoding, and also the choice
+  between ``finrange`` and ``ordinal``. For ``ordinal``, we always use
+  ``kind="nn"``.
+* In order to exclude certain hyperparameters from replacements, pass their
+  names in the ``exclude_names`` argument of
+  :func:`~syne_tune.utils.streamline_config_space`.
diff --git a/docs/source/tutorials/basics/basics_bayesopt.rst b/docs/source/tutorials/basics/basics_bayesopt.rst
@@ -124,41 +124,45 @@ categorical type are often used. For example:
 
 .. code-block:: python
 
-   from syne_tune.config_space import randint, choice
+   from syne_tune.config_space import lograndint, choice
 
    config_space = {
-       'n_units_1': randint(4, 1024),
+       'n_units_1': lograndint(4, 1024),
        # ...
        'activation': choice(['ReLU', 'LeakyReLU', 'Softplus']),
    }
 
 Here, ``activation`` could determine the type of activation function.
-Maybe the most important recommendation for Bayesian optimization and
-categorical parameters is not to use them if you do not have to. If your
-parameter is numerical, it admits a linear ordering, which is important
-information for any optimizer. By turning it into a categorical
-parameter, this ordering information is lost. Worse, in Bayesian
-optimization, the search space is encoded as multi-dimensional unit
-cube. This is a relaxation for ``int`` values, so one parameter maps to
-one encoded dimension. For a categorical parameter, in order to make
-sure that each value is equidistant any other, we need to use one-hot
-encoding, so the encoding dimension is equal to the number of different
-values!
-
-In short, while it is tempting to “simplify” our search space by
-replacing the ``n_units_1`` domain ``randint(4, 1024)`` with
-``choice([4, 8, 16, 32, 64, 128, 256, 512, 1024])``, reducing 1021 to 9
-distinct values, this would not make much of a difference for random
-search, while it would likely make Bayesian optimization perform worse.
-Both the acquisition function and the ARD parameters of our surrogate
-model would have to be optimized over a space with 8 more dimensions,
-and valuable ordering information between ``n_units_1`` values would be
-lost. If you insist on a sparse “regular grid” value range, you can use
-``logfinrange(4, 1024, 9)``, which has the same 9 values, but uses a
-latent ``int`` representation, which is encoded with a single number.
-More information can be found
+It is important to understand that in Bayesian optimization, a
+categorical parameter is encoded as vector in the multi-dimensional
+unit cube: the encoding dimension is equal to the number of different
+values. This is to make sure there is no ordering information between
+the different values, each pair has the same distance in the encoding
+space.
+
+This is usually **not** what you want with numerical values, whose
+ordering provide important information to the search. For example,
+it sounds simpler to search over the finite range
+``choice([4, 8, 16, 32, 64, 128, 256, 512, 1024])`` than over the infinite
+``lograndint(4, 1024)`` for ``n_units_1``, but **the opposite is the
+case**. The former occupies 9 dimensions, the latter 1 dimension in
+the encoded space, and ordering information is lost for the former.
+A better alternative is ``logfinrange(4, 1024, 9)``.
+
+Syne Tune provides a range of finite numerical domains in order to
+avoid suboptimal performance of Bayesian optimization due to the uncritical
+use of ``choice``. Since this is somewhat subtle, and you may also want
+to import configuration spaces from other HPO libraries which do not
+have these types, Syne Tune provides an automatic conversion logic
+with :func:`~syne_tune.utils.streamline_config_space`. Details are given
 `here <../../search_space.html#recommendations>`__.
 
+.. note::
+   When using Bayesian optimization or any other model-based HPO method,
+   we strongly recommend to use
+   :func:`~syne_tune.utils.streamline_config_space` in order to ensure that
+   your domains are chosen in a way that works best with internal encoding.
+
 Speeding up Decision-Making
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
 

diff --git a/docs/source/tutorials/benchmarking/bm_simulator.rst b/docs/source/tutorials/benchmarking/bm_simulator.rst
@@ -229,8 +229,10 @@ This call runs a number of experiments sequentially on the local machine:
   `not a good choice <../../search_space.html#recommendations>`__. With this
   option, the domain can be switched to different variants of ``ordinal``.
   The default is ``nn-log``, which is the domain
-  ``logordinal([0.0005, 0.001, 0.005, 0.01, 0.05, 0.1])``. In order to keep
-  the original categorical domain, use ``--fcnet_ordinal none``.
+  ``logordinal([0.0005, 0.001, 0.005, 0.01, 0.05, 0.1])`` (this is also the
+  replacement which :func:`~syne_tune.utils.streamline_config_space` would do).
+  In order to keep the original categorical domain, use
+  ``--fcnet_ordinal none``.
 
 If you defined additional arguments via ``extra_args``, you can use them
 here as well. For example, ``--num_brackets 3`` would run all

diff --git a/syne_tune/util.py b/syne_tune/util.py
@@ -221,6 +221,14 @@ def is_positive_integer(lst: List[int]) -> bool:
     return all(x == int(x) and x >= 1 for x in lst)
 
 
+def is_integer(lst: list) -> bool:
+    """
+    :param lst: List of entries
+    :return: Are all entries of ``lst`` of type ``int``?
+    """
+    return all(x == int(x) for x in lst)
+
+
 def dump_json_with_numpy(
     x: dict, filename: Optional[Union[str, Path]] = None
 ) -> Optional[str]:

diff --git a/syne_tune/utils/__init__.py b/syne_tune/utils/__init__.py
@@ -21,6 +21,7 @@
     add_config_json_to_argparse,
     load_config_json,
 )
+from syne_tune.utils.convert_domain import streamline_config_space
 
 __all__ = [
     "add_checkpointing_to_argparse",
@@ -30,4 +31,5 @@
     "parse_bool",
     "add_config_json_to_argparse",
     "load_config_json",
+    "streamline_config_space",
 ]