Skip to content

Commit

Permalink
Allow get_config to return the same config more than once, and unify …
Browse files Browse the repository at this point in the history
…duplicate filtering across all schedulers
  • Loading branch information
mseeger committed Jan 21, 2023
1 parent f06d672 commit 1b6b396
Show file tree
Hide file tree
Showing 17 changed files with 432 additions and 121 deletions.
3 changes: 3 additions & 0 deletions docs/source/schedulers.rst
Expand Up @@ -167,6 +167,9 @@ are given in ``search_options``. These are:

* ``debug_log``: If ``True``, a useful log output about the search progress is
printed.
* ``allow_duplicates``: If ``True``, the same configuration may be suggested
more than once. The default is ``False``, in that sampling is without
replacement.

Bayesian Optimization
~~~~~~~~~~~~~~~~~~~~~
Expand Down
2 changes: 2 additions & 0 deletions syne_tune/optimizer/schedulers/searchers/__init__.py
Expand Up @@ -19,6 +19,7 @@
SearcherWithRandomSeed,
impute_points_to_evaluate,
extract_random_seed,
SearcherWithRandomSeedAndFilterDuplicates,
)
from syne_tune.optimizer.schedulers.searchers.random_grid_searcher import ( # noqa: F401
RandomSearcher,
Expand All @@ -33,6 +34,7 @@
"SearcherWithRandomSeed",
"impute_points_to_evaluate",
"extract_random_seed",
"SearcherWithRandomSeedAndFilterDuplicates",
"RandomSearcher",
"GridSearcher",
"searcher_factory",
Expand Down
19 changes: 10 additions & 9 deletions syne_tune/optimizer/schedulers/searchers/bore/bore.py
Expand Up @@ -21,12 +21,9 @@
from sklearn.calibration import CalibratedClassifierCV

from syne_tune.optimizer.schedulers.searchers.searcher import (
SearcherWithRandomSeed,
SearcherWithRandomSeedAndFilterDuplicates,
sample_random_configuration,
)
from syne_tune.optimizer.schedulers.searchers.utils.hp_ranges_factory import (
make_hyperparameter_ranges,
)
from syne_tune.optimizer.schedulers.searchers.bayesopt.tuning_algorithms.common import (
ExclusionList,
)
Expand All @@ -37,7 +34,7 @@
logger = logging.getLogger(__name__)


class Bore(SearcherWithRandomSeed):
class Bore(SearcherWithRandomSeedAndFilterDuplicates):
"""
Implements "Bayesian optimization by Density Ratio Estimation" as described
in the following paper:
Expand Down Expand Up @@ -69,6 +66,8 @@ class Bore(SearcherWithRandomSeed):
until at least ``init_random`` observations have been recorded in
:meth:`update`. After that, the BORE algorithm is used. Defaults to 6
:param classifier_kwargs: Parameters for classifier. Optional
:param allow_duplicates: If ``True``, :meth:`get_config` may return the same
configuration more than once. Defaults to ``False``
"""

def __init__(
Expand All @@ -85,12 +84,16 @@ def __init__(
random_prob: Optional[float] = None,
init_random: Optional[int] = None,
classifier_kwargs: Optional[dict] = None,
allow_duplicates: Optional[bool] = None,
**kwargs,
):
if allow_duplicates is None:
allow_duplicates = False
super().__init__(
config_space=config_space,
metric=metric,
points_to_evaluate=points_to_evaluate,
allow_duplicates=allow_duplicates,
**kwargs,
)
if mode is None:
Expand Down Expand Up @@ -120,9 +123,6 @@ def __init__(
self.random_prob = random_prob
self.mode = mode

self._hp_ranges = make_hyperparameter_ranges(self.config_space)
self._excl_list = ExclusionList.empty_list(self._hp_ranges)

if classifier_kwargs is None:
classifier_kwargs = dict()
if self.classifier == "xgboost":
Expand Down Expand Up @@ -246,7 +246,8 @@ def wrapper(x):
f"config={config}] "
f"optimization time : {opt_time}"
)
self._excl_list.add(config) # Should not be suggested again
if not self.allow_duplicates:
self._excl_list.add(config) # Should not be suggested again

return config

Expand Down
Expand Up @@ -58,6 +58,7 @@ def __init__(
random_prob: Optional[float] = None,
init_random: Optional[int] = None,
classifier_kwargs: Optional[dict] = None,
allow_duplicates: Optional[bool] = None,
resource_attr: str = "epoch",
**kwargs,
):
Expand All @@ -76,6 +77,7 @@ def __init__(
random_prob=random_prob,
init_random=init_random,
classifier_kwargs=classifier_kwargs,
allow_duplicates=allow_duplicates,
**kwargs,
)
self.resource_attr = resource_attr
Expand Down
Expand Up @@ -100,6 +100,11 @@ def __init__(
self.trial_configs = dict()
self.pending_trials = set()
self.trial_observations = dict()
allow_duplicates = kwargs.get("allow_duplicates")
if allow_duplicates is not None and (not allow_duplicates):
logger.warning(
"This class does not support allow_duplicates argument. Sampling is with replacement"
)

def _update(self, trial_id: str, config: dict, result: dict):
trial_id = int(trial_id)
Expand Down
53 changes: 42 additions & 11 deletions syne_tune/optimizer/schedulers/searchers/gp_fifo_searcher.py
Expand Up @@ -155,6 +155,7 @@ def _create_internal(
cost_attr: Optional[str] = None,
resource_attr: Optional[str] = None,
filter_observed_data: Optional[ConfigurationFilter] = None,
allow_duplicates: bool = False,
):
self.hp_ranges = hp_ranges
self.num_initial_candidates = num_initial_candidates
Expand Down Expand Up @@ -190,6 +191,7 @@ def _create_internal(
self._cost_attr = cost_attr
self._resource_attr = resource_attr
self._filter_observed_data = filter_observed_data
self._allow_duplicates = allow_duplicates
self._random_searcher = None
# Tracks the cumulative time spent in ``get_config`` calls
self.cumulative_get_config_time = 0
Expand All @@ -202,6 +204,7 @@ def _create_internal(
num_initial_random_choices
)
deb_msg += "- initial_scoring = {}\n".format(self.initial_scoring)
deb_msg += f"- allow_duplicates = {self._allow_duplicates}\n"
logger.info(deb_msg)

def _copy_kwargs_to_kwargs_int(self, kwargs_int: dict, kwargs: dict):
Expand Down Expand Up @@ -287,7 +290,7 @@ def _get_config_modelbased(
self, exclusion_candidates: ExclusionList, **kwargs
) -> Optional[Configuration]:
"""
Implements ``get_config`` part if the surrogate model is used, instead
Implements :meth:`get_config` part if the surrogate model is used, instead
of initial choices from ``points_to_evaluate`` or initial random
choices.
Expand All @@ -298,10 +301,15 @@ def _get_config_modelbased(
"""
raise NotImplementedError

def _get_exclusion_candidates(self, **kwargs) -> ExclusionList:
def _get_exclusion_candidates(self, skip_observed: bool = False) -> ExclusionList:
def skip_all(config: Configuration) -> bool:
return False

return ExclusionList(
self.state_transformer.state,
filter_observed_data=self._filter_observed_data,
filter_observed_data=skip_all
if skip_observed
else self._filter_observed_data,
)

def _should_pick_random_config(self, exclusion_candidates: ExclusionList) -> bool:
Expand Down Expand Up @@ -331,7 +339,9 @@ def _get_config_not_modelbased(
model-based search. If False is returned, model-based search must be
called.
:param exclusion_candidates: Configs to be avoided
:param exclusion_candidates: Configs to be avoided, even if
``allow_duplicates == True`` (in this case, we avoid configs of
failed or pending trials)
:return: ``(config, use_get_config_modelbased)``
"""
self._assign_random_searcher()
Expand Down Expand Up @@ -376,8 +386,10 @@ def get_config(self, **kwargs) -> Optional[dict]:
}
self.profiler.begin_block(meta)
self.profiler.start("all")
# Initial configs come from ``points_to_evaluate`` or are drawn at random
exclusion_candidates = self._get_exclusion_candidates(**kwargs)
# Initial configs come from ``points_to_evaluate`` or are drawn at random
# We use ``exclusion_candidates`` even if ``allow_duplicates == True``, in order
# to count how many unique configs have been suggested
exclusion_candidates = self._get_exclusion_candidates()
config, pick_random = self._get_config_not_modelbased(exclusion_candidates)
if self.debug_log is not None:
trial_id = kwargs.get("trial_id")
Expand All @@ -386,8 +398,18 @@ def get_config(self, **kwargs) -> Optional[dict]:
)
if not pick_random:
# Model-based decision
if not exclusion_candidates.config_space_exhausted():
config = self._get_config_modelbased(exclusion_candidates, **kwargs)
if self._allow_duplicates or (
not exclusion_candidates.config_space_exhausted()
):
# Even if ``allow_duplicates == True``, we exclude configs which are
# pending or failed
if self._allow_duplicates:
excl_cands = self._get_exclusion_candidates(skip_observed=True)
else:
excl_cands = exclusion_candidates
config = self._get_config_modelbased(
exclusion_candidates=excl_cands, **kwargs
)

if config is not None:
if self.debug_log is not None:
Expand Down Expand Up @@ -468,6 +490,7 @@ def _assign_random_searcher(self):
points_to_evaluate=[],
random_seed=0,
debug_log=False,
allow_duplicates=self._allow_duplicates,
)
self._random_searcher.set_random_state(self.random_state)

Expand Down Expand Up @@ -597,6 +620,9 @@ class GPFIFOSearcher(ModelBasedSearcher):
``opt_skip_init_length``, fitting is done only K-th call, and skipped
otherwise. Defaults to 1 (no skipping)
:type opt_skip_period: int, optional
:param allow_duplicates: If ``True``, :meth:`get_config` may return the same
configuration more than once. Defaults to ``False``
:type allow_duplicates: bool, optional
:param map_reward: In the scheduler, the metric may be minimized or
maximized, but internally, Bayesian optimization is minimizing
the criterion. ``map_reward`` converts from metric to internal
Expand Down Expand Up @@ -738,7 +764,7 @@ def _postprocess_config(self, config: dict) -> dict:
def _get_config_modelbased(
self, exclusion_candidates, **kwargs
) -> Optional[Configuration]:
# Obtain current SurrogateModel from state transformer. Based on
# Obtain current :class:`SurrogateModel` from state transformer. Based on
# this, the BO algorithm components can be constructed
if self.do_profile:
self.profiler.push_prefix("getconfig")
Expand Down Expand Up @@ -820,14 +846,16 @@ def get_batch_configs(
if config is not None:
configs.append(config)
else:
# ``DebugLogWriter`` does not support batch selection right now,
# :class:`DebugLogWriter` does not support batch selection right now,
# must be switched off
assert self.debug_log is None, (
"``get_batch_configs`` does not support debug_log right now. "
+ "Please set ``debug_log=False`` in search_options argument "
+ "of scheduler, or create your searcher with ``debug_log=False``"
)
exclusion_candidates = self._get_exclusion_candidates(**kwargs)
exclusion_candidates = self._get_exclusion_candidates(
skip_observed=self._allow_duplicates
)
pick_random = True
while pick_random and len(configs) < batch_size:
config, pick_random = self._get_config_not_modelbased(
Expand All @@ -836,6 +864,8 @@ def get_batch_configs(
if pick_random:
if config is not None:
configs.append(config)
# Even if ``allow_duplicates == True``, we don't want to have
# duplicates in the same batch
exclusion_candidates.add(config)
else:
break # Space exhausted
Expand Down Expand Up @@ -921,6 +951,7 @@ def _new_searcher_kwargs_for_clone(self) -> dict:
cost_attr=self._cost_attr,
resource_attr=self._resource_attr,
filter_observed_data=self._filter_observed_data,
allow_duplicates=self._allow_duplicates,
)

def clone_from_state(self, state):
Expand Down
10 changes: 9 additions & 1 deletion syne_tune/optimizer/schedulers/searchers/gp_searcher_factory.py
Expand Up @@ -445,7 +445,12 @@ def _create_common_objects(model=None, is_hypertune=False, **kwargs):
)
result["num_initial_candidates"] = kwargs["num_init_candidates"]
result["num_initial_random_choices"] = kwargs["num_init_random"]
for k in ("initial_scoring", "cost_attr", "skip_local_optimization"):
for k in (
"initial_scoring",
"cost_attr",
"skip_local_optimization",
"allow_duplicates",
):
result[k] = kwargs[k]

return result
Expand Down Expand Up @@ -797,6 +802,7 @@ def _common_defaults(
"cost_attr": "elapsed_time",
"normalize_targets": True,
"no_fantasizing": False,
"allow_duplicates": False,
}
if is_hyperband:
if is_hypertune:
Expand Down Expand Up @@ -834,6 +840,8 @@ def _common_defaults(
"skip_local_optimization": Boolean(),
"debug_log": Boolean(),
"normalize_targets": Boolean(),
"no_fantasizing": Boolean(),
"allow_duplicates": Boolean(),
}

if is_hyperband:
Expand Down
18 changes: 9 additions & 9 deletions syne_tune/optimizer/schedulers/searchers/kde/kde_searcher.py
Expand Up @@ -17,24 +17,21 @@
import scipy.stats as sps

from syne_tune.optimizer.schedulers.searchers.searcher import (
SearcherWithRandomSeed,
SearcherWithRandomSeedAndFilterDuplicates,
sample_random_configuration,
)
import syne_tune.config_space as sp
from syne_tune.optimizer.schedulers.searchers.bayesopt.utils.debug_log import (
DebugLogPrinter,
)
from syne_tune.optimizer.schedulers.searchers.utils.hp_ranges_factory import (
make_hyperparameter_ranges,
)
from syne_tune.optimizer.schedulers.searchers.bayesopt.tuning_algorithms.common import (
ExclusionList,
)

logger = logging.getLogger(__name__)


class KernelDensityEstimator(SearcherWithRandomSeed):
class KernelDensityEstimator(SearcherWithRandomSeedAndFilterDuplicates):
"""
Fits two kernel density estimators (KDE) to model the density of the top N
configurations as well as the density of the configurations that are not
Expand Down Expand Up @@ -82,6 +79,8 @@ class KernelDensityEstimator(SearcherWithRandomSeed):
:param random_fraction: Defines the fraction of configurations that are
drawn uniformly at random instead of sampling from the model.
Defaults to 0.33
:param allow_duplicates: If ``True``, :meth:`get_config` may return the same
configuration more than once. Defaults to ``False``
"""

def __init__(
Expand All @@ -96,12 +95,16 @@ def __init__(
num_candidates: Optional[int] = None,
bandwidth_factor: Optional[int] = None,
random_fraction: Optional[float] = None,
allow_duplicates: Optional[bool] = None,
**kwargs,
):
if allow_duplicates is None:
allow_duplicates = False
super().__init__(
config_space=config_space,
metric=metric,
points_to_evaluate=points_to_evaluate,
allow_duplicates=allow_duplicates,
mode="min" if mode is None else mode,
**kwargs,
)
Expand Down Expand Up @@ -158,9 +161,6 @@ def __init__(
self.vartypes
), f"num_min_data_points = {num_min_data_points}, must be >= {len(self.vartypes)}"
self._resource_attr = kwargs.get("resource_attr")
# Used for sampling initial random configs, and to avoid duplicates
self._hp_ranges = make_hyperparameter_ranges(self.config_space)
self._excl_list = ExclusionList.empty_list(self._hp_ranges)
# Debug log printing (switched on by default)
debug_log = kwargs.get("debug_log", True)
if isinstance(debug_log, bool):
Expand Down Expand Up @@ -363,7 +363,7 @@ def acquisition_function(x):
)
suggestion = self._get_random_config()

if suggestion is not None:
if suggestion is not None and not self.allow_duplicates:
self._excl_list.add(suggestion) # Should not be suggested again
return suggestion

Expand Down

0 comments on commit 1b6b396

Please sign in to comment.