Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow get_config to return the same config more than once #487

Merged
merged 3 commits into from Jan 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 3 additions & 0 deletions docs/source/schedulers.rst
Expand Up @@ -167,6 +167,9 @@ are given in ``search_options``. These are:

* ``debug_log``: If ``True``, a useful log output about the search progress is
printed.
* ``allow_duplicates``: If ``True``, the same configuration may be suggested
more than once. The default is ``False``, in that sampling is without
replacement.

Bayesian Optimization
~~~~~~~~~~~~~~~~~~~~~
Expand Down
2 changes: 2 additions & 0 deletions syne_tune/optimizer/schedulers/searchers/__init__.py
Expand Up @@ -19,6 +19,7 @@
SearcherWithRandomSeed,
impute_points_to_evaluate,
extract_random_seed,
SearcherWithRandomSeedAndFilterDuplicates,
)
from syne_tune.optimizer.schedulers.searchers.random_grid_searcher import ( # noqa: F401
RandomSearcher,
Expand All @@ -33,6 +34,7 @@
"SearcherWithRandomSeed",
"impute_points_to_evaluate",
"extract_random_seed",
"SearcherWithRandomSeedAndFilterDuplicates",
"RandomSearcher",
"GridSearcher",
"searcher_factory",
Expand Down
32 changes: 6 additions & 26 deletions syne_tune/optimizer/schedulers/searchers/bore/bore.py
Expand Up @@ -21,14 +21,7 @@
from sklearn.calibration import CalibratedClassifierCV

from syne_tune.optimizer.schedulers.searchers.searcher import (
SearcherWithRandomSeed,
sample_random_configuration,
)
from syne_tune.optimizer.schedulers.searchers.utils.hp_ranges_factory import (
make_hyperparameter_ranges,
)
from syne_tune.optimizer.schedulers.searchers.bayesopt.tuning_algorithms.common import (
ExclusionList,
SearcherWithRandomSeedAndFilterDuplicates,
)
from syne_tune.optimizer.schedulers.searchers.bore.de import (
DifferentialevolutionOptimizer,
Expand All @@ -37,7 +30,7 @@
logger = logging.getLogger(__name__)


class Bore(SearcherWithRandomSeed):
class Bore(SearcherWithRandomSeedAndFilterDuplicates):
"""
Implements "Bayesian optimization by Density Ratio Estimation" as described
in the following paper:
Expand All @@ -48,7 +41,7 @@ class Bore(SearcherWithRandomSeed):
| https://arxiv.org/abs/2102.09009

Additional arguments on top of parent class
:class:`~syne_tune.optimizer.schedulers.searchers.SearcherWithRandomSeed`:
:class:`~syne_tune.optimizer.schedulers.searchers.SearcherWithRandomSeedAndFilterDuplicates`:

:param mode: Can be "min" (default) or "max".
:param gamma: Defines the percentile, i.e how many percent of configurations
Expand Down Expand Up @@ -76,6 +69,7 @@ def __init__(
config_space: dict,
metric: str,
points_to_evaluate: Optional[List[dict]] = None,
allow_duplicates: Optional[bool] = None,
mode: Optional[str] = None,
gamma: Optional[float] = None,
calibrate: Optional[bool] = None,
Expand All @@ -91,6 +85,7 @@ def __init__(
config_space=config_space,
metric=metric,
points_to_evaluate=points_to_evaluate,
allow_duplicates=allow_duplicates,
**kwargs,
)
if mode is None:
Expand Down Expand Up @@ -120,9 +115,6 @@ def __init__(
self.random_prob = random_prob
self.mode = mode

self._hp_ranges = make_hyperparameter_ranges(self.config_space)
self._excl_list = ExclusionList.empty_list(self._hp_ranges)

if classifier_kwargs is None:
classifier_kwargs = dict()
if self.classifier == "xgboost":
Expand Down Expand Up @@ -167,18 +159,7 @@ def _loss(self, x):
else:
return y[:, 1] # return probability of class 1

def _get_random_config(
self, exclusion_list: Optional[ExclusionList] = None
) -> dict:
if exclusion_list is None:
exclusion_list = self._excl_list
return sample_random_configuration(
hp_ranges=self._hp_ranges,
random_state=self.random_state,
exclusion_list=exclusion_list,
)

def get_config(self, **kwargs):
def _get_config(self, **kwargs):
start_time = time.time()
config = self._next_initial_config()
if config is None:
Expand Down Expand Up @@ -246,7 +227,6 @@ def wrapper(x):
f"config={config}] "
f"optimization time : {opt_time}"
)
self._excl_list.add(config) # Should not be suggested again

return config

Expand Down
Expand Up @@ -49,6 +49,7 @@ def __init__(
config_space: dict,
metric: str,
points_to_evaluate: Optional[List[dict]] = None,
allow_duplicates: Optional[bool] = None,
mode: Optional[str] = None,
gamma: Optional[float] = None,
calibrate: Optional[bool] = None,
Expand All @@ -67,6 +68,7 @@ def __init__(
config_space,
metric=metric,
points_to_evaluate=points_to_evaluate,
allow_duplicates=allow_duplicates,
mode=mode,
gamma=gamma,
calibrate=calibrate,
Expand Down
Expand Up @@ -100,6 +100,11 @@ def __init__(
self.trial_configs = dict()
self.pending_trials = set()
self.trial_observations = dict()
allow_duplicates = kwargs.get("allow_duplicates")
if allow_duplicates is not None and (not allow_duplicates):
logger.warning(
"This class does not support allow_duplicates argument. Sampling is with replacement"
)

def _update(self, trial_id: str, config: dict, result: dict):
trial_id = int(trial_id)
Expand Down
53 changes: 42 additions & 11 deletions syne_tune/optimizer/schedulers/searchers/gp_fifo_searcher.py
Expand Up @@ -155,6 +155,7 @@ def _create_internal(
cost_attr: Optional[str] = None,
resource_attr: Optional[str] = None,
filter_observed_data: Optional[ConfigurationFilter] = None,
allow_duplicates: bool = False,
):
self.hp_ranges = hp_ranges
self.num_initial_candidates = num_initial_candidates
Expand Down Expand Up @@ -190,6 +191,7 @@ def _create_internal(
self._cost_attr = cost_attr
self._resource_attr = resource_attr
self._filter_observed_data = filter_observed_data
self._allow_duplicates = allow_duplicates
self._random_searcher = None
# Tracks the cumulative time spent in ``get_config`` calls
self.cumulative_get_config_time = 0
Expand All @@ -202,6 +204,7 @@ def _create_internal(
num_initial_random_choices
)
deb_msg += "- initial_scoring = {}\n".format(self.initial_scoring)
deb_msg += f"- allow_duplicates = {self._allow_duplicates}\n"
logger.info(deb_msg)

def _copy_kwargs_to_kwargs_int(self, kwargs_int: dict, kwargs: dict):
Expand Down Expand Up @@ -287,7 +290,7 @@ def _get_config_modelbased(
self, exclusion_candidates: ExclusionList, **kwargs
) -> Optional[Configuration]:
"""
Implements ``get_config`` part if the surrogate model is used, instead
Implements :meth:`get_config` part if the surrogate model is used, instead
of initial choices from ``points_to_evaluate`` or initial random
choices.

Expand All @@ -298,10 +301,15 @@ def _get_config_modelbased(
"""
raise NotImplementedError

def _get_exclusion_candidates(self, **kwargs) -> ExclusionList:
def _get_exclusion_candidates(self, skip_observed: bool = False) -> ExclusionList:
def skip_all(config: Configuration) -> bool:
return False

return ExclusionList(
self.state_transformer.state,
filter_observed_data=self._filter_observed_data,
filter_observed_data=skip_all
if skip_observed
else self._filter_observed_data,
)

def _should_pick_random_config(self, exclusion_candidates: ExclusionList) -> bool:
Expand Down Expand Up @@ -331,7 +339,9 @@ def _get_config_not_modelbased(
model-based search. If False is returned, model-based search must be
called.

:param exclusion_candidates: Configs to be avoided
:param exclusion_candidates: Configs to be avoided, even if
``allow_duplicates == True`` (in this case, we avoid configs of
failed or pending trials)
:return: ``(config, use_get_config_modelbased)``
"""
self._assign_random_searcher()
Expand Down Expand Up @@ -376,8 +386,10 @@ def get_config(self, **kwargs) -> Optional[dict]:
}
self.profiler.begin_block(meta)
self.profiler.start("all")
# Initial configs come from ``points_to_evaluate`` or are drawn at random
exclusion_candidates = self._get_exclusion_candidates(**kwargs)
# Initial configs come from ``points_to_evaluate`` or are drawn at random
# We use ``exclusion_candidates`` even if ``allow_duplicates == True``, in order
# to count how many unique configs have been suggested
exclusion_candidates = self._get_exclusion_candidates()
config, pick_random = self._get_config_not_modelbased(exclusion_candidates)
if self.debug_log is not None:
trial_id = kwargs.get("trial_id")
Expand All @@ -386,8 +398,18 @@ def get_config(self, **kwargs) -> Optional[dict]:
)
if not pick_random:
# Model-based decision
if not exclusion_candidates.config_space_exhausted():
config = self._get_config_modelbased(exclusion_candidates, **kwargs)
if self._allow_duplicates or (
not exclusion_candidates.config_space_exhausted()
):
# Even if ``allow_duplicates == True``, we exclude configs which are
# pending or failed
if self._allow_duplicates:
excl_cands = self._get_exclusion_candidates(skip_observed=True)
else:
excl_cands = exclusion_candidates
config = self._get_config_modelbased(
exclusion_candidates=excl_cands, **kwargs
)

if config is not None:
if self.debug_log is not None:
Expand Down Expand Up @@ -468,6 +490,7 @@ def _assign_random_searcher(self):
points_to_evaluate=[],
random_seed=0,
debug_log=False,
allow_duplicates=self._allow_duplicates,
)
self._random_searcher.set_random_state(self.random_state)

Expand Down Expand Up @@ -597,6 +620,9 @@ class GPFIFOSearcher(ModelBasedSearcher):
``opt_skip_init_length``, fitting is done only K-th call, and skipped
otherwise. Defaults to 1 (no skipping)
:type opt_skip_period: int, optional
:param allow_duplicates: If ``True``, :meth:`get_config` may return the same
configuration more than once. Defaults to ``False``
:type allow_duplicates: bool, optional
:param map_reward: In the scheduler, the metric may be minimized or
maximized, but internally, Bayesian optimization is minimizing
the criterion. ``map_reward`` converts from metric to internal
Expand Down Expand Up @@ -738,7 +764,7 @@ def _postprocess_config(self, config: dict) -> dict:
def _get_config_modelbased(
self, exclusion_candidates, **kwargs
) -> Optional[Configuration]:
# Obtain current SurrogateModel from state transformer. Based on
# Obtain current :class:`SurrogateModel` from state transformer. Based on
# this, the BO algorithm components can be constructed
if self.do_profile:
self.profiler.push_prefix("getconfig")
Expand Down Expand Up @@ -820,14 +846,16 @@ def get_batch_configs(
if config is not None:
configs.append(config)
else:
# ``DebugLogWriter`` does not support batch selection right now,
# :class:`DebugLogWriter` does not support batch selection right now,
# must be switched off
assert self.debug_log is None, (
"``get_batch_configs`` does not support debug_log right now. "
+ "Please set ``debug_log=False`` in search_options argument "
+ "of scheduler, or create your searcher with ``debug_log=False``"
)
exclusion_candidates = self._get_exclusion_candidates(**kwargs)
exclusion_candidates = self._get_exclusion_candidates(
skip_observed=self._allow_duplicates
)
pick_random = True
while pick_random and len(configs) < batch_size:
config, pick_random = self._get_config_not_modelbased(
Expand All @@ -836,6 +864,8 @@ def get_batch_configs(
if pick_random:
if config is not None:
configs.append(config)
# Even if ``allow_duplicates == True``, we don't want to have
# duplicates in the same batch
exclusion_candidates.add(config)
else:
break # Space exhausted
Expand Down Expand Up @@ -921,6 +951,7 @@ def _new_searcher_kwargs_for_clone(self) -> dict:
cost_attr=self._cost_attr,
resource_attr=self._resource_attr,
filter_observed_data=self._filter_observed_data,
allow_duplicates=self._allow_duplicates,
)

def clone_from_state(self, state):
Expand Down
10 changes: 9 additions & 1 deletion syne_tune/optimizer/schedulers/searchers/gp_searcher_factory.py
Expand Up @@ -445,7 +445,12 @@ def _create_common_objects(model=None, is_hypertune=False, **kwargs):
)
result["num_initial_candidates"] = kwargs["num_init_candidates"]
result["num_initial_random_choices"] = kwargs["num_init_random"]
for k in ("initial_scoring", "cost_attr", "skip_local_optimization"):
for k in (
"initial_scoring",
"cost_attr",
"skip_local_optimization",
"allow_duplicates",
):
result[k] = kwargs[k]

return result
Expand Down Expand Up @@ -797,6 +802,7 @@ def _common_defaults(
"cost_attr": "elapsed_time",
"normalize_targets": True,
"no_fantasizing": False,
"allow_duplicates": False,
}
if is_hyperband:
if is_hypertune:
Expand Down Expand Up @@ -834,6 +840,8 @@ def _common_defaults(
"skip_local_optimization": Boolean(),
"debug_log": Boolean(),
"normalize_targets": Boolean(),
"no_fantasizing": Boolean(),
"allow_duplicates": Boolean(),
}

if is_hyperband:
Expand Down