Skip to content
Merged
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ install-no-extras-for-test:
python -m pip install .[test]

install-all-extras-for-test:
python -m pip install .[all_extras,sktime-integration,test]
python -m pip install .[all_extras,test,test_parallel_backends,sktime-integration]

install-editable:
pip install -e .
Expand Down
5 changes: 5 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,11 @@ test = [
"torch",
"tf_keras",
]
test_parallel_backends = [
"dask",
"joblib",
'ray >=2.40.0; python_version < "3.13"',
]
all_extras = [
"hyperactive[integrations]",
"optuna<5",
Expand Down
16 changes: 16 additions & 0 deletions src/hyperactive/opt/_common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
"""Common functions used by multiple optimizers."""

__all__ = ["_score_params"]


def _score_params(params, meta):
"""Score parameters, used in parallelization."""
meta = meta.copy()
experiment = meta["experiment"]
error_score = meta["error_score"]

try:
return experiment(**params)
except Exception: # noqa: B904
# Catch all exceptions and assign error_score
return error_score
86 changes: 76 additions & 10 deletions src/hyperactive/opt/gridsearch/_sk.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
from sklearn.model_selection import ParameterGrid

from hyperactive.base import BaseOptimizer
from hyperactive.opt._common import _score_params
from hyperactive.utils.parallel import parallelize


class GridSearchSk(BaseOptimizer):
Expand All @@ -17,8 +19,45 @@ class GridSearchSk(BaseOptimizer):
param_grid : dict[str, list]
The search space to explore. A dictionary with parameter
names as keys and a numpy array as values.

error_score : float, default=np.nan
The score to assign if an error occurs during the evaluation of a parameter set.

backend : {"dask", "loky", "multiprocessing", "threading", "ray"}, default = "None".
Parallelization backend to use in the search process.

- "None": executes loop sequentally, simple list comprehension
- "loky", "multiprocessing" and "threading": uses ``joblib.Parallel`` loops
- "joblib": custom and 3rd party ``joblib`` backends, e.g., ``spark``
- "dask": uses ``dask``, requires ``dask`` package in environment
- "ray": uses ``ray``, requires ``ray`` package in environment

backend_params : dict, optional
additional parameters passed to the backend as config.
Directly passed to ``utils.parallel.parallelize``.
Valid keys depend on the value of ``backend``:

- "None": no additional parameters, ``backend_params`` is ignored
- "loky", "multiprocessing" and "threading": default ``joblib`` backends
any valid keys for ``joblib.Parallel`` can be passed here, e.g., ``n_jobs``,
with the exception of ``backend`` which is directly controlled by ``backend``.
If ``n_jobs`` is not passed, it will default to ``-1``, other parameters
will default to ``joblib`` defaults.
- "joblib": custom and 3rd party ``joblib`` backends, e.g., ``spark``.
any valid keys for ``joblib.Parallel`` can be passed here, e.g., ``n_jobs``,
``backend`` must be passed as a key of ``backend_params`` in this case.
If ``n_jobs`` is not passed, it will default to ``-1``, other parameters
will default to ``joblib`` defaults.
- "dask": any valid keys for ``dask.compute`` can be passed, e.g., ``scheduler``

- "ray": The following keys can be passed:

- "ray_remote_args": dictionary of valid keys for ``ray.init``
- "shutdown_ray": bool, default=True; False prevents ``ray`` from shutting
down after parallelization.
- "logger_name": str, default="ray"; name of the logger to use.
- "mute_warnings": bool, default=False; if True, suppresses warnings

experiment : BaseExperiment, optional
The experiment to optimize parameters for.
Optional, can be passed later via ``set_params``.
Expand Down Expand Up @@ -53,17 +92,29 @@ class GridSearchSk(BaseOptimizer):

Best parameters can also be accessed via the attributes:
>>> best_params = grid_search.best_params_

To parallelize the search, set the ``backend`` and ``backend_params``:
>>> grid_search = GridSearch(
... param_grid,
... backend="joblib",
... backend_params={"n_jobs": -1},
... experiment=sklearn_exp,
... )
"""

def __init__(
self,
param_grid=None,
error_score=np.nan,
backend="None",
backend_params=None,
experiment=None,
):
self.experiment = experiment
self.param_grid = param_grid
self.error_score = error_score
self.backend = backend
self.backend_params = backend_params

super().__init__()

Expand Down Expand Up @@ -91,19 +142,23 @@ def _check_param_grid(self, param_grid):
"to be a non-empty sequence."
)

def _solve(self, experiment, param_grid, error_score):
def _solve(self, experiment, param_grid, error_score, backend, backend_params):
"""Run the optimization search process."""
self._check_param_grid(param_grid)
candidate_params = list(ParameterGrid(param_grid))

scores = []
for candidate_param in candidate_params:
try:
score = experiment(**candidate_param)
except Exception: # noqa: B904
# Catch all exceptions and assign error_score
score = error_score
scores.append(score)
meta = {
"experiment": experiment,
"error_score": error_score,
}

scores = parallelize(
fun=_score_params,
iter=candidate_params,
meta=meta,
backend=backend,
backend_params=backend_params,
)

best_index = np.argmin(scores)
best_params = candidate_params[best_index]
Expand Down Expand Up @@ -170,4 +225,15 @@ def get_test_params(cls, parameter_set="default"):
"param_grid": param_grid,
}

return [params_sklearn, params_ackley]
params = [params_sklearn, params_ackley]

from hyperactive.utils.parallel import _get_parallel_test_fixtures

parallel_fixtures = _get_parallel_test_fixtures()

for x in parallel_fixtures:
new_ackley = params_ackley.copy()
new_ackley.update(x)
params.append(new_ackley)

return params
81 changes: 72 additions & 9 deletions src/hyperactive/opt/random_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
from sklearn.model_selection import ParameterSampler

from hyperactive.base import BaseOptimizer
from hyperactive.opt._common import _score_params
from hyperactive.utils.parallel import parallelize


class RandomSearchSk(BaseOptimizer):
Expand All @@ -18,12 +20,51 @@ class RandomSearchSk(BaseOptimizer):
param_distributions : dict[str, list | scipy.stats.rv_frozen]
Search space specification. Discrete lists are sampled uniformly;
scipy distribution objects are sampled via their ``rvs`` method.

n_iter : int, default=10
Number of parameter sets to evaluate.

random_state : int | np.random.RandomState | None, default=None
Controls the pseudo-random generator for reproducibility.

error_score : float, default=np.nan
Score assigned when the experiment raises an exception.

backend : {"dask", "loky", "multiprocessing", "threading", "ray"}, default = "None".
Parallelization backend to use in the search process.

- "None": executes loop sequentally, simple list comprehension
- "loky", "multiprocessing" and "threading": uses ``joblib.Parallel`` loops
- "joblib": custom and 3rd party ``joblib`` backends, e.g., ``spark``
- "dask": uses ``dask``, requires ``dask`` package in environment
- "ray": uses ``ray``, requires ``ray`` package in environment

backend_params : dict, optional
additional parameters passed to the backend as config.
Directly passed to ``utils.parallel.parallelize``.
Valid keys depend on the value of ``backend``:

- "None": no additional parameters, ``backend_params`` is ignored
Copy link
Collaborator

@SimonBlanke SimonBlanke Aug 14, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I read through the backend_params multiple times and I do not understand how they work. How does the key and value of the dict look like?
We also have two parameters "backend" and "backend_params", that could easily be done in one parameter.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

backend selects which parellization backend you want to use, e.g., "joblib" or `"ray".

backend_params is a config dict for the backend, which keys you can pass depends on the chosen backend.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I updated the docstrings, hope that is clearer now?

Copy link
Collaborator

@SimonBlanke SimonBlanke Aug 14, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How does the key and value of the dict look like? Does the dict have multiple keys? Is the value a list of the strings shown in the docstring text or just one string? Could you write an example into the docstring?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the params dict can be a dict with multiple keys, I will add an example

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added example

- "loky", "multiprocessing" and "threading": default ``joblib`` backends
any valid keys for ``joblib.Parallel`` can be passed here, e.g., ``n_jobs``,
with the exception of ``backend`` which is directly controlled by ``backend``.
If ``n_jobs`` is not passed, it will default to ``-1``, other parameters
will default to ``joblib`` defaults.
- "joblib": custom and 3rd party ``joblib`` backends, e.g., ``spark``.
any valid keys for ``joblib.Parallel`` can be passed here, e.g., ``n_jobs``,
``backend`` must be passed as a key of ``backend_params`` in this case.
If ``n_jobs`` is not passed, it will default to ``-1``, other parameters
will default to ``joblib`` defaults.
- "dask": any valid keys for ``dask.compute`` can be passed, e.g., ``scheduler``

- "ray": The following keys can be passed:

- "ray_remote_args": dictionary of valid keys for ``ray.init``
- "shutdown_ray": bool, default=True; False prevents ``ray`` from shutting
down after parallelization.
- "logger_name": str, default="ray"; name of the logger to use.
- "mute_warnings": bool, default=False; if True, suppresses warnings

experiment : BaseExperiment, optional
Callable returning a scalar score when invoked with keyword
arguments matching a parameter set.
Expand All @@ -44,13 +85,17 @@ def __init__(
n_iter=10,
random_state=None,
error_score=np.nan,
backend="None",
backend_params=None,
experiment=None,
):
self.experiment = experiment
self.param_distributions = param_distributions
self.n_iter = n_iter
self.random_state = random_state
self.error_score = error_score
self.backend = backend
self.backend_params = backend_params

super().__init__()

Expand All @@ -67,7 +112,7 @@ def _check_param_distributions(self, param_distributions):
for p in param_distributions:
for name, v in p.items():
if self._is_distribution(v):
# Assume scipy frozen distribution - nothing to check
# Assume scipy frozen distribution: nothing to check
continue

if isinstance(v, np.ndarray) and v.ndim > 1:
Expand All @@ -93,6 +138,8 @@ def _solve(
n_iter,
random_state,
error_score,
backend,
backend_params,
):
"""Sample ``n_iter`` points and return the best parameter set."""
self._check_param_distributions(param_distributions)
Expand All @@ -104,13 +151,18 @@ def _solve(
)
candidate_params = list(sampler)

scores: list[float] = []
for candidate_param in candidate_params:
try:
score = experiment(**candidate_param)
except Exception: # noqa: B904
score = error_score
scores.append(score)
meta = {
"experiment": experiment,
"error_score": error_score,
}

scores = parallelize(
fun=_score_params,
iter=candidate_params,
meta=meta,
backend=backend,
backend_params=backend_params,
)

best_index = int(np.argmin(scores)) # lower-is-better convention
best_params = candidate_params[best_index]
Expand Down Expand Up @@ -154,4 +206,15 @@ def get_test_params(cls, parameter_set: str = "default"):
"random_state": 0,
}

return [params_sklearn, params_ackley]
params = [params_sklearn, params_ackley]

from hyperactive.utils.parallel import _get_parallel_test_fixtures

parallel_fixtures = _get_parallel_test_fixtures()

for x in parallel_fixtures:
new_ackley = params_ackley.copy()
new_ackley.update(x)
params.append(new_ackley)

return params
Loading
Loading