From b6ee9fb0460e9d67dd8706f19ebdd24d97139e1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Wed, 13 Aug 2025 23:33:15 +0200 Subject: [PATCH 01/15] gridsearch --- Makefile | 2 +- pyproject.toml | 5 + src/hyperactive/opt/gridsearch/_sk.py | 96 +++++- src/hyperactive/utils/__init__.py | 6 - src/hyperactive/utils/estimator_checks.py | 139 --------- src/hyperactive/utils/parallel.py | 289 +++++++++++++++++++ src/hyperactive/utils/tests/__init__.py | 1 + src/hyperactive/utils/tests/test_parallel.py | 58 ++++ 8 files changed, 440 insertions(+), 156 deletions(-) delete mode 100644 src/hyperactive/utils/estimator_checks.py create mode 100644 src/hyperactive/utils/parallel.py create mode 100644 src/hyperactive/utils/tests/__init__.py create mode 100644 src/hyperactive/utils/tests/test_parallel.py diff --git a/Makefile b/Makefile index 746de3f3..dfe7ecb8 100644 --- a/Makefile +++ b/Makefile @@ -87,7 +87,7 @@ install-no-extras-for-test: python -m pip install .[test] install-all-extras-for-test: - python -m pip install .[all_extras,test] + python -m pip install .[all_extras,test,test_parallel_backends] install-editable: pip install -e . diff --git a/pyproject.toml b/pyproject.toml index c7c62a0a..06d3b8ed 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -60,6 +60,11 @@ test = [ "pytest-cov", "pathos", ] +test_parallel_backends = [ + "dask", + "joblib", + "ray", +] all_extras = [ "hyperactive[integrations]", ] diff --git a/src/hyperactive/opt/gridsearch/_sk.py b/src/hyperactive/opt/gridsearch/_sk.py index 84e97625..31f111ab 100644 --- a/src/hyperactive/opt/gridsearch/_sk.py +++ b/src/hyperactive/opt/gridsearch/_sk.py @@ -8,6 +8,7 @@ from sklearn.model_selection import ParameterGrid from hyperactive.base import BaseOptimizer +from hyperactive.utils.parallel import parallelize class GridSearchSk(BaseOptimizer): @@ -18,8 +19,50 @@ class GridSearchSk(BaseOptimizer): param_grid : dict[str, list] The search space to explore. A dictionary with parameter names as keys and a numpy array as values. + error_score : float, default=np.nan The score to assign if an error occurs during the evaluation of a parameter set. + + backend : {"dask", "loky", "multiprocessing", "threading","ray"}, by default "None". + Runs parallel evaluate if specified and ``strategy`` is set as "refit". + + - "None": executes loop sequentally, simple list comprehension + - "loky", "multiprocessing" and "threading": uses ``joblib.Parallel`` loops + - "joblib": custom and 3rd party ``joblib`` backends, e.g., ``spark`` + - "dask": uses ``dask``, requires ``dask`` package in environment + - "ray": uses ``ray``, requires ``ray`` package in environment + + Recommendation: Use "dask" or "loky" for parallel evaluate. + "threading" is unlikely to see speed ups due to the GIL and the serialization + backend (``cloudpickle``) for "dask" and "loky" is generally more robust + than the standard ``pickle`` library used in "multiprocessing". + + backend_params : dict, optional + additional parameters passed to the backend as config. + Directly passed to ``utils.parallel.parallelize``. + Valid keys depend on the value of ``backend``: + + - "None": no additional parameters, ``backend_params`` is ignored + - "loky", "multiprocessing" and "threading": default ``joblib`` backends + any valid keys for ``joblib.Parallel`` can be passed here, e.g., ``n_jobs``, + with the exception of ``backend`` which is directly controlled by ``backend``. + If ``n_jobs`` is not passed, it will default to ``-1``, other parameters + will default to ``joblib`` defaults. + - "joblib": custom and 3rd party ``joblib`` backends, e.g., ``spark``. + any valid keys for ``joblib.Parallel`` can be passed here, e.g., ``n_jobs``, + ``backend`` must be passed as a key of ``backend_params`` in this case. + If ``n_jobs`` is not passed, it will default to ``-1``, other parameters + will default to ``joblib`` defaults. + - "dask": any valid keys for ``dask.compute`` can be passed, e.g., ``scheduler`` + + - "ray": The following keys can be passed: + + - "ray_remote_args": dictionary of valid keys for ``ray.init`` + - "shutdown_ray": bool, default=True; False prevents ``ray`` from shutting + down after parallelization. + - "logger_name": str, default="ray"; name of the logger to use. + - "mute_warnings": bool, default=False; if True, suppresses warnings + experiment : BaseExperiment, optional The experiment to optimize parameters for. Optional, can be passed later via ``set_params``. @@ -60,11 +103,15 @@ def __init__( self, param_grid=None, error_score=np.nan, + backend="None", + backend_params=None, experiment=None, ): self.experiment = experiment self.param_grid = param_grid self.error_score = error_score + self.backend = backend + self.backend_params = backend_params super().__init__() @@ -97,14 +144,18 @@ def _run(self, experiment, param_grid, error_score): self._check_param_grid(param_grid) candidate_params = list(ParameterGrid(param_grid)) - scores = [] - for candidate_param in candidate_params: - try: - score = experiment(**candidate_param) - except Exception: # noqa: B904 - # Catch all exceptions and assign error_score - score = error_score - scores.append(score) + meta = { + "experiment": experiment, + "error_score": error_score, + } + + scores = parallelize( + fun=_score_params, + iter=candidate_params, + meta=meta, + backend=self.backend, + backend_params=self.backend_params, + ) best_index = np.argmin(scores) best_params = candidate_params[best_index] @@ -170,5 +221,30 @@ def get_test_params(cls, parameter_set="default"): "experiment": ackley_exp, "param_grid": param_grid, } - - return [params_sklearn, params_ackley] + + params = [params_sklearn, params_ackley] + + from hyperactive.utils.parallel import _get_parallel_test_fixtures + + parallel_fixtures = _get_parallel_test_fixtures() + + for k, v in parallel_fixtures.items(): + new_ackley = params_ackley.copy() + new_ackley["backend"] = k + new_ackley["backend_params"] = v + params.append(new_ackley) + + return params + + +def _score_params(params, meta): + """Function to score parameters, used in parallelization.""" + meta = meta.copy() + experiment = meta["experiment"] + error_score = meta["error_score"] + + try: + return experiment(**params) + except Exception: # noqa: B904 + # Catch all exceptions and assign error_score + return error_score diff --git a/src/hyperactive/utils/__init__.py b/src/hyperactive/utils/__init__.py index c9c88720..45e7d572 100644 --- a/src/hyperactive/utils/__init__.py +++ b/src/hyperactive/utils/__init__.py @@ -1,7 +1 @@ """Utility functionality.""" - -from hyperactive.utils.estimator_checks import check_estimator - -__all__ = [ - "check_estimator", -] diff --git a/src/hyperactive/utils/estimator_checks.py b/src/hyperactive/utils/estimator_checks.py deleted file mode 100644 index 1bc9f793..00000000 --- a/src/hyperactive/utils/estimator_checks.py +++ /dev/null @@ -1,139 +0,0 @@ -"""Estimator checker for extension.""" - -__author__ = ["fkiraly"] -__all__ = ["check_estimator"] - -from skbase.utils.dependencies import _check_soft_dependencies - - -def check_estimator( - estimator, - raise_exceptions=False, - tests_to_run=None, - fixtures_to_run=None, - verbose=True, - tests_to_exclude=None, - fixtures_to_exclude=None, -): - """Run all tests on one single estimator. - - Tests that are run on estimator: - - * all tests in test_all_estimators - * all interface compatibility tests from the module of estimator's scitype - - Parameters - ---------- - estimator : estimator class or estimator instance - raise_exceptions : bool, optional, default=False - whether to return exceptions/failures in the results dict, or raise them - - * if False: returns exceptions in returned `results` dict - * if True: raises exceptions as they occur - - tests_to_run : str or list of str, optional. Default = run all tests. - Names (test/function name string) of tests to run. - sub-sets tests that are run to the tests given here. - fixtures_to_run : str or list of str, optional. Default = run all tests. - pytest test-fixture combination codes, which test-fixture combinations to run. - sub-sets tests and fixtures to run to the list given here. - If both tests_to_run and fixtures_to_run are provided, runs the *union*, - i.e., all test-fixture combinations for tests in tests_to_run, - plus all test-fixture combinations in fixtures_to_run. - verbose : str, optional, default=True. - whether to print out informative summary of tests run. - tests_to_exclude : str or list of str, names of tests to exclude. default = None - removes tests that should not be run, after subsetting via tests_to_run. - fixtures_to_exclude : str or list of str, fixtures to exclude. default = None - removes test-fixture combinations that should not be run. - This is done after subsetting via fixtures_to_run. - - Returns - ------- - results : dict of results of the tests in self - keys are test/fixture strings, identical as in pytest, e.g., test[fixture] - entries are the string "PASSED" if the test passed, - or the exception raised if the test did not pass - returned only if all tests pass, or raise_exceptions=False - - Raises - ------ - if raise_exceptions=True, - raises any exception produced by the tests directly - - Examples - -------- - >>> from hyperactive.opt import HillClimbing - >>> from hyperactive.utils import check_estimator - - Running all tests for HillClimbing class, - this uses all instances from get_test_params and compatible scenarios - - >>> results = check_estimator(HillClimbing) - All tests PASSED! - - Running all tests for a specific HillClimbing - this uses the instance that is passed and compatible scenarios - - >>> specific_hill_climbing = HillClimbing.create_test_instance() - >>> results = check_estimator(specific_hill_climbing) - All tests PASSED! - - Running specific test (all fixtures) HillClimbing - - >>> results = check_estimator(HillClimbing, tests_to_run="test_clone") - All tests PASSED! - - {'test_clone[HillClimbing-0]': 'PASSED', - 'test_clone[HillClimbing-1]': 'PASSED'} - - Running one specific test-fixture-combination for ResidualDouble - - >>> check_estimator( - ... HillClimbing, fixtures_to_run="test_clone[HillClimbing-1]" - ... ) - All tests PASSED! - {'test_clone[HillClimbing-1]': 'PASSED'} - """ - msg = ( - "check_estimator is a testing utility for developers, and " - "requires pytest to be present " - "in the python environment, but pytest was not found. " - "pytest is a developer dependency and not included in the base " - "sktime installation. Please run: `pip install pytest` to " - "install the pytest package. " - "To install sktime with all developer dependencies, run:" - " `pip install hyperactive[dev]`" - ) - _check_soft_dependencies("pytest", msg=msg) - - from hyperactive.tests.test_class_register import get_test_classes_for_obj - - test_clss_for_est = get_test_classes_for_obj(estimator) - - results = {} - - for test_cls in test_clss_for_est: - test_cls_results = test_cls().run_tests( - obj=estimator, - raise_exceptions=raise_exceptions, - tests_to_run=tests_to_run, - fixtures_to_run=fixtures_to_run, - tests_to_exclude=tests_to_exclude, - fixtures_to_exclude=fixtures_to_exclude, - ) - results.update(test_cls_results) - - failed_tests = [key for key in results.keys() if results[key] != "PASSED"] - if len(failed_tests) > 0: - msg = failed_tests - msg = ["FAILED: " + x for x in msg] - msg = "\n".join(msg) - else: - msg = "All tests PASSED!" - - if verbose: - # printing is an intended feature, for console usage and interactive debugging - print(msg) # noqa T001 - - return results diff --git a/src/hyperactive/utils/parallel.py b/src/hyperactive/utils/parallel.py new file mode 100644 index 00000000..3591c2a9 --- /dev/null +++ b/src/hyperactive/utils/parallel.py @@ -0,0 +1,289 @@ +# copied from sktime, BSD-3-Clause License (see LICENSE file) +# to be moved to scikit-base in the future +"""Common abstraction utilities for parallelization backends. + +New parallelization or iteration backends can be added easily as follows: + +* Add a new backend name to ``backend_dict``, syntax is + backend_name: backend_type, where backend_type collects backend options, + e.g., multiple options for a single parallelization backend. +* Add a new function to ``para_dict``, should have name + ``_parallelize_`` and take the same arguments as + ``_parallelize_none``. Ensure that ``backend`` and ``backend_params`` are arguments, + even if there is only one backend option, or no additional parameters. +* add the backend string in the docstring of parallelize, and any downstream + functions that use ``parallelize`` and expose the backend parameter an argument +""" + + +def parallelize(fun, iter, meta=None, backend=None, backend_params=None): + """Parallelize loop over iter via backend. + + Executes ``fun(x, meta=meta)`` in parallel for ``x`` in ``iter``, + and returns the results as a list in the same order as ``iter``. + + Uses the iteration or parallelization backend specified by ``backend``. + + Parameters + ---------- + fun : callable, must have exactly two arguments, second argument of name "meta" + function to be executed in parallel + + iter : iterable + iterable over which to parallelize, elements are passed to fun in order, + to the first argument + + meta : dict, optional + variables to be passed to fun, as the second argument, under the key ``meta`` + + backend : str, optional + backend to use for parallelization, one of + + - "None": executes loop sequentially, simple list comprehension + - "loky", "multiprocessing" and "threading": uses ``joblib`` ``Parallel`` loops + - "joblib": custom and 3rd party ``joblib`` backends, e.g., ``spark`` + - "dask": uses ``dask``, requires ``dask`` package in environment + - "dask_lazy": same as ``"dask"``, but returns delayed object instead of list + - "ray": uses a ray remote to execute jobs in parallel + + backend_params : dict, optional + additional parameters passed to the backend as config. + Valid keys depend on the value of ``backend``: + + - "None": no additional parameters, ``backend_params`` is ignored + - "loky", "multiprocessing" and "threading": default ``joblib`` backends + any valid keys for ``joblib.Parallel`` can be passed here, e.g., ``n_jobs``, + with the exception of ``backend`` which is directly controlled by ``backend``. + If ``n_jobs`` is not passed, it will default to ``-1``, other parameters + will default to ``joblib`` defaults. + - "joblib": custom and 3rd party ``joblib`` backends, e.g., ``spark``. + any valid keys for ``joblib.Parallel`` can be passed here, e.g., ``n_jobs``, + ``backend`` must be passed as a key of ``backend_params`` in this case. + If ``n_jobs`` is not passed, it will default to ``-1``, other parameters + will default to ``joblib`` defaults. + - "dask": any valid keys for ``dask.compute`` can be passed, e.g., ``scheduler`` + + - "ray": The following keys can be passed: + + - "ray_remote_args": dictionary of valid keys for ``ray.init`` + - "shutdown_ray": bool, default=True; False prevents ``ray`` from shutting + down after parallelization. + - "logger_name": str, default="ray"; name of the logger to use. + - "mute_warnings": bool, default=False; if True, suppresses warnings + + """ + if meta is None: + meta = {} + if backend is None: + backend = "None" + if backend_params is None: + backend_params = {} + + backend_name = backend_dict[backend] + para_fun = para_dict[backend_name] + + ret = para_fun( + fun=fun, iter=iter, meta=meta, backend=backend, backend_params=backend_params + ) + return ret + + +backend_dict = { + "None": "none", + "loky": "joblib", + "multiprocessing": "joblib", + "threading": "joblib", + "joblib": "joblib", + "dask": "dask", + "dask_lazy": "dask", + "ray": "ray", +} +para_dict = {} + + +def _parallelize_none(fun, iter, meta, backend, backend_params): + """Execute loop via simple sequential list comprehension.""" + ret = [fun(x, meta=meta) for x in iter] + return ret + + +para_dict["none"] = _parallelize_none + + +def _parallelize_joblib(fun, iter, meta, backend, backend_params): + """Parallelize loop via joblib Parallel.""" + from joblib import Parallel, delayed + + par_params = backend_params.copy() + if "backend" not in par_params: + # if user selects custom joblib backend but does not specify backend explicitly, + # raise a ValueError + if backend == "joblib": + raise ValueError( + '"joblib" was selected as first layer parallelization backend, ' + "but no backend string was " + 'passed in the backend parameters dict, e.g., "spark". ' + "Please specify a backend to joblib as a key-value pair " + "in the backend_params arg or the backend:parallel:params config " + 'when using "joblib". ' + 'For clarity, "joblib" should only be used for two-layer ' + "backend dispatch, where the first layer is joblib, " + "and the second layer is a custom backend of joblib, e.g., spark. " + "For first-party joblib backends, please use the backend string " + 'of sktime directly, e.g., by specifying "multiprocessing" or "loky".' + ) + # in all other cases, we ensure the backend parameter is one of + # "loky", "multiprocessing" or "threading", as passed via backend + else: + par_params["backend"] = backend + elif backend != "joblib": + par_params["backend"] = backend + + if "n_jobs" not in par_params: + par_params["n_jobs"] = -1 + + ret = Parallel(**par_params)(delayed(fun)(x, meta=meta) for x in iter) + return ret + + +para_dict["joblib"] = _parallelize_joblib + + +def _parallelize_dask(fun, iter, meta, backend, backend_params): + """Parallelize loop via dask.""" + from dask import compute, delayed + + lazy = [delayed(fun)(x, meta=meta) for x in iter] + if backend == "dask": + return compute(*lazy, **backend_params) + else: + return lazy + + +para_dict["dask"] = _parallelize_dask + + +def _parallelize_ray(fun, iter, meta, backend, backend_params): + """Parallelize loop via ray.""" + import logging + import warnings + + import ray + + par_params = backend_params.copy() + + # read the possible additional keys + logger = logging.getLogger(par_params.get("logger_name", None)) + mute_warnings = par_params.get("mute_warnings", False) + shutdown_ray = par_params.get("shutdown_ray", True) + + if "ray_remote_args" not in par_params.keys(): + par_params["ray_remote_args"] = {} + + @ray.remote # pragma: no cover + def _ray_execute_function( + fun, params: dict, meta: dict, mute_warnings: bool = False + ): + if mute_warnings: + warnings.filterwarnings("ignore") # silence sktime warnings + assert ray.is_initialized() + result = fun(params, meta) + return result + + if not ray.is_initialized(): + logger.info("Starting Ray Parallel") + context = ray.init(**par_params["ray_remote_args"]) + logger.info( + f"Ray initialized. Open dashboard at http://{context.dashboard_url}" + ) + + # this is to keep the order of results while still using wait to optimize runtime + refs = [ + _ray_execute_function.remote(fun, x, meta, mute_warnings=mute_warnings) + for x in iter + ] + res_dict = dict.fromkeys(refs) + + unfinished = refs + while unfinished: + finished, unfinished = ray.wait(unfinished, num_returns=1) + res_dict[finished[0]] = ray.get(finished[0]) + + if shutdown_ray: + ray.shutdown() + + res = [res_dict[ref] for ref in refs] + return res + + +para_dict["ray"] = _parallelize_ray + + +# list of backends where we skip tests during CI +SKIP_FIXTURES = [ + "ray", # unstable, sporadic crashes in CI, see bug 8149 +] + + +def _get_parallel_test_fixtures(naming="estimator"): + """Return fixtures for parallelization tests. + + Returns a list of parameter fixtures, where each fixture + is a dict with keys "backend" and "backend_params". + + Parameters + ---------- + naming : str, optional + naming convention for the parameters, one of + + "estimator": for use in estimator constructors, + ``backend`` and ``backend_params`` + "config": for use in ``set_config``, + ``backend:parallel`` and ``backend:parallel:params`` + + Returns + ------- + fixtures : list of dict + list of backend parameter fixtures + keys depend on ``naming`` parameter, see above + either ``backend`` and ``backend_params`` (``naming="estimator"``), + or ``backend:parallel`` and ``backend:parallel:params`` (``naming="config"``) + values are backend strings and backend parameter dicts + only backends that are available in the environment are included + """ + from skbase.utils.dependencies import _check_soft_dependencies + + fixtures = [] + + # test no parallelization + fixtures.append({"backend": "None", "backend_params": {}}) + + # test joblib backends + for backend in ["loky", "multiprocessing", "threading"]: + fixtures.append({"backend": backend, "backend_params": {}}) + fixtures.append({"backend": backend, "backend_params": {"n_jobs": 2}}) + fixtures.append({"backend": backend, "backend_params": {"n_jobs": -1}}) + + # test dask backends + if _check_soft_dependencies("dask", severity="none"): + fixtures.append({"backend": "dask", "backend_params": {}}) + fixtures.append({"backend": "dask", "backend_params": {"scheduler": "sync"}}) + + # test ray backend + if _check_soft_dependencies("ray", severity="none"): + import os + + fixtures.append( + { + "backend": "ray", + "backend_params": { + "mute_warnings": True, + "ray_remote_args": {"num_cpus": os.cpu_count() - 1}, + }, + } + ) + + fixtures = [x for x in fixtures if x["backend"] not in SKIP_FIXTURES] + # remove backends in SKIP_FIXTURES from fixtures + + return fixtures diff --git a/src/hyperactive/utils/tests/__init__.py b/src/hyperactive/utils/tests/__init__.py new file mode 100644 index 00000000..874535a0 --- /dev/null +++ b/src/hyperactive/utils/tests/__init__.py @@ -0,0 +1 @@ +"""Tests for utilities.""" diff --git a/src/hyperactive/utils/tests/test_parallel.py b/src/hyperactive/utils/tests/test_parallel.py new file mode 100644 index 00000000..e936f459 --- /dev/null +++ b/src/hyperactive/utils/tests/test_parallel.py @@ -0,0 +1,58 @@ +# copied from sktime, BSD-3-Clause License (see LICENSE file) +# to be moved to scikit-base in the future +import copy +import os + +import pytest + +from skbase.utils.dependencies import _check_soft_dependencies + +from hyperactive.utils.parallel import _get_parallel_test_fixtures, parallelize + + +@pytest.mark.skipif( + not _check_soft_dependencies("ray", severity="none"), + reason="Execute tests for iff anything in the module has changed", +) +def test_ray_leaves_params_invariant(): + def trial_function(params, meta): + return params + + backend = "ray" + backend_params = { + "mute_warnings": True, + "ray_remote_args": {"num_cpus": os.cpu_count() - 1}, + } + # copy for later comparison + backup = backend_params.copy() + + params = [1, 2, 3] + meta = {} + + parallelize(trial_function, params, meta, backend, backend_params) + + assert backup == backend_params + + +def square(x, **kwargs): + return x**2 + + +@pytest.mark.parametrize("fixture", _get_parallel_test_fixtures()) +def test_parallelize_simple_loop(fixture): + backend = fixture["backend"] + backend_params = copy.deepcopy(fixture["backend_params"]) + params_before = copy.deepcopy(fixture["backend_params"]) + + nums = range(8) + expected = [x**2 for x in nums] + + result = parallelize( + square, + nums, + backend=backend, + backend_params=backend_params, + ) + + assert list(result) == expected + assert backend_params == params_before From 60cc034713adfb2ab81ea7d47d105172f3e477e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Wed, 13 Aug 2025 23:36:48 +0200 Subject: [PATCH 02/15] random search --- src/hyperactive/opt/_common.py | 16 ++++++ src/hyperactive/opt/gridsearch/_sk.py | 14 +---- src/hyperactive/opt/random_search.py | 83 ++++++++++++++++++++++++--- 3 files changed, 92 insertions(+), 21 deletions(-) create mode 100644 src/hyperactive/opt/_common.py diff --git a/src/hyperactive/opt/_common.py b/src/hyperactive/opt/_common.py new file mode 100644 index 00000000..bef1f652 --- /dev/null +++ b/src/hyperactive/opt/_common.py @@ -0,0 +1,16 @@ +"""This module contains common functions used by multiple optimizers.""" + +__all__ = ["_score_params"] + + +def _score_params(params, meta): + """Function to score parameters, used in parallelization.""" + meta = meta.copy() + experiment = meta["experiment"] + error_score = meta["error_score"] + + try: + return experiment(**params) + except Exception: # noqa: B904 + # Catch all exceptions and assign error_score + return error_score diff --git a/src/hyperactive/opt/gridsearch/_sk.py b/src/hyperactive/opt/gridsearch/_sk.py index 31f111ab..c0e8caa4 100644 --- a/src/hyperactive/opt/gridsearch/_sk.py +++ b/src/hyperactive/opt/gridsearch/_sk.py @@ -8,6 +8,7 @@ from sklearn.model_selection import ParameterGrid from hyperactive.base import BaseOptimizer +from hyperactive.opt._common import _score_params from hyperactive.utils.parallel import parallelize @@ -235,16 +236,3 @@ def get_test_params(cls, parameter_set="default"): params.append(new_ackley) return params - - -def _score_params(params, meta): - """Function to score parameters, used in parallelization.""" - meta = meta.copy() - experiment = meta["experiment"] - error_score = meta["error_score"] - - try: - return experiment(**params) - except Exception: # noqa: B904 - # Catch all exceptions and assign error_score - return error_score diff --git a/src/hyperactive/opt/random_search.py b/src/hyperactive/opt/random_search.py index 9f07ab3e..fac3fdb6 100644 --- a/src/hyperactive/opt/random_search.py +++ b/src/hyperactive/opt/random_search.py @@ -9,6 +9,8 @@ from sklearn.model_selection import ParameterSampler from hyperactive.base import BaseOptimizer +from hyperactive.opt._common import _score_params +from hyperactive.utils.parallel import parallelize class RandomSearchSk(BaseOptimizer): @@ -19,12 +21,56 @@ class RandomSearchSk(BaseOptimizer): param_distributions : dict[str, list | scipy.stats.rv_frozen] Search space specification. Discrete lists are sampled uniformly; scipy distribution objects are sampled via their ``rvs`` method. + n_iter : int, default=10 Number of parameter sets to evaluate. + random_state : int | np.random.RandomState | None, default=None Controls the pseudo-random generator for reproducibility. + error_score : float, default=np.nan Score assigned when the experiment raises an exception. + + backend : {"dask", "loky", "multiprocessing", "threading","ray"}, by default "None". + Runs parallel evaluate if specified and ``strategy`` is set as "refit". + + - "None": executes loop sequentally, simple list comprehension + - "loky", "multiprocessing" and "threading": uses ``joblib.Parallel`` loops + - "joblib": custom and 3rd party ``joblib`` backends, e.g., ``spark`` + - "dask": uses ``dask``, requires ``dask`` package in environment + - "ray": uses ``ray``, requires ``ray`` package in environment + + Recommendation: Use "dask" or "loky" for parallel evaluate. + "threading" is unlikely to see speed ups due to the GIL and the serialization + backend (``cloudpickle``) for "dask" and "loky" is generally more robust + than the standard ``pickle`` library used in "multiprocessing". + + backend_params : dict, optional + additional parameters passed to the backend as config. + Directly passed to ``utils.parallel.parallelize``. + Valid keys depend on the value of ``backend``: + + - "None": no additional parameters, ``backend_params`` is ignored + - "loky", "multiprocessing" and "threading": default ``joblib`` backends + any valid keys for ``joblib.Parallel`` can be passed here, e.g., ``n_jobs``, + with the exception of ``backend`` which is directly controlled by ``backend``. + If ``n_jobs`` is not passed, it will default to ``-1``, other parameters + will default to ``joblib`` defaults. + - "joblib": custom and 3rd party ``joblib`` backends, e.g., ``spark``. + any valid keys for ``joblib.Parallel`` can be passed here, e.g., ``n_jobs``, + ``backend`` must be passed as a key of ``backend_params`` in this case. + If ``n_jobs`` is not passed, it will default to ``-1``, other parameters + will default to ``joblib`` defaults. + - "dask": any valid keys for ``dask.compute`` can be passed, e.g., ``scheduler`` + + - "ray": The following keys can be passed: + + - "ray_remote_args": dictionary of valid keys for ``ray.init`` + - "shutdown_ray": bool, default=True; False prevents ``ray`` from shutting + down after parallelization. + - "logger_name": str, default="ray"; name of the logger to use. + - "mute_warnings": bool, default=False; if True, suppresses warnings + experiment : BaseExperiment, optional Callable returning a scalar score when invoked with keyword arguments matching a parameter set. @@ -45,6 +91,8 @@ def __init__( n_iter=10, random_state=None, error_score=np.nan, + backend="None", + backend_params=None, experiment=None, ): self.experiment = experiment @@ -52,6 +100,8 @@ def __init__( self.n_iter = n_iter self.random_state = random_state self.error_score = error_score + self.backend = backend + self.backend_params = backend_params super().__init__() @@ -104,13 +154,18 @@ def _run( ) candidate_params = list(sampler) - scores: list[float] = [] - for candidate_param in candidate_params: - try: - score = experiment(**candidate_param) - except Exception: # noqa: B904 - score = error_score - scores.append(score) + meta = { + "experiment": experiment, + "error_score": error_score, + } + + scores = parallelize( + fun=_score_params, + iter=candidate_params, + meta=meta, + backend=self.backend, + backend_params=self.backend_params, + ) best_index = int(np.argmin(scores)) # lower-is-better convention best_params = candidate_params[best_index] @@ -154,4 +209,16 @@ def get_test_params(cls, parameter_set: str = "default"): "random_state": 0, } - return [params_sklearn, params_ackley] + params = [params_sklearn, params_ackley] + + from hyperactive.utils.parallel import _get_parallel_test_fixtures + + parallel_fixtures = _get_parallel_test_fixtures() + + for k, v in parallel_fixtures.items(): + new_ackley = params_ackley.copy() + new_ackley["backend"] = k + new_ackley["backend_params"] = v + params.append(new_ackley) + + return params From 4173725fad47ec5b729901fdea6bbb1bf975d9d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Wed, 13 Aug 2025 23:41:12 +0200 Subject: [PATCH 03/15] revert accidental deletion --- src/hyperactive/utils/__init__.py | 6 + src/hyperactive/utils/estimator_checks.py | 139 ++++++++++++++++++++++ 2 files changed, 145 insertions(+) create mode 100644 src/hyperactive/utils/estimator_checks.py diff --git a/src/hyperactive/utils/__init__.py b/src/hyperactive/utils/__init__.py index 45e7d572..c9c88720 100644 --- a/src/hyperactive/utils/__init__.py +++ b/src/hyperactive/utils/__init__.py @@ -1 +1,7 @@ """Utility functionality.""" + +from hyperactive.utils.estimator_checks import check_estimator + +__all__ = [ + "check_estimator", +] diff --git a/src/hyperactive/utils/estimator_checks.py b/src/hyperactive/utils/estimator_checks.py new file mode 100644 index 00000000..1bc9f793 --- /dev/null +++ b/src/hyperactive/utils/estimator_checks.py @@ -0,0 +1,139 @@ +"""Estimator checker for extension.""" + +__author__ = ["fkiraly"] +__all__ = ["check_estimator"] + +from skbase.utils.dependencies import _check_soft_dependencies + + +def check_estimator( + estimator, + raise_exceptions=False, + tests_to_run=None, + fixtures_to_run=None, + verbose=True, + tests_to_exclude=None, + fixtures_to_exclude=None, +): + """Run all tests on one single estimator. + + Tests that are run on estimator: + + * all tests in test_all_estimators + * all interface compatibility tests from the module of estimator's scitype + + Parameters + ---------- + estimator : estimator class or estimator instance + raise_exceptions : bool, optional, default=False + whether to return exceptions/failures in the results dict, or raise them + + * if False: returns exceptions in returned `results` dict + * if True: raises exceptions as they occur + + tests_to_run : str or list of str, optional. Default = run all tests. + Names (test/function name string) of tests to run. + sub-sets tests that are run to the tests given here. + fixtures_to_run : str or list of str, optional. Default = run all tests. + pytest test-fixture combination codes, which test-fixture combinations to run. + sub-sets tests and fixtures to run to the list given here. + If both tests_to_run and fixtures_to_run are provided, runs the *union*, + i.e., all test-fixture combinations for tests in tests_to_run, + plus all test-fixture combinations in fixtures_to_run. + verbose : str, optional, default=True. + whether to print out informative summary of tests run. + tests_to_exclude : str or list of str, names of tests to exclude. default = None + removes tests that should not be run, after subsetting via tests_to_run. + fixtures_to_exclude : str or list of str, fixtures to exclude. default = None + removes test-fixture combinations that should not be run. + This is done after subsetting via fixtures_to_run. + + Returns + ------- + results : dict of results of the tests in self + keys are test/fixture strings, identical as in pytest, e.g., test[fixture] + entries are the string "PASSED" if the test passed, + or the exception raised if the test did not pass + returned only if all tests pass, or raise_exceptions=False + + Raises + ------ + if raise_exceptions=True, + raises any exception produced by the tests directly + + Examples + -------- + >>> from hyperactive.opt import HillClimbing + >>> from hyperactive.utils import check_estimator + + Running all tests for HillClimbing class, + this uses all instances from get_test_params and compatible scenarios + + >>> results = check_estimator(HillClimbing) + All tests PASSED! + + Running all tests for a specific HillClimbing + this uses the instance that is passed and compatible scenarios + + >>> specific_hill_climbing = HillClimbing.create_test_instance() + >>> results = check_estimator(specific_hill_climbing) + All tests PASSED! + + Running specific test (all fixtures) HillClimbing + + >>> results = check_estimator(HillClimbing, tests_to_run="test_clone") + All tests PASSED! + + {'test_clone[HillClimbing-0]': 'PASSED', + 'test_clone[HillClimbing-1]': 'PASSED'} + + Running one specific test-fixture-combination for ResidualDouble + + >>> check_estimator( + ... HillClimbing, fixtures_to_run="test_clone[HillClimbing-1]" + ... ) + All tests PASSED! + {'test_clone[HillClimbing-1]': 'PASSED'} + """ + msg = ( + "check_estimator is a testing utility for developers, and " + "requires pytest to be present " + "in the python environment, but pytest was not found. " + "pytest is a developer dependency and not included in the base " + "sktime installation. Please run: `pip install pytest` to " + "install the pytest package. " + "To install sktime with all developer dependencies, run:" + " `pip install hyperactive[dev]`" + ) + _check_soft_dependencies("pytest", msg=msg) + + from hyperactive.tests.test_class_register import get_test_classes_for_obj + + test_clss_for_est = get_test_classes_for_obj(estimator) + + results = {} + + for test_cls in test_clss_for_est: + test_cls_results = test_cls().run_tests( + obj=estimator, + raise_exceptions=raise_exceptions, + tests_to_run=tests_to_run, + fixtures_to_run=fixtures_to_run, + tests_to_exclude=tests_to_exclude, + fixtures_to_exclude=fixtures_to_exclude, + ) + results.update(test_cls_results) + + failed_tests = [key for key in results.keys() if results[key] != "PASSED"] + if len(failed_tests) > 0: + msg = failed_tests + msg = ["FAILED: " + x for x in msg] + msg = "\n".join(msg) + else: + msg = "All tests PASSED!" + + if verbose: + # printing is an intended feature, for console usage and interactive debugging + print(msg) # noqa T001 + + return results From 015a26f9cfe59a3e8b6b4a14bbcbc2beef747f88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Wed, 13 Aug 2025 23:44:06 +0200 Subject: [PATCH 04/15] params --- src/hyperactive/opt/gridsearch/_sk.py | 6 +++--- src/hyperactive/opt/random_search.py | 6 ++++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/hyperactive/opt/gridsearch/_sk.py b/src/hyperactive/opt/gridsearch/_sk.py index c0e8caa4..151c9910 100644 --- a/src/hyperactive/opt/gridsearch/_sk.py +++ b/src/hyperactive/opt/gridsearch/_sk.py @@ -140,7 +140,7 @@ def _check_param_grid(self, param_grid): "to be a non-empty sequence." ) - def _run(self, experiment, param_grid, error_score): + def _run(self, experiment, param_grid, error_score, backend, backend_params): """Run the optimization search process.""" self._check_param_grid(param_grid) candidate_params = list(ParameterGrid(param_grid)) @@ -154,8 +154,8 @@ def _run(self, experiment, param_grid, error_score): fun=_score_params, iter=candidate_params, meta=meta, - backend=self.backend, - backend_params=self.backend_params, + backend=backend, + backend_params=backend_params, ) best_index = np.argmin(scores) diff --git a/src/hyperactive/opt/random_search.py b/src/hyperactive/opt/random_search.py index fac3fdb6..0b67ff80 100644 --- a/src/hyperactive/opt/random_search.py +++ b/src/hyperactive/opt/random_search.py @@ -143,6 +143,8 @@ def _run( n_iter, random_state, error_score, + backend, + backend_params, ): """Sample ``n_iter`` points and return the best parameter set.""" self._check_param_distributions(param_distributions) @@ -163,8 +165,8 @@ def _run( fun=_score_params, iter=candidate_params, meta=meta, - backend=self.backend, - backend_params=self.backend_params, + backend=backend, + backend_params=backend_params, ) best_index = int(np.argmin(scores)) # lower-is-better convention From ff1861048960618489ae58d0a73cb6fa953ccd24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Wed, 13 Aug 2025 23:49:41 +0200 Subject: [PATCH 05/15] fixes --- src/hyperactive/opt/gridsearch/_sk.py | 6 +++--- src/hyperactive/opt/random_search.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/hyperactive/opt/gridsearch/_sk.py b/src/hyperactive/opt/gridsearch/_sk.py index 151c9910..71ac714d 100644 --- a/src/hyperactive/opt/gridsearch/_sk.py +++ b/src/hyperactive/opt/gridsearch/_sk.py @@ -229,10 +229,10 @@ def get_test_params(cls, parameter_set="default"): parallel_fixtures = _get_parallel_test_fixtures() - for k, v in parallel_fixtures.items(): + for x in parallel_fixtures: new_ackley = params_ackley.copy() - new_ackley["backend"] = k - new_ackley["backend_params"] = v + new_ackley["backend"] = x["backend"] + new_ackley["backend_params"] = x["backend_params"] params.append(new_ackley) return params diff --git a/src/hyperactive/opt/random_search.py b/src/hyperactive/opt/random_search.py index 0b67ff80..0d665ff1 100644 --- a/src/hyperactive/opt/random_search.py +++ b/src/hyperactive/opt/random_search.py @@ -217,10 +217,10 @@ def get_test_params(cls, parameter_set: str = "default"): parallel_fixtures = _get_parallel_test_fixtures() - for k, v in parallel_fixtures.items(): + for x in parallel_fixtures: new_ackley = params_ackley.copy() - new_ackley["backend"] = k - new_ackley["backend_params"] = v + new_ackley["backend"] = x["backend"] + new_ackley["backend_params"] = x["backend_params"] params.append(new_ackley) return params From 31cc87f4d266ffcd1906f8eacf7e22d6f0b1246c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Wed, 13 Aug 2025 23:52:02 +0200 Subject: [PATCH 06/15] Update pyproject.toml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 06d3b8ed..655f367f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -63,7 +63,7 @@ test = [ test_parallel_backends = [ "dask", "joblib", - "ray", + 'ray >=2.40.0; python_version < "3.13"', ] all_extras = [ "hyperactive[integrations]", From f565519a5f3bd56bb6d444009027151e2ea21289 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Thu, 14 Aug 2025 00:01:57 +0200 Subject: [PATCH 07/15] new_ackley.update(x) --- src/hyperactive/opt/gridsearch/_sk.py | 3 +-- src/hyperactive/opt/random_search.py | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/hyperactive/opt/gridsearch/_sk.py b/src/hyperactive/opt/gridsearch/_sk.py index 71ac714d..9bc20ef2 100644 --- a/src/hyperactive/opt/gridsearch/_sk.py +++ b/src/hyperactive/opt/gridsearch/_sk.py @@ -231,8 +231,7 @@ def get_test_params(cls, parameter_set="default"): for x in parallel_fixtures: new_ackley = params_ackley.copy() - new_ackley["backend"] = x["backend"] - new_ackley["backend_params"] = x["backend_params"] + new_ackley.update(x) params.append(new_ackley) return params diff --git a/src/hyperactive/opt/random_search.py b/src/hyperactive/opt/random_search.py index 0d665ff1..5a98991e 100644 --- a/src/hyperactive/opt/random_search.py +++ b/src/hyperactive/opt/random_search.py @@ -219,8 +219,7 @@ def get_test_params(cls, parameter_set: str = "default"): for x in parallel_fixtures: new_ackley = params_ackley.copy() - new_ackley["backend"] = x["backend"] - new_ackley["backend_params"] = x["backend_params"] + new_ackley.update(x) params.append(new_ackley) return params From 28d2dbc5b22361c87fc6c16a05e5f3595f399f04 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Thu, 14 Aug 2025 09:56:08 +0200 Subject: [PATCH 08/15] better docstrings --- src/hyperactive/opt/gridsearch/_sk.py | 9 ++------- src/hyperactive/opt/random_search.py | 9 ++------- 2 files changed, 4 insertions(+), 14 deletions(-) diff --git a/src/hyperactive/opt/gridsearch/_sk.py b/src/hyperactive/opt/gridsearch/_sk.py index 9bc20ef2..1e7538b5 100644 --- a/src/hyperactive/opt/gridsearch/_sk.py +++ b/src/hyperactive/opt/gridsearch/_sk.py @@ -24,8 +24,8 @@ class GridSearchSk(BaseOptimizer): error_score : float, default=np.nan The score to assign if an error occurs during the evaluation of a parameter set. - backend : {"dask", "loky", "multiprocessing", "threading","ray"}, by default "None". - Runs parallel evaluate if specified and ``strategy`` is set as "refit". + backend : {"dask", "loky", "multiprocessing", "threading", "ray"}, default = "None". + Parallelization backend to use in the search process. - "None": executes loop sequentally, simple list comprehension - "loky", "multiprocessing" and "threading": uses ``joblib.Parallel`` loops @@ -33,11 +33,6 @@ class GridSearchSk(BaseOptimizer): - "dask": uses ``dask``, requires ``dask`` package in environment - "ray": uses ``ray``, requires ``ray`` package in environment - Recommendation: Use "dask" or "loky" for parallel evaluate. - "threading" is unlikely to see speed ups due to the GIL and the serialization - backend (``cloudpickle``) for "dask" and "loky" is generally more robust - than the standard ``pickle`` library used in "multiprocessing". - backend_params : dict, optional additional parameters passed to the backend as config. Directly passed to ``utils.parallel.parallelize``. diff --git a/src/hyperactive/opt/random_search.py b/src/hyperactive/opt/random_search.py index 5a98991e..406721ed 100644 --- a/src/hyperactive/opt/random_search.py +++ b/src/hyperactive/opt/random_search.py @@ -31,8 +31,8 @@ class RandomSearchSk(BaseOptimizer): error_score : float, default=np.nan Score assigned when the experiment raises an exception. - backend : {"dask", "loky", "multiprocessing", "threading","ray"}, by default "None". - Runs parallel evaluate if specified and ``strategy`` is set as "refit". + backend : {"dask", "loky", "multiprocessing", "threading", "ray"}, default = "None". + Parallelization backend to use in the search process. - "None": executes loop sequentally, simple list comprehension - "loky", "multiprocessing" and "threading": uses ``joblib.Parallel`` loops @@ -40,11 +40,6 @@ class RandomSearchSk(BaseOptimizer): - "dask": uses ``dask``, requires ``dask`` package in environment - "ray": uses ``ray``, requires ``ray`` package in environment - Recommendation: Use "dask" or "loky" for parallel evaluate. - "threading" is unlikely to see speed ups due to the GIL and the serialization - backend (``cloudpickle``) for "dask" and "loky" is generally more robust - than the standard ``pickle`` library used in "multiprocessing". - backend_params : dict, optional additional parameters passed to the backend as config. Directly passed to ``utils.parallel.parallelize``. From bb5141904e9a7c1b62d6deb5cc08c2d42be5329a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Thu, 14 Aug 2025 15:19:39 +0200 Subject: [PATCH 09/15] fixes --- src/hyperactive/opt/_common.py | 4 ++-- src/hyperactive/opt/random_search.py | 2 +- src/hyperactive/utils/tests/test_parallel.py | 2 ++ 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/hyperactive/opt/_common.py b/src/hyperactive/opt/_common.py index bef1f652..37c01e3a 100644 --- a/src/hyperactive/opt/_common.py +++ b/src/hyperactive/opt/_common.py @@ -1,10 +1,10 @@ -"""This module contains common functions used by multiple optimizers.""" +"""Common functions used by multiple optimizers.""" __all__ = ["_score_params"] def _score_params(params, meta): - """Function to score parameters, used in parallelization.""" + """Score parameters, used in parallelization.""" meta = meta.copy() experiment = meta["experiment"] error_score = meta["error_score"] diff --git a/src/hyperactive/opt/random_search.py b/src/hyperactive/opt/random_search.py index 406721ed..3e28b736 100644 --- a/src/hyperactive/opt/random_search.py +++ b/src/hyperactive/opt/random_search.py @@ -113,7 +113,7 @@ def _check_param_distributions(self, param_distributions): for p in param_distributions: for name, v in p.items(): if self._is_distribution(v): - # Assume scipy frozen distribution – nothing to check + # Assume scipy frozen distribution: nothing to check continue if isinstance(v, np.ndarray) and v.ndim > 1: diff --git a/src/hyperactive/utils/tests/test_parallel.py b/src/hyperactive/utils/tests/test_parallel.py index e936f459..48d5b386 100644 --- a/src/hyperactive/utils/tests/test_parallel.py +++ b/src/hyperactive/utils/tests/test_parallel.py @@ -15,6 +15,7 @@ reason="Execute tests for iff anything in the module has changed", ) def test_ray_leaves_params_invariant(): + """Test that the parallelize function leaves backend_params invariant.""" def trial_function(params, meta): return params @@ -40,6 +41,7 @@ def square(x, **kwargs): @pytest.mark.parametrize("fixture", _get_parallel_test_fixtures()) def test_parallelize_simple_loop(fixture): + """Test that parallelize works with a simple function and fixture.""" backend = fixture["backend"] backend_params = copy.deepcopy(fixture["backend_params"]) params_before = copy.deepcopy(fixture["backend_params"]) From 5894f2d7812c3465e0abb2e7bf122096d5db00d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Thu, 14 Aug 2025 15:20:27 +0200 Subject: [PATCH 10/15] linting --- Makefile | 2 +- src/hyperactive/opt/gridsearch/_sk.py | 1 - src/hyperactive/utils/tests/test_parallel.py | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index dfe7ecb8..0516b359 100644 --- a/Makefile +++ b/Makefile @@ -94,4 +94,4 @@ install-editable: reinstall: uninstall install -reinstall-editable: uninstall install-editable \ No newline at end of file +reinstall-editable: uninstall install-editable diff --git a/src/hyperactive/opt/gridsearch/_sk.py b/src/hyperactive/opt/gridsearch/_sk.py index 1e7538b5..b047092b 100644 --- a/src/hyperactive/opt/gridsearch/_sk.py +++ b/src/hyperactive/opt/gridsearch/_sk.py @@ -4,7 +4,6 @@ from collections.abc import Sequence import numpy as np - from sklearn.model_selection import ParameterGrid from hyperactive.base import BaseOptimizer diff --git a/src/hyperactive/utils/tests/test_parallel.py b/src/hyperactive/utils/tests/test_parallel.py index 48d5b386..883ad418 100644 --- a/src/hyperactive/utils/tests/test_parallel.py +++ b/src/hyperactive/utils/tests/test_parallel.py @@ -4,7 +4,6 @@ import os import pytest - from skbase.utils.dependencies import _check_soft_dependencies from hyperactive.utils.parallel import _get_parallel_test_fixtures, parallelize @@ -36,6 +35,7 @@ def trial_function(params, meta): def square(x, **kwargs): + """Simple function to square a number.""" return x**2 From dc8228ca96d43ba0fb47568de570ef8f2d672e9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Thu, 14 Aug 2025 15:21:22 +0200 Subject: [PATCH 11/15] Update _sk.py --- src/hyperactive/opt/gridsearch/_sk.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/hyperactive/opt/gridsearch/_sk.py b/src/hyperactive/opt/gridsearch/_sk.py index b047092b..22cfa58c 100644 --- a/src/hyperactive/opt/gridsearch/_sk.py +++ b/src/hyperactive/opt/gridsearch/_sk.py @@ -92,6 +92,14 @@ class GridSearchSk(BaseOptimizer): Best parameters can also be accessed via the attributes: >>> best_params = grid_search.best_params_ + + To parallelize the search, set the ``backend`` and ``backend_params``: + >>> grid_search = GridSearch( + ... param_grid, + ... backend="joblib", + ... backend_params={"n_jobs": -1}, + ... experiment=sklearn_exp, + ... ) """ def __init__( From ac75366d9c9520981241f5dad4dffccde31387f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Thu, 14 Aug 2025 15:24:12 +0200 Subject: [PATCH 12/15] linting --- src/hyperactive/opt/random_search.py | 4 ++-- src/hyperactive/utils/tests/test_parallel.py | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/hyperactive/opt/random_search.py b/src/hyperactive/opt/random_search.py index 3e28b736..c5e3430a 100644 --- a/src/hyperactive/opt/random_search.py +++ b/src/hyperactive/opt/random_search.py @@ -5,7 +5,6 @@ from collections.abc import Sequence import numpy as np - from sklearn.model_selection import ParameterSampler from hyperactive.base import BaseOptimizer @@ -128,7 +127,8 @@ def _check_param_distributions(self, param_distributions): if len(v) == 0: raise ValueError( - f"Parameter values for ({name}) need to be a non-empty sequence." + f"Parameter values for " + f"({name}) need to be a non-empty sequence." ) def _run( diff --git a/src/hyperactive/utils/tests/test_parallel.py b/src/hyperactive/utils/tests/test_parallel.py index 883ad418..587623c8 100644 --- a/src/hyperactive/utils/tests/test_parallel.py +++ b/src/hyperactive/utils/tests/test_parallel.py @@ -15,6 +15,7 @@ ) def test_ray_leaves_params_invariant(): """Test that the parallelize function leaves backend_params invariant.""" + def trial_function(params, meta): return params @@ -35,7 +36,7 @@ def trial_function(params, meta): def square(x, **kwargs): - """Simple function to square a number.""" + """Square function, for testing.""" return x**2 From 76ec7fe5623bcba7695e4e44f6831ccda843a245 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Thu, 14 Aug 2025 15:26:05 +0200 Subject: [PATCH 13/15] Update test_parallel.py --- src/hyperactive/utils/tests/test_parallel.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/hyperactive/utils/tests/test_parallel.py b/src/hyperactive/utils/tests/test_parallel.py index 587623c8..a15be731 100644 --- a/src/hyperactive/utils/tests/test_parallel.py +++ b/src/hyperactive/utils/tests/test_parallel.py @@ -1,5 +1,6 @@ # copied from sktime, BSD-3-Clause License (see LICENSE file) # to be moved to scikit-base in the future +"""Tests for parallelization utilities.""" import copy import os From 40c8dc4ec981115aa3c3f734aa00131344cdadc9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Thu, 14 Aug 2025 15:27:42 +0200 Subject: [PATCH 14/15] Update test_parallel.py --- src/hyperactive/utils/tests/test_parallel.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/hyperactive/utils/tests/test_parallel.py b/src/hyperactive/utils/tests/test_parallel.py index a15be731..618945d8 100644 --- a/src/hyperactive/utils/tests/test_parallel.py +++ b/src/hyperactive/utils/tests/test_parallel.py @@ -1,6 +1,7 @@ # copied from sktime, BSD-3-Clause License (see LICENSE file) # to be moved to scikit-base in the future """Tests for parallelization utilities.""" + import copy import os From 4de7ad48d6d7eed39576212f7487c4ce1ed74935 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Sun, 17 Aug 2025 11:28:53 +0200 Subject: [PATCH 15/15] Update Makefile --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 2f10f74d..3801fe58 100644 --- a/Makefile +++ b/Makefile @@ -90,7 +90,7 @@ install-no-extras-for-test: python -m pip install .[test] install-all-extras-for-test: - python -m pip install .[all_extras,test,test_parallel_backends, sktime-integration] + python -m pip install .[all_extras,test,test_parallel_backends,sktime-integration] install-editable: pip install -e .