From 00369724abc0c13a30f5e7d275702ad09be88ca9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= <fkiraly@gcos.ai>
Date: Sun, 24 Aug 2025 10:18:30 +0200
Subject: [PATCH 01/18] classification

---
 .../experiment/integrations/__init__.py       |   9 +-
 .../experiment/integrations/_skl_metrics.py   |  79 ++++
 .../experiment/integrations/sklearn_cv.py     |  77 +---
 .../integrations/sktime_classification.py     | 305 +++++++++++++++
 .../integrations/sktime/__init__.py           |   3 +-
 .../integrations/sktime/_classification.py    | 361 ++++++++++++++++++
 6 files changed, 756 insertions(+), 78 deletions(-)
 create mode 100644 src/hyperactive/experiment/integrations/_skl_metrics.py
 create mode 100644 src/hyperactive/experiment/integrations/sktime_classification.py
 create mode 100644 src/hyperactive/integrations/sktime/_classification.py

diff --git a/src/hyperactive/experiment/integrations/__init__.py b/src/hyperactive/experiment/integrations/__init__.py
index 1b600df2..4ed0ce52 100644
--- a/src/hyperactive/experiment/integrations/__init__.py
+++ b/src/hyperactive/experiment/integrations/__init__.py
@@ -2,8 +2,15 @@
 # copyright: hyperactive developers, MIT License (see LICENSE file)
 
 from hyperactive.experiment.integrations.sklearn_cv import SklearnCvExperiment
+from hyperactive.experiment.integrations.sktime_classification import (
+    SktimeClassificationExperiment,
+)
 from hyperactive.experiment.integrations.sktime_forecasting import (
     SktimeForecastingExperiment,
 )
 
-__all__ = ["SklearnCvExperiment", "SktimeForecastingExperiment"]
+__all__ = [
+    "SklearnCvExperiment",
+    "SktimeClassificationExperiment",
+    "SktimeForecastingExperiment",
+]
diff --git a/src/hyperactive/experiment/integrations/_skl_metrics.py b/src/hyperactive/experiment/integrations/_skl_metrics.py
new file mode 100644
index 00000000..dab7edd5
--- /dev/null
+++ b/src/hyperactive/experiment/integrations/_skl_metrics.py
@@ -0,0 +1,79 @@
+"""Integration utilities for sklearn metrics with Hyperactive."""
+
+__all__ = ["_guess_sign_of_sklmetric"]
+
+
+def _guess_sign_of_sklmetric(scorer):
+    """Guess the sign of a sklearn metric scorer.
+
+    Parameters
+    ----------
+    scorer : callable
+        The sklearn metric scorer to guess the sign for.
+
+    Returns
+    -------
+    int
+        1 if higher scores are better, -1 if lower scores are better.
+    """
+    HIGHER_IS_BETTER = {
+        # Classification
+        "accuracy_score": True,
+        "auc": True,
+        "average_precision_score": True,
+        "balanced_accuracy_score": True,
+        "brier_score_loss": False,
+        "class_likelihood_ratios": False,
+        "cohen_kappa_score": True,
+        "d2_log_loss_score": True,
+        "dcg_score": True,
+        "f1_score": True,
+        "fbeta_score": True,
+        "hamming_loss": False,
+        "hinge_loss": False,
+        "jaccard_score": True,
+        "log_loss": False,
+        "matthews_corrcoef": True,
+        "ndcg_score": True,
+        "precision_score": True,
+        "recall_score": True,
+        "roc_auc_score": True,
+        "top_k_accuracy_score": True,
+        "zero_one_loss": False,
+        # Regression
+        "d2_absolute_error_score": True,
+        "d2_pinball_score": True,
+        "d2_tweedie_score": True,
+        "explained_variance_score": True,
+        "max_error": False,
+        "mean_absolute_error": False,
+        "mean_absolute_percentage_error": False,
+        "mean_gamma_deviance": False,
+        "mean_pinball_loss": False,
+        "mean_poisson_deviance": False,
+        "mean_squared_error": False,
+        "mean_squared_log_error": False,
+        "mean_tweedie_deviance": False,
+        "median_absolute_error": False,
+        "r2_score": True,
+        "root_mean_squared_error": False,
+        "root_mean_squared_log_error": False,
+    }
+
+    scorer_name = getattr(scorer, "__name__", None)
+
+    if hasattr(scorer, "greater_is_better"):
+        return 1 if scorer.greater_is_better else -1
+    elif scorer_name in HIGHER_IS_BETTER:
+        return 1 if HIGHER_IS_BETTER[scorer_name] else -1
+    elif scorer_name.endswith("_score"):
+        # If the scorer name ends with "_score", we assume higher is better
+        return 1
+    elif scorer_name.endswith("_loss") or scorer_name.endswith("_deviance"):
+        # If the scorer name ends with "_loss", we assume lower is better
+        return -1
+    elif scorer_name.endswith("_error"):
+        return -1
+    else:
+        # If we cannot determine the sign, we assume lower is better
+        return -1
diff --git a/src/hyperactive/experiment/integrations/sklearn_cv.py b/src/hyperactive/experiment/integrations/sklearn_cv.py
index 3a649801..39946a9f 100644
--- a/src/hyperactive/experiment/integrations/sklearn_cv.py
+++ b/src/hyperactive/experiment/integrations/sklearn_cv.py
@@ -8,6 +8,7 @@
 from sklearn.utils.validation import _num_samples
 
 from hyperactive.base import BaseExperiment
+from hyperactive.experiment.integrations._skl_metrics import _guess_sign_of_sklmetric
 
 
 class SklearnCvExperiment(BaseExperiment):
@@ -281,79 +282,3 @@ def _get_score_params(self):
             score_params_defaults,
         ]
         return params
-
-
-def _guess_sign_of_sklmetric(scorer):
-    """Guess the sign of a sklearn metric scorer.
-
-    Parameters
-    ----------
-    scorer : callable
-        The sklearn metric scorer to guess the sign for.
-
-    Returns
-    -------
-    int
-        1 if higher scores are better, -1 if lower scores are better.
-    """
-    HIGHER_IS_BETTER = {
-        # Classification
-        "accuracy_score": True,
-        "auc": True,
-        "average_precision_score": True,
-        "balanced_accuracy_score": True,
-        "brier_score_loss": False,
-        "class_likelihood_ratios": False,
-        "cohen_kappa_score": True,
-        "d2_log_loss_score": True,
-        "dcg_score": True,
-        "f1_score": True,
-        "fbeta_score": True,
-        "hamming_loss": False,
-        "hinge_loss": False,
-        "jaccard_score": True,
-        "log_loss": False,
-        "matthews_corrcoef": True,
-        "ndcg_score": True,
-        "precision_score": True,
-        "recall_score": True,
-        "roc_auc_score": True,
-        "top_k_accuracy_score": True,
-        "zero_one_loss": False,
-        # Regression
-        "d2_absolute_error_score": True,
-        "d2_pinball_score": True,
-        "d2_tweedie_score": True,
-        "explained_variance_score": True,
-        "max_error": False,
-        "mean_absolute_error": False,
-        "mean_absolute_percentage_error": False,
-        "mean_gamma_deviance": False,
-        "mean_pinball_loss": False,
-        "mean_poisson_deviance": False,
-        "mean_squared_error": False,
-        "mean_squared_log_error": False,
-        "mean_tweedie_deviance": False,
-        "median_absolute_error": False,
-        "r2_score": True,
-        "root_mean_squared_error": False,
-        "root_mean_squared_log_error": False,
-    }
-
-    scorer_name = getattr(scorer, "__name__", None)
-
-    if hasattr(scorer, "greater_is_better"):
-        return 1 if scorer.greater_is_better else -1
-    elif scorer_name in HIGHER_IS_BETTER:
-        return 1 if HIGHER_IS_BETTER[scorer_name] else -1
-    elif scorer_name.endswith("_score"):
-        # If the scorer name ends with "_score", we assume higher is better
-        return 1
-    elif scorer_name.endswith("_loss") or scorer_name.endswith("_deviance"):
-        # If the scorer name ends with "_loss", we assume lower is better
-        return -1
-    elif scorer_name.endswith("_error"):
-        return -1
-    else:
-        # If we cannot determine the sign, we assume lower is better
-        return -1
diff --git a/src/hyperactive/experiment/integrations/sktime_classification.py b/src/hyperactive/experiment/integrations/sktime_classification.py
new file mode 100644
index 00000000..0338c4fb
--- /dev/null
+++ b/src/hyperactive/experiment/integrations/sktime_classification.py
@@ -0,0 +1,305 @@
+"""Experiment adapter for sktime backtesting experiments."""
+# copyright: hyperactive developers, MIT License (see LICENSE file)
+
+import numpy as np
+
+from hyperactive.base import BaseExperiment
+from hyperactive.experiment.integrations._skl_metrics import _guess_sign_of_sklmetric
+
+
+class SktimeClassificationExperiment(BaseExperiment):
+    """Experiment adapter for time series classification experiments.
+
+    This class is used to perform cross-validation experiments using a given
+    sktime classifier. It allows for hyperparameter tuning and evaluation of
+    the model's performance.
+
+    The score returned is the summary backtesting score,
+    of applying ``sktime`` ``evaluate`` to ``estimator`` with the parameters given in
+    ``score`` ``params``.
+
+    The backtesting performed is specified by the ``cv`` parameter,
+    and the scoring metric is specified by the ``scoring`` parameter.
+    The ``X`` and ``y`` parameters are the input data and target values,
+    which are used in fit/predict cross-validation.
+
+    Parameters
+    ----------
+    estimator : sktime BaseClassifier descendant (concrete classifier)
+        sktime classifier to benchmark
+
+    cv : int, sklearn cross-validation generator or an iterable, default=3-fold CV
+        Determines the cross-validation splitting strategy.
+        Possible inputs for cv are:
+
+        - None = default = ``KFold(n_splits=3, shuffle=True)``
+        - integer, number of folds folds in a ``KFold`` splitter, ``shuffle=True``
+        - An iterable yielding (train, test) splits as arrays of indices.
+
+        For integer/None inputs, if the estimator is a classifier and ``y`` is
+        either binary or multiclass, :class:`StratifiedKFold` is used. In all
+        other cases, :class:`KFold` is used. These splitters are instantiated
+        with ``shuffle=False`` so the splits will be the same across calls.
+
+    X : sktime-compatible panel data (Panel scitype)
+        Panel data container. Supported formats include:
+
+        - ``pd.DataFrame`` with MultiIndex [instance, time] and variable columns
+        - 3D ``np.array`` with shape ``[n_instances, n_dimensions, series_length]``
+        - Other formats listed in ``datatypes.SCITYPE_REGISTER``
+
+    y : sktime-compatible tabular data (Table scitype)
+        Target variable, typically a 1D ``np.ndarray`` or ``pd.Series``
+        of shape ``[n_instances]``.
+
+    scoring : str, callable, default=None
+        Strategy to evaluate the performance of the cross-validated model on
+        the test set. Can be:
+
+        - a single string resolvable to an sklearn scorer
+        - a callable that returns a single value;
+        - ``None`` = default = ``accuracy_score``
+
+    error_score : "raise" or numeric, default=np.nan
+        Value to assign to the score if an exception occurs in estimator fitting. If set
+        to "raise", the exception is raised. If a numeric value is given,
+        FitFailedWarning is raised.
+
+    backend : string, by default "None".
+        Parallelization backend to use for runs.
+        Runs parallel evaluate if specified and ``strategy="refit"``.
+
+        - "None": executes loop sequentially, simple list comprehension
+        - "loky", "multiprocessing" and "threading": uses ``joblib.Parallel`` loops
+        - "joblib": custom and 3rd party ``joblib`` backends, e.g., ``spark``
+        - "dask": uses ``dask``, requires ``dask`` package in environment
+        - "dask_lazy": same as "dask",
+          but changes the return to (lazy) ``dask.dataframe.DataFrame``.
+        - "ray": uses ``ray``, requires ``ray`` package in environment
+
+        Recommendation: Use "dask" or "loky" for parallel evaluate.
+        "threading" is unlikely to see speed ups due to the GIL and the serialization
+        backend (``cloudpickle``) for "dask" and "loky" is generally more robust
+        than the standard ``pickle`` library used in "multiprocessing".
+
+    backend_params : dict, optional
+        additional parameters passed to the backend as config.
+        Directly passed to ``utils.parallel.parallelize``.
+        Valid keys depend on the value of ``backend``:
+
+        - "None": no additional parameters, ``backend_params`` is ignored
+        - "loky", "multiprocessing" and "threading": default ``joblib`` backends
+          any valid keys for ``joblib.Parallel`` can be passed here, e.g., ``n_jobs``,
+          with the exception of ``backend`` which is directly controlled by ``backend``.
+          If ``n_jobs`` is not passed, it will default to ``-1``, other parameters
+          will default to ``joblib`` defaults.
+        - "joblib": custom and 3rd party ``joblib`` backends, e.g., ``spark``.
+          any valid keys for ``joblib.Parallel`` can be passed here, e.g., ``n_jobs``,
+          ``backend`` must be passed as a key of ``backend_params`` in this case.
+          If ``n_jobs`` is not passed, it will default to ``-1``, other parameters
+          will default to ``joblib`` defaults.
+        - "dask": any valid keys for ``dask.compute`` can be passed,
+          e.g., ``scheduler``
+
+        - "ray": The following keys can be passed:
+
+            - "ray_remote_args": dictionary of valid keys for ``ray.init``
+            - "shutdown_ray": bool, default=True; False prevents ``ray`` from shutting
+                down after parallelization.
+            - "logger_name": str, default="ray"; name of the logger to use.
+            - "mute_warnings": bool, default=False; if True, suppresses warnings
+
+    Example
+    -------
+    >>> from hyperactive.experiment.integrations import SktimeForecastingExperiment
+    >>> from sktime.datasets import load_airline
+    >>> from sktime.forecasting.naive import NaiveForecaster
+    >>> from sktime.performance_metrics.forecasting import MeanAbsolutePercentageError
+    >>> from sktime.split import ExpandingWindowSplitter
+    >>>
+    >>> y = load_airline()
+    >>>
+    >>> sktime_exp = SktimeForecastingExperiment(
+    ...     forecaster=NaiveForecaster(strategy="last"),
+    ...     scoring=MeanAbsolutePercentageError(),
+    ...     cv=ExpandingWindowSplitter(initial_window=36, step_length=12, fh=12),
+    ...     y=y,
+    ... )
+    >>> params = {"strategy": "mean"}
+    >>> score, add_info = sktime_exp.score(params)
+
+    For default choices of ``scoring``:
+    >>> sktime_exp = SktimeForecastingExperiment(
+    ...     forecaster=NaiveForecaster(strategy="last"),
+    ...     cv=ExpandingWindowSplitter(initial_window=36, step_length=12, fh=12),
+    ...     y=y,
+    ... )
+    >>> params = {"strategy": "mean"}
+    >>> score, add_info = sktime_exp.score(params)
+
+    Quick call without metadata return or dictionary:
+    >>> score = sktime_exp(strategy="mean")
+    """
+
+    _tags = {
+        "authors": "fkiraly",
+        "maintainers": "fkiraly",
+        "python_dependencies": "sktime",  # python dependencies
+    }
+
+    def __init__(
+        self,
+        forecaster,
+        cv,
+        y,
+        X=None,
+        strategy="refit",
+        scoring=None,
+        error_score=np.nan,
+        cv_X=None,
+        backend=None,
+        backend_params=None,
+    ):
+        self.forecaster = forecaster
+        self.X = X
+        self.y = y
+        self.strategy = strategy
+        self.scoring = scoring
+        self.cv = cv
+        self.error_score = error_score
+        self.cv_X = cv_X
+        self.backend = backend
+        self.backend_params = backend_params
+
+        super().__init__()
+
+        from sklearn.dummy import DummyClassifier
+        from sklearn.metrics import check_scoring
+
+        # use dummy classifier from sklearn to get default coercion behaviour
+        # for classificatoin metrics
+        self._scoring = check_scoring(DummyClassifier(), self.scoring)
+
+        # Set the sign of the scoring function
+        if hasattr(self._scoring, "_score"):
+            score_func = self._scoring._score_func
+            _sign = _guess_sign_of_sklmetric(score_func)
+            _sign_str = "higher" if _sign == 1 else "lower"
+            self.set_tags(**{"property:higher_or_lower_is_better": _sign_str})
+
+    def _paramnames(self):
+        """Return the parameter names of the search.
+
+        Returns
+        -------
+        list of str
+            The parameter names of the search parameters.
+        """
+        return list(self.estimator.get_params().keys())
+
+    def _evaluate(self, params):
+        """Evaluate the parameters.
+
+        Parameters
+        ----------
+        params : dict with string keys
+            Parameters to evaluate.
+
+        Returns
+        -------
+        float
+            The value of the parameters as per evaluation.
+        dict
+            Additional metadata about the search.
+        """
+        from sktime.classification.model_evaluation import evaluate
+
+        results = evaluate(
+            self.estimator,
+            cv=self.cv,
+            X=self.X,
+            y=self.y,
+            scoring=self._scoring,
+            error_score=self.error_score,
+            backend=self.backend,
+            backend_params=self.backend_params,
+        )
+
+        result_name = f"test_{self._scoring.name}"
+
+        res_float = results[result_name].mean()
+
+        return res_float, {"results": results}
+
+    @classmethod
+    def get_test_params(cls, parameter_set="default"):
+        """Return testing parameter settings for the skbase object.
+
+        ``get_test_params`` is a unified interface point to store
+        parameter settings for testing purposes. This function is also
+        used in ``create_test_instance`` and ``create_test_instances_and_names``
+        to construct test instances.
+
+        ``get_test_params`` should return a single ``dict``, or a ``list`` of ``dict``.
+
+        Each ``dict`` is a parameter configuration for testing,
+        and can be used to construct an "interesting" test instance.
+        A call to ``cls(**params)`` should
+        be valid for all dictionaries ``params`` in the return of ``get_test_params``.
+
+        The ``get_test_params`` need not return fixed lists of dictionaries,
+        it can also return dynamic or stochastic parameter settings.
+
+        Parameters
+        ----------
+        parameter_set : str, default="default"
+            Name of the set of test parameters to return, for use in tests. If no
+            special parameters are defined for a value, will return `"default"` set.
+
+        Returns
+        -------
+        params : dict or list of dict, default = {}
+            Parameters to create testing instances of the class
+            Each dict are parameters to construct an "interesting" test instance, i.e.,
+            `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance.
+            `create_test_instance` uses the first (or only) dictionary in `params`
+        """
+        from sklearn.metrics import brier_score_loss
+        from sklearn.model_selection import KFold
+
+        from sktime.datasets import load_unit_test
+        from sktime.classification.dummy import DummyClassifier
+
+        X, y = load_unit_test(return_X_y=True, return_type="pd-multiindex")
+        params0 = {
+            "estimator": DummyClassifier(strategy="most_frequent"),
+            "X": X,
+            "y": y,
+        }
+
+        params1 = {
+            "forecaster": DummyClassifier(strategy="stratified"),
+            "cv": KFold(n_splits=2),
+            "X": X,
+            "y": y,
+            "scoring": brier_score_loss,
+        }
+
+        return [params0, params1]
+
+    @classmethod
+    def _get_score_params(self):
+        """Return settings for testing score/evaluate functions. Used in tests only.
+
+        Returns a list, the i-th element should be valid arguments for
+        self.evaluate and self.score, of an instance constructed with
+        self.get_test_params()[i].
+
+        Returns
+        -------
+        list of dict
+            The parameters to be used for scoring.
+        """
+        val0 = {}
+        val1 = {"strategy": "most_frequent"}
+        return [val0, val1]
diff --git a/src/hyperactive/integrations/sktime/__init__.py b/src/hyperactive/integrations/sktime/__init__.py
index a88ca2f0..256d03ea 100644
--- a/src/hyperactive/integrations/sktime/__init__.py
+++ b/src/hyperactive/integrations/sktime/__init__.py
@@ -1,5 +1,6 @@
 """Integrations for sktime with Hyperactive."""
 
+from hyperactive.integrations.sktime._classification import TSCOptCV
 from hyperactive.integrations.sktime._forecasting import ForecastingOptCV
 
-__all__ = ["ForecastingOptCV"]
+__all__ = ["TSCOptCV", "ForecastingOptCV"]
diff --git a/src/hyperactive/integrations/sktime/_classification.py b/src/hyperactive/integrations/sktime/_classification.py
new file mode 100644
index 00000000..c5332dc4
--- /dev/null
+++ b/src/hyperactive/integrations/sktime/_classification.py
@@ -0,0 +1,361 @@
+# copyright: hyperactive developers, MIT License (see LICENSE file)
+
+import numpy as np
+from skbase.utils.dependencies import _check_soft_dependencies
+
+if _check_soft_dependencies("sktime", severity="none"):
+    from sktime.classification._delegate import _DelegatedClassifier
+else:
+    from skbase.base import BaseEstimator as _DelegatedClassifier
+
+from hyperactive.experiment.integrations.sktime_forecasting import (
+    SktimeClassificationExperiment,
+)
+
+
+class TSCOptCV(_DelegatedClassifier):
+    """Tune an sktime classifier via any optimizer in the hyperactive toolbox.
+
+    ``TSCOptCV`` uses any available tuning engine from ``hyperactive``
+    to tune a classifier by backtesting.
+
+    It passes backtesting results as scores to the tuning engine,
+    which identifies the best hyperparameters.
+
+    Any available tuning engine from hyperactive can be used, for example:
+
+    * grid search - ``from hyperactive.opt import GridSearchSk as GridSearch``,
+      this results in the same algorithm as ``TSCGridSearchCV``
+    * hill climbing - ``from hyperactive.opt import HillClimbing``
+    * optuna parzen-tree search - ``from hyperactive.opt.optuna import TPEOptimizer``
+
+    Configuration of the tuning engine is as per the respective documentation.
+
+    Formally, ``TSCOptCV`` does the following:
+
+    In ``fit``:
+
+    * wraps the ``estimator``, ``scoring``, and other parameters
+      into a ``SktimeClassificationExperiment`` instance, which is passed to the
+      optimizer ``optimizer`` as the ``experiment`` argument.
+    * Optimal parameters are then obtained from ``optimizer.solve``, and set
+      as ``best_params_`` and ``best_estimator_`` attributes.
+    *  If ``refit=True``, ``best_estimator_`` is fitted to the entire ``y`` and ``X``.
+
+    In ``predict`` and ``predict``-like methods, calls the respective method
+    of the ``best_estimator_`` if ``refit=True``.
+
+    Parameters
+    ----------
+    estimator : sktime classifier, BaseClassifier instance or interface compatible
+        The classifier to tune, must implement the sktime classifier interface.
+
+    optimizer : hyperactive BaseOptimizer
+        The optimizer to be used for hyperparameter search.
+
+    cv : int, sklearn cross-validation generator or an iterable, default=3-fold CV
+        Determines the cross-validation splitting strategy.
+        Possible inputs for cv are:
+
+        - None = default = ``KFold(n_splits=3, shuffle=True)``
+        - integer, number of folds folds in a ``KFold`` splitter, ``shuffle=True``
+        - An iterable yielding (train, test) splits as arrays of indices.
+
+        For integer/None inputs, if the estimator is a classifier and ``y`` is
+        either binary or multiclass, :class:`StratifiedKFold` is used. In all
+        other cases, :class:`KFold` is used. These splitters are instantiated
+        with ``shuffle=False`` so the splits will be the same across calls.
+
+    scoring : str, callable, default=None
+        Strategy to evaluate the performance of the cross-validated model on
+        the test set. Can be:
+
+        - a single string resolvable to an sklearn scorer
+        - a callable that returns a single value;
+        - ``None`` = default = ``accuracy_score``
+
+    refit : bool, optional (default=True)
+        True = refit the forecaster with the best parameters on the entire data in fit
+        False = no refitting takes place. The forecaster cannot be used to predict.
+        This is to be used to tune the hyperparameters, and then use the estimator
+        as a parameter estimator, e.g., via get_fitted_params or PluginParamsForecaster.
+
+    error_score : "raise" or numeric, default=np.nan
+        Value to assign to the score if an exception occurs in estimator fitting. If set
+        to "raise", the exception is raised. If a numeric value is given,
+        FitFailedWarning is raised.
+
+    backend : string, by default "None".
+        Parallelization backend to use for runs.
+        Runs parallel evaluate if specified and ``strategy="refit"``.
+
+        - "None": executes loop sequentially, simple list comprehension
+        - "loky", "multiprocessing" and "threading": uses ``joblib.Parallel`` loops
+        - "joblib": custom and 3rd party ``joblib`` backends, e.g., ``spark``
+        - "dask": uses ``dask``, requires ``dask`` package in environment
+        - "dask_lazy": same as "dask",
+          but changes the return to (lazy) ``dask.dataframe.DataFrame``.
+        - "ray": uses ``ray``, requires ``ray`` package in environment
+
+        Recommendation: Use "dask" or "loky" for parallel evaluate.
+        "threading" is unlikely to see speed ups due to the GIL and the serialization
+        backend (``cloudpickle``) for "dask" and "loky" is generally more robust
+        than the standard ``pickle`` library used in "multiprocessing".
+
+    backend_params : dict, optional
+        additional parameters passed to the backend as config.
+        Directly passed to ``utils.parallel.parallelize``.
+        Valid keys depend on the value of ``backend``:
+
+        - "None": no additional parameters, ``backend_params`` is ignored
+        - "loky", "multiprocessing" and "threading": default ``joblib`` backends
+          any valid keys for ``joblib.Parallel`` can be passed here, e.g., ``n_jobs``,
+          with the exception of ``backend`` which is directly controlled by ``backend``.
+          If ``n_jobs`` is not passed, it will default to ``-1``, other parameters
+          will default to ``joblib`` defaults.
+        - "joblib": custom and 3rd party ``joblib`` backends, e.g., ``spark``.
+          any valid keys for ``joblib.Parallel`` can be passed here, e.g., ``n_jobs``,
+          ``backend`` must be passed as a key of ``backend_params`` in this case.
+          If ``n_jobs`` is not passed, it will default to ``-1``, other parameters
+          will default to ``joblib`` defaults.
+        - "dask": any valid keys for ``dask.compute`` can be passed,
+          e.g., ``scheduler``
+
+        - "ray": The following keys can be passed:
+
+            - "ray_remote_args": dictionary of valid keys for ``ray.init``
+            - "shutdown_ray": bool, default=True; False prevents ``ray`` from shutting
+                down after parallelization.
+            - "logger_name": str, default="ray"; name of the logger to use.
+            - "mute_warnings": bool, default=False; if True, suppresses warnings
+
+    Example
+    -------
+    Any available tuning engine from hyperactive can be used, for example:
+
+    * grid search - ``from hyperactive.opt import GridSearchSk as GridSearch``
+    * hill climbing - ``from hyperactive.opt import HillClimbing``
+    * optuna parzen-tree search - ``from hyperactive.opt.optuna import TPEOptimizer``
+
+    For illustration, we use grid search, this can be replaced by any other optimizer.
+
+    1. defining the tuned estimator:
+    >>> from sktime.forecasting.naive import NaiveForecaster
+    >>> from sktime.split import ExpandingWindowSplitter
+    >>> from hyperactive.integrations.sktime import ForecastingOptCV
+    >>> from hyperactive.opt import GridSearchSk as GridSearch
+    >>>
+    >>> param_grid = {"strategy": ["mean", "last", "drift"]}
+    >>> tuned_naive = ForecastingOptCV(
+    ...     NaiveForecaster(),
+    ...     GridSearch(param_grid),
+    ...     cv=ExpandingWindowSplitter(
+    ...         initial_window=12, step_length=3, fh=range(1, 13)
+    ...     ),
+    ... )
+
+    2. fitting the tuned estimator:
+    >>> from sktime.datasets import load_airline
+    >>> from sktime.split import temporal_train_test_split
+    >>> y = load_airline()
+    >>> y_train, y_test = temporal_train_test_split(y, test_size=12)
+    >>>
+    >>> tuned_naive.fit(y_train, fh=range(1, 13))
+    ForecastingOptCV(...)
+    >>> y_pred = tuned_naive.predict()
+
+    3. obtaining best parameters and best forecaster
+    >>> best_params = tuned_naive.best_params_
+    >>> best_forecaster = tuned_naive.best_forecaster_
+    """
+
+    _tags = {
+        "authors": "fkiraly",
+        "maintainers": "fkiraly",
+        "python_dependencies": "sktime",
+    }
+
+    # attribute for _DelegatedClassifier, which then delegates
+    #     all non-overridden methods are same as of getattr(self, _delegate_name)
+    #     see further details in _DelegatedClassifier docstring
+    _delegate_name = "best_estimator_"
+
+    def __init__(
+        self,
+        estimator,
+        optimizer,
+        cv=None,
+        scoring=None,
+        refit=True,
+        error_score=np.nan,
+        backend=None,
+        backend_params=None,
+    ):
+        self.estimator = estimator
+        self.optimizer = optimizer
+        self.cv = cv
+        self.scoring = scoring
+        self.refit = refit
+        self.error_score = error_score
+        self.backend = backend
+        self.backend_params = backend_params
+        super().__init__()
+
+        # default handling for cv
+        if isinstance(cv, int):
+            from sklearn.model_selection import KFold
+
+            self._cv = KFold(n_splits=cv, shuffle=True)
+        elif cv is None:
+            from sklearn.model_selection import KFold
+
+            self._cv = KFold(n_splits=3, shuffle=True)
+        else:
+            self._cv = cv
+
+    def _fit(self, X, y):
+        """Fit time series classifier to training data.
+
+        private _fit containing the core logic, called from fit
+
+        Writes to self:
+            Sets fitted model attributes ending in "_".
+
+        Parameters
+        ----------
+        X : guaranteed to be of a type in self.get_tag("X_inner_mtype")
+            if self.get_tag("X_inner_mtype") = "numpy3D":
+            3D np.ndarray of shape = [n_instances, n_dimensions, series_length]
+            if self.get_tag("X_inner_mtype") = "pd-multiindex:":
+            pd.DataFrame with columns = variables,
+            index = pd.MultiIndex with first level = instance indices,
+            second level = time indices
+            for list of other mtypes, see datatypes.SCITYPE_REGISTER
+            for specifications, see examples/AA_datatypes_and_datasets.ipynb
+        y : guaranteed to be of a type in self.get_tag("y_inner_mtype")
+            1D iterable, of shape [n_instances]
+            or 2D iterable, of shape [n_instances, n_dimensions]
+            class labels for fitting
+            if self.get_tag("capaility:multioutput") = False, guaranteed to be 1D
+            if self.get_tag("capaility:multioutput") = True, guaranteed to be 2D
+
+        Returns
+        -------
+        self : Reference to self.
+        """
+        from sklearn.dummy import DummyClassifier
+        from sklearn.metrics import check_scoring
+
+        estimator = self.estimator.clone()
+
+        # use dummy classifier from sklearn to get default coercion behaviour
+        # for classificatoin metrics
+        scoring = check_scoring(DummyClassifier(), self.scoring)
+        # scoring_name = f"test_{scoring.name}"
+
+        experiment = SktimeClassificationExperiment(
+            estimator=estimator,
+            scoring=scoring,
+            cv=self._cv,
+            X=X,
+            y=y,
+            error_score=self.error_score,
+            backend=self.backend,
+            backend_params=self.backend_params,
+        )
+
+        optimizer = self.optimizer.clone()
+        optimizer.set_params(experiment=experiment)
+        best_params = optimizer.solve()
+
+        self.best_params_ = best_params
+        self.best_forecaster_ = estimator.set_params(**best_params)
+
+        # Refit model with best parameters.
+        if self.refit:
+            self.best_estimator_.fit(X=X, y=y)
+
+        return self
+
+    def _predict(self, X):
+        """Predict labels for sequences in X.
+
+        private _predict containing the core logic, called from predict
+
+        State required:
+            Requires state to be "fitted".
+
+        Accesses in self:
+            Fitted model attributes ending in "_"
+
+        Parameters
+        ----------
+        X : guaranteed to be of a type in self.get_tag("X_inner_mtype")
+            if self.get_tag("X_inner_mtype") = "numpy3D":
+                3D np.ndarray of shape = [n_instances, n_dimensions, series_length]
+            if self.get_tag("X_inner_mtype") = "nested_univ":
+                pd.DataFrame with each column a dimension, each cell a pd.Series
+            for list of other mtypes, see datatypes.SCITYPE_REGISTER
+            for specifications, see examples/AA_datatypes_and_datasets.ipynb
+
+        Returns
+        -------
+        y : 1D np.array of int, of shape [n_instances] - predicted class labels
+            indices correspond to instance indices in X
+        """
+        if not self.refit:
+            raise RuntimeError(
+                f"In {self.__class__.__name__}, refit must be True to make predictions,"
+                f" but found refit=False. If refit=False, {self.__class__.__name__} can"
+                " be used only to tune hyper-parameters, as a parameter estimator."
+            )
+        return super()._predict(X=X)
+
+    @classmethod
+    def get_test_params(cls, parameter_set="default"):
+        """Return testing parameter settings for the estimator.
+
+        Parameters
+        ----------
+        parameter_set : str, default="default"
+            Name of the set of test parameters to return, for use in tests. If no
+            special parameters are defined for a value, will return ``"default"`` set.
+
+        Returns
+        -------
+        params : dict or list of dict
+        """
+        from sklearn.metrics import accuracy_score
+        from sklearn.model_selection import KFold
+
+        from sktime.classification.dummy import DummyClassifier
+
+        from hyperactive.opt.gfo import HillClimbing
+        from hyperactive.opt.gridsearch import GridSearchSk
+        from hyperactive.opt.random_search import RandomSearchSk
+
+        params_gridsearch = {
+            "estimator": DummyClassifier(),
+            "optimizer": GridSearchSk(
+                param_grid={"strategy": ["most_frequent", "stratified"]}
+            ),
+        }
+        params_randomsearch = {
+            "estimator": DummyClassifier(),
+            "cv": 2,
+            "optimizer": RandomSearchSk(
+                param_distributions={"strategy": ["most_frequent", "stratified"]},
+            ),
+            "scoring": accuracy_score,
+        }
+        params_hillclimb = {
+            "estimator": DummyClassifier(strategy="stratified"),
+            "cv": KFold(n_splits=2, shuffle=False),
+            "optimizer": HillClimbing(
+                search_space={"strategy": ["most_frequent", "stratified"]},
+                n_iter=10,
+                n_neighbours=5,
+            ),
+            "scoring": "cross-entropy",
+        }
+        return [params_gridsearch, params_randomsearch, params_hillclimb]

From 3aa2fbdad0e1f76c880cd1ef3f2618cff0298084 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= <fkiraly@gcos.ai>
Date: Sun, 24 Aug 2025 10:23:04 +0200
Subject: [PATCH 02/18] linting

---
 .../experiment/integrations/sktime_classification.py           | 3 +--
 src/hyperactive/integrations/sktime/_classification.py         | 1 -
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/hyperactive/experiment/integrations/sktime_classification.py b/src/hyperactive/experiment/integrations/sktime_classification.py
index 0338c4fb..7ccc2ca7 100644
--- a/src/hyperactive/experiment/integrations/sktime_classification.py
+++ b/src/hyperactive/experiment/integrations/sktime_classification.py
@@ -266,9 +266,8 @@ def get_test_params(cls, parameter_set="default"):
         """
         from sklearn.metrics import brier_score_loss
         from sklearn.model_selection import KFold
-
-        from sktime.datasets import load_unit_test
         from sktime.classification.dummy import DummyClassifier
+        from sktime.datasets import load_unit_test
 
         X, y = load_unit_test(return_X_y=True, return_type="pd-multiindex")
         params0 = {
diff --git a/src/hyperactive/integrations/sktime/_classification.py b/src/hyperactive/integrations/sktime/_classification.py
index c5332dc4..9ac1f71f 100644
--- a/src/hyperactive/integrations/sktime/_classification.py
+++ b/src/hyperactive/integrations/sktime/_classification.py
@@ -327,7 +327,6 @@ def get_test_params(cls, parameter_set="default"):
         """
         from sklearn.metrics import accuracy_score
         from sklearn.model_selection import KFold
-
         from sktime.classification.dummy import DummyClassifier
 
         from hyperactive.opt.gfo import HillClimbing

From ac50f9d9221bb5ed0085fbf9084b4010fc9f98e5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= <fkiraly@gcos.ai>
Date: Sun, 24 Aug 2025 10:23:33 +0200
Subject: [PATCH 03/18] Update _classification.py

---
 src/hyperactive/integrations/sktime/_classification.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/hyperactive/integrations/sktime/_classification.py b/src/hyperactive/integrations/sktime/_classification.py
index 9ac1f71f..933f191e 100644
--- a/src/hyperactive/integrations/sktime/_classification.py
+++ b/src/hyperactive/integrations/sktime/_classification.py
@@ -8,7 +8,7 @@
 else:
     from skbase.base import BaseEstimator as _DelegatedClassifier
 
-from hyperactive.experiment.integrations.sktime_forecasting import (
+from hyperactive.experiment.integrations.sktime_classification import (
     SktimeClassificationExperiment,
 )
 

From 10c89fd87b85ed78466b7e4ccc80b164716dbc2c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= <fkiraly@gcos.ai>
Date: Sun, 24 Aug 2025 10:47:45 +0200
Subject: [PATCH 04/18] Update sktime_classification.py

---
 .../integrations/sktime_classification.py     | 32 +++++++++----------
 1 file changed, 15 insertions(+), 17 deletions(-)

diff --git a/src/hyperactive/experiment/integrations/sktime_classification.py b/src/hyperactive/experiment/integrations/sktime_classification.py
index 7ccc2ca7..5629d051 100644
--- a/src/hyperactive/experiment/integrations/sktime_classification.py
+++ b/src/hyperactive/experiment/integrations/sktime_classification.py
@@ -28,6 +28,17 @@ class SktimeClassificationExperiment(BaseExperiment):
     estimator : sktime BaseClassifier descendant (concrete classifier)
         sktime classifier to benchmark
 
+    X : sktime-compatible panel data (Panel scitype)
+        Panel data container. Supported formats include:
+
+        - ``pd.DataFrame`` with MultiIndex [instance, time] and variable columns
+        - 3D ``np.array`` with shape ``[n_instances, n_dimensions, series_length]``
+        - Other formats listed in ``datatypes.SCITYPE_REGISTER``
+
+    y : sktime-compatible tabular data (Table scitype)
+        Target variable, typically a 1D ``np.ndarray`` or ``pd.Series``
+        of shape ``[n_instances]``.
+
     cv : int, sklearn cross-validation generator or an iterable, default=3-fold CV
         Determines the cross-validation splitting strategy.
         Possible inputs for cv are:
@@ -41,17 +52,6 @@ class SktimeClassificationExperiment(BaseExperiment):
         other cases, :class:`KFold` is used. These splitters are instantiated
         with ``shuffle=False`` so the splits will be the same across calls.
 
-    X : sktime-compatible panel data (Panel scitype)
-        Panel data container. Supported formats include:
-
-        - ``pd.DataFrame`` with MultiIndex [instance, time] and variable columns
-        - 3D ``np.array`` with shape ``[n_instances, n_dimensions, series_length]``
-        - Other formats listed in ``datatypes.SCITYPE_REGISTER``
-
-    y : sktime-compatible tabular data (Table scitype)
-        Target variable, typically a 1D ``np.ndarray`` or ``pd.Series``
-        of shape ``[n_instances]``.
-
     scoring : str, callable, default=None
         Strategy to evaluate the performance of the cross-validated model on
         the test set. Can be:
@@ -149,25 +149,23 @@ class SktimeClassificationExperiment(BaseExperiment):
 
     def __init__(
         self,
-        forecaster,
-        cv,
+        estimator,
+        X,
         y,
-        X=None,
+        cv=None,
         strategy="refit",
         scoring=None,
         error_score=np.nan,
-        cv_X=None,
         backend=None,
         backend_params=None,
     ):
-        self.forecaster = forecaster
+        self.estimator = estimator
         self.X = X
         self.y = y
         self.strategy = strategy
         self.scoring = scoring
         self.cv = cv
         self.error_score = error_score
-        self.cv_X = cv_X
         self.backend = backend
         self.backend_params = backend_params
 

From eb6a357b4b2164df8ca80f4bd2b27058a6bc87fb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= <fkiraly@gcos.ai>
Date: Sun, 24 Aug 2025 10:57:47 +0200
Subject: [PATCH 05/18] Update sktime_classification.py

---
 .../experiment/integrations/sktime_classification.py            | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/hyperactive/experiment/integrations/sktime_classification.py b/src/hyperactive/experiment/integrations/sktime_classification.py
index 5629d051..4b2e1926 100644
--- a/src/hyperactive/experiment/integrations/sktime_classification.py
+++ b/src/hyperactive/experiment/integrations/sktime_classification.py
@@ -275,7 +275,7 @@ def get_test_params(cls, parameter_set="default"):
         }
 
         params1 = {
-            "forecaster": DummyClassifier(strategy="stratified"),
+            "estimator": DummyClassifier(strategy="stratified"),
             "cv": KFold(n_splits=2),
             "X": X,
             "y": y,

From 323768a34f78a31f5c8749f3816a3762c602d760 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= <fkiraly@gcos.ai>
Date: Sun, 24 Aug 2025 11:08:25 +0200
Subject: [PATCH 06/18] scoring

---
 .../experiment/integrations/_skl_metrics.py   | 24 ++++++++++++++++++-
 .../experiment/integrations/sklearn_cv.py     | 23 ++++--------------
 .../integrations/sktime_classification.py     |  9 ++++---
 3 files changed, 34 insertions(+), 22 deletions(-)

diff --git a/src/hyperactive/experiment/integrations/_skl_metrics.py b/src/hyperactive/experiment/integrations/_skl_metrics.py
index dab7edd5..164aa5a5 100644
--- a/src/hyperactive/experiment/integrations/_skl_metrics.py
+++ b/src/hyperactive/experiment/integrations/_skl_metrics.py
@@ -1,6 +1,28 @@
 """Integration utilities for sklearn metrics with Hyperactive."""
 
-__all__ = ["_guess_sign_of_sklmetric"]
+__all__ = ["_coerce_to_scorer", "_guess_sign_of_sklmetric"]
+
+
+def _coerce_to_scorer(scoring, estimator):
+    """Coerce a scoring into a sklearn scorer."""
+    from sklearn.metrics import check_scoring
+
+    # check if scoring is a scorer by checking for "estimator" in signature
+    if scoring is None:
+        return check_scoring(estimator)
+    # check using inspect.signature for "estimator" in signature
+    elif callable(scoring):
+        from inspect import signature
+
+        if "estimator" in signature(scoring).parameters:
+            return scoring
+        else:
+            from sklearn.metrics import make_scorer
+
+            return make_scorer(scoring)
+    else:
+        # scoring is a string (scorer name)
+        return check_scoring(self.estimator, scoring=scoring)
 
 
 def _guess_sign_of_sklmetric(scorer):
diff --git a/src/hyperactive/experiment/integrations/sklearn_cv.py b/src/hyperactive/experiment/integrations/sklearn_cv.py
index 39946a9f..14ee0d3b 100644
--- a/src/hyperactive/experiment/integrations/sklearn_cv.py
+++ b/src/hyperactive/experiment/integrations/sklearn_cv.py
@@ -3,12 +3,14 @@
 # copyright: hyperactive developers, MIT License (see LICENSE file)
 
 from sklearn import clone
-from sklearn.metrics import check_scoring
 from sklearn.model_selection import cross_validate
 from sklearn.utils.validation import _num_samples
 
 from hyperactive.base import BaseExperiment
-from hyperactive.experiment.integrations._skl_metrics import _guess_sign_of_sklmetric
+from hyperactive.experiment.integrations._skl_metrics import (
+    _coerce_to_scorer,
+    _guess_sign_of_sklmetric,
+)
 
 
 class SklearnCvExperiment(BaseExperiment):
@@ -98,22 +100,7 @@ def __init__(self, estimator, X, y, scoring=None, cv=None):
         else:
             self._cv = cv
 
-        # check if scoring is a scorer by checking for "estimator" in signature
-        if scoring is None:
-            self._scoring = check_scoring(self.estimator)
-        # check using inspect.signature for "estimator" in signature
-        elif callable(scoring):
-            from inspect import signature
-
-            if "estimator" in signature(scoring).parameters:
-                self._scoring = scoring
-            else:
-                from sklearn.metrics import make_scorer
-
-                self._scoring = make_scorer(scoring)
-        else:
-            # scoring is a string (scorer name)
-            self._scoring = check_scoring(self.estimator, scoring=scoring)
+        self._scoring = _coerce_to_scorer(scoring, self.estimator)
         self.scorer_ = self._scoring
 
         # Set the sign of the scoring function
diff --git a/src/hyperactive/experiment/integrations/sktime_classification.py b/src/hyperactive/experiment/integrations/sktime_classification.py
index 4b2e1926..4ff33d32 100644
--- a/src/hyperactive/experiment/integrations/sktime_classification.py
+++ b/src/hyperactive/experiment/integrations/sktime_classification.py
@@ -4,7 +4,10 @@
 import numpy as np
 
 from hyperactive.base import BaseExperiment
-from hyperactive.experiment.integrations._skl_metrics import _guess_sign_of_sklmetric
+from hyperactive.experiment.integrations._skl_metrics import (
+    _coerce_to_scorer,
+    _guess_sign_of_sklmetric,
+)
 
 
 class SktimeClassificationExperiment(BaseExperiment):
@@ -175,8 +178,8 @@ def __init__(
         from sklearn.metrics import check_scoring
 
         # use dummy classifier from sklearn to get default coercion behaviour
-        # for classificatoin metrics
-        self._scoring = check_scoring(DummyClassifier(), self.scoring)
+        # for classification metrics
+        self._scoring = _coerce_to_scorer(scoring, self.estimator)
 
         # Set the sign of the scoring function
         if hasattr(self._scoring, "_score"):

From 0348afc805afd2de1a169c7bc4e71b49a73fb1ca Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= <fkiraly@gcos.ai>
Date: Sun, 24 Aug 2025 11:08:49 +0200
Subject: [PATCH 07/18] Update sktime_classification.py

---
 .../experiment/integrations/sktime_classification.py           | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/hyperactive/experiment/integrations/sktime_classification.py b/src/hyperactive/experiment/integrations/sktime_classification.py
index 4ff33d32..3fb256a3 100644
--- a/src/hyperactive/experiment/integrations/sktime_classification.py
+++ b/src/hyperactive/experiment/integrations/sktime_classification.py
@@ -175,11 +175,10 @@ def __init__(
         super().__init__()
 
         from sklearn.dummy import DummyClassifier
-        from sklearn.metrics import check_scoring
 
         # use dummy classifier from sklearn to get default coercion behaviour
         # for classification metrics
-        self._scoring = _coerce_to_scorer(scoring, self.estimator)
+        self._scoring = _coerce_to_scorer(scoring, DummyClassifier())
 
         # Set the sign of the scoring function
         if hasattr(self._scoring, "_score"):

From ab5fa1aef95c1c5061ebb565366d2af794859a9b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= <fkiraly@gcos.ai>
Date: Sun, 24 Aug 2025 11:10:39 +0200
Subject: [PATCH 08/18] Update _skl_metrics.py

---
 src/hyperactive/experiment/integrations/_skl_metrics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/hyperactive/experiment/integrations/_skl_metrics.py b/src/hyperactive/experiment/integrations/_skl_metrics.py
index 164aa5a5..47fd605a 100644
--- a/src/hyperactive/experiment/integrations/_skl_metrics.py
+++ b/src/hyperactive/experiment/integrations/_skl_metrics.py
@@ -22,7 +22,7 @@ def _coerce_to_scorer(scoring, estimator):
             return make_scorer(scoring)
     else:
         # scoring is a string (scorer name)
-        return check_scoring(self.estimator, scoring=scoring)
+        return check_scoring(estimator, scoring=scoring)
 
 
 def _guess_sign_of_sklmetric(scorer):

From f2dd17a6fecba7d1be2f704c6575bfe58bd6b5eb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= <fkiraly@gcos.ai>
Date: Sun, 24 Aug 2025 11:11:37 +0200
Subject: [PATCH 09/18] Update _skl_metrics.py

---
 .../experiment/integrations/_skl_metrics.py    | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/src/hyperactive/experiment/integrations/_skl_metrics.py b/src/hyperactive/experiment/integrations/_skl_metrics.py
index 47fd605a..bd6fa3a2 100644
--- a/src/hyperactive/experiment/integrations/_skl_metrics.py
+++ b/src/hyperactive/experiment/integrations/_skl_metrics.py
@@ -4,7 +4,23 @@
 
 
 def _coerce_to_scorer(scoring, estimator):
-    """Coerce a scoring into a sklearn scorer."""
+    """Coerce scoring argument into a sklearn scorer.
+
+    Parameters
+    ----------
+    scoring : str, callable, or None
+        The scoring strategy to use.
+    estimator : estimator object
+        The estimator to use for default scoring if scoring is None.
+
+    Returns
+    -------
+    scorer : callable
+        A sklearn scorer callable.
+        Follows the unified sklearn scorer interface:
+
+        scorer(estimator, X, y) -> score
+    """
     from sklearn.metrics import check_scoring
 
     # check if scoring is a scorer by checking for "estimator" in signature

From e060da015cfccbebe66a4007b4902dc8700d08aa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= <fkiraly@gcos.ai>
Date: Sun, 24 Aug 2025 11:11:49 +0200
Subject: [PATCH 10/18] Update _skl_metrics.py

---
 src/hyperactive/experiment/integrations/_skl_metrics.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/hyperactive/experiment/integrations/_skl_metrics.py b/src/hyperactive/experiment/integrations/_skl_metrics.py
index bd6fa3a2..7736fdec 100644
--- a/src/hyperactive/experiment/integrations/_skl_metrics.py
+++ b/src/hyperactive/experiment/integrations/_skl_metrics.py
@@ -18,8 +18,6 @@ def _coerce_to_scorer(scoring, estimator):
     scorer : callable
         A sklearn scorer callable.
         Follows the unified sklearn scorer interface:
-
-        scorer(estimator, X, y) -> score
     """
     from sklearn.metrics import check_scoring
 

From ba8d97959f3bb8406b9ecdd819621b9711a1a4d5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= <fkiraly@gcos.ai>
Date: Sun, 24 Aug 2025 11:38:45 +0200
Subject: [PATCH 11/18] cv defaults

---
 .../integrations/sktime_classification.py          | 14 +++++++++++++-
 .../integrations/sktime/_classification.py         | 14 +-------------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/src/hyperactive/experiment/integrations/sktime_classification.py b/src/hyperactive/experiment/integrations/sktime_classification.py
index 3fb256a3..9f6dace4 100644
--- a/src/hyperactive/experiment/integrations/sktime_classification.py
+++ b/src/hyperactive/experiment/integrations/sktime_classification.py
@@ -187,6 +187,18 @@ def __init__(
             _sign_str = "higher" if _sign == 1 else "lower"
             self.set_tags(**{"property:higher_or_lower_is_better": _sign_str})
 
+        # default handling for cv
+        if isinstance(cv, int):
+            from sklearn.model_selection import KFold
+
+            self._cv = KFold(n_splits=cv, shuffle=True)
+        elif cv is None:
+            from sklearn.model_selection import KFold
+
+            self._cv = KFold(n_splits=3, shuffle=True)
+        else:
+            self._cv = cv
+
     def _paramnames(self):
         """Return the parameter names of the search.
 
@@ -216,7 +228,7 @@ def _evaluate(self, params):
 
         results = evaluate(
             self.estimator,
-            cv=self.cv,
+            cv=self._cv,
             X=self.X,
             y=self.y,
             scoring=self._scoring,
diff --git a/src/hyperactive/integrations/sktime/_classification.py b/src/hyperactive/integrations/sktime/_classification.py
index 933f191e..dfb051e5 100644
--- a/src/hyperactive/integrations/sktime/_classification.py
+++ b/src/hyperactive/integrations/sktime/_classification.py
@@ -201,18 +201,6 @@ def __init__(
         self.backend_params = backend_params
         super().__init__()
 
-        # default handling for cv
-        if isinstance(cv, int):
-            from sklearn.model_selection import KFold
-
-            self._cv = KFold(n_splits=cv, shuffle=True)
-        elif cv is None:
-            from sklearn.model_selection import KFold
-
-            self._cv = KFold(n_splits=3, shuffle=True)
-        else:
-            self._cv = cv
-
     def _fit(self, X, y):
         """Fit time series classifier to training data.
 
@@ -256,7 +244,7 @@ class labels for fitting
         experiment = SktimeClassificationExperiment(
             estimator=estimator,
             scoring=scoring,
-            cv=self._cv,
+            cv=self.cv,
             X=X,
             y=y,
             error_score=self.error_score,

From 4a9b96165ecf3d8a02f006172f12b24722b0ec36 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= <fkiraly@gcos.ai>
Date: Sun, 24 Aug 2025 12:58:23 +0200
Subject: [PATCH 12/18] fixes

---
 .../experiment/integrations/_skl_metrics.py       | 15 +++++++++++++--
 .../integrations/sktime_classification.py         | 13 ++++---------
 2 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/src/hyperactive/experiment/integrations/_skl_metrics.py b/src/hyperactive/experiment/integrations/_skl_metrics.py
index 7736fdec..f80094b5 100644
--- a/src/hyperactive/experiment/integrations/_skl_metrics.py
+++ b/src/hyperactive/experiment/integrations/_skl_metrics.py
@@ -23,9 +23,20 @@ def _coerce_to_scorer(scoring, estimator):
 
     # check if scoring is a scorer by checking for "estimator" in signature
     if scoring is None:
-        return check_scoring(estimator)
+        if isinstance(estimator, str):
+            if estimator == "classifier":
+                from sklearn.metrics import accuracy_score
+
+                scoring = accuracy_score
+            elif estimator == "regressor":
+                from sklearn.metrics import r2_score
+
+                scoring = r2_score
+        else:
+            return check_scoring(estimator)
+
     # check using inspect.signature for "estimator" in signature
-    elif callable(scoring):
+    if callable(scoring):
         from inspect import signature
 
         if "estimator" in signature(scoring).parameters:
diff --git a/src/hyperactive/experiment/integrations/sktime_classification.py b/src/hyperactive/experiment/integrations/sktime_classification.py
index 9f6dace4..afaae3b3 100644
--- a/src/hyperactive/experiment/integrations/sktime_classification.py
+++ b/src/hyperactive/experiment/integrations/sktime_classification.py
@@ -156,7 +156,6 @@ def __init__(
         X,
         y,
         cv=None,
-        strategy="refit",
         scoring=None,
         error_score=np.nan,
         backend=None,
@@ -165,7 +164,6 @@ def __init__(
         self.estimator = estimator
         self.X = X
         self.y = y
-        self.strategy = strategy
         self.scoring = scoring
         self.cv = cv
         self.error_score = error_score
@@ -174,11 +172,7 @@ def __init__(
 
         super().__init__()
 
-        from sklearn.dummy import DummyClassifier
-
-        # use dummy classifier from sklearn to get default coercion behaviour
-        # for classification metrics
-        self._scoring = _coerce_to_scorer(scoring, DummyClassifier())
+        self._scoring = _coerce_to_scorer(scoring, "classifier")
 
         # Set the sign of the scoring function
         if hasattr(self._scoring, "_score"):
@@ -231,13 +225,14 @@ def _evaluate(self, params):
             cv=self._cv,
             X=self.X,
             y=self.y,
-            scoring=self._scoring,
+            scoring=self._scoring._score_func,
             error_score=self.error_score,
             backend=self.backend,
             backend_params=self.backend_params,
         )
 
-        result_name = f"test_{self._scoring.name}"
+        metric = self._scoring._score_func
+        result_name = f"test_{metric.__name__}"
 
         res_float = results[result_name].mean()
 

From 1d0022e1d6426603510f065d3a030892fa04d695 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= <fkiraly@gcos.ai>
Date: Sun, 24 Aug 2025 12:59:05 +0200
Subject: [PATCH 13/18] Update _skl_metrics.py

---
 src/hyperactive/experiment/integrations/_skl_metrics.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/hyperactive/experiment/integrations/_skl_metrics.py b/src/hyperactive/experiment/integrations/_skl_metrics.py
index f80094b5..1bd130b1 100644
--- a/src/hyperactive/experiment/integrations/_skl_metrics.py
+++ b/src/hyperactive/experiment/integrations/_skl_metrics.py
@@ -10,14 +10,16 @@ def _coerce_to_scorer(scoring, estimator):
     ----------
     scoring : str, callable, or None
         The scoring strategy to use.
-    estimator : estimator object
+    estimator : estimator object or str
         The estimator to use for default scoring if scoring is None.
 
+        If str, indicates estimator type, should be one of {"classifier", "regressor"}.
+
     Returns
     -------
     scorer : callable
         A sklearn scorer callable.
-        Follows the unified sklearn scorer interface:
+        Follows the unified sklearn scorer interface
     """
     from sklearn.metrics import check_scoring
 

From 18f19787c8fcbc2e36a491e87e6f1a0d518fa4db Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= <fkiraly@gcos.ai>
Date: Sun, 24 Aug 2025 16:24:38 +0200
Subject: [PATCH 14/18] Update sktime_classification.py

---
 .../experiment/integrations/sktime_classification.py          | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/hyperactive/experiment/integrations/sktime_classification.py b/src/hyperactive/experiment/integrations/sktime_classification.py
index afaae3b3..c1e43caa 100644
--- a/src/hyperactive/experiment/integrations/sktime_classification.py
+++ b/src/hyperactive/experiment/integrations/sktime_classification.py
@@ -220,8 +220,10 @@ def _evaluate(self, params):
         """
         from sktime.classification.model_evaluation import evaluate
 
+        estimator = self.estimator.clone().set_params(**params)
+
         results = evaluate(
-            self.estimator,
+            estimator,
             cv=self._cv,
             X=self.X,
             y=self.y,

From 0b1592c8364a42758ae0f8ec73cd7243e2f4a134 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= <fkiraly@gcos.ai>
Date: Fri, 29 Aug 2025 09:19:59 +0200
Subject: [PATCH 15/18] Update _classification.py

---
 .../integrations/sktime/_classification.py    | 39 ++++++++++---------
 1 file changed, 20 insertions(+), 19 deletions(-)

diff --git a/src/hyperactive/integrations/sktime/_classification.py b/src/hyperactive/integrations/sktime/_classification.py
index dfb051e5..72674c57 100644
--- a/src/hyperactive/integrations/sktime/_classification.py
+++ b/src/hyperactive/integrations/sktime/_classification.py
@@ -140,33 +140,34 @@ class TSCOptCV(_DelegatedClassifier):
     For illustration, we use grid search, this can be replaced by any other optimizer.
 
     1. defining the tuned estimator:
-    >>> from sktime.forecasting.naive import NaiveForecaster
-    >>> from sktime.split import ExpandingWindowSplitter
-    >>> from hyperactive.integrations.sktime import ForecastingOptCV
+    >>> from sktime.classification.dummy import DummyClassifier
+    >>> from sklearn.model_selection import KFold
+    >>> from hyperactive.integrations.sktime import TSCOptCV
     >>> from hyperactive.opt import GridSearchSk as GridSearch
     >>>
-    >>> param_grid = {"strategy": ["mean", "last", "drift"]}
-    >>> tuned_naive = ForecastingOptCV(
-    ...     NaiveForecaster(),
+    >>> param_grid = {"strategy": ["most_frequent", "stratified"]}
+    >>> tuned_naive = TSCOptCV(
+    ...     DummyClassifier(),
     ...     GridSearch(param_grid),
-    ...     cv=ExpandingWindowSplitter(
-    ...         initial_window=12, step_length=3, fh=range(1, 13)
-    ...     ),
+    ...     cv=KFold(n_splits=2, shuffle=False),
     ... )
 
     2. fitting the tuned estimator:
-    >>> from sktime.datasets import load_airline
-    >>> from sktime.split import temporal_train_test_split
-    >>> y = load_airline()
-    >>> y_train, y_test = temporal_train_test_split(y, test_size=12)
+    >>> from sktime.datasets import load_unit_test
+    >>> X_train, y_train = load_unit_test(
+    ...     return_X_y=True, split="TRAIN", return_type="pd-multiindex"
+    ... )
+    >>> X_test, _ = load_unit_test(
+    ...     return_X_y=True, split="TEST", return_type="pd-multiindex"
+    ... )
     >>>
-    >>> tuned_naive.fit(y_train, fh=range(1, 13))
-    ForecastingOptCV(...)
-    >>> y_pred = tuned_naive.predict()
+    >>> tuned_naive.fit(X_train, y_train)
+    TSCOptCV(...)
+    >>> y_pred = tuned_naive.predict(X_test)
 
-    3. obtaining best parameters and best forecaster
+    3. obtaining best parameters and best estimator
     >>> best_params = tuned_naive.best_params_
-    >>> best_forecaster = tuned_naive.best_forecaster_
+    >>> best_classifier = tuned_naive.best_estimator_
     """
 
     _tags = {
@@ -257,7 +258,7 @@ class labels for fitting
         best_params = optimizer.solve()
 
         self.best_params_ = best_params
-        self.best_forecaster_ = estimator.set_params(**best_params)
+        self.best_estimator_ = estimator.set_params(**best_params)
 
         # Refit model with best parameters.
         if self.refit:

From 6e0e127c5bd926b70ca25204c64f6f54a35943d7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= <fkiraly@gcos.ai>
Date: Fri, 29 Aug 2025 09:32:57 +0200
Subject: [PATCH 16/18] experiment

---
 .github/workflows/test.yml                    |  2 +-
 .../integrations/sktime_classification.py     | 33 ++++++++++---------
 2 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 17f5b4cc..8956a396 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -99,7 +99,7 @@ jobs:
         exclude:
           - os: "windows-latest"
             python-version: "3.13"
-            
+
       fail-fast: false
 
     runs-on: ${{ matrix.os }}
diff --git a/src/hyperactive/experiment/integrations/sktime_classification.py b/src/hyperactive/experiment/integrations/sktime_classification.py
index c1e43caa..74fde017 100644
--- a/src/hyperactive/experiment/integrations/sktime_classification.py
+++ b/src/hyperactive/experiment/integrations/sktime_classification.py
@@ -114,34 +114,35 @@ class SktimeClassificationExperiment(BaseExperiment):
 
     Example
     -------
-    >>> from hyperactive.experiment.integrations import SktimeForecastingExperiment
-    >>> from sktime.datasets import load_airline
-    >>> from sktime.forecasting.naive import NaiveForecaster
-    >>> from sktime.performance_metrics.forecasting import MeanAbsolutePercentageError
-    >>> from sktime.split import ExpandingWindowSplitter
+    >>> from hyperactive.experiment.integrations import SktimeClassificationExperiment
+    >>> from sklearn.model_selection import KFold
+    >>> from sklearn.metrics import accuracy_score
+    >>> from sktime.datasets import load_unit_test
+    >>> from sktime.classification.dummy import DummyClassifier
     >>>
-    >>> y = load_airline()
+    >>> X, y = load_unit_test()
     >>>
-    >>> sktime_exp = SktimeForecastingExperiment(
-    ...     forecaster=NaiveForecaster(strategy="last"),
-    ...     scoring=MeanAbsolutePercentageError(),
-    ...     cv=ExpandingWindowSplitter(initial_window=36, step_length=12, fh=12),
+    >>> sktime_exp = SktimeClassificationExperiment(
+    ...     classifier=DummyClassifier(),
+    ...     scoring=accuracy_score,
+    ...     cv=KFold(n_splits=2),
+    ...     X=X,
     ...     y=y,
     ... )
-    >>> params = {"strategy": "mean"}
+    >>> params = {"strategy": "most_frequent"}
     >>> score, add_info = sktime_exp.score(params)
 
-    For default choices of ``scoring``:
+    For default choices of ``scoring`` and ``cv``:
     >>> sktime_exp = SktimeForecastingExperiment(
-    ...     forecaster=NaiveForecaster(strategy="last"),
-    ...     cv=ExpandingWindowSplitter(initial_window=36, step_length=12, fh=12),
+    ...     classifier=DummyClassifier(),
+    ...     X=X,
     ...     y=y,
     ... )
-    >>> params = {"strategy": "mean"}
+    >>> params = {"strategy": "most_frequent"}
     >>> score, add_info = sktime_exp.score(params)
 
     Quick call without metadata return or dictionary:
-    >>> score = sktime_exp(strategy="mean")
+    >>> score = sktime_exp({"strategy": "most_frequent"})
     """
 
     _tags = {

From a9dd08e9e4707b2f62d163d17415f0b4bdc9922c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= <fkiraly@gcos.ai>
Date: Fri, 29 Aug 2025 09:40:08 +0200
Subject: [PATCH 17/18] Update sktime_classification.py

---
 .../experiment/integrations/sktime_classification.py            | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/hyperactive/experiment/integrations/sktime_classification.py b/src/hyperactive/experiment/integrations/sktime_classification.py
index 74fde017..1eadec1d 100644
--- a/src/hyperactive/experiment/integrations/sktime_classification.py
+++ b/src/hyperactive/experiment/integrations/sktime_classification.py
@@ -133,7 +133,7 @@ class SktimeClassificationExperiment(BaseExperiment):
     >>> score, add_info = sktime_exp.score(params)
 
     For default choices of ``scoring`` and ``cv``:
-    >>> sktime_exp = SktimeForecastingExperiment(
+    >>> sktime_exp = SktimeClassificationExperiment(
     ...     classifier=DummyClassifier(),
     ...     X=X,
     ...     y=y,

From 9c04cf9b028888281697a91c77405517ebddc25c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= <fkiraly@gcos.ai>
Date: Fri, 29 Aug 2025 11:38:05 +0200
Subject: [PATCH 18/18] Update sktime_classification.py

---
 .../experiment/integrations/sktime_classification.py          | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/hyperactive/experiment/integrations/sktime_classification.py b/src/hyperactive/experiment/integrations/sktime_classification.py
index 1eadec1d..ab4622b8 100644
--- a/src/hyperactive/experiment/integrations/sktime_classification.py
+++ b/src/hyperactive/experiment/integrations/sktime_classification.py
@@ -123,7 +123,7 @@ class SktimeClassificationExperiment(BaseExperiment):
     >>> X, y = load_unit_test()
     >>>
     >>> sktime_exp = SktimeClassificationExperiment(
-    ...     classifier=DummyClassifier(),
+    ...     estimator=DummyClassifier(),
     ...     scoring=accuracy_score,
     ...     cv=KFold(n_splits=2),
     ...     X=X,
@@ -134,7 +134,7 @@ class SktimeClassificationExperiment(BaseExperiment):
 
     For default choices of ``scoring`` and ``cv``:
     >>> sktime_exp = SktimeClassificationExperiment(
-    ...     classifier=DummyClassifier(),
+    ...     estimator=DummyClassifier(),
     ...     X=X,
     ...     y=y,
     ... )