diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 17f5b4cc..8956a396 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -99,7 +99,7 @@ jobs:
         exclude:
           - os: "windows-latest"
             python-version: "3.13"
-            
+
       fail-fast: false
 
     runs-on: ${{ matrix.os }}
diff --git a/src/hyperactive/experiment/integrations/__init__.py b/src/hyperactive/experiment/integrations/__init__.py
index 1b600df2..4ed0ce52 100644
--- a/src/hyperactive/experiment/integrations/__init__.py
+++ b/src/hyperactive/experiment/integrations/__init__.py
@@ -2,8 +2,15 @@
 # copyright: hyperactive developers, MIT License (see LICENSE file)
 
 from hyperactive.experiment.integrations.sklearn_cv import SklearnCvExperiment
+from hyperactive.experiment.integrations.sktime_classification import (
+    SktimeClassificationExperiment,
+)
 from hyperactive.experiment.integrations.sktime_forecasting import (
     SktimeForecastingExperiment,
 )
 
-__all__ = ["SklearnCvExperiment", "SktimeForecastingExperiment"]
+__all__ = [
+    "SklearnCvExperiment",
+    "SktimeClassificationExperiment",
+    "SktimeForecastingExperiment",
+]
diff --git a/src/hyperactive/experiment/integrations/_skl_metrics.py b/src/hyperactive/experiment/integrations/_skl_metrics.py
new file mode 100644
index 00000000..1bd130b1
--- /dev/null
+++ b/src/hyperactive/experiment/integrations/_skl_metrics.py
@@ -0,0 +1,128 @@
+"""Integration utilities for sklearn metrics with Hyperactive."""
+
+__all__ = ["_coerce_to_scorer", "_guess_sign_of_sklmetric"]
+
+
+def _coerce_to_scorer(scoring, estimator):
+    """Coerce scoring argument into a sklearn scorer.
+
+    Parameters
+    ----------
+    scoring : str, callable, or None
+        The scoring strategy to use.
+    estimator : estimator object or str
+        The estimator to use for default scoring if scoring is None.
+
+        If str, indicates estimator type, should be one of {"classifier", "regressor"}.
+
+    Returns
+    -------
+    scorer : callable
+        A sklearn scorer callable.
+        Follows the unified sklearn scorer interface
+    """
+    from sklearn.metrics import check_scoring
+
+    # check if scoring is a scorer by checking for "estimator" in signature
+    if scoring is None:
+        if isinstance(estimator, str):
+            if estimator == "classifier":
+                from sklearn.metrics import accuracy_score
+
+                scoring = accuracy_score
+            elif estimator == "regressor":
+                from sklearn.metrics import r2_score
+
+                scoring = r2_score
+        else:
+            return check_scoring(estimator)
+
+    # check using inspect.signature for "estimator" in signature
+    if callable(scoring):
+        from inspect import signature
+
+        if "estimator" in signature(scoring).parameters:
+            return scoring
+        else:
+            from sklearn.metrics import make_scorer
+
+            return make_scorer(scoring)
+    else:
+        # scoring is a string (scorer name)
+        return check_scoring(estimator, scoring=scoring)
+
+
+def _guess_sign_of_sklmetric(scorer):
+    """Guess the sign of a sklearn metric scorer.
+
+    Parameters
+    ----------
+    scorer : callable
+        The sklearn metric scorer to guess the sign for.
+
+    Returns
+    -------
+    int
+        1 if higher scores are better, -1 if lower scores are better.
+    """
+    HIGHER_IS_BETTER = {
+        # Classification
+        "accuracy_score": True,
+        "auc": True,
+        "average_precision_score": True,
+        "balanced_accuracy_score": True,
+        "brier_score_loss": False,
+        "class_likelihood_ratios": False,
+        "cohen_kappa_score": True,
+        "d2_log_loss_score": True,
+        "dcg_score": True,
+        "f1_score": True,
+        "fbeta_score": True,
+        "hamming_loss": False,
+        "hinge_loss": False,
+        "jaccard_score": True,
+        "log_loss": False,
+        "matthews_corrcoef": True,
+        "ndcg_score": True,
+        "precision_score": True,
+        "recall_score": True,
+        "roc_auc_score": True,
+        "top_k_accuracy_score": True,
+        "zero_one_loss": False,
+        # Regression
+        "d2_absolute_error_score": True,
+        "d2_pinball_score": True,
+        "d2_tweedie_score": True,
+        "explained_variance_score": True,
+        "max_error": False,
+        "mean_absolute_error": False,
+        "mean_absolute_percentage_error": False,
+        "mean_gamma_deviance": False,
+        "mean_pinball_loss": False,
+        "mean_poisson_deviance": False,
+        "mean_squared_error": False,
+        "mean_squared_log_error": False,
+        "mean_tweedie_deviance": False,
+        "median_absolute_error": False,
+        "r2_score": True,
+        "root_mean_squared_error": False,
+        "root_mean_squared_log_error": False,
+    }
+
+    scorer_name = getattr(scorer, "__name__", None)
+
+    if hasattr(scorer, "greater_is_better"):
+        return 1 if scorer.greater_is_better else -1
+    elif scorer_name in HIGHER_IS_BETTER:
+        return 1 if HIGHER_IS_BETTER[scorer_name] else -1
+    elif scorer_name.endswith("_score"):
+        # If the scorer name ends with "_score", we assume higher is better
+        return 1
+    elif scorer_name.endswith("_loss") or scorer_name.endswith("_deviance"):
+        # If the scorer name ends with "_loss", we assume lower is better
+        return -1
+    elif scorer_name.endswith("_error"):
+        return -1
+    else:
+        # If we cannot determine the sign, we assume lower is better
+        return -1
diff --git a/src/hyperactive/experiment/integrations/sklearn_cv.py b/src/hyperactive/experiment/integrations/sklearn_cv.py
index 483eb6b1..051edfde 100644
--- a/src/hyperactive/experiment/integrations/sklearn_cv.py
+++ b/src/hyperactive/experiment/integrations/sklearn_cv.py
@@ -3,11 +3,14 @@
 # copyright: hyperactive developers, MIT License (see LICENSE file)
 
 from sklearn import clone
-from sklearn.metrics import check_scoring
 from sklearn.model_selection import cross_validate
 from sklearn.utils.validation import _num_samples
 
 from hyperactive.base import BaseExperiment
+from hyperactive.experiment.integrations._skl_metrics import (
+    _coerce_to_scorer,
+    _guess_sign_of_sklmetric,
+)
 
 
 class SklearnCvExperiment(BaseExperiment):
@@ -97,22 +100,7 @@ def __init__(self, estimator, X, y, scoring=None, cv=None):
         else:
             self._cv = cv
 
-        # check if scoring is a scorer by checking for "estimator" in signature
-        if scoring is None:
-            self._scoring = check_scoring(self.estimator)
-        # check using inspect.signature for "estimator" in signature
-        elif callable(scoring):
-            from inspect import signature
-
-            if "estimator" in signature(scoring).parameters:
-                self._scoring = scoring
-            else:
-                from sklearn.metrics import make_scorer
-
-                self._scoring = make_scorer(scoring)
-        else:
-            # scoring is a string (scorer name)
-            self._scoring = check_scoring(self.estimator, scoring=scoring)
+        self._scoring = _coerce_to_scorer(scoring, self.estimator)
         self.scorer_ = self._scoring
 
         # Set the sign of the scoring function
@@ -281,79 +269,3 @@ def _get_score_params(self):
             score_params_defaults,
         ]
         return params
-
-
-def _guess_sign_of_sklmetric(scorer):
-    """Guess the sign of a sklearn metric scorer.
-
-    Parameters
-    ----------
-    scorer : callable
-        The sklearn metric scorer to guess the sign for.
-
-    Returns
-    -------
-    int
-        1 if higher scores are better, -1 if lower scores are better.
-    """
-    HIGHER_IS_BETTER = {
-        # Classification
-        "accuracy_score": True,
-        "auc": True,
-        "average_precision_score": True,
-        "balanced_accuracy_score": True,
-        "brier_score_loss": False,
-        "class_likelihood_ratios": False,
-        "cohen_kappa_score": True,
-        "d2_log_loss_score": True,
-        "dcg_score": True,
-        "f1_score": True,
-        "fbeta_score": True,
-        "hamming_loss": False,
-        "hinge_loss": False,
-        "jaccard_score": True,
-        "log_loss": False,
-        "matthews_corrcoef": True,
-        "ndcg_score": True,
-        "precision_score": True,
-        "recall_score": True,
-        "roc_auc_score": True,
-        "top_k_accuracy_score": True,
-        "zero_one_loss": False,
-        # Regression
-        "d2_absolute_error_score": True,
-        "d2_pinball_score": True,
-        "d2_tweedie_score": True,
-        "explained_variance_score": True,
-        "max_error": False,
-        "mean_absolute_error": False,
-        "mean_absolute_percentage_error": False,
-        "mean_gamma_deviance": False,
-        "mean_pinball_loss": False,
-        "mean_poisson_deviance": False,
-        "mean_squared_error": False,
-        "mean_squared_log_error": False,
-        "mean_tweedie_deviance": False,
-        "median_absolute_error": False,
-        "r2_score": True,
-        "root_mean_squared_error": False,
-        "root_mean_squared_log_error": False,
-    }
-
-    scorer_name = getattr(scorer, "__name__", None)
-
-    if hasattr(scorer, "greater_is_better"):
-        return 1 if scorer.greater_is_better else -1
-    elif scorer_name in HIGHER_IS_BETTER:
-        return 1 if HIGHER_IS_BETTER[scorer_name] else -1
-    elif scorer_name.endswith("_score"):
-        # If the scorer name ends with "_score", we assume higher is better
-        return 1
-    elif scorer_name.endswith("_loss") or scorer_name.endswith("_deviance"):
-        # If the scorer name ends with "_loss", we assume lower is better
-        return -1
-    elif scorer_name.endswith("_error"):
-        return -1
-    else:
-        # If we cannot determine the sign, we assume lower is better
-        return -1
diff --git a/src/hyperactive/experiment/integrations/sktime_classification.py b/src/hyperactive/experiment/integrations/sktime_classification.py
new file mode 100644
index 00000000..ab4622b8
--- /dev/null
+++ b/src/hyperactive/experiment/integrations/sktime_classification.py
@@ -0,0 +1,314 @@
+"""Experiment adapter for sktime backtesting experiments."""
+# copyright: hyperactive developers, MIT License (see LICENSE file)
+
+import numpy as np
+
+from hyperactive.base import BaseExperiment
+from hyperactive.experiment.integrations._skl_metrics import (
+    _coerce_to_scorer,
+    _guess_sign_of_sklmetric,
+)
+
+
+class SktimeClassificationExperiment(BaseExperiment):
+    """Experiment adapter for time series classification experiments.
+
+    This class is used to perform cross-validation experiments using a given
+    sktime classifier. It allows for hyperparameter tuning and evaluation of
+    the model's performance.
+
+    The score returned is the summary backtesting score,
+    of applying ``sktime`` ``evaluate`` to ``estimator`` with the parameters given in
+    ``score`` ``params``.
+
+    The backtesting performed is specified by the ``cv`` parameter,
+    and the scoring metric is specified by the ``scoring`` parameter.
+    The ``X`` and ``y`` parameters are the input data and target values,
+    which are used in fit/predict cross-validation.
+
+    Parameters
+    ----------
+    estimator : sktime BaseClassifier descendant (concrete classifier)
+        sktime classifier to benchmark
+
+    X : sktime-compatible panel data (Panel scitype)
+        Panel data container. Supported formats include:
+
+        - ``pd.DataFrame`` with MultiIndex [instance, time] and variable columns
+        - 3D ``np.array`` with shape ``[n_instances, n_dimensions, series_length]``
+        - Other formats listed in ``datatypes.SCITYPE_REGISTER``
+
+    y : sktime-compatible tabular data (Table scitype)
+        Target variable, typically a 1D ``np.ndarray`` or ``pd.Series``
+        of shape ``[n_instances]``.
+
+    cv : int, sklearn cross-validation generator or an iterable, default=3-fold CV
+        Determines the cross-validation splitting strategy.
+        Possible inputs for cv are:
+
+        - None = default = ``KFold(n_splits=3, shuffle=True)``
+        - integer, number of folds folds in a ``KFold`` splitter, ``shuffle=True``
+        - An iterable yielding (train, test) splits as arrays of indices.
+
+        For integer/None inputs, if the estimator is a classifier and ``y`` is
+        either binary or multiclass, :class:`StratifiedKFold` is used. In all
+        other cases, :class:`KFold` is used. These splitters are instantiated
+        with ``shuffle=False`` so the splits will be the same across calls.
+
+    scoring : str, callable, default=None
+        Strategy to evaluate the performance of the cross-validated model on
+        the test set. Can be:
+
+        - a single string resolvable to an sklearn scorer
+        - a callable that returns a single value;
+        - ``None`` = default = ``accuracy_score``
+
+    error_score : "raise" or numeric, default=np.nan
+        Value to assign to the score if an exception occurs in estimator fitting. If set
+        to "raise", the exception is raised. If a numeric value is given,
+        FitFailedWarning is raised.
+
+    backend : string, by default "None".
+        Parallelization backend to use for runs.
+        Runs parallel evaluate if specified and ``strategy="refit"``.
+
+        - "None": executes loop sequentially, simple list comprehension
+        - "loky", "multiprocessing" and "threading": uses ``joblib.Parallel`` loops
+        - "joblib": custom and 3rd party ``joblib`` backends, e.g., ``spark``
+        - "dask": uses ``dask``, requires ``dask`` package in environment
+        - "dask_lazy": same as "dask",
+          but changes the return to (lazy) ``dask.dataframe.DataFrame``.
+        - "ray": uses ``ray``, requires ``ray`` package in environment
+
+        Recommendation: Use "dask" or "loky" for parallel evaluate.
+        "threading" is unlikely to see speed ups due to the GIL and the serialization
+        backend (``cloudpickle``) for "dask" and "loky" is generally more robust
+        than the standard ``pickle`` library used in "multiprocessing".
+
+    backend_params : dict, optional
+        additional parameters passed to the backend as config.
+        Directly passed to ``utils.parallel.parallelize``.
+        Valid keys depend on the value of ``backend``:
+
+        - "None": no additional parameters, ``backend_params`` is ignored
+        - "loky", "multiprocessing" and "threading": default ``joblib`` backends
+          any valid keys for ``joblib.Parallel`` can be passed here, e.g., ``n_jobs``,
+          with the exception of ``backend`` which is directly controlled by ``backend``.
+          If ``n_jobs`` is not passed, it will default to ``-1``, other parameters
+          will default to ``joblib`` defaults.
+        - "joblib": custom and 3rd party ``joblib`` backends, e.g., ``spark``.
+          any valid keys for ``joblib.Parallel`` can be passed here, e.g., ``n_jobs``,
+          ``backend`` must be passed as a key of ``backend_params`` in this case.
+          If ``n_jobs`` is not passed, it will default to ``-1``, other parameters
+          will default to ``joblib`` defaults.
+        - "dask": any valid keys for ``dask.compute`` can be passed,
+          e.g., ``scheduler``
+
+        - "ray": The following keys can be passed:
+
+            - "ray_remote_args": dictionary of valid keys for ``ray.init``
+            - "shutdown_ray": bool, default=True; False prevents ``ray`` from shutting
+                down after parallelization.
+            - "logger_name": str, default="ray"; name of the logger to use.
+            - "mute_warnings": bool, default=False; if True, suppresses warnings
+
+    Example
+    -------
+    >>> from hyperactive.experiment.integrations import SktimeClassificationExperiment
+    >>> from sklearn.model_selection import KFold
+    >>> from sklearn.metrics import accuracy_score
+    >>> from sktime.datasets import load_unit_test
+    >>> from sktime.classification.dummy import DummyClassifier
+    >>>
+    >>> X, y = load_unit_test()
+    >>>
+    >>> sktime_exp = SktimeClassificationExperiment(
+    ...     estimator=DummyClassifier(),
+    ...     scoring=accuracy_score,
+    ...     cv=KFold(n_splits=2),
+    ...     X=X,
+    ...     y=y,
+    ... )
+    >>> params = {"strategy": "most_frequent"}
+    >>> score, add_info = sktime_exp.score(params)
+
+    For default choices of ``scoring`` and ``cv``:
+    >>> sktime_exp = SktimeClassificationExperiment(
+    ...     estimator=DummyClassifier(),
+    ...     X=X,
+    ...     y=y,
+    ... )
+    >>> params = {"strategy": "most_frequent"}
+    >>> score, add_info = sktime_exp.score(params)
+
+    Quick call without metadata return or dictionary:
+    >>> score = sktime_exp({"strategy": "most_frequent"})
+    """
+
+    _tags = {
+        "authors": "fkiraly",
+        "maintainers": "fkiraly",
+        "python_dependencies": "sktime",  # python dependencies
+    }
+
+    def __init__(
+        self,
+        estimator,
+        X,
+        y,
+        cv=None,
+        scoring=None,
+        error_score=np.nan,
+        backend=None,
+        backend_params=None,
+    ):
+        self.estimator = estimator
+        self.X = X
+        self.y = y
+        self.scoring = scoring
+        self.cv = cv
+        self.error_score = error_score
+        self.backend = backend
+        self.backend_params = backend_params
+
+        super().__init__()
+
+        self._scoring = _coerce_to_scorer(scoring, "classifier")
+
+        # Set the sign of the scoring function
+        if hasattr(self._scoring, "_score"):
+            score_func = self._scoring._score_func
+            _sign = _guess_sign_of_sklmetric(score_func)
+            _sign_str = "higher" if _sign == 1 else "lower"
+            self.set_tags(**{"property:higher_or_lower_is_better": _sign_str})
+
+        # default handling for cv
+        if isinstance(cv, int):
+            from sklearn.model_selection import KFold
+
+            self._cv = KFold(n_splits=cv, shuffle=True)
+        elif cv is None:
+            from sklearn.model_selection import KFold
+
+            self._cv = KFold(n_splits=3, shuffle=True)
+        else:
+            self._cv = cv
+
+    def _paramnames(self):
+        """Return the parameter names of the search.
+
+        Returns
+        -------
+        list of str
+            The parameter names of the search parameters.
+        """
+        return list(self.estimator.get_params().keys())
+
+    def _evaluate(self, params):
+        """Evaluate the parameters.
+
+        Parameters
+        ----------
+        params : dict with string keys
+            Parameters to evaluate.
+
+        Returns
+        -------
+        float
+            The value of the parameters as per evaluation.
+        dict
+            Additional metadata about the search.
+        """
+        from sktime.classification.model_evaluation import evaluate
+
+        estimator = self.estimator.clone().set_params(**params)
+
+        results = evaluate(
+            estimator,
+            cv=self._cv,
+            X=self.X,
+            y=self.y,
+            scoring=self._scoring._score_func,
+            error_score=self.error_score,
+            backend=self.backend,
+            backend_params=self.backend_params,
+        )
+
+        metric = self._scoring._score_func
+        result_name = f"test_{metric.__name__}"
+
+        res_float = results[result_name].mean()
+
+        return res_float, {"results": results}
+
+    @classmethod
+    def get_test_params(cls, parameter_set="default"):
+        """Return testing parameter settings for the skbase object.
+
+        ``get_test_params`` is a unified interface point to store
+        parameter settings for testing purposes. This function is also
+        used in ``create_test_instance`` and ``create_test_instances_and_names``
+        to construct test instances.
+
+        ``get_test_params`` should return a single ``dict``, or a ``list`` of ``dict``.
+
+        Each ``dict`` is a parameter configuration for testing,
+        and can be used to construct an "interesting" test instance.
+        A call to ``cls(**params)`` should
+        be valid for all dictionaries ``params`` in the return of ``get_test_params``.
+
+        The ``get_test_params`` need not return fixed lists of dictionaries,
+        it can also return dynamic or stochastic parameter settings.
+
+        Parameters
+        ----------
+        parameter_set : str, default="default"
+            Name of the set of test parameters to return, for use in tests. If no
+            special parameters are defined for a value, will return `"default"` set.
+
+        Returns
+        -------
+        params : dict or list of dict, default = {}
+            Parameters to create testing instances of the class
+            Each dict are parameters to construct an "interesting" test instance, i.e.,
+            `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance.
+            `create_test_instance` uses the first (or only) dictionary in `params`
+        """
+        from sklearn.metrics import brier_score_loss
+        from sklearn.model_selection import KFold
+        from sktime.classification.dummy import DummyClassifier
+        from sktime.datasets import load_unit_test
+
+        X, y = load_unit_test(return_X_y=True, return_type="pd-multiindex")
+        params0 = {
+            "estimator": DummyClassifier(strategy="most_frequent"),
+            "X": X,
+            "y": y,
+        }
+
+        params1 = {
+            "estimator": DummyClassifier(strategy="stratified"),
+            "cv": KFold(n_splits=2),
+            "X": X,
+            "y": y,
+            "scoring": brier_score_loss,
+        }
+
+        return [params0, params1]
+
+    @classmethod
+    def _get_score_params(self):
+        """Return settings for testing score/evaluate functions. Used in tests only.
+
+        Returns a list, the i-th element should be valid arguments for
+        self.evaluate and self.score, of an instance constructed with
+        self.get_test_params()[i].
+
+        Returns
+        -------
+        list of dict
+            The parameters to be used for scoring.
+        """
+        val0 = {}
+        val1 = {"strategy": "most_frequent"}
+        return [val0, val1]
diff --git a/src/hyperactive/integrations/sktime/__init__.py b/src/hyperactive/integrations/sktime/__init__.py
index a88ca2f0..256d03ea 100644
--- a/src/hyperactive/integrations/sktime/__init__.py
+++ b/src/hyperactive/integrations/sktime/__init__.py
@@ -1,5 +1,6 @@
 """Integrations for sktime with Hyperactive."""
 
+from hyperactive.integrations.sktime._classification import TSCOptCV
 from hyperactive.integrations.sktime._forecasting import ForecastingOptCV
 
-__all__ = ["ForecastingOptCV"]
+__all__ = ["TSCOptCV", "ForecastingOptCV"]
diff --git a/src/hyperactive/integrations/sktime/_classification.py b/src/hyperactive/integrations/sktime/_classification.py
new file mode 100644
index 00000000..72674c57
--- /dev/null
+++ b/src/hyperactive/integrations/sktime/_classification.py
@@ -0,0 +1,349 @@
+# copyright: hyperactive developers, MIT License (see LICENSE file)
+
+import numpy as np
+from skbase.utils.dependencies import _check_soft_dependencies
+
+if _check_soft_dependencies("sktime", severity="none"):
+    from sktime.classification._delegate import _DelegatedClassifier
+else:
+    from skbase.base import BaseEstimator as _DelegatedClassifier
+
+from hyperactive.experiment.integrations.sktime_classification import (
+    SktimeClassificationExperiment,
+)
+
+
+class TSCOptCV(_DelegatedClassifier):
+    """Tune an sktime classifier via any optimizer in the hyperactive toolbox.
+
+    ``TSCOptCV`` uses any available tuning engine from ``hyperactive``
+    to tune a classifier by backtesting.
+
+    It passes backtesting results as scores to the tuning engine,
+    which identifies the best hyperparameters.
+
+    Any available tuning engine from hyperactive can be used, for example:
+
+    * grid search - ``from hyperactive.opt import GridSearchSk as GridSearch``,
+      this results in the same algorithm as ``TSCGridSearchCV``
+    * hill climbing - ``from hyperactive.opt import HillClimbing``
+    * optuna parzen-tree search - ``from hyperactive.opt.optuna import TPEOptimizer``
+
+    Configuration of the tuning engine is as per the respective documentation.
+
+    Formally, ``TSCOptCV`` does the following:
+
+    In ``fit``:
+
+    * wraps the ``estimator``, ``scoring``, and other parameters
+      into a ``SktimeClassificationExperiment`` instance, which is passed to the
+      optimizer ``optimizer`` as the ``experiment`` argument.
+    * Optimal parameters are then obtained from ``optimizer.solve``, and set
+      as ``best_params_`` and ``best_estimator_`` attributes.
+    *  If ``refit=True``, ``best_estimator_`` is fitted to the entire ``y`` and ``X``.
+
+    In ``predict`` and ``predict``-like methods, calls the respective method
+    of the ``best_estimator_`` if ``refit=True``.
+
+    Parameters
+    ----------
+    estimator : sktime classifier, BaseClassifier instance or interface compatible
+        The classifier to tune, must implement the sktime classifier interface.
+
+    optimizer : hyperactive BaseOptimizer
+        The optimizer to be used for hyperparameter search.
+
+    cv : int, sklearn cross-validation generator or an iterable, default=3-fold CV
+        Determines the cross-validation splitting strategy.
+        Possible inputs for cv are:
+
+        - None = default = ``KFold(n_splits=3, shuffle=True)``
+        - integer, number of folds folds in a ``KFold`` splitter, ``shuffle=True``
+        - An iterable yielding (train, test) splits as arrays of indices.
+
+        For integer/None inputs, if the estimator is a classifier and ``y`` is
+        either binary or multiclass, :class:`StratifiedKFold` is used. In all
+        other cases, :class:`KFold` is used. These splitters are instantiated
+        with ``shuffle=False`` so the splits will be the same across calls.
+
+    scoring : str, callable, default=None
+        Strategy to evaluate the performance of the cross-validated model on
+        the test set. Can be:
+
+        - a single string resolvable to an sklearn scorer
+        - a callable that returns a single value;
+        - ``None`` = default = ``accuracy_score``
+
+    refit : bool, optional (default=True)
+        True = refit the forecaster with the best parameters on the entire data in fit
+        False = no refitting takes place. The forecaster cannot be used to predict.
+        This is to be used to tune the hyperparameters, and then use the estimator
+        as a parameter estimator, e.g., via get_fitted_params or PluginParamsForecaster.
+
+    error_score : "raise" or numeric, default=np.nan
+        Value to assign to the score if an exception occurs in estimator fitting. If set
+        to "raise", the exception is raised. If a numeric value is given,
+        FitFailedWarning is raised.
+
+    backend : string, by default "None".
+        Parallelization backend to use for runs.
+        Runs parallel evaluate if specified and ``strategy="refit"``.
+
+        - "None": executes loop sequentially, simple list comprehension
+        - "loky", "multiprocessing" and "threading": uses ``joblib.Parallel`` loops
+        - "joblib": custom and 3rd party ``joblib`` backends, e.g., ``spark``
+        - "dask": uses ``dask``, requires ``dask`` package in environment
+        - "dask_lazy": same as "dask",
+          but changes the return to (lazy) ``dask.dataframe.DataFrame``.
+        - "ray": uses ``ray``, requires ``ray`` package in environment
+
+        Recommendation: Use "dask" or "loky" for parallel evaluate.
+        "threading" is unlikely to see speed ups due to the GIL and the serialization
+        backend (``cloudpickle``) for "dask" and "loky" is generally more robust
+        than the standard ``pickle`` library used in "multiprocessing".
+
+    backend_params : dict, optional
+        additional parameters passed to the backend as config.
+        Directly passed to ``utils.parallel.parallelize``.
+        Valid keys depend on the value of ``backend``:
+
+        - "None": no additional parameters, ``backend_params`` is ignored
+        - "loky", "multiprocessing" and "threading": default ``joblib`` backends
+          any valid keys for ``joblib.Parallel`` can be passed here, e.g., ``n_jobs``,
+          with the exception of ``backend`` which is directly controlled by ``backend``.
+          If ``n_jobs`` is not passed, it will default to ``-1``, other parameters
+          will default to ``joblib`` defaults.
+        - "joblib": custom and 3rd party ``joblib`` backends, e.g., ``spark``.
+          any valid keys for ``joblib.Parallel`` can be passed here, e.g., ``n_jobs``,
+          ``backend`` must be passed as a key of ``backend_params`` in this case.
+          If ``n_jobs`` is not passed, it will default to ``-1``, other parameters
+          will default to ``joblib`` defaults.
+        - "dask": any valid keys for ``dask.compute`` can be passed,
+          e.g., ``scheduler``
+
+        - "ray": The following keys can be passed:
+
+            - "ray_remote_args": dictionary of valid keys for ``ray.init``
+            - "shutdown_ray": bool, default=True; False prevents ``ray`` from shutting
+                down after parallelization.
+            - "logger_name": str, default="ray"; name of the logger to use.
+            - "mute_warnings": bool, default=False; if True, suppresses warnings
+
+    Example
+    -------
+    Any available tuning engine from hyperactive can be used, for example:
+
+    * grid search - ``from hyperactive.opt import GridSearchSk as GridSearch``
+    * hill climbing - ``from hyperactive.opt import HillClimbing``
+    * optuna parzen-tree search - ``from hyperactive.opt.optuna import TPEOptimizer``
+
+    For illustration, we use grid search, this can be replaced by any other optimizer.
+
+    1. defining the tuned estimator:
+    >>> from sktime.classification.dummy import DummyClassifier
+    >>> from sklearn.model_selection import KFold
+    >>> from hyperactive.integrations.sktime import TSCOptCV
+    >>> from hyperactive.opt import GridSearchSk as GridSearch
+    >>>
+    >>> param_grid = {"strategy": ["most_frequent", "stratified"]}
+    >>> tuned_naive = TSCOptCV(
+    ...     DummyClassifier(),
+    ...     GridSearch(param_grid),
+    ...     cv=KFold(n_splits=2, shuffle=False),
+    ... )
+
+    2. fitting the tuned estimator:
+    >>> from sktime.datasets import load_unit_test
+    >>> X_train, y_train = load_unit_test(
+    ...     return_X_y=True, split="TRAIN", return_type="pd-multiindex"
+    ... )
+    >>> X_test, _ = load_unit_test(
+    ...     return_X_y=True, split="TEST", return_type="pd-multiindex"
+    ... )
+    >>>
+    >>> tuned_naive.fit(X_train, y_train)
+    TSCOptCV(...)
+    >>> y_pred = tuned_naive.predict(X_test)
+
+    3. obtaining best parameters and best estimator
+    >>> best_params = tuned_naive.best_params_
+    >>> best_classifier = tuned_naive.best_estimator_
+    """
+
+    _tags = {
+        "authors": "fkiraly",
+        "maintainers": "fkiraly",
+        "python_dependencies": "sktime",
+    }
+
+    # attribute for _DelegatedClassifier, which then delegates
+    #     all non-overridden methods are same as of getattr(self, _delegate_name)
+    #     see further details in _DelegatedClassifier docstring
+    _delegate_name = "best_estimator_"
+
+    def __init__(
+        self,
+        estimator,
+        optimizer,
+        cv=None,
+        scoring=None,
+        refit=True,
+        error_score=np.nan,
+        backend=None,
+        backend_params=None,
+    ):
+        self.estimator = estimator
+        self.optimizer = optimizer
+        self.cv = cv
+        self.scoring = scoring
+        self.refit = refit
+        self.error_score = error_score
+        self.backend = backend
+        self.backend_params = backend_params
+        super().__init__()
+
+    def _fit(self, X, y):
+        """Fit time series classifier to training data.
+
+        private _fit containing the core logic, called from fit
+
+        Writes to self:
+            Sets fitted model attributes ending in "_".
+
+        Parameters
+        ----------
+        X : guaranteed to be of a type in self.get_tag("X_inner_mtype")
+            if self.get_tag("X_inner_mtype") = "numpy3D":
+            3D np.ndarray of shape = [n_instances, n_dimensions, series_length]
+            if self.get_tag("X_inner_mtype") = "pd-multiindex:":
+            pd.DataFrame with columns = variables,
+            index = pd.MultiIndex with first level = instance indices,
+            second level = time indices
+            for list of other mtypes, see datatypes.SCITYPE_REGISTER
+            for specifications, see examples/AA_datatypes_and_datasets.ipynb
+        y : guaranteed to be of a type in self.get_tag("y_inner_mtype")
+            1D iterable, of shape [n_instances]
+            or 2D iterable, of shape [n_instances, n_dimensions]
+            class labels for fitting
+            if self.get_tag("capaility:multioutput") = False, guaranteed to be 1D
+            if self.get_tag("capaility:multioutput") = True, guaranteed to be 2D
+
+        Returns
+        -------
+        self : Reference to self.
+        """
+        from sklearn.dummy import DummyClassifier
+        from sklearn.metrics import check_scoring
+
+        estimator = self.estimator.clone()
+
+        # use dummy classifier from sklearn to get default coercion behaviour
+        # for classificatoin metrics
+        scoring = check_scoring(DummyClassifier(), self.scoring)
+        # scoring_name = f"test_{scoring.name}"
+
+        experiment = SktimeClassificationExperiment(
+            estimator=estimator,
+            scoring=scoring,
+            cv=self.cv,
+            X=X,
+            y=y,
+            error_score=self.error_score,
+            backend=self.backend,
+            backend_params=self.backend_params,
+        )
+
+        optimizer = self.optimizer.clone()
+        optimizer.set_params(experiment=experiment)
+        best_params = optimizer.solve()
+
+        self.best_params_ = best_params
+        self.best_estimator_ = estimator.set_params(**best_params)
+
+        # Refit model with best parameters.
+        if self.refit:
+            self.best_estimator_.fit(X=X, y=y)
+
+        return self
+
+    def _predict(self, X):
+        """Predict labels for sequences in X.
+
+        private _predict containing the core logic, called from predict
+
+        State required:
+            Requires state to be "fitted".
+
+        Accesses in self:
+            Fitted model attributes ending in "_"
+
+        Parameters
+        ----------
+        X : guaranteed to be of a type in self.get_tag("X_inner_mtype")
+            if self.get_tag("X_inner_mtype") = "numpy3D":
+                3D np.ndarray of shape = [n_instances, n_dimensions, series_length]
+            if self.get_tag("X_inner_mtype") = "nested_univ":
+                pd.DataFrame with each column a dimension, each cell a pd.Series
+            for list of other mtypes, see datatypes.SCITYPE_REGISTER
+            for specifications, see examples/AA_datatypes_and_datasets.ipynb
+
+        Returns
+        -------
+        y : 1D np.array of int, of shape [n_instances] - predicted class labels
+            indices correspond to instance indices in X
+        """
+        if not self.refit:
+            raise RuntimeError(
+                f"In {self.__class__.__name__}, refit must be True to make predictions,"
+                f" but found refit=False. If refit=False, {self.__class__.__name__} can"
+                " be used only to tune hyper-parameters, as a parameter estimator."
+            )
+        return super()._predict(X=X)
+
+    @classmethod
+    def get_test_params(cls, parameter_set="default"):
+        """Return testing parameter settings for the estimator.
+
+        Parameters
+        ----------
+        parameter_set : str, default="default"
+            Name of the set of test parameters to return, for use in tests. If no
+            special parameters are defined for a value, will return ``"default"`` set.
+
+        Returns
+        -------
+        params : dict or list of dict
+        """
+        from sklearn.metrics import accuracy_score
+        from sklearn.model_selection import KFold
+        from sktime.classification.dummy import DummyClassifier
+
+        from hyperactive.opt.gfo import HillClimbing
+        from hyperactive.opt.gridsearch import GridSearchSk
+        from hyperactive.opt.random_search import RandomSearchSk
+
+        params_gridsearch = {
+            "estimator": DummyClassifier(),
+            "optimizer": GridSearchSk(
+                param_grid={"strategy": ["most_frequent", "stratified"]}
+            ),
+        }
+        params_randomsearch = {
+            "estimator": DummyClassifier(),
+            "cv": 2,
+            "optimizer": RandomSearchSk(
+                param_distributions={"strategy": ["most_frequent", "stratified"]},
+            ),
+            "scoring": accuracy_score,
+        }
+        params_hillclimb = {
+            "estimator": DummyClassifier(strategy="stratified"),
+            "cv": KFold(n_splits=2, shuffle=False),
+            "optimizer": HillClimbing(
+                search_space={"strategy": ["most_frequent", "stratified"]},
+                n_iter=10,
+                n_neighbours=5,
+            ),
+            "scoring": "cross-entropy",
+        }
+        return [params_gridsearch, params_randomsearch, params_hillclimb]