diff --git a/sktime/forecasting/tests/test_all_forecasters.py b/sktime/forecasting/tests/test_all_forecasters.py
index 6b79d547673..e5b6a99b7f6 100644
--- a/sktime/forecasting/tests/test_all_forecasters.py
+++ b/sktime/forecasting/tests/test_all_forecasters.py
@@ -212,12 +212,10 @@ def test_predict_time_index(
         """
         index_type, fh_type, is_relative = index_fh_comb
         if fh_type == "timedelta":
-            return None
-            # todo: ensure check_estimator works with pytest.skip like below
-            # pytest.skip(
-            #    "ForecastingHorizon with timedelta values "
-            #     "is currently experimental and not supported everywhere"
-            # )
+            pytest.skip(
+                "ForecastingHorizon with timedelta values "
+                "is currently experimental and not supported everywhere"
+            )
         y_train = _make_series(
             n_columns=n_columns, index_type=index_type, n_timepoints=50
         )
@@ -268,12 +266,10 @@ def test_predict_time_index_with_X(
         """Check that predicted time index matches forecasting horizon."""
         index_type, fh_type, is_relative = index_fh_comb
         if fh_type == "timedelta":
-            return None
-            # todo: ensure check_estimator works with pytest.skip like below
-            # pytest.skip(
-            #    "ForecastingHorizon with timedelta values "
-            #     "is currently experimental and not supported everywhere"
-            # )
+            pytest.skip(
+                "ForecastingHorizon with timedelta values "
+                "is currently experimental and not supported everywhere"
+            )
         z, X = make_forecasting_problem(index_type=index_type, make_X=True)
 
         # Some estimators may not support all time index types and fh types, hence we
@@ -308,12 +304,10 @@ def test_predict_time_index_in_sample_full(
         """Check that predicted time index equals fh for full in-sample predictions."""
         index_type, fh_type, is_relative = index_fh_comb
         if fh_type == "timedelta":
-            return None
-            # todo: ensure check_estimator works with pytest.skip like below
-            # pytest.skip(
-            #    "ForecastingHorizon with timedelta values "
-            #     "is currently experimental and not supported everywhere"
-            # )
+            pytest.skip(
+                "ForecastingHorizon with timedelta values "
+                "is currently experimental and not supported everywhere"
+            )
         y_train = _make_series(n_columns=n_columns, index_type=index_type)
         cutoff = get_cutoff(y_train, return_index=True)
         steps = -np.arange(len(y_train))
diff --git a/sktime/split/tests/test_temporaltraintest.py b/sktime/split/tests/test_temporaltraintest.py
index f8506d7053a..dbb70897e84 100644
--- a/sktime/split/tests/test_temporaltraintest.py
+++ b/sktime/split/tests/test_temporaltraintest.py
@@ -36,12 +36,10 @@ def _check_train_test_split_y(fh, split):
 def test_split_by_fh(index_type, fh_type, is_relative, values):
     """Test temporal_train_test_split."""
     if fh_type == "timedelta":
-        return None
-        # todo: ensure check_estimator works with pytest.skip like below
-        # pytest.skip(
-        #    "ForecastingHorizon with timedelta values "
-        #     "is currently experimental and not supported everywhere"
-        # )
+        pytest.skip(
+            "ForecastingHorizon with timedelta values "
+            "is currently experimental and not supported everywhere"
+        )
     y = _make_series(20, index_type=index_type)
     cutoff = get_cutoff(y.iloc[:10], return_index=True)
     fh = _make_fh(cutoff, values, fh_type, is_relative)
diff --git a/sktime/tests/test_all_estimators.py b/sktime/tests/test_all_estimators.py
index f7ad4ef4a78..0d71dad9b6d 100644
--- a/sktime/tests/test_all_estimators.py
+++ b/sktime/tests/test_all_estimators.py
@@ -17,6 +17,7 @@
 import numpy as np
 import pandas as pd
 import pytest
+from _pytest.outcomes import Skipped
 
 from sktime.base import BaseEstimator, BaseObject, load
 from sktime.classification.deep_learning.base import BaseDeepClassifier
@@ -599,6 +600,8 @@ def _generate_estimator_instance_cls(test_name, **kwargs):
                     try:
                         test_fun(**deepcopy(args))
                         results[key] = "PASSED"
+                    except Skipped as err:
+                        results[key] = f"SKIPPED: {err.msg}"
                     except Exception as err:
                         results[key] = err
                 else:
diff --git a/sktime/utils/tests/test_check_estimator.py b/sktime/utils/tests/test_check_estimator.py
index 6d44fb7af50..d35928b9da4 100644
--- a/sktime/utils/tests/test_check_estimator.py
+++ b/sktime/utils/tests/test_check_estimator.py
@@ -14,14 +14,33 @@
 
 @pytest.mark.parametrize("estimator_class", EXAMPLE_CLASSES)
 def test_check_estimator_passed(estimator_class):
-    """Test that check_estimator returns only passed tests for examples we know pass."""
+    """Test that check_estimator returns only passed tests for examples we know pass.
+
+    Tests may be skipped if they are not applicable to the estimator,
+    in this case the test is marked as "SKIP", and we test
+    that less than 10% of tests are skipped.
+    """
     estimator_instance = estimator_class.create_test_instance()
 
     result_class = check_estimator(estimator_class, verbose=False)
-    assert all(x == "PASSED" for x in result_class.values())
+
+    # Check there are no failures.
+    assert not any(x == "FAILED" for x in result_class.values())
+
+    # Check less than 10% are skipped.
+    skip_ratio = sum(list(x[:4] == "SKIP" for x in result_class.values()))
+    skip_ratio = skip_ratio / len(result_class.values())
+    assert skip_ratio < 0.1
 
     result_instance = check_estimator(estimator_instance, verbose=False)
-    assert all(x == "PASSED" for x in result_instance.values())
+
+    # Check there are no failures.
+    assert not any(x == "FAILED" for x in result_instance.values())
+
+    # Check less than 10% are skipped.
+    skip_ratio = sum(list(x[:4] == "SKIP" for x in result_instance.values()))
+    skip_ratio = skip_ratio / len(result_instance.values())
+    assert skip_ratio < 0.1
 
 
 @pytest.mark.parametrize("estimator_class", EXAMPLE_CLASSES)