confluentinc · stan-is-hate · Jul 13, 2022 · Jul 7, 2022 · Jul 8, 2022 · Jul 11, 2022
@@ -186,6 +186,15 @@ def attempt_remove_spec(self, cluster_spec):
         :returns:                               An empty string if we can remove the nodes;
                                                 an error string otherwise.
         """
+        # if cluster_spec is None this means the test cannot be run at all
+        # e.g. users didn't specify `@cluster` annotation on it but the session context has a flag to fail
+        # on such tests or any other state where the test deems its cluster spec incorrect.
+        if cluster_spec is None:
+            return "Invalid or missing cluster spec"
+        # cluster spec may be empty and that's ok, shortcut to returning no error messages
+        elif len(cluster_spec) == 0:
+            return ""
+
         msg = ""
         for os, node_specs in iteritems(cluster_spec.nodes.os_to_nodes):
             num_nodes = len(node_specs)

@@ -79,7 +79,14 @@ def create_ducktape_parser():
     parser.add_argument("--sample", action="store", type=int,
                         help="The size of a random test sample to run")
     parser.add_argument("--fail-bad-cluster-utilization", action="store_true",
-                        help="Fail a test if the cluster node utilization does not match the cluster node usage.")
+                        help="Fail a test if the test declared that it needs more nodes than it actually used. "
+                             "E.g. if the test had `@cluster(num_nodes=10)` annotation, "
+                             "but never used more than 5 nodes during its execution.")
+    parser.add_argument("--fail-greedy-tests", action="store_true",
+                        help="Fail a test if it has no @cluster annotation "
+                             "or if @cluster annotation is empty. "
+                             "You can still specify 0-sized cluster explicitly using either num_nodes=0 "
+                             "or cluster_spec=ClusterSpec.empty()")
     parser.add_argument("--test-runner-timeout", action="store", type=int, default=1800000,
                         help="Amount of time in milliseconds between test communicating between the test runner"
                              " before a timeout error occurs. Default is 30 minutes")

@@ -44,6 +44,10 @@ def apply(self, seed_context, context_list):
 def cluster(**kwargs):
     """Test method decorator used to provide hints about how the test will use the given cluster.
 
+    If this decorator is not provided, the test will either claim all cluster resources or fail immediately,
+    depending on the flags passed to ducktape.
+
+
     :Keywords used by ducktape:
 
         - ``num_nodes`` provide hint about how many nodes the test will consume

@@ -15,8 +15,6 @@
 
 from collections import OrderedDict
 
-from ducktape.cluster.cluster_spec import ClusterSpec
-
 
 class ServiceRegistry(object):
 
@@ -87,16 +85,6 @@ def free_all(self):
         self._services.clear()
         self._nodes.clear()
 
-    def min_cluster_spec(self):
-        """
-        Returns the minimum cluster specification that would be required to run all the currently
-        extant services.
-        """
-        cluster_spec = ClusterSpec()
-        for service in self._services.values():
-            cluster_spec.add(service.cluster_spec)
-        return cluster_spec
-
     def errors(self):
         """
         Gets a printable string containing any errors produced by the services.

@@ -43,12 +43,11 @@ def __init__(self,
         @param data          data returned by the test, e.g. throughput
         """
         self.nodes_allocated = len(test_context.cluster)
+        self.nodes_used = test_context.cluster.max_used_nodes
         if hasattr(test_context, "services"):
             self.services = test_context.services.to_json()
-            self.nodes_used = test_context.services.min_cluster_spec().size()
         else:
             self.services = {}
-            self.nodes_used = 0
 
         self.test_id = test_context.test_id
         self.module_name = test_context.module_name

@@ -19,7 +19,6 @@
 import traceback
 import zmq
 
-from six import iteritems
 from ducktape.services.service import MultiRunServiceIdFactory, service_id_factory
 from ducktape.services.service_registry import ServiceRegistry
 
@@ -178,14 +177,10 @@ def _do_run(self, num_runs):
             mkdir_p(TestContext.results_dir(self.test_context, self.test_index))
             # Instantiate test
             self.test = self.test_context.cls(self.test_context)
-            # Check if there are enough nodes
-            self._check_min_cluster_spec()
-            # Run the test unit
 
+            # Run the test unit
             self.setup_test()
-
             data = self.run_test()
-
             test_status = PASS
 
         except BaseException as e:
@@ -212,23 +207,6 @@ def _do_run(self, num_runs):
                 self._do_safely(self.test.free_nodes, "Error freeing nodes:")
             return test_status, "".join(summary), data
 
-    def _check_min_cluster_spec(self):
-        self.log(logging.DEBUG, "Checking if there are enough nodes...")
-        min_cluster_spec = self.test.min_cluster_spec()
-        os_to_num_nodes = {}
-        for node_spec in min_cluster_spec:
-            if not os_to_num_nodes.get(node_spec.operating_system):
-                os_to_num_nodes[node_spec.operating_system] = 1
-            else:
-                os_to_num_nodes[node_spec.operating_system] = os_to_num_nodes[node_spec.operating_system] + 1
-        for (operating_system, node_count) in iteritems(os_to_num_nodes):
-            num_avail = len(list(self.cluster.all().nodes.elements(operating_system=operating_system)))
-            if node_count > num_avail:
-                raise RuntimeError(
-                    "There are not enough nodes available in the cluster to run this test. "
-                    "Cluster size for %s: %d, Need at least: %d. Services currently registered: %s" %
-                    (operating_system, num_avail, node_count, self.test_context.services))
-
     def _check_cluster_utilization(self, result, summary):
         """Checks if the number of nodes used by a test is less than the number of
         nodes requested by the test. If this is the case and we wish to fail

@@ -55,10 +55,12 @@ def _sort_test_context_list(self):
 
         Sort from largest cluster users to smallest
         """
-        # sort from largest cluster users to smallest
-        self._test_context_list = sorted(self._test_context_list,
-                                         key=lambda tc: tc.expected_num_nodes,
-                                         reverse=True)
+        # sort from the largest cluster users to smallest
+        self._test_context_list = sorted(
+            self._test_context_list,
+            key=lambda tc: tc.expected_num_nodes,
+            reverse=True
+        )
 
     def peek(self):
         """Locate and return the next object to be scheduled, without removing it internally.

@@ -38,6 +38,7 @@ def __init__(self, **kwargs):
         self.max_parallel = kwargs.get("max_parallel", 1)
         self.default_expected_num_nodes = kwargs.get("default_num_nodes", None)
         self.fail_bad_cluster_utilization = kwargs.get("fail_bad_cluster_utilization")
+        self.fail_greedy_tests = kwargs.get("fail_greedy_tests", False)
         self.test_runner_timeout = kwargs.get("test_runner_timeout")
         self._globals = kwargs.get("globals")
 

@@ -23,6 +23,7 @@
 
 from ducktape.cluster.cluster_spec import ClusterSpec
 from ducktape.tests.loggermaker import LoggerMaker, close_logger
+from ducktape.tests.session import SessionContext
 from ducktape.utils.local_filesystem_utils import mkdir_p
 from ducktape.command_line.defaults import ConsoleDefaults
 from ducktape.services.service_registry import ServiceRegistry
@@ -52,35 +53,17 @@ def logger(self):
 
     def min_cluster_spec(self):
         """
-        Returns a specification for the minimal cluster we need to run this test.
-
-        This method replaces the deprecated min_cluster_size.  Unlike min_cluster_size, it can handle
-        non-Linux operating systems.
-
-        In general, most Tests don't need to override this method.  The default implementation
-        seen here works well in most cases.  However, the default implementation only takes into account
-        the services that exist at the time of the call.  You may need to override this method if you add
-        new services during the course of your test.
-
-        :return:            A ClusterSpec object.
+        THIS METHOD IS DEPRECATED AND WILL BE REMOVED IN THE SUBSEQUENT RELEASES.
+        Nothing in the ducktape framework calls it, it is only provided so that subclasses don't break.
+        If you're overriding this method in your subclass, please remove it.
         """
-        try:
-            # If the Test overrode the deprecated min_cluster_size method, we will use that.
-            num_linux_nodes = self.min_cluster_size()
-            return ClusterSpec.simple_linux(num_linux_nodes)
-        except NotImplementedError:
-            # Otherwise, ask the service registry what kind of cluster spec we need for currently
-            # extant services.
-            return self.test_context.services.min_cluster_spec()
+        raise NotImplementedError
 
     def min_cluster_size(self):
         """
-        Returns the number of linux nodes which this test needs.
-
-        THIS METHOD IS DEPRECATED, and provided only for backwards compatibility.
-        Please implement min_cluster_spec instead.
-
-        :return:            An integer.
+        THIS METHOD IS DEPRECATED AND WILL BE REMOVED IN THE SUBSEQUENT RELEASES.
+        Nothing in the ducktape framework calls it, it is only provided so that subclasses don't break.
+        If you're overriding this method in your subclass, please remove it.
         """
         raise NotImplementedError
 
@@ -291,7 +274,7 @@ def __init__(self, **kwargs):
         :param cluster_use_metadata: dict containing information about how this test will use cluster resources
         """
 
-        self.session_context = kwargs.get("session_context")
+        self.session_context: SessionContext = kwargs.get("session_context")
         self.cluster = kwargs.get("cluster")
         self.module = kwargs.get("module")
         self.test_suite_name = kwargs.get("test_suite_name")
@@ -319,10 +302,9 @@ def __init__(self, **kwargs):
 
     def __repr__(self):
         return \
-            "<module=%s, cls=%s, function=%s, injected_args=%s, file=%s, ignore=%s, " \
-            "cluster_size=%s, cluster_spec=%s>" % \
-            (self.module, self.cls_name, self.function_name, str(self.injected_args), str(self.file),
-             str(self.ignore), str(self.expected_num_nodes), str(self.expected_cluster_spec))
+            f"<module={self.module}, cls={self.cls_name}, function={self.function_name}, " \
+            f"injected_args={self.injected_args}, file={self.file}, ignore={self.ignore}, " \
+            f"cluster_spec={self.expected_cluster_spec}>"
 
     def copy(self, **kwargs):
         """Construct a new TestContext object from another TestContext object
@@ -376,26 +358,30 @@ def results_dir(test_context, test_index):
     def expected_num_nodes(self):
         """
         How many nodes of any type we expect this test to consume when run.
+        Note that this will be 0 for both unschedulable tests and the tests that legitimately need 0 nodes.
 
         :return:            an integer number of nodes.
         """
-        return self.expected_cluster_spec.size()
+        return self.expected_cluster_spec.size() if self.expected_cluster_spec else 0
 
     @property
     def expected_cluster_spec(self):
         """
         The cluster spec we expect this test to consume when run.
 
-        :return:            A ClusterSpec object.
+        :return:            A ClusterSpec object or None if the test cannot be run
+                            (e.g. session context settings disallow tests with no cluster metadata attached).
         """
         cluster_spec = self.cluster_use_metadata.get(CLUSTER_SPEC_KEYWORD)
         cluster_size = self.cluster_use_metadata.get(CLUSTER_SIZE_KEYWORD)
         if cluster_spec is not None:
             return cluster_spec
         elif cluster_size is not None:
             return ClusterSpec.simple_linux(cluster_size)
-        elif self.cluster is None:
+        elif not self.cluster:
             return ClusterSpec.empty()
+        elif self.session_context.fail_greedy_tests:
+            return None
         else:
             return self.cluster.all()
 

@@ -0,0 +1,11 @@
+from ducktape.mark.resource import cluster
+from ducktape.tests.test import Test
+
+
+class NoClusterTest(Test):
+    """This test helps validate the behavior for no-cluster tests (ie 0 nodes)"""
+
+    @cluster(num_nodes=0)
+    def test_zero_nodes(self):
+        self.logger.warn('Testing')
+        assert True
@@ -210,3 +210,22 @@ def check_enough_healthy_but_some_bad_nodes_too(self, accounts):
         assert len(actual_linux) == 2
         actual_win = [node for node in good_nodes if node.os == WINDOWS]
         assert len(actual_win) == 2
+
+    def check_empty_cluster_spec(self):
+        accounts = [fake_account('host1'), fake_account('host2'), fake_account('host3')]
+        container = NodeContainer(accounts)
+        spec = ClusterSpec.empty()
+        assert not container.attempt_remove_spec(spec)
+        assert container.can_remove_spec(spec)
+        good, bad = container.remove_spec(spec)
+        assert not good
+        assert not bad
+
+    def check_none_cluster_spec(self):
+        accounts = [fake_account('host1'), fake_account('host2'), fake_account('host3')]
+        container = NodeContainer(accounts)
+        spec = None
+        assert container.attempt_remove_spec(spec)
+        assert not container.can_remove_spec(spec)
+        with pytest.raises(InsufficientResourcesError):
+            container.remove_spec(spec)
@@ -12,14 +12,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
+from ducktape.cluster import LocalhostCluster
 from ducktape.mark import parametrize, matrix, ignore, defaults
 from ducktape.mark.mark_expander import MarkedFunctionExpander
 from ducktape.mark.resource import cluster
 from ducktape.cluster.cluster_spec import ClusterSpec
 
 import pytest
 
+from tests import ducktape_mock
+
 
 class CheckClusterUseAnnotation(object):
 
@@ -64,6 +66,51 @@ def function():
         assert len(test_context_list) == 1
         assert test_context_list[0].expected_num_nodes == num_nodes
 
+    @pytest.mark.parametrize('fail_greedy_tests', [True, False])
+    @pytest.mark.parametrize('has_annotation', [True, False])
+    def check_empty_cluster_annotation(self, fail_greedy_tests, has_annotation):
+
+        @cluster()
+        def function_with_annotation():
+            return "hi"
+
+        def function_no_annotation():
+            return "hello"
+
+        assert hasattr(function_with_annotation, "marks")
+        assert not hasattr(function_no_annotation, "marks")
+
+        # no annotation and empty annotation behave identically as far as this functionality is concerned
+        function = function_with_annotation if has_annotation else function_no_annotation
+
+        mock_cluster = LocalhostCluster(num_nodes=1000)
+        session_context = ducktape_mock.session_context(fail_greedy_tests=fail_greedy_tests)
+        tc_list = MarkedFunctionExpander(
+            function=function, cluster=mock_cluster, session_context=session_context).expand()
+
+        assert len(tc_list) == 1
+        if fail_greedy_tests:
+            assert tc_list[0].expected_num_nodes == 0
+            assert tc_list[0].expected_cluster_spec is None
+        else:
+            assert tc_list[0].expected_num_nodes == 1000
+
+    @pytest.mark.parametrize('fail_greedy_tests', [True, False])
+    def check_zero_nodes_annotation(self, fail_greedy_tests):
+        @cluster(num_nodes=0)
+        def function():
+            return "hi"
+
+        assert hasattr(function, "marks")
+        mock_cluster = LocalhostCluster(num_nodes=1000)
+        session_context = ducktape_mock.session_context(fail_greedy_tests=fail_greedy_tests)
+        tc_list = MarkedFunctionExpander(function=function, cluster=mock_cluster,
+                                         session_context=session_context).expand()
+        assert len(tc_list) == 1
+        assert tc_list[0].expected_num_nodes == 0
+        assert tc_list[0].expected_cluster_spec is not None
+        assert len(tc_list[0].expected_cluster_spec) == 0
+
     def check_with_parametrize(self):
         num_nodes = 200