Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fail tests without non-empty @cluster annotation #336

Merged
9 changes: 9 additions & 0 deletions ducktape/cluster/node_container.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,15 @@ def attempt_remove_spec(self, cluster_spec):
:returns: An empty string if we can remove the nodes;
an error string otherwise.
"""
# if cluster_spec is None this means the test cannot be run at all
# e.g. users didn't specify `@cluster` annotation on it but the session context has a flag to fail
# on such tests or any other state where the test deems its cluster spec incorrect.
if cluster_spec is None:
return "Invalid or missing cluster spec"
# cluster spec may be empty and that's ok, shortcut to returning no error messages
elif len(cluster_spec) == 0:
return ""

msg = ""
for os, node_specs in iteritems(cluster_spec.nodes.os_to_nodes):
num_nodes = len(node_specs)
Expand Down
9 changes: 8 additions & 1 deletion ducktape/command_line/parse_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,14 @@ def create_ducktape_parser():
parser.add_argument("--sample", action="store", type=int,
help="The size of a random test sample to run")
parser.add_argument("--fail-bad-cluster-utilization", action="store_true",
help="Fail a test if the cluster node utilization does not match the cluster node usage.")
help="Fail a test if the test declared that it needs more nodes than it actually used. "
"E.g. if the test had `@cluster(num_nodes=10)` annotation, "
"but never used more than 5 nodes during its execution.")
parser.add_argument("--fail-greedy-tests", action="store_true",
help="Fail a test if it has no @cluster annotation "
"or if @cluster annotation is empty. "
"You can still specify 0-sized cluster explicitly using either num_nodes=0 "
"or cluster_spec=ClusterSpec.empty()")
parser.add_argument("--test-runner-timeout", action="store", type=int, default=1800000,
help="Amount of time in milliseconds between test communicating between the test runner"
" before a timeout error occurs. Default is 30 minutes")
Expand Down
4 changes: 4 additions & 0 deletions ducktape/mark/resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,10 @@ def apply(self, seed_context, context_list):
def cluster(**kwargs):
"""Test method decorator used to provide hints about how the test will use the given cluster.
If this decorator is not provided, the test will either claim all cluster resources or fail immediately,
depending on the flags passed to ducktape.
:Keywords used by ducktape:
- ``num_nodes`` provide hint about how many nodes the test will consume
Expand Down
12 changes: 0 additions & 12 deletions ducktape/services/service_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@

from collections import OrderedDict

from ducktape.cluster.cluster_spec import ClusterSpec


class ServiceRegistry(object):

Expand Down Expand Up @@ -87,16 +85,6 @@ def free_all(self):
self._services.clear()
self._nodes.clear()

def min_cluster_spec(self):
"""
Returns the minimum cluster specification that would be required to run all the currently
extant services.
"""
cluster_spec = ClusterSpec()
for service in self._services.values():
cluster_spec.add(service.cluster_spec)
return cluster_spec

def errors(self):
"""
Gets a printable string containing any errors produced by the services.
Expand Down
3 changes: 1 addition & 2 deletions ducktape/tests/result.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,11 @@ def __init__(self,
@param data data returned by the test, e.g. throughput
"""
self.nodes_allocated = len(test_context.cluster)
self.nodes_used = test_context.cluster.max_used_nodes
if hasattr(test_context, "services"):
self.services = test_context.services.to_json()
self.nodes_used = test_context.services.min_cluster_spec().size()
else:
self.services = {}
self.nodes_used = 0

self.test_id = test_context.test_id
self.module_name = test_context.module_name
Expand Down
24 changes: 1 addition & 23 deletions ducktape/tests/runner_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
import traceback
import zmq

from six import iteritems
from ducktape.services.service import MultiRunServiceIdFactory, service_id_factory
from ducktape.services.service_registry import ServiceRegistry

Expand Down Expand Up @@ -178,14 +177,10 @@ def _do_run(self, num_runs):
mkdir_p(TestContext.results_dir(self.test_context, self.test_index))
# Instantiate test
self.test = self.test_context.cls(self.test_context)
# Check if there are enough nodes
self._check_min_cluster_spec()
# Run the test unit

# Run the test unit
self.setup_test()

data = self.run_test()

test_status = PASS

except BaseException as e:
Expand All @@ -212,23 +207,6 @@ def _do_run(self, num_runs):
self._do_safely(self.test.free_nodes, "Error freeing nodes:")
return test_status, "".join(summary), data

def _check_min_cluster_spec(self):
self.log(logging.DEBUG, "Checking if there are enough nodes...")
min_cluster_spec = self.test.min_cluster_spec()
os_to_num_nodes = {}
for node_spec in min_cluster_spec:
if not os_to_num_nodes.get(node_spec.operating_system):
os_to_num_nodes[node_spec.operating_system] = 1
else:
os_to_num_nodes[node_spec.operating_system] = os_to_num_nodes[node_spec.operating_system] + 1
for (operating_system, node_count) in iteritems(os_to_num_nodes):
num_avail = len(list(self.cluster.all().nodes.elements(operating_system=operating_system)))
if node_count > num_avail:
raise RuntimeError(
"There are not enough nodes available in the cluster to run this test. "
"Cluster size for %s: %d, Need at least: %d. Services currently registered: %s" %
(operating_system, num_avail, node_count, self.test_context.services))

def _check_cluster_utilization(self, result, summary):
"""Checks if the number of nodes used by a test is less than the number of
nodes requested by the test. If this is the case and we wish to fail
Expand Down
10 changes: 6 additions & 4 deletions ducktape/tests/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,12 @@ def _sort_test_context_list(self):
Sort from largest cluster users to smallest
"""
# sort from largest cluster users to smallest
self._test_context_list = sorted(self._test_context_list,
key=lambda tc: tc.expected_num_nodes,
reverse=True)
# sort from the largest cluster users to smallest
self._test_context_list = sorted(
self._test_context_list,
key=lambda tc: tc.expected_num_nodes,
reverse=True
)

def peek(self):
"""Locate and return the next object to be scheduled, without removing it internally.
Expand Down
1 change: 1 addition & 0 deletions ducktape/tests/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ def __init__(self, **kwargs):
self.max_parallel = kwargs.get("max_parallel", 1)
self.default_expected_num_nodes = kwargs.get("default_num_nodes", None)
self.fail_bad_cluster_utilization = kwargs.get("fail_bad_cluster_utilization")
self.fail_greedy_tests = kwargs.get("fail_greedy_tests", False)
self.test_runner_timeout = kwargs.get("test_runner_timeout")
self._globals = kwargs.get("globals")

Expand Down
52 changes: 19 additions & 33 deletions ducktape/tests/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

from ducktape.cluster.cluster_spec import ClusterSpec
from ducktape.tests.loggermaker import LoggerMaker, close_logger
from ducktape.tests.session import SessionContext
from ducktape.utils.local_filesystem_utils import mkdir_p
from ducktape.command_line.defaults import ConsoleDefaults
from ducktape.services.service_registry import ServiceRegistry
Expand Down Expand Up @@ -52,35 +53,17 @@ def logger(self):

def min_cluster_spec(self):
"""
Returns a specification for the minimal cluster we need to run this test.
This method replaces the deprecated min_cluster_size. Unlike min_cluster_size, it can handle
non-Linux operating systems.
In general, most Tests don't need to override this method. The default implementation
seen here works well in most cases. However, the default implementation only takes into account
the services that exist at the time of the call. You may need to override this method if you add
new services during the course of your test.
:return: A ClusterSpec object.
THIS METHOD IS DEPRECATED AND WILL BE REMOVED IN THE SUBSEQUENT RELEASES.
Nothing in the ducktape framework calls it, it is only provided so that subclasses don't break.
If you're overriding this method in your subclass, please remove it.
"""
try:
# If the Test overrode the deprecated min_cluster_size method, we will use that.
num_linux_nodes = self.min_cluster_size()
return ClusterSpec.simple_linux(num_linux_nodes)
except NotImplementedError:
# Otherwise, ask the service registry what kind of cluster spec we need for currently
# extant services.
return self.test_context.services.min_cluster_spec()
raise NotImplementedError

def min_cluster_size(self):
"""
Returns the number of linux nodes which this test needs.
THIS METHOD IS DEPRECATED, and provided only for backwards compatibility.
Please implement min_cluster_spec instead.
:return: An integer.
THIS METHOD IS DEPRECATED AND WILL BE REMOVED IN THE SUBSEQUENT RELEASES.
Nothing in the ducktape framework calls it, it is only provided so that subclasses don't break.
If you're overriding this method in your subclass, please remove it.
"""
raise NotImplementedError

Expand Down Expand Up @@ -291,7 +274,7 @@ def __init__(self, **kwargs):
:param cluster_use_metadata: dict containing information about how this test will use cluster resources
"""

self.session_context = kwargs.get("session_context")
self.session_context: SessionContext = kwargs.get("session_context")
self.cluster = kwargs.get("cluster")
self.module = kwargs.get("module")
self.test_suite_name = kwargs.get("test_suite_name")
Expand Down Expand Up @@ -319,10 +302,9 @@ def __init__(self, **kwargs):

def __repr__(self):
return \
"<module=%s, cls=%s, function=%s, injected_args=%s, file=%s, ignore=%s, " \
"cluster_size=%s, cluster_spec=%s>" % \
(self.module, self.cls_name, self.function_name, str(self.injected_args), str(self.file),
str(self.ignore), str(self.expected_num_nodes), str(self.expected_cluster_spec))
f"<module={self.module}, cls={self.cls_name}, function={self.function_name}, " \
f"injected_args={self.injected_args}, file={self.file}, ignore={self.ignore}, " \
f"cluster_spec={self.expected_cluster_spec}>"

def copy(self, **kwargs):
"""Construct a new TestContext object from another TestContext object
Expand Down Expand Up @@ -376,26 +358,30 @@ def results_dir(test_context, test_index):
def expected_num_nodes(self):
"""
How many nodes of any type we expect this test to consume when run.
Note that this will be 0 for both unschedulable tests and the tests that legitimately need 0 nodes.
:return: an integer number of nodes.
"""
return self.expected_cluster_spec.size()
return self.expected_cluster_spec.size() if self.expected_cluster_spec else 0

@property
def expected_cluster_spec(self):
"""
The cluster spec we expect this test to consume when run.
:return: A ClusterSpec object.
:return: A ClusterSpec object or None if the test cannot be run
(e.g. session context settings disallow tests with no cluster metadata attached).
"""
cluster_spec = self.cluster_use_metadata.get(CLUSTER_SPEC_KEYWORD)
cluster_size = self.cluster_use_metadata.get(CLUSTER_SIZE_KEYWORD)
if cluster_spec is not None:
return cluster_spec
elif cluster_size is not None:
return ClusterSpec.simple_linux(cluster_size)
elif self.cluster is None:
elif not self.cluster:
return ClusterSpec.empty()
elif self.session_context.fail_greedy_tests:
return None
else:
return self.cluster.all()

Expand Down
11 changes: 11 additions & 0 deletions systests/cluster/test_no_cluster.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from ducktape.mark.resource import cluster
from ducktape.tests.test import Test


class NoClusterTest(Test):
"""This test helps validate the behavior for no-cluster tests (ie 0 nodes)"""

@cluster(num_nodes=0)
def test_zero_nodes(self):
self.logger.warn('Testing')
assert True
19 changes: 19 additions & 0 deletions tests/cluster/check_node_container.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,3 +210,22 @@ def check_enough_healthy_but_some_bad_nodes_too(self, accounts):
assert len(actual_linux) == 2
actual_win = [node for node in good_nodes if node.os == WINDOWS]
assert len(actual_win) == 2

def check_empty_cluster_spec(self):
accounts = [fake_account('host1'), fake_account('host2'), fake_account('host3')]
container = NodeContainer(accounts)
spec = ClusterSpec.empty()
assert not container.attempt_remove_spec(spec)
assert container.can_remove_spec(spec)
good, bad = container.remove_spec(spec)
assert not good
assert not bad

def check_none_cluster_spec(self):
accounts = [fake_account('host1'), fake_account('host2'), fake_account('host3')]
container = NodeContainer(accounts)
spec = None
assert container.attempt_remove_spec(spec)
assert not container.can_remove_spec(spec)
with pytest.raises(InsufficientResourcesError):
container.remove_spec(spec)
49 changes: 48 additions & 1 deletion tests/mark/check_cluster_use_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,16 @@
# See the License for the specific language governing permissions and
# limitations under the License.


from ducktape.cluster import LocalhostCluster
from ducktape.mark import parametrize, matrix, ignore, defaults
from ducktape.mark.mark_expander import MarkedFunctionExpander
from ducktape.mark.resource import cluster
from ducktape.cluster.cluster_spec import ClusterSpec

import pytest

from tests import ducktape_mock


class CheckClusterUseAnnotation(object):

Expand Down Expand Up @@ -64,6 +66,51 @@ def function():
assert len(test_context_list) == 1
assert test_context_list[0].expected_num_nodes == num_nodes

@pytest.mark.parametrize('fail_greedy_tests', [True, False])
@pytest.mark.parametrize('has_annotation', [True, False])
def check_empty_cluster_annotation(self, fail_greedy_tests, has_annotation):

@cluster()
def function_with_annotation():
return "hi"

def function_no_annotation():
return "hello"

assert hasattr(function_with_annotation, "marks")
assert not hasattr(function_no_annotation, "marks")

# no annotation and empty annotation behave identically as far as this functionality is concerned
function = function_with_annotation if has_annotation else function_no_annotation

mock_cluster = LocalhostCluster(num_nodes=1000)
session_context = ducktape_mock.session_context(fail_greedy_tests=fail_greedy_tests)
tc_list = MarkedFunctionExpander(
function=function, cluster=mock_cluster, session_context=session_context).expand()

assert len(tc_list) == 1
if fail_greedy_tests:
assert tc_list[0].expected_num_nodes == 0
assert tc_list[0].expected_cluster_spec is None
else:
assert tc_list[0].expected_num_nodes == 1000

@pytest.mark.parametrize('fail_greedy_tests', [True, False])
def check_zero_nodes_annotation(self, fail_greedy_tests):
@cluster(num_nodes=0)
def function():
return "hi"

assert hasattr(function, "marks")
mock_cluster = LocalhostCluster(num_nodes=1000)
session_context = ducktape_mock.session_context(fail_greedy_tests=fail_greedy_tests)
tc_list = MarkedFunctionExpander(function=function, cluster=mock_cluster,
session_context=session_context).expand()
assert len(tc_list) == 1
assert tc_list[0].expected_num_nodes == 0
assert tc_list[0].expected_cluster_spec is not None
assert len(tc_list[0].expected_cluster_spec) == 0

def check_with_parametrize(self):
num_nodes = 200

Expand Down