confluentinc · imcdo · Jun 16, 2022 · Jun 8, 2022 · Jun 8, 2022 · Jun 8, 2022
diff --git a/ducktape/tests/runner.py b/ducktape/tests/runner.py
@@ -210,6 +210,9 @@ def run_all_tests(self):
                         self._log(logging.ERROR, err_str)
 
                         # All processes are on the same machine, so treat communication failure as a fatal error
+                        for proc in self._client_procs.values():
+                            proc.terminate()
+                        self._client_procs = {}
                         raise
             except KeyboardInterrupt:
                 # If SIGINT is received, stop triggering new tests, and let the currently running tests finish

diff --git a/systests/cluster/test_runner_operations.py b/systests/cluster/test_runner_operations.py
@@ -0,0 +1,54 @@
+# Copyright 2022 Confluent Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from ducktape.cluster.cluster_spec import ClusterSpec, WINDOWS, LINUX, NodeSpec
+from ducktape.services.service import Service
+from ducktape.tests.test import Test
+from ducktape.errors import TimeoutError
+from ducktape.mark.resource import cluster
+import time
+
+
+
+
+class SimpleEchoService(Service):
+    """Simple service that allocates one node for performing tests of RemoteAccount functionality"""
+    logs = {
+        "my_log": {
+            "path": "/tmp/log",
+            "collect_default": True
+        },
+    }
+
+    def __init__(self, context):
+        super(SimpleEchoService, self).__init__(context, num_nodes=1)
+        self.count = 0
+
+    def echo(self):
+        self.nodes[0].account.ssh("echo {} >> /tmp/log".format(self.count))
+        self.count += 1
+
+class SimpleRunnerTest(Test):
+    def setup(self):
+        self.service = SimpleEchoService(self.test_context)
+
+    @cluster(num_nodes=1)
+    def timeout_test(self):
+        """
+        a simple longer running test to test special run flags agaisnt.
+        """
+        self.service.start()
+
+        while self.service.count < 100000000:
+            self.service.echo()
+            time.sleep(.2)
diff --git a/tests/runner/check_runner.py b/tests/runner/check_runner.py
@@ -16,6 +16,7 @@
 except ImportError:
     from mock import patch, MagicMock  # noqa: F401
 
+import pytest
 from ducktape.tests.runner_client import RunnerClient
 from ducktape.tests.test import TestContext
 from ducktape.tests.runner import TestRunner
@@ -29,7 +30,7 @@
 from .resources.test_thingy import ClusterTestThingy, TestThingy
 from .resources.test_failing_tests import FailingTest
 from ducktape.tests.reporter import JUnitReporter
-
+from ducktape.errors import TimeoutError
 
 from mock import Mock
 import os
@@ -227,3 +228,18 @@ def check_run_failure_with_bad_cluster_allocation(self):
         assert results.num_failed == 1
         assert results.num_passed == 1
         assert results.num_ignored == 0
+
+    def check_runner_timeout(self):
+        """Check process cleanup and error handling in a parallel runner client run."""
+        mock_cluster = LocalhostCluster(num_nodes=1000)
+        session_context = tests.ducktape_mock.session_context(max_parallel=1000, test_runner_timeout=1    )
+
+        test_methods = [TestThingy.test_delayed, TestThingy.test_failure]
+        ctx_list = self._do_expand(test_file=TEST_THINGY_FILE, test_class=TestThingy, test_methods=test_methods,
+                                   cluster=mock_cluster, session_context=session_context)
+        runner = TestRunner(mock_cluster, session_context, Mock(), ctx_list, 1)
+
+        with pytest.raises(TimeoutError):
+            results = runner.run_all_tests()
+
+        assert not runner._client_procs
diff --git a/tests/runner/resources/test_thingy.py b/tests/runner/resources/test_thingy.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from time import time
 from ducktape.cluster.cluster_spec import ClusterSpec
 from ducktape.tests.test import Test
 from ducktape.mark import ignore, parametrize
@@ -30,6 +31,9 @@ def min_cluster_spec(self):
 
     def test_pi(self):
         return {"data": 3.14159}
+
+    def test_delayed(self):
+        time.sleep(1)
 
     @ignore
     def test_ignore1(self):