Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

KAFKA-13234; Transaction system test should clear URPs after broker restarts #11267

Merged
merged 8 commits into from
Sep 1, 2021
2 changes: 1 addition & 1 deletion tests/docker/ducker-ak
Original file line number Diff line number Diff line change
Expand Up @@ -480,7 +480,7 @@ ducker_test() {
(test -f ./gradlew || gradle) && ./gradlew systemTestLibs
must_popd
if [[ "${debug}" -eq 1 ]]; then
local ducktape_cmd="python3.7 -m debugpy --listen 0.0.0.0:${debugpy_port} --wait-for-client /usr/local/bin/ducktape"
local ducktape_cmd="python3 -m debugpy --listen 0.0.0.0:${debugpy_port} --wait-for-client /usr/local/bin/ducktape"
else
local ducktape_cmd="ducktape"
fi
Expand Down
43 changes: 43 additions & 0 deletions tests/kafkatest/services/kafka/kafka.py
Original file line number Diff line number Diff line change
Expand Up @@ -1125,6 +1125,49 @@ def delete_topic(self, topic, node=None):
self.logger.info("Running topic delete command...\n%s" % cmd)
node.account.ssh(cmd)

def has_under_replicated_partitions(self):
"""
Check whether the cluster has under-replicated partitions.

:return True if there are under-replicated partitions, False otherwise.
"""
return len(self.describe_under_replicated_partitions()) > 0

def await_no_under_replicated_partitions(self, timeout_sec=30):
"""
Wait for all under-replicated partitions to clear.

:param timeout_sec: the maximum time in seconds to wait
"""
wait_until(lambda: not self.has_under_replicated_partitions(),
timeout_sec = timeout_sec,
err_msg="Timed out waiting for under-replicated-partitions to clear")

def describe_under_replicated_partitions(self):
"""
Use the topic tool to find the under-replicated partitions in the cluster.

:return the under-replicated partitions as a list of dictionaries
(e.g. [{"topic": "foo", "partition": 1}, {"topic": "bar", "partition": 0}, ... ])
"""

node = self.nodes[0]
force_use_zk_connection = not node.version.topic_command_supports_bootstrap_server()

cmd = fix_opts_for_new_jvm(node)
cmd += "%s --describe --under-replicated-partitions" % \
self.kafka_topics_cmd_with_optional_security_settings(node, force_use_zk_connection)

self.logger.debug("Running topic command to describe under-replicated partitions\n%s" % cmd)
output = ""
for line in node.account.ssh_capture(cmd):
output += line

under_replicated_partitions = self.parse_describe_topic(output)["partitions"]
self.logger.debug("Found %d under-replicated-partitions" % len(under_replicated_partitions))

return under_replicated_partitions

def describe_topic(self, topic, node=None):
if node is None:
node = self.nodes[0]
Expand Down
9 changes: 2 additions & 7 deletions tests/kafkatest/tests/core/downgrade_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def upgrade_from(self, kafka_version):
node.config[config_property.INTER_BROKER_PROTOCOL_VERSION] = str(kafka_version)
node.config[config_property.MESSAGE_FORMAT_VERSION] = str(kafka_version)
self.kafka.start_node(node)
self.wait_until_rejoin()
self.kafka.await_no_under_replicated_partitions(timeout_sec=60)

def downgrade_to(self, kafka_version):
for node in self.kafka.nodes:
Expand All @@ -50,7 +50,7 @@ def downgrade_to(self, kafka_version):
del node.config[config_property.INTER_BROKER_PROTOCOL_VERSION]
del node.config[config_property.MESSAGE_FORMAT_VERSION]
self.kafka.start_node(node)
self.wait_until_rejoin()
self.kafka.await_no_under_replicated_partitions(timeout_sec=60)

def setup_services(self, kafka_version, compression_types, security_protocol, static_membership):
self.create_zookeeper_if_necessary()
Expand All @@ -73,11 +73,6 @@ def setup_services(self, kafka_version, compression_types, security_protocol, st

self.consumer.start()

def wait_until_rejoin(self):
for partition in range(0, self.PARTITIONS):
wait_until(lambda: len(self.kafka.isr_idx_list(self.topic, partition)) == self.REPLICATION_FACTOR,
timeout_sec=60, backoff_sec=1, err_msg="Replicas did not rejoin the ISR in a reasonable amount of time")

@cluster(num_nodes=7)
@parametrize(version=str(LATEST_2_8), compression_types=["snappy"])
@parametrize(version=str(LATEST_2_8), compression_types=["zstd"], security_protocol="SASL_SSL")
Expand Down
2 changes: 2 additions & 0 deletions tests/kafkatest/tests/core/group_mode_transactions_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,8 @@ def bounce_brokers(self, clean_shutdown):
time.sleep(brokerSessionTimeoutSecs + gracePeriodSecs)
self.kafka.start_node(node)

self.kafka.await_no_under_replicated_partitions()

def create_and_start_message_copier(self, input_topic, output_topic, transactional_id):
message_copier = TransactionalMessageCopier(
context=self.test_context,
Expand Down
2 changes: 2 additions & 0 deletions tests/kafkatest/tests/core/transactions_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,8 @@ def bounce_brokers(self, clean_shutdown):
time.sleep(brokerSessionTimeoutSecs + gracePeriodSecs)
self.kafka.start_node(node)

self.kafka.await_no_under_replicated_partitions()

def create_and_start_message_copier(self, input_topic, input_partition, output_topic, transactional_id, use_group_metadata):
message_copier = TransactionalMessageCopier(
context=self.test_context,
Expand Down
9 changes: 2 additions & 7 deletions tests/kafkatest/tests/core/upgrade_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,6 @@ def setUp(self):
self.num_producers = 1
self.num_consumers = 1

def wait_until_rejoin(self):
for partition in range(0, self.partitions):
wait_until(lambda: len(self.kafka.isr_idx_list(self.topic, partition)) == self.replication_factor, timeout_sec=60,
backoff_sec=1, err_msg="Replicas did not rejoin the ISR in a reasonable amount of time")

def perform_upgrade(self, from_kafka_version, to_message_format_version=None):
self.logger.info("Upgrade ZooKeeper from %s to %s" % (str(self.zk.nodes[0].version), str(DEV_BRANCH)))
self.zk.set_version(DEV_BRANCH)
Expand Down Expand Up @@ -74,7 +69,7 @@ def perform_upgrade(self, from_kafka_version, to_message_format_version=None):
node.config[config_property.INTER_BROKER_PROTOCOL_VERSION] = from_kafka_version
node.config[config_property.MESSAGE_FORMAT_VERSION] = from_kafka_version
self.kafka.start_node(node)
self.wait_until_rejoin()
self.kafka.await_no_under_replicated_partitions(timeout_sec=60)

self.logger.info("Third pass bounce - remove inter.broker.protocol.version config")
for node in self.kafka.nodes:
Expand All @@ -88,7 +83,7 @@ def perform_upgrade(self, from_kafka_version, to_message_format_version=None):
node.config[config_property.INTER_BROKER_PROTOCOL_VERSION] = str(V_2_8_0)
node.config[config_property.MESSAGE_FORMAT_VERSION] = to_message_format_version
self.kafka.start_node(node)
self.wait_until_rejoin()
self.kafka.await_no_under_replicated_partitions(timeout_sec=60)

@cluster(num_nodes=6)
@parametrize(from_kafka_version=str(LATEST_2_8), to_message_format_version=None, compression_types=["none"])
Expand Down