Skip to content

Commit

Permalink
Merge bitcoin/bitcoin#30118: test: improve robustness of connect_nodes()
Browse files Browse the repository at this point in the history
6629d1d test: improve robustness of connect_nodes() (furszy)

Pull request description:

  Decoupled from #27837 because this can help other too, found it investigating a CI failure https://cirrus-ci.com/task/5805115213348864?logs=ci#L3200.

  The `connect_nodes` function in the test framework relies on a stable number of peer
  connections to verify that the new connection between the nodes is successfully established.
  This approach is fragile, as any of the peers involved in the process can drop, lose, or
  create a connection at any step, causing subsequent `wait_until` checks to stall indefinitely
  even when the peers in question were connected successfully.

  This commit improves the situation by using the nodes' subversion and the connection
  direction (inbound/outbound) to identify the exact peer connection and perform the
  checks exclusively on it.

ACKs for top commit:
  stratospher:
    reACK 6629d1d.
  achow101:
    ACK 6629d1d
  maflcko:
    utACK 6629d1d
  AngusP:
    re-ACK 6629d1d

Tree-SHA512: 5f345c0ce49ea81b643e97c5cffd133e182838752c27592fcdeac14ad10919fb4b7ff38e289e42a7c3c638a170bd0d0b7a9cd493898997a2082a7b7ceef4aeeb
  • Loading branch information
achow101 committed May 23, 2024
2 parents f157785 + 6629d1d commit e163d86
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 9 deletions.
27 changes: 19 additions & 8 deletions test/functional/test_framework/test_framework.py
Original file line number Diff line number Diff line change
Expand Up @@ -610,8 +610,6 @@ def connect_nodes(self, a, b, *, peer_advertises_v2=None, wait_for_connect: bool
"""
from_connection = self.nodes[a]
to_connection = self.nodes[b]
from_num_peers = 1 + len(from_connection.getpeerinfo())
to_num_peers = 1 + len(to_connection.getpeerinfo())
ip_port = "127.0.0.1:" + str(p2p_port(b))

if peer_advertises_v2 is None:
Expand All @@ -627,19 +625,32 @@ def connect_nodes(self, a, b, *, peer_advertises_v2=None, wait_for_connect: bool
if not wait_for_connect:
return

# Use subversion as peer id. Test nodes have their node number appended to the user agent string
from_connection_subver = from_connection.getnetworkinfo()['subversion']
to_connection_subver = to_connection.getnetworkinfo()['subversion']

def find_conn(node, peer_subversion, inbound):
return next(filter(lambda peer: peer['subver'] == peer_subversion and peer['inbound'] == inbound, node.getpeerinfo()), None)

# poll until version handshake complete to avoid race conditions
# with transaction relaying
# See comments in net_processing:
# * Must have a version message before anything else
# * Must have a verack message before anything else
self.wait_until(lambda: sum(peer['version'] != 0 for peer in from_connection.getpeerinfo()) == from_num_peers)
self.wait_until(lambda: sum(peer['version'] != 0 for peer in to_connection.getpeerinfo()) == to_num_peers)
self.wait_until(lambda: sum(peer['bytesrecv_per_msg'].pop('verack', 0) >= 21 for peer in from_connection.getpeerinfo()) == from_num_peers)
self.wait_until(lambda: sum(peer['bytesrecv_per_msg'].pop('verack', 0) >= 21 for peer in to_connection.getpeerinfo()) == to_num_peers)
self.wait_until(lambda: find_conn(from_connection, to_connection_subver, inbound=False) is not None)
self.wait_until(lambda: find_conn(to_connection, from_connection_subver, inbound=True) is not None)

def check_bytesrecv(peer, msg_type, min_bytes_recv):
assert peer is not None, "Error: peer disconnected"
return peer['bytesrecv_per_msg'].pop(msg_type, 0) >= min_bytes_recv

self.wait_until(lambda: check_bytesrecv(find_conn(from_connection, to_connection_subver, inbound=False), 'verack', 21))
self.wait_until(lambda: check_bytesrecv(find_conn(to_connection, from_connection_subver, inbound=True), 'verack', 21))

# The message bytes are counted before processing the message, so make
# sure it was fully processed by waiting for a ping.
self.wait_until(lambda: sum(peer["bytesrecv_per_msg"].pop("pong", 0) >= 29 for peer in from_connection.getpeerinfo()) == from_num_peers)
self.wait_until(lambda: sum(peer["bytesrecv_per_msg"].pop("pong", 0) >= 29 for peer in to_connection.getpeerinfo()) == to_num_peers)
self.wait_until(lambda: check_bytesrecv(find_conn(from_connection, to_connection_subver, inbound=False), 'pong', 29))
self.wait_until(lambda: check_bytesrecv(find_conn(to_connection, from_connection_subver, inbound=True), 'pong', 29))

def disconnect_nodes(self, a, b):
def disconnect_nodes_helper(node_a, node_b):
Expand Down
2 changes: 1 addition & 1 deletion test/functional/test_framework/test_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def __init__(self, i, datadir_path, *, chain, rpchost, timewait, timeout_factor,
"-debugexclude=libevent",
"-debugexclude=leveldb",
"-debugexclude=rand",
"-uacomment=testnode%d" % i,
"-uacomment=testnode%d" % i, # required for subversion uniqueness across peers
]
if self.descriptors is None:
self.args.append("-disablewallet")
Expand Down

0 comments on commit e163d86

Please sign in to comment.