Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 15 additions & 20 deletions playbooks/dev/two-way-network-split.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ name: "Two-Way Network Split Finality Test"
description: |
Splits a Kurtosis-launched devnet into two halves through the disruptoor
HTTP API, verifies that finality stops for two epochs, heals the split,
waits two more epochs, and verifies finality recovers.
then polls for finality recovery for up to recoveryEpochs (default 3).

The participant groups are computed dynamically from the assertoor client
pool: nodes 1..floor(N/2) form the left half, nodes floor(N/2)+1..N form
Expand All @@ -14,14 +14,14 @@ description: |
(default `http://disruptoor:7700`).
version: 1.0.0
tags: [disruptoor, kurtosis, finality, network-split, consensus]
timeout: 45m
timeout: 120m
config:
disruptoorUrl: "http://disruptoor:7700"
minClientCount: 2
partitionClientTypes: ["execution", "beacon"]
splitObservationEpochs: 2
recoveryEpochs: 2
recoveredMaxUnfinalizedEpochs: 3
recoveryEpochs: 3
recoveredMaxUnfinalizedEpochs: 6
tasks:
- name: get_consensus_specs
id: get_specs
Expand All @@ -46,7 +46,7 @@ tasks:
- name: check_clients_are_healthy
id: client_check
title: "Wait for all devnet clients to be healthy"
timeout: 10m
timeout: 20m
configVars:
minClientCount: "minClientCount"
config:
Expand All @@ -55,7 +55,7 @@ tasks:
- name: check_consensus_finality
id: initial_finality
title: "Wait for initial finality"
timeout: 20m
timeout: 40m
config:
minFinalizedEpochs: 2
maxUnfinalizedEpochs: 3
Expand Down Expand Up @@ -116,6 +116,7 @@ tasks:

- name: check_consensus_slot_range
title: "Wait split observation epochs with the split active"
timeout: 15m
configVars:
minSlotNumber: "| (.tasks.split_start.outputs.currentSlot | tonumber) + ((.splitObservationEpochs | tonumber) * (.tasks.get_specs.outputs.specs.SLOTS_PER_EPOCH | tonumber))"

Expand All @@ -139,21 +140,13 @@ tasks:
curl -fsS -X POST "${disruptoor_url}/v1/state/clear"
curl -fsS "${disruptoor_url}/v1/state" | jq -e '(.partitions | length) == 0 and (.shaping | length) == 0' >/dev/null

- name: check_consensus_slot_range
id: recovery_start
title: "Capture recovery start slot"
timeout: 1m
config: {}

- name: check_consensus_slot_range
title: "Wait recovery epochs after clearing the split"
configVars:
minSlotNumber: "| (.tasks.recovery_start.outputs.currentSlot | tonumber) + ((.recoveryEpochs | tonumber) * (.tasks.get_specs.outputs.specs.SLOTS_PER_EPOCH | tonumber))"

- name: check_consensus_finality
title: "Check finality recovered"
timeout: 4m
title: "Poll for finality recovery (succeeds as soon as unfinalized epochs drop)"
timeout: 25m
configVars:
# Require a fresh finalized checkpoint after the split. maxUnfinalizedEpochs
# alone can pass on stale pre-split finality when the current epoch is close.
minFinalizedEpochs: "| (.tasks.initial_finality.outputs.finalizedEpoch | tonumber) + 1"
maxUnfinalizedEpochs: "recoveredMaxUnfinalizedEpochs"

cleanupTasks:
Expand All @@ -166,4 +159,6 @@ cleanupTasks:
command: |
set -euo pipefail
disruptoor_url=$(echo "$DISRUPTOOR_URL" | jq -r .)
curl -fsS -X POST "${disruptoor_url}/v1/state/clear" || true
if ! curl -fsS -X POST "${disruptoor_url}/v1/state/clear"; then
echo "WARNING: failed to clear disruptoor state during cleanup; devnet may still be partitioned" >&2
fi