diff --git a/internal/controller/ensure_tiebreaker_evict_bug_385_test.go b/internal/controller/ensure_tiebreaker_evict_bug_385_test.go new file mode 100644 index 00000000..4f408257 --- /dev/null +++ b/internal/controller/ensure_tiebreaker_evict_bug_385_test.go @@ -0,0 +1,358 @@ +// SPDX-License-Identifier: Apache-2.0 + +/* +Copyright 2026 Cozystack contributors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controller_test + +import ( + "context" + "slices" + "testing" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + + blockstoriov1alpha1 "github.com/cozystack/blockstor/api/v1alpha1" + controllerpkg "github.com/cozystack/blockstor/internal/controller" + apiv1 "github.com/cozystack/blockstor/pkg/api/v1" + "github.com/cozystack/blockstor/pkg/store" +) + +// Bug 385 (P1) — `linstor n e ` (node evict) misbehaves when the +// evicted node hosts the free TieBreaker replica. Operator stand repro +// (dev cluster): +// +// Before evict: worker-1 UpToDate, worker-2 UpToDate, worker-3 TieBreaker. +// After `linstor n e dev-worker-3`: +// worker-2 wrongly demoted UpToDate → TieBreaker, +// worker-3 (EVICTED) still holds a diskful resource. +// +// Operator summary: "Если евиктить ноду, при свободной TieBreaker +// реплике, то евикт не произойдёт" — the evict does not actually take +// effect. +// +// Root cause: ensureTiebreaker counted ALL replicas of the RD, including +// the TIE_BREAKER witness sitting on the just-EVICTED node. The witness +// invariant therefore read as already-satisfied (diskful=2 + witness=1) +// and the reconciler never relocated the witness off the drained node — +// leaving it pinned to the evicted satellite. Worse, downstream the +// stranded witness blocked a fresh witness from landing on a healthy +// spare, which is how the operator observed a healthy diskful drift into +// the tiebreaker role. +// +// Fix (Bug 385): replicas on EVICTED / LOST nodes are draining +// placements, not live ones — the witness / quorum decision must ignore +// them (mirroring the placer's documented `disabledNodes` semantic), and +// a TIE_BREAKER stranded on a disabled node must be reaped so a fresh one +// can land on a healthy spare or quorum falls back to off. A healthy +// diskful must NEVER be demoted to TIE_BREAKER. + +// TestBug385EvictTiebreakerNodeReapsStrandedWitness pins the exact stand +// repro: 2 diskful (n1, n2) + 1 TIE_BREAKER witness (n3), then evict n3 +// (the witness node). After the reconcile: +// +// 1. The stranded TIE_BREAKER on the EVICTED n3 MUST be gone (the evict +// takes effect — the witness no longer pins the drained node). +// 2. Both diskful replicas (n1, n2) MUST stay diskful — a healthy +// replica is NEVER demoted to TIE_BREAKER. +// 3. No witness lands back on n3, and no spare exists (n1/n2 are +// diskful) so no new witness is created. +func TestBug385EvictTiebreakerNodeReapsStrandedWitness(t *testing.T) { + t.Parallel() + + scheme := newScheme(t) + st := store.NewInMemory() + ctx := context.Background() + + // n1, n2 healthy; n3 EVICTED (the operator just ran `n e n3`). + if err := st.Nodes().Create(ctx, &apiv1.Node{ + Name: "n1", Type: apiv1.NodeTypeSatellite, + }); err != nil { + t.Fatalf("seed n1: %v", err) + } + + if err := st.Nodes().Create(ctx, &apiv1.Node{ + Name: "n2", Type: apiv1.NodeTypeSatellite, + }); err != nil { + t.Fatalf("seed n2: %v", err) + } + + if err := st.Nodes().Create(ctx, &apiv1.Node{ + Name: "n3", Type: apiv1.NodeTypeSatellite, + Flags: []string{apiv1.NodeFlagEvicted}, + }); err != nil { + t.Fatalf("seed n3 (evicted): %v", err) + } + + // Steady state before evict: n1 + n2 diskful, n3 TIE_BREAKER. + for _, n := range []string{"n1", "n2"} { + if err := st.Resources().Create(ctx, &apiv1.Resource{ + Name: "pvc-bug385", NodeName: n, + }); err != nil { + t.Fatalf("seed diskful %s: %v", n, err) + } + } + + if err := st.Resources().Create(ctx, &apiv1.Resource{ + Name: "pvc-bug385", NodeName: "n3", + Flags: []string{apiv1.ResourceFlagDiskless, apiv1.ResourceFlagTieBreaker}, + }); err != nil { + t.Fatalf("seed n3 witness: %v", err) + } + + rd := &blockstoriov1alpha1.ResourceDefinition{ + ObjectMeta: metav1.ObjectMeta{Name: "pvc-bug385"}, + } + + cli := fake.NewClientBuilder().WithScheme(scheme).WithObjects(rd).Build() + + rec := &controllerpkg.ResourceDefinitionReconciler{ + Client: cli, + Scheme: scheme, + Store: st, + } + + if err := rec.EnsureTiebreaker(ctx, rd); err != nil { + t.Fatalf("EnsureTiebreaker: %v", err) + } + + all, err := st.Resources().ListByDefinition(ctx, "pvc-bug385") + if err != nil { + t.Fatalf("list: %v", err) + } + + // Invariant 1: the stranded TIE_BREAKER on the EVICTED n3 is gone. + if _, err := st.Resources().Get(ctx, "pvc-bug385", "n3"); err == nil { + t.Errorf("Bug 385: TIE_BREAKER witness still pinned to EVICTED n3; entries=%v", all) + } + + // Invariant 2: both diskful replicas survive unchanged — NEVER demoted. + for _, n := range []string{"n1", "n2"} { + got, getErr := st.Resources().Get(ctx, "pvc-bug385", n) + if getErr != nil { + t.Fatalf("Bug 385: diskful replica on %s vanished: %v", n, getErr) + } + + if slices.Contains(got.Flags, apiv1.ResourceFlagTieBreaker) { + t.Errorf("Bug 385: healthy diskful on %s was demoted to TIE_BREAKER; flags=%v", n, got.Flags) + } + + if slices.Contains(got.Flags, apiv1.ResourceFlagDiskless) { + t.Errorf("Bug 385: healthy diskful on %s gained DISKLESS; flags=%v", n, got.Flags) + } + } + + // Invariant 3: exactly the two diskful replicas remain (no spare to + // host a fresh witness — n1/n2 are diskful, n3 is evicted). + if len(all) != 2 { + t.Fatalf("Bug 385: replica count=%d, want 2 (both diskful, no stranded/relocated witness); entries=%v", + len(all), all) + } + + diskful, diskless, witness := classifyReplicas(all) + if diskful != 2 || diskless != 0 || witness != 0 { + t.Errorf("Bug 385: composition diskful=%d diskless=%d witness=%d, want 2/0/0; entries=%v", + diskful, diskless, witness, all) + } +} + +// TestBug385EvictTiebreakerNodeRelocatesWitnessToSpare extends the repro +// with a spare healthy node (n4). Evicting the witness node (n3) must +// relocate the witness onto the spare — not leave it stranded on the +// drained node and not demote a healthy diskful. +// +// Expected convergence: n1 + n2 diskful, witness lands on n4, nothing on +// the EVICTED n3. +func TestBug385EvictTiebreakerNodeRelocatesWitnessToSpare(t *testing.T) { + t.Parallel() + + scheme := newScheme(t) + st := store.NewInMemory() + ctx := context.Background() + + for _, n := range []string{"n1", "n2", "n4"} { + if err := st.Nodes().Create(ctx, &apiv1.Node{ + Name: n, Type: apiv1.NodeTypeSatellite, + }); err != nil { + t.Fatalf("seed node %s: %v", n, err) + } + } + + if err := st.Nodes().Create(ctx, &apiv1.Node{ + Name: "n3", Type: apiv1.NodeTypeSatellite, + Flags: []string{apiv1.NodeFlagEvicted}, + }); err != nil { + t.Fatalf("seed n3 (evicted): %v", err) + } + + for _, n := range []string{"n1", "n2"} { + if err := st.Resources().Create(ctx, &apiv1.Resource{ + Name: "pvc-bug385b", NodeName: n, + }); err != nil { + t.Fatalf("seed diskful %s: %v", n, err) + } + } + + // Witness stranded on the evicted n3. + if err := st.Resources().Create(ctx, &apiv1.Resource{ + Name: "pvc-bug385b", NodeName: "n3", + Flags: []string{apiv1.ResourceFlagDiskless, apiv1.ResourceFlagTieBreaker}, + }); err != nil { + t.Fatalf("seed n3 witness: %v", err) + } + + rd := &blockstoriov1alpha1.ResourceDefinition{ + ObjectMeta: metav1.ObjectMeta{Name: "pvc-bug385b"}, + } + + cli := fake.NewClientBuilder().WithScheme(scheme).WithObjects(rd).Build() + + rec := &controllerpkg.ResourceDefinitionReconciler{ + Client: cli, + Scheme: scheme, + Store: st, + } + + if err := rec.EnsureTiebreaker(ctx, rd); err != nil { + t.Fatalf("EnsureTiebreaker: %v", err) + } + + all, err := st.Resources().ListByDefinition(ctx, "pvc-bug385b") + if err != nil { + t.Fatalf("list: %v", err) + } + + // The stranded witness on the EVICTED n3 must be gone. + if _, err := st.Resources().Get(ctx, "pvc-bug385b", "n3"); err == nil { + t.Errorf("Bug 385: witness still on EVICTED n3; entries=%v", all) + } + + // The witness must have relocated to the healthy spare n4. + n4, err := st.Resources().Get(ctx, "pvc-bug385b", "n4") + if err != nil { + t.Fatalf("Bug 385: witness did not relocate to healthy spare n4: %v; entries=%v", err, all) + } + + if !slices.Contains(n4.Flags, apiv1.ResourceFlagTieBreaker) { + t.Errorf("Bug 385: relocated replica on n4 lacks TIE_BREAKER flag; flags=%v", n4.Flags) + } + + // Diskful replicas untouched. + for _, n := range []string{"n1", "n2"} { + got, getErr := st.Resources().Get(ctx, "pvc-bug385b", n) + if getErr != nil { + t.Fatalf("Bug 385: diskful on %s vanished: %v", n, getErr) + } + + if slices.Contains(got.Flags, apiv1.ResourceFlagTieBreaker) || + slices.Contains(got.Flags, apiv1.ResourceFlagDiskless) { + t.Errorf("Bug 385: healthy diskful on %s changed class; flags=%v", n, got.Flags) + } + } + + diskful, diskless, witness := classifyReplicas(all) + if diskful != 2 || diskless != 0 || witness != 1 { + t.Errorf("Bug 385: composition diskful=%d diskless=%d witness=%d, want 2/0/1; entries=%v", + diskful, diskless, witness, all) + } +} + +// TestBug385EvictDiskfulNodeDoesNotCountStrandedDiskful pins the other +// half of the bug: evicting a DISKFUL node must not let its stranded +// diskful replica keep the witness invariant satisfied. Topology: n1 + n2 +// diskful + n3 witness, then evict n2 (a diskful node). With n2's diskful +// no longer counted, the live diskful count drops to 1 and the orphaned +// witness on n3 must collapse (Bug-338 carve-out, now reached via the +// disabled-node filter) — the lone diskful runs quorum=off. Crucially, +// the witness on n3 is NOT promoted/demoted onto a healthy diskful. +func TestBug385EvictDiskfulNodeDoesNotCountStrandedDiskful(t *testing.T) { + t.Parallel() + + scheme := newScheme(t) + st := store.NewInMemory() + ctx := context.Background() + + for _, n := range []string{"n1", "n3"} { + if err := st.Nodes().Create(ctx, &apiv1.Node{ + Name: n, Type: apiv1.NodeTypeSatellite, + }); err != nil { + t.Fatalf("seed node %s: %v", n, err) + } + } + + // n2 is the evicted DISKFUL node. + if err := st.Nodes().Create(ctx, &apiv1.Node{ + Name: "n2", Type: apiv1.NodeTypeSatellite, + Flags: []string{apiv1.NodeFlagEvicted}, + }); err != nil { + t.Fatalf("seed n2 (evicted): %v", err) + } + + for _, n := range []string{"n1", "n2"} { + if err := st.Resources().Create(ctx, &apiv1.Resource{ + Name: "pvc-bug385c", NodeName: n, + }); err != nil { + t.Fatalf("seed diskful %s: %v", n, err) + } + } + + if err := st.Resources().Create(ctx, &apiv1.Resource{ + Name: "pvc-bug385c", NodeName: "n3", + Flags: []string{apiv1.ResourceFlagDiskless, apiv1.ResourceFlagTieBreaker}, + }); err != nil { + t.Fatalf("seed n3 witness: %v", err) + } + + rd := &blockstoriov1alpha1.ResourceDefinition{ + ObjectMeta: metav1.ObjectMeta{Name: "pvc-bug385c"}, + } + + cli := fake.NewClientBuilder().WithScheme(scheme).WithObjects(rd).Build() + + rec := &controllerpkg.ResourceDefinitionReconciler{ + Client: cli, + Scheme: scheme, + Store: st, + } + + if err := rec.EnsureTiebreaker(ctx, rd); err != nil { + t.Fatalf("EnsureTiebreaker: %v", err) + } + + // The lone live diskful on n1 must survive, still diskful. + n1, err := st.Resources().Get(ctx, "pvc-bug385c", "n1") + if err != nil { + t.Fatalf("Bug 385: live diskful on n1 vanished: %v", err) + } + + if slices.Contains(n1.Flags, apiv1.ResourceFlagTieBreaker) || + slices.Contains(n1.Flags, apiv1.ResourceFlagDiskless) { + t.Errorf("Bug 385: healthy diskful on n1 changed class; flags=%v", n1.Flags) + } + + // The orphaned witness on n3 must collapse — live diskful dropped to + // 1, no user-diskless, so the witness is meaningless. + if _, err := st.Resources().Get(ctx, "pvc-bug385c", "n3"); err == nil { + t.Errorf("Bug 385: orphaned witness on n3 survived after evicting the second diskful") + } + + // The stranded diskful on the EVICTED n2 is the NodeReconciler's + // migration responsibility — the witness path must leave it alone. + if _, err := st.Resources().Get(ctx, "pvc-bug385c", "n2"); err != nil { + t.Errorf("Bug 385: witness path must not delete the stranded diskful on EVICTED n2: %v", err) + } +} diff --git a/internal/controller/resourcedefinition_controller.go b/internal/controller/resourcedefinition_controller.go index d585929e..abd0d852 100644 --- a/internal/controller/resourcedefinition_controller.go +++ b/internal/controller/resourcedefinition_controller.go @@ -223,7 +223,30 @@ func (r *ResourceDefinitionReconciler) ensureTiebreaker(ctx context.Context, rd return err } - diskful, diskless := splitByDiskless(replicas) + // Bug 385: a replica hosted on an EVICTED / LOST node is a draining + // placement, not a live one. Counting it would (a) let a stranded + // diskful keep the witness invariant "satisfied" so the reconciler + // never relocates the witness off the drained node, and (b) leave a + // TIE_BREAKER pinned to the very node the operator just evicted — + // the exact "evict doesn't take effect" symptom. Mirror the placer's + // documented semantic ("replicas on EVICTED/LOST nodes are NOT + // counted") so the witness / quorum decision runs over live replicas + // only. Stranded witnesses are reaped explicitly below so a fresh one + // can land on a healthy spare (or quorum falls to off when none + // remains) — never by demoting a healthy diskful to TIE_BREAKER. + disabled, err := r.disabledNodeSet(ctx) + if err != nil { + return err + } + + live, stranded := splitByDisabledNode(replicas, disabled) + + err = r.removeStrandedWitnesses(ctx, rd.Name, stranded) + if err != nil { + return err + } + + diskful, diskless := splitByDiskless(live) witness := filterTieBreaker(diskless) wantWitness := shouldTieBreakerExist(rd, diskful, diskless, witness) @@ -1156,6 +1179,74 @@ func isDisabledNode(node *apiv1.Node) bool { return false } +// disabledNodeSet returns the set of node names flagged EVICTED / LOST. +// Bug 385: the RD-level witness / quorum decision must treat replicas on +// these nodes as draining placements, not live ones — mirroring the +// placer's `disabledNodes` semantic. Re-probed from the store on every +// reconcile so a freshly-stamped EVICTED flag takes effect on the next +// witness pass. +func (r *ResourceDefinitionReconciler) disabledNodeSet(ctx context.Context) (map[string]struct{}, error) { + nodes, err := r.Store.Nodes().List(ctx) + if err != nil { + return nil, err + } + + disabled := make(map[string]struct{}) + + for i := range nodes { + if isDisabledNode(&nodes[i]) { + disabled[nodes[i].Name] = struct{}{} + } + } + + return disabled, nil +} + +// splitByDisabledNode partitions replicas into (live, stranded) by the +// disabled-node set. `live` is hosted on healthy (non-EVICTED/-LOST) +// nodes and drives the witness / quorum decision; `stranded` sits on a +// draining node and is handled separately (Bug 385). +func splitByDisabledNode(replicas []apiv1.Resource, disabled map[string]struct{}) ([]apiv1.Resource, []apiv1.Resource) { + var live, stranded []apiv1.Resource + + for i := range replicas { + if _, off := disabled[replicas[i].NodeName]; off { + stranded = append(stranded, replicas[i]) + + continue + } + + live = append(live, replicas[i]) + } + + return live, stranded +} + +// removeStrandedWitnesses deletes every TIE_BREAKER witness that sits on +// an EVICTED / LOST node (Bug 385). A diskless witness has no business +// remaining on a drained node — leaving it there pins the witness to the +// very node the operator evicted and blocks a fresh witness from landing +// on a healthy spare. Diskful replicas on disabled nodes are intentionally +// left alone here: relocating / deleting those is the NodeReconciler's job +// (placer gap-fill for EVICTED, source-delete for LOST), and dropping a +// diskful's only data copy from this path would be a data-availability +// hazard. Per-row Delete tolerates NotFound so a concurrent +// node-migration cascade can't fail the reconcile. +func (r *ResourceDefinitionReconciler) removeStrandedWitnesses(ctx context.Context, rdName string, stranded []apiv1.Resource) error { + for i := range stranded { + if !slices.Contains(stranded[i].Flags, apiv1.ResourceFlagTieBreaker) { + continue + } + + err := r.Store.Resources().Delete(ctx, rdName, stranded[i].NodeName) + if err != nil && !stderrors.Is(err, store.ErrNotFound) { + return err + } + } + + return nil +} + // alreadyExists is a string-based check for the wrapped errors the // k8s store returns. The k8s store wraps errAlreadyExists from // kube-apiserver in a cockroachdb/errors.Wrap — Is() doesn't tunnel diff --git a/tests/e2e/cli-matrix/n-evict-tiebreaker-no-shuffle.sh b/tests/e2e/cli-matrix/n-evict-tiebreaker-no-shuffle.sh new file mode 100755 index 00000000..9167a7a3 --- /dev/null +++ b/tests/e2e/cli-matrix/n-evict-tiebreaker-no-shuffle.sh @@ -0,0 +1,193 @@ +#!/usr/bin/env bash +# +# usage: n-evict-tiebreaker-no-shuffle.sh WORK_DIR +# +# L6 cli-matrix cell — Bug 385 (P1, user-reported, stand-observable). +# +# Reproduction from the dev stand: +# +# $ linstor r l +# test worker-1 … UpToDate +# test worker-2 … UpToDate +# test worker-3 … TieBreaker +# +# $ linstor n e dev-worker-3 +# SUCCESS +# +# $ linstor n l +# dev-worker-1 SATELLITE Online +# dev-worker-2 SATELLITE Online +# dev-worker-3 SATELLITE EVICTED +# $ linstor r l +# test worker-1 … UpToDate +# test worker-2 … TieBreaker ← WRONG: was UpToDate, demoted to TB +# test worker-3 … UpToDate ← EVICTED node still holds a diskful +# +# Operator summary: "Если евиктить ноду, при свободной TieBreaker +# реплике, то евикт не произойдёт" — the evict does not actually take +# effect. +# +# Root cause: ensureTiebreaker counted the TIE_BREAKER witness sitting on +# the just-EVICTED node as a live witness, so the witness invariant read +# as satisfied (diskful=2 + witness=1) and the reconciler never relocated +# the witness off the drained node. A healthy diskful must NEVER be +# demoted to TIE_BREAKER. +# +# Fix: replicas on EVICTED / LOST nodes are draining placements, not live +# ones — the witness / quorum decision ignores them and a TIE_BREAKER +# stranded on a disabled node is reaped so a fresh one lands on a healthy +# spare (or quorum falls to off when none remains). +# +# Unit pin: internal/controller/ensure_tiebreaker_evict_bug_385_test.go +# (TestBug385EvictTiebreakerNodeReapsStrandedWitness + +# TestBug385EvictTiebreakerNodeRelocatesWitnessToSpare). +# +# This L6 cell is the stand-side companion: it builds the 2r+tb shape on +# 3 workers, evicts the tiebreaker node via the real `linstor n e` CLI, +# and asserts that within 30s (a) the EVICTED node no longer hosts a +# witness, and (b) BOTH original diskful replicas are still diskful — no +# healthy UpToDate replica was demoted to TIE_BREAKER. + +set -euo pipefail + +WORK_DIR=${1:?work_dir required} +export KUBECONFIG="$WORK_DIR/kubeconfig" + +SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=lib.sh +source "$SCRIPT_DIR/lib.sh" + +require_workers 3 + +linstor_cli_setup + +RD=cli-matrix-385 +N1=$WORKER_1 +N2=$WORKER_2 +N3=$WORKER_3 + +EVICTED_NODE="" +cleanup() { + # Restore any node we evicted so the shared stand is left healthy + # for the next cell, BEFORE deleting the RD. + if [[ -n "$EVICTED_NODE" ]]; then + "${LCTL[@]}" node restore "$EVICTED_NODE" >/dev/null 2>&1 || true + fi + delete_rd "$RD" + assert_no_orphans "$RD" + linstor_cli_teardown +} +trap cleanup EXIT + +echo ">> [Bug 385] shape-2r-tb: 2-replica RD + auto-tiebreaker" +"${LCTL[@]}" resource-definition create "$RD" >/dev/null +"${LCTL[@]}" volume-definition create "$RD" 256M >/dev/null +"${LCTL[@]}" resource create --auto-place=2 --storage-pool=stand "$RD" >/dev/null + +echo ">> wait for steady state: 2 diskful UpToDate + 1 TIE_BREAKER witness" +deadline=$(( $(date +%s) + 180 )) +uptodate_pair="" +tb_node="" +while (( $(date +%s) < deadline )); do + pair=() + tb="" + for n in "$N1" "$N2" "$N3"; do + flags=$(kubectl get "resources.blockstor.cozystack.io/${RD}.${n}" \ + -o jsonpath='{.spec.flags}' 2>/dev/null || echo "") + if [[ "$flags" == *"TIE_BREAKER"* ]]; then + tb=$n + continue + fi + d=$(status_disk_state "$RD" "$n" 0) + if [[ "$d" == "UpToDate" ]]; then + pair+=("$n") + fi + done + if (( ${#pair[@]} >= 2 )) && [[ -n "$tb" ]]; then + uptodate_pair="${pair[0]} ${pair[1]}" + tb_node=$tb + break + fi + sleep 3 +done +if [[ -z "$uptodate_pair" ]] || [[ -z "$tb_node" ]]; then + echo "FAIL: never reached steady state (2 diskful UpToDate + TIE_BREAKER) within 180s" >&2 + "${LCTL[@]}" resource list --resources "$RD" 2>&1 | tail -20 >&2 + exit 1 +fi +echo " diskful pair: $uptodate_pair tiebreaker: $tb_node" + +# Capture the two diskful nodes so we can assert neither is demoted. +DISKFUL_A=$(echo "$uptodate_pair" | awk '{print $1}') +DISKFUL_B=$(echo "$uptodate_pair" | awk '{print $2}') + +# Evict the tiebreaker node — the exact operator action from the repro. +echo ">> linstor n e $tb_node (evict the tiebreaker node)" +EVICTED_NODE=$tb_node +"${LCTL[@]}" node evict "$tb_node" >/dev/null 2>&1 || { + echo "FAIL: n e (node evict) exited non-zero" >&2 + exit 1 +} + +# The node must show EVICTED in `linstor n l` (evict took effect at the +# node level). +echo ">> confirm $tb_node is flagged EVICTED" +deadline=$(( $(date +%s) + 30 )) +node_evicted=false +while (( $(date +%s) < deadline )); do + nflags=$(kubectl get "nodes.blockstor.cozystack.io/${tb_node}" \ + -o jsonpath='{.spec.flags}' 2>/dev/null || echo "") + if [[ "$nflags" == *"EVICTED"* ]]; then + node_evicted=true + break + fi + sleep 2 +done +if [[ "$node_evicted" != "true" ]]; then + echo "FAIL: node $tb_node never reached EVICTED flag within 30s" >&2 + exit 1 +fi + +echo ">> wait up to 30s for the tiebreaker on EVICTED $tb_node to be relocated/reaped" +deadline=$(( $(date +%s) + 30 )) +reaped=false +while (( $(date +%s) < deadline )); do + # The witness must no longer sit on the evicted node. + flags=$(kubectl get "resources.blockstor.cozystack.io/${RD}.${tb_node}" \ + -o jsonpath='{.spec.flags}' 2>/dev/null || echo "__absent__") + if [[ "$flags" == "__absent__" ]] || [[ "$flags" != *"TIE_BREAKER"* ]]; then + reaped=true + break + fi + sleep 2 +done + +if [[ "$reaped" != "true" ]]; then + echo "FAIL (Bug 385 regression): TIE_BREAKER still pinned to EVICTED $tb_node within 30s" >&2 + "${LCTL[@]}" resource list --resources "$RD" 2>&1 | tail -20 >&2 + exit 1 +fi + +# The load-bearing invariant: NEITHER healthy diskful was demoted to +# TIE_BREAKER (the exact wrong behaviour from the repro where worker-2 +# went UpToDate → TieBreaker). +echo ">> assert neither healthy diskful ($DISKFUL_A, $DISKFUL_B) was demoted to TIE_BREAKER" +for n in "$DISKFUL_A" "$DISKFUL_B"; do + flags=$(kubectl get "resources.blockstor.cozystack.io/${RD}.${n}" \ + -o jsonpath='{.spec.flags}' 2>/dev/null || echo "__absent__") + if [[ "$flags" == "__absent__" ]]; then + echo "FAIL (Bug 385): healthy diskful on $n disappeared after evict; flags=$flags" >&2 + exit 1 + fi + if [[ "$flags" == *"TIE_BREAKER"* ]]; then + echo "FAIL (Bug 385 regression): healthy diskful on $n was demoted to TIE_BREAKER; flags=$flags" >&2 + "${LCTL[@]}" resource list --resources "$RD" 2>&1 | tail -20 >&2 + exit 1 + fi + if [[ "$flags" == *"DISKLESS"* ]]; then + echo "FAIL (Bug 385): healthy diskful on $n gained DISKLESS after evict; flags=$flags" >&2 + exit 1 + fi +done + +echo ">> n-evict-tiebreaker-no-shuffle OK (Bug 385 pinned: evict relocates the witness, no healthy diskful demoted)" diff --git a/tests/operator-harness/replay/n-evict-tiebreaker-no-shuffle.yaml b/tests/operator-harness/replay/n-evict-tiebreaker-no-shuffle.yaml new file mode 100644 index 00000000..aaba8b1c --- /dev/null +++ b/tests/operator-harness/replay/n-evict-tiebreaker-no-shuffle.yaml @@ -0,0 +1,95 @@ +name: n-evict-tiebreaker-no-shuffle +description: | + Bug-385 catcher (P1, user-reported). Cluster shape: 2 diskful replicas + (node1, node2) + 1 TieBreaker witness on node3. Operator evicts the + tiebreaker node (`linstor n e node3`). + + Pre-fix the witness was counted as a live tiebreaker even though it sat + on the just-EVICTED node, so the witness invariant read as satisfied + and the evict never took effect — the TieBreaker stayed pinned to the + drained node and a healthy diskful drifted into the tiebreaker role. + + Expected convergence: + - the witness on the EVICTED node3 is reaped (resource_absent); with + only 3 nodes there is no healthy spare, so it does not relocate. + - BOTH original diskful replicas (node1, node2) stay UpToDate — a + healthy diskful is NEVER demoted to TieBreaker. + +prerequisites: + min_nodes: 3 + storage_pool: stand + +vars: + sp: stand + +steps: + - name: create-rd + cmd: ["resource-definition", "create", "{{rd}}"] + expect_exit: 0 + - name: create-vd + cmd: ["volume-definition", "create", "{{rd}}", "32M"] + expect_exit: 0 + - name: r-c-node1-diskful + cmd: ["resource", "create", "{{node1}}", "{{rd}}", "--storage-pool", "{{sp}}"] + expect_exit: 0 + - name: r-c-node2-diskful + cmd: ["resource", "create", "{{node2}}", "{{rd}}", "--storage-pool", "{{sp}}"] + expect_exit: 0 + - name: wait-2-uptodate + cmd: ["resource", "list", "--resources", "{{rd}}"] + expect_exit: 0 + await: + kind: all_uptodate + rd: "{{rd}}" + timeout_s: 240 + - name: tiebreaker-on-node3 + # Hand-create the witness on node3 so the scenario is deterministic + # regardless of the auto-tiebreaker config / which node auto-tb picks. + cmd: ["resource", "create", "{{node3}}", "{{rd}}", "--diskless"] + expect_exit: 0 + await: + kind: replica_diskless + rd: "{{rd}}" + node: "{{node3}}" + timeout_s: 60 + - name: n-evict-node3 + # The exact operator action from the repro: evict the tiebreaker node. + cmd: ["node", "evict", "{{node3}}"] + expect_exit: 0 + await: + # Evict must take effect: the witness no longer sits on the drained + # node3. No healthy spare exists (3 nodes), so it is reaped, not + # relocated. + kind: resource_absent + rd: "{{rd}}" + node: "{{node3}}" + timeout_s: 90 + - name: node1-still-uptodate + # Load-bearing invariant: the first healthy diskful was NOT demoted + # to TieBreaker by the evict (the wrong behaviour from the repro). + cmd: ["resource", "list", "--resources", "{{rd}}"] + expect_exit: 0 + await: + kind: disk_state + rd: "{{rd}}" + node: "{{node1}}" + expected: UpToDate + timeout_s: 90 + - name: node2-still-uptodate + cmd: ["resource", "list", "--resources", "{{rd}}"] + expect_exit: 0 + await: + kind: disk_state + rd: "{{rd}}" + node: "{{node2}}" + expected: UpToDate + timeout_s: 90 + +teardown: + # Restore the evicted node so the shared stand is left healthy for the + # next workflow, BEFORE deleting the RD. + - cmd: ["node", "restore", "{{node3}}"] + - cmd: ["resource-definition", "delete", "{{rd}}"] + +invariants: + - no_orphans