diff --git a/pkg/rest/autoplace.go b/pkg/rest/autoplace.go index 025026c6..8480f98c 100644 --- a/pkg/rest/autoplace.go +++ b/pkg/rest/autoplace.go @@ -2052,7 +2052,29 @@ func (s *Server) resolveTakeoverStorPool(ctx context.Context, rdName, takeoverNo return "", errors.Wrapf(rgErr, "lookup RG %q for takeover pool resolution", rd.ResourceGroupName) } - return rg.SelectFilter.StoragePool, nil + if pool := rg.SelectFilter.StoragePool; pool != "" { + return pool, nil + } + + // Bug 364 (P1): also walk the RG's StoragePoolList default. The + // linstor-csi driver posts no body to + // `POST /v1/resource-definitions/{rd}/resources/{node}` and + // relies on RG-side propagation for the pool name; when the + // SC sets `linstor.csi.linbit.com/storagePool:

`, + // linstor-csi's RGCreate path lands the value under + // SelectFilter.StoragePoolList[0] (not .StoragePool). Pre-fix the + // fresh-create resolution only checked SelectFilter.StoragePool, + // so an `r c ` against such an RG persisted a Resource + // with empty Props["StorPoolName"] — the satellite reconciler + // then had no pool to bind to and the replica wedged at + // `Provisioning` forever. Matches the existing + // `resolveGatePoolName` walk shape (the per-pool capacity gate + // already tolerates StoragePoolList tier-4 for the same reason). + if len(rg.SelectFilter.StoragePoolList) > 0 { + return rg.SelectFilter.StoragePoolList[0], nil + } + + return "", nil } // resolveStorPoolForFreshCreate implements the Bug 327 fix for the diff --git a/pkg/rest/bug_364_resource_create_pool_resolve_test.go b/pkg/rest/bug_364_resource_create_pool_resolve_test.go new file mode 100644 index 00000000..572159b8 --- /dev/null +++ b/pkg/rest/bug_364_resource_create_pool_resolve_test.go @@ -0,0 +1,242 @@ +// SPDX-License-Identifier: Apache-2.0 + +/* +Copyright 2026 Cozystack contributors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package rest + +import ( + "net/http" + "testing" + + apiv1 "github.com/cozystack/blockstor/pkg/api/v1" + "github.com/cozystack/blockstor/pkg/store" +) + +// Bug 364 (P1, hunt-caught 2026-06-02): `linstor r c ` +// without `--storage-pool` against an RG that pins its default via +// `select_filter.storage_pool_list` (not `select_filter.storage_pool`) +// created a Resource with empty `Props["StorPoolName"]`. The satellite +// reconciler then had no pool to bind to and the replica wedged at +// "Provisioning" — visible to the operator only as a phantom replica +// that never reached UpToDate. +// +// Bug-hunt v7 (2026-06-02) reproduced on a live dev stand: +// +// $ curl -sS -X PUT .../v1/resource-groups/testrg \ +// -d '{"select_filter":{"storage_pool":"","storage_pool_list":["lvm-thin"]}}' +// 200 +// $ linstor r c dev-worker-1 testlist +// SUCCESS: resource(s) created on resource-definition: testlist +// $ curl -sS .../v1/resource-definitions/testlist/resources \ +// | jq '.[].props' +// null # <-- empty: StorPoolName missing +// +// linstor-csi posts no body to the per-node resource-create endpoint +// and relies on RG-side propagation for the pool name. When the +// StorageClass sets `linstor.csi.linbit.com/storagePool:

`, +// linstor-csi's RGCreate path lands the value under +// SelectFilter.StoragePoolList[0] (not .StoragePool), so this is the +// canonical CSI shape — every Cozystack volume hits this path. +// +// The fix extends `resolveTakeoverStorPool` to also walk the RG's +// `StoragePoolList[0]` (mirroring the existing `resolveGatePoolName` +// fallback chain — the per-pool capacity gate already tolerates that +// tier). 1 line of intent, 3 lines of fence. + +// TestBug364ResourceCreatePicksUpStoragePoolList pins the canonical +// reproducer: an RG with only `storage_pool_list` (no +// `storage_pool`) must drive `resolveStorPoolForFreshCreate` to stamp +// the first entry onto `res.Props["StorPoolName"]`. +func TestBug364ResourceCreatePicksUpStoragePoolList(t *testing.T) { + t.Parallel() + + st := store.NewInMemory() + ctx := t.Context() + + if err := st.ResourceGroups().Create(ctx, &apiv1.ResourceGroup{ + Name: "csirg", + SelectFilter: apiv1.AutoSelectFilter{ + PlaceCount: 1, + StoragePoolList: []string{"lvm-thin"}, + }, + }); err != nil { + t.Fatalf("seed RG: %v", err) + } + + if err := st.ResourceDefinitions().Create(ctx, &apiv1.ResourceDefinition{ + Name: "rd1", + ResourceGroupName: "csirg", + }); err != nil { + t.Fatalf("seed RD: %v", err) + } + + if err := st.Nodes().Create(ctx, &apiv1.Node{ + Name: "n1", + Type: apiv1.NodeTypeSatellite, + NetInterfaces: []apiv1.NetInterface{ + {Name: DefaultNetInterfaceName, Address: "10.0.0.1"}, + }, + }); err != nil { + t.Fatalf("seed Node: %v", err) + } + + if err := st.StoragePools().Create(ctx, &apiv1.StoragePool{ + NodeName: "n1", + StoragePoolName: "lvm-thin", + ProviderKind: apiv1.StoragePoolKindLVMThin, + }); err != nil { + t.Fatalf("seed SP: %v", err) + } + + base, stop := startServerWithStore(t, st) + defer stop() + + // linstor-csi shape: empty body, path-only intent. + resp := httpPost(t, base+"/v1/resource-definitions/rd1/resources/n1", + []byte("")) + defer func() { _ = resp.Body.Close() }() + + if resp.StatusCode != http.StatusCreated { + got, _ := readAllBody(resp) + t.Fatalf("status: got %d, want 201. Body: %s", resp.StatusCode, got) + } + + res, err := st.Resources().Get(ctx, "rd1", "n1") + if err != nil { + t.Fatalf("re-fetch resource: %v", err) + } + + if got, want := res.Props["StorPoolName"], "lvm-thin"; got != want { + t.Errorf("StorPoolName: got %q, want %q (Bug 364: storage_pool_list[0] must seed fresh-create)", + got, want) + } +} + +// TestBug364ResolveTakeoverStorPoolFromList exercises the helper +// directly so a future refactor of the create-pipeline doesn't lose +// the tier-4 fallback semantics. +func TestBug364ResolveTakeoverStorPoolFromList(t *testing.T) { + t.Parallel() + + st := store.NewInMemory() + ctx := t.Context() + + if err := st.ResourceGroups().Create(ctx, &apiv1.ResourceGroup{ + Name: "rg", + SelectFilter: apiv1.AutoSelectFilter{ + StoragePoolList: []string{"pool-a", "pool-b"}, + }, + }); err != nil { + t.Fatalf("seed RG: %v", err) + } + + if err := st.ResourceDefinitions().Create(ctx, &apiv1.ResourceDefinition{ + Name: "rd", + ResourceGroupName: "rg", + }); err != nil { + t.Fatalf("seed RD: %v", err) + } + + s := &Server{Store: st} + + got, err := s.resolveTakeoverStorPool(ctx, "rd", "n1") + if err != nil { + t.Fatalf("resolveTakeoverStorPool: %v", err) + } + + if want := "pool-a"; got != want { + t.Errorf("resolveTakeoverStorPool: got %q, want %q (must return StoragePoolList[0])", + got, want) + } +} + +// TestBug364StoragePoolWinsOverList pins the precedence: when both +// SelectFilter.StoragePool and SelectFilter.StoragePoolList are set, +// the single .StoragePool takes priority (matches +// `resolveGatePoolName`'s tier ordering and upstream LINSTOR's +// CtrlRscCrtApiHelper). +func TestBug364StoragePoolWinsOverList(t *testing.T) { + t.Parallel() + + st := store.NewInMemory() + ctx := t.Context() + + if err := st.ResourceGroups().Create(ctx, &apiv1.ResourceGroup{ + Name: "rg", + SelectFilter: apiv1.AutoSelectFilter{ + StoragePool: "wins", + StoragePoolList: []string{"loses"}, + }, + }); err != nil { + t.Fatalf("seed RG: %v", err) + } + + if err := st.ResourceDefinitions().Create(ctx, &apiv1.ResourceDefinition{ + Name: "rd", + ResourceGroupName: "rg", + }); err != nil { + t.Fatalf("seed RD: %v", err) + } + + s := &Server{Store: st} + + got, err := s.resolveTakeoverStorPool(ctx, "rd", "n1") + if err != nil { + t.Fatalf("resolveTakeoverStorPool: %v", err) + } + + if want := "wins"; got != want { + t.Errorf("resolveTakeoverStorPool: got %q, want %q (single .StoragePool must beat list)", + got, want) + } +} + +// TestBug364NoListNoStoragePoolReturnsEmpty pins the "no pool +// anywhere" case — empty string with no error, so the downstream +// flow falls through to diskless / 404 paths as before. +func TestBug364NoListNoStoragePoolReturnsEmpty(t *testing.T) { + t.Parallel() + + st := store.NewInMemory() + ctx := t.Context() + + if err := st.ResourceGroups().Create(ctx, &apiv1.ResourceGroup{ + Name: "rg", + SelectFilter: apiv1.AutoSelectFilter{}, + }); err != nil { + t.Fatalf("seed RG: %v", err) + } + + if err := st.ResourceDefinitions().Create(ctx, &apiv1.ResourceDefinition{ + Name: "rd", + ResourceGroupName: "rg", + }); err != nil { + t.Fatalf("seed RD: %v", err) + } + + s := &Server{Store: st} + + got, err := s.resolveTakeoverStorPool(ctx, "rd", "n1") + if err != nil { + t.Fatalf("resolveTakeoverStorPool: %v", err) + } + + if got != "" { + t.Errorf("resolveTakeoverStorPool: got %q, want %q (no pool config → empty)", + got, "") + } +} diff --git a/tests/e2e/resource-create-pool-resolve.sh b/tests/e2e/resource-create-pool-resolve.sh new file mode 100755 index 00000000..b4c9d993 --- /dev/null +++ b/tests/e2e/resource-create-pool-resolve.sh @@ -0,0 +1,121 @@ +#!/usr/bin/env bash +# +# usage: resource-create-pool-resolve.sh WORK_DIR +# +# Bug 364 (P1, hunt-caught 2026-06-02) — `linstor r c ` +# without `--storage-pool` against an RG that pins its default via +# `select_filter.storage_pool_list` (not `select_filter.storage_pool`) +# created a Resource with empty `Props["StorPoolName"]`. The satellite +# reconciler then had no pool to bind to and the replica wedged at +# "Provisioning" — visible to the operator only as a phantom replica +# that never reached UpToDate. +# +# linstor-csi is the canonical caller for this path: it posts no body +# to the per-node resource-create endpoint and relies on RG-side +# propagation for the pool name. When the StorageClass sets +# `linstor.csi.linbit.com/storagePool:

`, linstor-csi's RGCreate +# path lands the value under SelectFilter.StoragePoolList[0] (not +# .StoragePool), so every Cozystack volume hits this path. +# +# This e2e pins the fix on a live cluster: an RG with only +# `storage_pool_list` must drive the resource-create's fresh-create +# pool resolution to stamp Props["StorPoolName"] from the list's +# first entry, and the satellite must actually reach UpToDate. + +set -euo pipefail + +WORK_DIR=${1:?work_dir required} +export KUBECONFIG="$WORK_DIR/kubeconfig" + +SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=lib.sh +source "$SCRIPT_DIR/lib.sh" + +require_workers 1 + +PF_PORT=$(python3 -c 'import socket; s=socket.socket(); s.bind(("127.0.0.1", 0)); print(s.getsockname()[1]); s.close()') +kubectl -n "$NS" port-forward deploy/blockstor-apiserver "$PF_PORT":3370 \ + >/tmp/bug364-pf.log 2>&1 & +PF_PID=$! + +cleanup() { + kill "$PF_PID" 2>/dev/null || true + wait "$PF_PID" 2>/dev/null || true + curl -s -X DELETE "http://localhost:$PF_PORT/v1/resource-definitions/bug364-rd" >/dev/null 2>&1 || true + curl -s -X DELETE "http://localhost:$PF_PORT/v1/resource-groups/bug364-rg" >/dev/null 2>&1 || true +} +trap cleanup EXIT + +for _ in $(seq 1 30); do + if curl -sf -m1 "http://localhost:$PF_PORT/v1/nodes" >/dev/null 2>&1; then + break + fi + sleep 0.5 +done + +B="http://localhost:$PF_PORT" + +# --- Setup: RG with only storage_pool_list --- +echo ">> seed bug364-rg with storage_pool_list (no storage_pool)" +curl -sf -X POST "$B/v1/resource-groups" -H "Content-Type: application/json" \ + -d '{"name":"bug364-rg","select_filter":{"place_count":1,"storage_pool_list":["lvm-thin"]}}' >/dev/null + +echo ">> seed bug364-rd in bug364-rg" +curl -sf -X POST "$B/v1/resource-definitions" -H "Content-Type: application/json" \ + -d '{"resource_definition":{"name":"bug364-rd","resource_group_name":"bug364-rg"}}' >/dev/null + +curl -sf -X POST "$B/v1/resource-definitions/bug364-rd/volume-definitions" \ + -H "Content-Type: application/json" \ + -d '{"volume_definition":{"size_kib":65536}}' >/dev/null + +# --- Bug 364: r c bug364-rd without --storage-pool --- +echo ">> POST resource-create on $WORKER_1 with empty body (linstor-csi shape)" +CODE=$(curl -s -o /tmp/bug364-resp.txt -w "%{http_code}" -X POST \ + "$B/v1/resource-definitions/bug364-rd/resources/$WORKER_1" \ + -H "Content-Type: application/json" \ + -d '') +if [[ "$CODE" != "201" ]]; then + echo "FAIL: r c returned $CODE, expected 201" + cat /tmp/bug364-resp.txt + exit 1 +fi +echo " 201 OK" + +# --- Persistence: Props["StorPoolName"] must be stamped --- +POOL=$(curl -sf "$B/v1/resource-definitions/bug364-rd/resources" | python3 -c ' +import sys, json +for r in json.load(sys.stdin): + props = r.get("props") or {} + print(props.get("StorPoolName", "")) + break +') +if [[ "$POOL" != "lvm-thin" ]]; then + echo "FAIL: Props[StorPoolName] = $POOL, expected lvm-thin (Bug 364)" + curl -sf "$B/v1/resource-definitions/bug364-rd/resources" | python3 -m json.tool + exit 1 +fi +echo ">> Props[StorPoolName] = lvm-thin (Bug 364 fix OK)" + +# --- Live convergence: the replica must reach UpToDate --- +echo ">> wait for replica UpToDate on $WORKER_1" +for _ in $(seq 1 30); do + STATE=$(curl -sf "$B/v1/resource-definitions/bug364-rd/resources" | python3 -c ' +import sys, json +for r in json.load(sys.stdin): + state = r.get("state") or {} + print(state.get("drbd_state", "")) + break +') + if [[ "$STATE" == "UpToDate" ]]; then + echo " replica UpToDate after wait" + break + fi + sleep 2 +done + +if [[ "$STATE" != "UpToDate" ]]; then + echo "FAIL: replica never reached UpToDate (last state: $STATE)" + exit 1 +fi + +echo ">> PASS: Bug 364 — r c without --storage-pool resolves through RG StoragePoolList[0]"