diff --git a/pkg/rest/input_validation.go b/pkg/rest/input_validation.go index 34d85535..7e72b028 100644 --- a/pkg/rest/input_validation.go +++ b/pkg/rest/input_validation.go @@ -48,13 +48,14 @@ import ( // input and the rule it violated. // // The chosen ruleset is RFC 1123 subdomain (lowercase alphanumerics + -// hyphen, can't start/end with hyphen, max 253 chars). Upstream -// LINSTOR's wire-side regex is `^[A-Za-z][A-Za-z0-9_-]{1,47}$` — -// stricter on length (48) but permissive on case + underscore. We -// pick RFC 1123 because the k8s store is authoritative for storage: -// any name that would later fail the metadata.Name regex is rejected -// up front, so the operator sees ONE consistent failure mode rather -// than "linstor said OK, but kubectl says no". +// hyphen, can't start/end with hyphen) capped at 48 chars (Bug 360). +// Upstream LINSTOR's wire-side regex is `^[A-Za-z][A-Za-z0-9_-]{1,47}$` +// — permissive on case + underscore, but the same 48-char length cap +// we mirror here. We pick RFC 1123 (lowercase-only, no underscore) +// because the k8s store is authoritative for storage: any name that +// would later fail the metadata.Name regex is rejected up front, so +// the operator sees ONE consistent failure mode rather than "linstor +// said OK, but kubectl says no". // rfc1123SubdomainName is the wire-level identifier regex applied to // every user-supplied LINSTOR name (Bug 97). Mirrors @@ -71,12 +72,27 @@ import ( // pkg/rest/autoplace.go). var rfc1123SubdomainName = regexp.MustCompile(`^[a-z0-9]([-a-z0-9]*[a-z0-9])?$`) -// maxLinstorName caps the wire-side identifier length. RFC 1123 -// subdomain allows up to 253 chars; we mirror the same limit here so -// the REST gate doesn't accept anything the k8s store would later -// refuse. (`.` Resource CRD names would still fit comfortably -// since each side is independently bounded.) -const maxLinstorName = 253 +// maxLinstorName caps the wire-side identifier length. Upstream +// LINSTOR's wire-level regex tops out at 48 characters +// (DRBD_RES_NAME_MAX), but the k8s store has a second hard ceiling that +// matters more in practice: every user-supplied name flows into a +// Kubernetes `metadata.labels` value on the Resource CRD +// (`LabelResourceDefinition`, `LabelNodeName` in pkg/store/k8s/ +// resources.go). k8s label VALUES are bounded to 63 characters by the +// apimachinery validator — anything longer slips past `rd c` (which +// only writes a metadata.name, the 253-char regime) and explodes on +// the next `r c` with `metadata.labels: Invalid value: …: must be no +// more than 63 characters`. The RD is now a zombie: it lives in the +// catalogue and accepts `vd c`, but the first replica create fails +// inside the store layer, leaving a partial state the operator can +// only clean up manually. +// +// Bug 360 (hunt-v4): cap the wire-side limit at 48 so the rd-create +// rejection happens up front, BEFORE the partial-create lands and +// BEFORE the k8s label cap can ever fire. 48 also matches upstream +// LINSTOR's documented identifier length for forward-compat with any +// caller that relies on the upstream limit. +const maxLinstorName = 48 // ErrInvalidLinstorName is the static sentinel for Bug-97 rejections. // Callers wrap it via fmt.Errorf("%w: …") with object-kind + name + diff --git a/pkg/rest/input_validation_bug_360_test.go b/pkg/rest/input_validation_bug_360_test.go new file mode 100644 index 00000000..1061a723 --- /dev/null +++ b/pkg/rest/input_validation_bug_360_test.go @@ -0,0 +1,196 @@ +// SPDX-License-Identifier: Apache-2.0 + +/* +Copyright 2026 Cozystack contributors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package rest + +import ( + "encoding/json" + "errors" + "net/http" + "strings" + "testing" + + apiv1 "github.com/cozystack/blockstor/pkg/api/v1" + "github.com/cozystack/blockstor/pkg/store" +) + +// Bug 360: long RD names (>63 chars) bypassed the wire-side gate +// (which capped at 253 — the k8s metadata.name regime) and then +// exploded on the FIRST `r c` because the k8s store writes the +// RD-name into `metadata.labels` (LabelResourceDefinition in +// pkg/store/k8s/resources.go). k8s label VALUES are bounded to 63 +// chars — anything longer leaks the raw apimachinery error to the +// operator and leaves a zombie RD that accepts `vd c` but never +// accepts a replica. +// +// Fix: cap maxLinstorName at 48 (matches upstream LINSTOR's +// DRBD_RES_NAME_MAX). The wire gate now fires at `rd c` before any +// partial state lands. +// +// Reproduction on dev stand (HEAD 6f69c5678, 2026-06-01): +// +// $ linstor rd c $(printf 'a%.0s' {1..150}) +// SUCCESS: resource definition created +// $ linstor vd c $LONG_NAME 64M +// SUCCESS: volume definition created +// $ linstor r c dev-worker-1 $LONG_NAME --storage-pool stand +// ERROR: store error: create Resource …/dev-worker-1: +// "<150-a-name>.dev-worker-1" is invalid: +// metadata.labels: Invalid value: "aaa…aaa": must be no more +// than 63 characters + +// TestBug360RDCreateRefusedOnNameOver48 pins the upper bound: an +// RD name strictly over 48 chars must be refused at `rd c` time with +// a LINSTOR envelope naming the rule, NOT a k8s apimachinery error +// surfacing through later. We exercise sizes that previously slipped +// past the 253-char cap (64, 150, 250) plus the boundary (49). +func TestBug360RDCreateRefusedOnNameOver48(t *testing.T) { + t.Parallel() + + cases := []int{49, 64, 150, 250} + + for _, length := range cases { + t.Run(makeLenLabel(length), func(t *testing.T) { + t.Parallel() + + st := store.NewInMemory() + + base, stop := startServerWithStore(t, st) + defer stop() + + name := strings.Repeat("a", length) + body, _ := json.Marshal(apiv1.ResourceDefinitionCreate{ + ResourceDefinition: apiv1.ResourceDefinition{Name: name}, + }) + + resp := httpPost(t, base+"/v1/resource-definitions", body) + defer func() { _ = resp.Body.Close() }() + + if resp.StatusCode < 400 || resp.StatusCode >= 500 { + t.Fatalf("status: got %d, want 4xx (Bug 360: name >%d chars must be refused)", + resp.StatusCode, maxLinstorName) + } + + got, _ := readAllBody(resp) + gotStr := string(got) + // Envelope must name the offending length, not leak a k8s error. + if !strings.Contains(gotStr, "exceeds") { + t.Errorf("envelope should name the violated rule (exceeds %d chars); got %s", + maxLinstorName, gotStr) + } + + if strings.Contains(gotStr, "metadata.labels") { + t.Errorf("envelope must NOT leak k8s apimachinery error; got %s", gotStr) + } + + // Store must remain clean: zombie RD is the exact symptom + // Bug 360 fixes. + if _, err := st.ResourceDefinitions().Get(t.Context(), name); !errors.Is(err, store.ErrNotFound) { + t.Errorf("RD %q must NOT be persisted on refusal; got err=%v", name, err) + } + }) + } +} + +// TestBug360RDCreateAcceptedAtBoundary pins the lower bound: 48-char +// names (the new cap) must still be accepted. Guards against the cap +// being one-off in the wrong direction. +func TestBug360RDCreateAcceptedAtBoundary(t *testing.T) { + t.Parallel() + + st := store.NewInMemory() + base, stop := startServerWithStore(t, st) + defer stop() + + name := strings.Repeat("a", 48) + body, _ := json.Marshal(apiv1.ResourceDefinitionCreate{ + ResourceDefinition: apiv1.ResourceDefinition{Name: name}, + }) + + resp := httpPost(t, base+"/v1/resource-definitions", body) + defer func() { _ = resp.Body.Close() }() + + if resp.StatusCode != http.StatusCreated { + got, _ := readAllBody(resp) + t.Fatalf("status: got %d, want 201 for 48-char name. Body: %s", + resp.StatusCode, got) + } + + if _, err := st.ResourceDefinitions().Get(t.Context(), name); err != nil { + t.Errorf("RD %q not persisted at boundary: %v", name, err) + } +} + +// TestBug360RGSpawnRefusedOnLongRDName covers the second entry point: +// `linstor rg spawn rg-foo $LONG_NAME` should also be refused, sharing +// the same gate as `rd c` (the rd-name-validation-bulk fix in PR #56 +// added validateLinstorName to spawn handler). +func TestBug360RGSpawnRefusedOnLongRDName(t *testing.T) { + t.Parallel() + + st := store.NewInMemory() + ctx := t.Context() + + // Seed an RG so the spawn at least reaches the name-validation + // gate (without the RG, the handler short-circuits earlier). + rgName := "rg-bug360" + if err := st.ResourceGroups().Create(ctx, &apiv1.ResourceGroup{Name: rgName}); err != nil { + t.Fatalf("seed RG: %v", err) + } + + base, stop := startServerWithStore(t, st) + defer stop() + + longName := strings.Repeat("b", 64) + body, _ := json.Marshal(map[string]string{ + "resource_definition_name": longName, + }) + + resp := httpPost(t, base+"/v1/resource-groups/"+rgName+"/spawn", body) + defer func() { _ = resp.Body.Close() }() + + if resp.StatusCode < 400 || resp.StatusCode >= 500 { + t.Fatalf("status: got %d, want 4xx (Bug 360 via spawn)", resp.StatusCode) + } + + got, _ := readAllBody(resp) + if !strings.Contains(string(got), "exceeds") { + t.Errorf("envelope should name the violated rule; got %s", got) + } + + if _, err := st.ResourceDefinitions().Get(ctx, longName); !errors.Is(err, store.ErrNotFound) { + t.Errorf("RD %q must NOT be persisted via spawn refusal; err=%v", longName, err) + } +} + +// makeLenLabel formats a subtest label like "len-49" so the +// parallel run report stays readable for the matrix of lengths. +func makeLenLabel(n int) string { + switch n { + case 49: + return "len-49" + case 64: + return "len-64" + case 150: + return "len-150" + case 250: + return "len-250" + default: + return "len-other" + } +} diff --git a/tests/e2e/rd-name-length-48.sh b/tests/e2e/rd-name-length-48.sh new file mode 100755 index 00000000..f9580878 --- /dev/null +++ b/tests/e2e/rd-name-length-48.sh @@ -0,0 +1,212 @@ +#!/usr/bin/env bash +# +# usage: rd-name-length-48.sh WORK_DIR +# +# Bug 360 (hunt-v4) regression catcher: long RD names (>63 chars) +# slipped past the wire-side gate (which capped at 253 — the k8s +# metadata.name regime) and then exploded inside the store layer +# because the k8s store writes the RD-name into `metadata.labels` +# (LabelResourceDefinition in pkg/store/k8s/resources.go). k8s label +# VALUES are bounded to 63 chars — anything longer leaks the raw +# apimachinery error through the next `r c` and leaves a zombie RD +# behind that accepts `vd c` but never accepts a replica. +# +# Reproduction on dev stand (HEAD 6f69c5678, 2026-06-01): +# +# $ linstor rd c $(printf 'a%.0s' {1..150}) +# SUCCESS: resource definition created +# $ linstor vd c $LONG_NAME 64M +# SUCCESS: volume definition created +# $ linstor r c dev-worker-1 $LONG_NAME --storage-pool stand +# ERROR: store error: "<150-a-name>.dev-worker-1" is +# invalid: metadata.labels: Invalid value: "aaa…aaa": must be no +# more than 63 characters +# +# Fix: cap maxLinstorName at 48 (matches upstream LINSTOR's +# DRBD_RES_NAME_MAX). The wire gate now refuses at rd-create time +# before any partial state lands. +# +# This e2e mirrors the unit test +# TestBug360RDCreateRefusedOnNameOver48 but exercises the live +# apiserver port (the REST layer drives the same gate the CLI does, +# so we hit the wire path the operator hits). + +set -euo pipefail + +WORK_DIR=${1:?work_dir required} +export KUBECONFIG="$WORK_DIR/kubeconfig" + +SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=lib.sh +source "$SCRIPT_DIR/lib.sh" + +NS=${NS:-blockstor-system} + +require_workers 1 + +# ---- port-forward the apiserver ---- + +LPORT=$(python3 -c 'import socket; s=socket.socket(); s.bind(("127.0.0.1", 0)); print(s.getsockname()[1]); s.close()') +kubectl -n "$NS" port-forward deploy/blockstor-apiserver "${LPORT}:3370" >/dev/null 2>&1 & +PF_PID=$! +_wait_port_forward "$LPORT" "$PF_PID" || { + echo "FAIL: could not port-forward apiserver" + exit 1 +} + +cleanup() { + set +e + kill "$PF_PID" 2>/dev/null || true + # Best-effort cleanup if a borderline RD slipped through. + for n in 48 49; do + local name + # shellcheck disable=SC2155 + name=$(printf 'a%.0s' $(seq 1 "$n")) + kubectl delete resourcedefinition.blockstor.cozystack.io "$name" \ + --ignore-not-found --timeout=10s 2>/dev/null || true + done + set -e +} +trap cleanup EXIT + +API="http://127.0.0.1:${LPORT}" + +# ---- helpers ---- + +# assert_reject_long NAME_LEN +# POST rd-create with an `a` * NAME_LEN name; expect a 4xx envelope +# that mentions the cap and does NOT leak `metadata.labels`. Then +# assert no RD landed in the store under that name. +assert_reject_long() { + local n=$1 + local name body_file code body + name=$(printf 'a%.0s' $(seq 1 "$n")) + body_file=$(mktemp) + + code=$(curl -sS -o "$body_file" -w "%{http_code}" \ + -X POST -H 'Content-Type: application/json' \ + "${API}/v1/resource-definitions" \ + -d "{\"resource_definition\":{\"name\":\"${name}\"}}" || echo "000") + + body=$(cat "$body_file") + rm -f "$body_file" + + if [[ "$code" -lt 400 || "$code" -ge 500 ]]; then + echo "FAIL: len=$n — expected 4xx, got HTTP $code" + echo "$body" + return 1 + fi + + if ! grep -qE 'exceeds|valid identifier' <<<"$body"; then + echo "FAIL: len=$n — envelope must name the rule (exceeds N chars); got:" + echo "$body" + return 1 + fi + + if grep -q 'metadata\.labels' <<<"$body"; then + echo "FAIL: len=$n — envelope leaks k8s metadata.labels error:" + echo "$body" + return 1 + fi + + # Zombie RD assert: the store must NOT hold a row for this name + # after the refusal. + if kubectl get resourcedefinition.blockstor.cozystack.io "$name" >/dev/null 2>&1; then + echo "FAIL: len=$n — zombie RD CRD survived a refused rd-create" + return 1 + fi + + echo " ok: len=$n refused at wire gate (HTTP $code)" +} + +# assert_accept_at_boundary NAME_LEN +# POST rd-create with an `a` * NAME_LEN name; expect 201 + the RD +# visible via `kubectl get`. +assert_accept_at_boundary() { + local n=$1 + local name body_file code body + name=$(printf 'a%.0s' $(seq 1 "$n")) + body_file=$(mktemp) + + code=$(curl -sS -o "$body_file" -w "%{http_code}" \ + -X POST -H 'Content-Type: application/json' \ + "${API}/v1/resource-definitions" \ + -d "{\"resource_definition\":{\"name\":\"${name}\"}}" || echo "000") + + body=$(cat "$body_file") + rm -f "$body_file" + + if [[ "$code" -ne 201 ]]; then + echo "FAIL: len=$n — boundary name was refused; HTTP $code" + echo "$body" + return 1 + fi + + # Best-effort visibility check; informer cache settles in <1s on the + # dev stand. + deadline=$(( $(date +%s) + 10 )) + while (( $(date +%s) < deadline )); do + if kubectl get resourcedefinition.blockstor.cozystack.io "$name" >/dev/null 2>&1; then + echo " ok: len=$n accepted at boundary (HTTP $code)" + kubectl delete resourcedefinition.blockstor.cozystack.io "$name" \ + --ignore-not-found --timeout=10s >/dev/null 2>&1 || true + return 0 + fi + sleep 1 + done + + echo "FAIL: len=$n — boundary RD not visible after accept" + return 1 +} + +# ---- run the matrix ---- + +echo ">> Bug 360: refuse names strictly over 48 chars" +for n in 49 64 150 250; do + assert_reject_long "$n" +done + +echo ">> Bug 360: accept names at the 48-char boundary" +assert_accept_at_boundary 48 + +echo ">> Bug 360: rg-spawn entry point also refuses long names" +# Seed a tiny RG so the spawn handler reaches the name-validation gate. +RG=e2e-bug360-rg +kubectl apply -f - >/dev/null </dev/null 2>&1 || true; kill "$PF_PID" 2>/dev/null || true' EXIT + +long_spawn_name=$(printf 'b%.0s' $(seq 1 64)) +body_file=$(mktemp) +code=$(curl -sS -o "$body_file" -w "%{http_code}" \ + -X POST -H 'Content-Type: application/json' \ + "${API}/v1/resource-groups/${RG}/spawn" \ + -d "{\"resource_definition_name\":\"${long_spawn_name}\"}" || echo "000") +body=$(cat "$body_file") +rm -f "$body_file" + +if [[ "$code" -lt 400 || "$code" -ge 500 ]]; then + echo "FAIL: rg-spawn — expected 4xx for long RD name, got HTTP $code" + echo "$body" + exit 1 +fi + +if ! grep -qE 'exceeds|valid identifier' <<<"$body"; then + echo "FAIL: rg-spawn envelope must name the rule; got:" + echo "$body" + exit 1 +fi + +if kubectl get resourcedefinition.blockstor.cozystack.io "$long_spawn_name" >/dev/null 2>&1; then + echo "FAIL: rg-spawn left a zombie RD CRD behind" + exit 1 +fi +echo " ok: rg-spawn refused (HTTP $code)" + +echo ">> BUG-360 OK: long RD names are now refused at the wire gate"