Skip to content
Merged
1 change: 1 addition & 0 deletions docs/cli-parity-known-deltas.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ Row IDs match the command-catalogue indexes used by `cli-parity-refresh.sh` (see
| 66 | `vd drbd-options --<net-option>` (option-class enforcement) | VALIDATION | 2026-12-31 | Corner-case C6: BS does not reject a DRBD option set at the wrong object level (e.g. a `net{}`-class `--protocol` on a volume-definition, which accepts only disk/peer-device classes). BS merges the prop permissively; upstream rejects with a typed FAIL_INVLD_PROP. Pinned permissive behavior with a regression test pending the per-object-class validator. |
| 67 | `r c <node> <rd>` (StorPoolName resolution chain) | BEHAVIOR | permanent | Corner D5: upstream resolves the pool via VD → Resource → RD → Node → literal `DfltStorPool`. BS resolves via sibling-diskful-replica → RG `SelectFilter.StoragePool` → RG `StoragePoolList[0]` (`resolveStorPoolForFreshCreate`). BS does NOT honor a per-VD `StorPoolName` prop (no two-volumes-in-two-pools layout) and has no literal `DfltStorPool` terminal fallback — an unresolvable pool yields an empty `StorPoolName` (a named-but-missing pool 404s via `refuseResourceCreateOnUnknownPool`). CSI always supplies the pool through the RG path, so the divergent source-chain is operator-invisible for Cozystack. |
| 68 | `controller set-property Autoplacer/Weights/*` (default weights) | BEHAVIOR | permanent | Corner D9: upstream default weights are MaxFreeSpace=1, MinReservedSpace/MinRscCount/MaxThroughput=0. BS defaults ALL FOUR to 1.0 (`parseWeight` empty→1.0) — a richer equal-weight scorer. Per-strategy semantics match upstream (negative/zero ranks a pool lower but never excludes it; eligibility is gated separately in `matchesPoolFilter`). Only the unset-default composition differs; operators who set the weights explicitly get upstream-identical ranking. |
| 69 | `snapshot rollback` | MISSING_FEATURE | permanent | DELIBERATE (corner G1/G2): BS does NOT expose in-place rollback. `zfs rollback` / `lvconvert --merge` destroy every snapshot newer than the target, and upstream >=1.31.2 additionally RESURRECTS replicas on nodes whose resource was deleted — both silent footguns. BS returns 501 + actionable text pointing at `snapshot-restore-resource` (safe, non-destructive). InUse/Primary still 409s first, unknown snap still 404s first. Pinned by `TestSnapshotRollback*` (pkg/rest/snapshots_test.go). Oracle 1.33.2 confirmed: in-place rollback fails on a snapshot-less node WITHOUT resurrecting the deleted replica (the resurrection footgun is version-specific); BS's blanket 501 is the safe superset. |

## Open (block merge until addressed)

Expand Down
28 changes: 17 additions & 11 deletions internal/controller/autosnapshot_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,9 @@ const (
PropAutoSnapshotRunEvery = "AutoSnapshot/RunEvery"

// PropAutoSnapshotKeep bounds the retained auto-snapshot count.
// Default 10 (DFLT_AUTO_SNAPSHOT_KEEP). A value <=0 disables the
// prune step, in which case every auto-snapshot is kept.
// Default 10 (DFLT_AUTO_SNAPSHOT_KEEP). Per upstream's documented
// semantic, a value <= 0 (or the prop being absent) falls back to
// that default of 10 — it does NOT disable the prune (G4).
PropAutoSnapshotKeep = "AutoSnapshot/Keep"

// PropAutoSnapshotNextID is the monotonically-incremented
Expand Down Expand Up @@ -371,10 +372,11 @@ func (r *AutoSnapshotRunnable) stampRDAfterCreate(
// "Manually-created snapshots NOT counted against the keep budget"
// invariant.
//
// A Keep value of 0 or negative disables the prune (every
// auto-snapshot is retained) — matches the upstream OpenAPI
// "Removing this property or having a value <= 0 disables
// auto-cleanup" wording.
// G4 (corner-case): a Keep value of 0 or negative falls back to the
// DEFAULT Keep (10) — it does NOT disable cleanup. This matches the
// upstream administration guide: "If AutoSnapshot/Keep is omitted (or
// <= 0), LINSTOR will keep the last 10 snapshots by default"
// (linstor-administration.adoc ~2467).
func (r *AutoSnapshotRunnable) pruneOldAutoSnapshots(
ctx context.Context,
rd *blockstoriov1alpha1.ResourceDefinition,
Expand All @@ -387,12 +389,16 @@ func (r *AutoSnapshotRunnable) pruneOldAutoSnapshots(
return errors.Wrapf(err, "parse %s=%q", PropAutoSnapshotKeep, rawKeep)
}

if parsed <= 0 {
// Disable cleanup — every auto-snapshot is kept.
return nil
// G4 (corner-case): upstream LINSTOR's documented semantic is
// "if AutoSnapshot/Keep is omitted (or <= 0), LINSTOR will keep
// the last 10 snapshots by default" (linstor-administration.adoc
// ~2467). A <= 0 value therefore falls back to the default Keep
// (10) — it does NOT disable cleanup. Pre-fix blockstor treated
// <= 0 as "keep everything", which let an unbounded number of
// auto-snapshots accumulate and silently diverged from upstream.
if parsed > 0 {
keep = int(parsed)
}

keep = int(parsed)
}

var snapList blockstoriov1alpha1.SnapshotList
Expand Down
49 changes: 42 additions & 7 deletions internal/controller/autosnapshot_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -390,11 +390,14 @@ func TestAutoSnapshotRunEveryDisabledSkipsRD(t *testing.T) {
}
}

// TestAutoSnapshotKeepZeroDisablesPrune: the OpenAPI doc says
// "Removing this property or having a value <= 0 disables
// auto-cleanup, all auto-snapshots will be kept". Pin that
// invariant — Keep=0 means never prune.
func TestAutoSnapshotKeepZeroDisablesPrune(t *testing.T) {
// TestAutoSnapshotKeepZeroFallsBackToDefault10 (G4): the upstream
// administration guide is explicit — "If AutoSnapshot/Keep is omitted
// (or <= 0), LINSTOR will keep the last 10 snapshots by default"
// (linstor-administration.adoc ~2467). A Keep value of 0 therefore
// falls back to the default budget of 10, it does NOT disable cleanup.
// (Pre-fix blockstor diverged here, treating <= 0 as "keep
// everything" — see git history for TestAutoSnapshotKeepZeroDisablesPrune.)
func TestAutoSnapshotKeepZeroFallsBackToDefault10(t *testing.T) {
t.Parallel()

scheme := newScheme(t)
Expand All @@ -417,8 +420,40 @@ func TestAutoSnapshotKeepZeroDisablesPrune(t *testing.T) {
}

snaps := listAutoSnapshotsByRD(t, cli, "pvc-w05-keep0")
if len(snaps) != 15 {
t.Errorf("expected 15 auto-snapshots (cleanup disabled), got %d", len(snaps))
if len(snaps) != controllerpkg.DefaultAutoSnapshotKeep {
t.Errorf("expected %d auto-snapshots (Keep=0 falls back to default 10), got %d",
controllerpkg.DefaultAutoSnapshotKeep, len(snaps))
}
}

// TestAutoSnapshotKeepNegativeFallsBackToDefault10 (G4): same contract
// for an explicitly negative Keep — `<= 0` is the documented trigger,
// so a negative value must also resolve to the default budget of 10.
func TestAutoSnapshotKeepNegativeFallsBackToDefault10(t *testing.T) {
t.Parallel()

scheme := newScheme(t)
rd := makeRDWithAutoSnapshot(t, "pvc-w05-keepneg", 15, "-3")

cli := fake.NewClientBuilder().WithScheme(scheme).
WithObjects(rd).
Build()

clk := &stubClock{t: time.Date(2026, 5, 14, 12, 0, 0, 0, time.UTC)}
r := &controllerpkg.AutoSnapshotRunnable{Client: cli, Clock: clk}

for i := range 14 {
if err := r.Tick(context.Background()); err != nil {
t.Fatalf("Tick #%d: %v", i, err)
}

clk.advance(15*time.Minute + time.Second)
}

snaps := listAutoSnapshotsByRD(t, cli, "pvc-w05-keepneg")
if len(snaps) != controllerpkg.DefaultAutoSnapshotKeep {
t.Errorf("expected %d auto-snapshots (Keep<0 falls back to default 10), got %d",
controllerpkg.DefaultAutoSnapshotKeep, len(snaps))
}
}

Expand Down
15 changes: 15 additions & 0 deletions pkg/rest/api_call_rc.go
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,21 @@ const warnNodeAlreadyGone = maskWarn | int64(2060)
// envelope wrapper at the call site.
const apiCallRcFailSnapshotFinalizerStuck int64 = 998

// apiCallRcFailSnapshotsNotSupported mirrors upstream LINSTOR's
// `ApiConsts.FAIL_SNAPSHOTS_NOT_SUPPORTED` (`994 | MASK_ERROR`).
// Emitted by `POST /v1/resource-definitions/{rd}/snapshots` (G5) when
// a targeted diskful replica lives in a storage pool whose provider
// cannot take copy-on-write snapshots (thick `LVM`, plain thick
// `FILE`, `DISKLESS`). The upstream controller refuses the snapshot
// at the API edge — thin LVM and ZFS (and `FILE_THIN` downstream) are
// the only snapshot-capable providers. blockstor's thick-LVM provider
// CAN technically `lvcreate --snapshot` (a 25%ORIGIN COW overlay that
// silently invalidates on overflow), so without this gate the
// satellite would happily stage a footgun snapshot the operator never
// asked to risk. The MASK_ERROR bit is OR'd in by the envelope wrapper
// at the call site.
const apiCallRcFailSnapshotsNotSupported int64 = 994

// apiCallRcFailExistsSnapshotDfn mirrors upstream LINSTOR's
// `ApiConsts.FAIL_EXISTS_SNAPSHOT_DFN` (`514 | MASK_ERROR`). Emitted by
// `DELETE /v1/resource-definitions/{rd}` when the RD still has at
Expand Down
68 changes: 68 additions & 0 deletions pkg/rest/snapshot_restore.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"maps"
"net/http"
"slices"
"strconv"
"strings"

apiv1 "github.com/cozystack/blockstor/pkg/api/v1"
Expand Down Expand Up @@ -114,6 +115,19 @@ func (s *Server) handleSnapshotRestoreVolumeDefinition(w http.ResponseWriter, r
return
}

// G3b (corner-case): the VD-restore variant hydrates the snapshot's
// recorded volume layout onto a (typically pre-existing, empty)
// target RD. If the target RD ALREADY carries a volume-definition
// whose number collides with one of the snapshot's, refuse up front
// with a typed FAIL_EXISTS_VLM_DFN envelope naming the offending
// volume number — rather than letting hydrateVolumesFromSnapshot's
// per-VD Create surface a bare 409 only AFTER it has already
// partially mutated the target (an earlier non-colliding VD would
// land before the colliding one errored, leaving a half-restored RD).
if !s.refuseRestoreOnVolumeConflict(w, r, req.ToResource, &snap) {
return
}

err = hydrateVolumesFromSnapshot(r.Context(), s, req.ToResource, &snap)
if err != nil {
writeStoreError(w, err)
Expand All @@ -128,6 +142,60 @@ func (s *Server) handleSnapshotRestoreVolumeDefinition(w http.ResponseWriter, r
}})
}

// refuseRestoreOnVolumeConflict is the G3b pre-mutation guard for the
// VD-restore handler. It compares the snapshot's recorded volume
// numbers against the target RD's existing VolumeDefinitions and
// refuses with a typed FAIL_EXISTS_VLM_DFN (502) envelope when any
// number collides. Returns true (caller may proceed) when the target
// has no conflicting VD or its VD list could not be read (best-effort:
// the downstream hydrate Create still guards). Returns false (and
// writes the 409 envelope) on a collision.
func (s *Server) refuseRestoreOnVolumeConflict(w http.ResponseWriter, r *http.Request, toResource string, snap *apiv1.Snapshot) bool {
existing, err := s.Store.VolumeDefinitions().List(r.Context(), toResource)
if err != nil || len(existing) == 0 {
return true
}

have := make(map[int32]struct{}, len(existing))
for i := range existing {
have[existing[i].VolumeNumber] = struct{}{}
}

var clashes []int32

for i := range snap.VolumeDefinitions {
if _, ok := have[snap.VolumeDefinitions[i].VolumeNumber]; ok {
clashes = append(clashes, snap.VolumeDefinitions[i].VolumeNumber)
}
}

if len(clashes) == 0 {
return true
}

slices.Sort(clashes)

nums := make([]string, 0, len(clashes))
for _, n := range clashes {
nums = append(nums, strconv.Itoa(int(n)))
}

writeJSON(w, http.StatusConflict, []apiv1.APICallRc{{
RetCode: apiCallRcError | apiCallRcFailExistsVlmDfn,
Message: "cannot restore snapshot volume definitions onto '" +
toResource + "': volume number(s) " + strings.Join(nums, ", ") +
" already exist on the target",
Cause: "the target resource definition already carries a volume " +
"definition with the same number as the snapshot's; restoring " +
"would collide on the volume-number key",
Correc: "restore into a resource definition with no conflicting " +
"volume definitions (e.g. a freshly-created empty RD), or remove " +
"the clashing volume definition(s) from '" + toResource + "' first",
}})

return false
}

// validateSnapshotRestoreRequest runs every pre-store wire-boundary
// gate the new-RD-spawning restore handler needs: ToResource is set
// (required field), and ToResource is a valid LINSTOR identifier
Expand Down
Loading