diff --git a/.github/workflows/tla-check.yml b/.github/workflows/tla-check.yml new file mode 100644 index 000000000..51d05dfb8 --- /dev/null +++ b/.github/workflows/tla-check.yml @@ -0,0 +1,86 @@ +name: TLA+ model check + +# Runs `make tla-check` on PRs (and pushes to main) that touch the +# subsystems modelled by the spec or the spec / tooling themselves. +# Per docs/design/2026_05_28_partial_tla_safety_spec.md §7.3. +# +# The job runs both MCHLC.cfg (correct design — expected PASS) and +# MCHLC_gap.cfg (preconditions disabled — expected FAIL with the HLC-4 +# counterexample); the Makefile target greps for the specific HLC-4 +# violation string so the gap evidence is validated, not just the exit +# code. + +on: + pull_request: + types: [opened, synchronize, reopened] + paths: + - 'tla/**' + - 'Makefile' + - 'kv/hlc*.go' + - 'kv/coordinator.go' + - 'kv/sharded_coordinator.go' + - 'kv/transaction.go' + - 'kv/fsm.go' + - 'kv/lock_resolver.go' + - 'store/mvcc_store.go' + - 'distribution/**' + - '.github/workflows/tla-check.yml' + push: + branches: [main] + paths: + - 'tla/**' + - 'Makefile' + - 'kv/hlc*.go' + - 'kv/coordinator.go' + - 'kv/sharded_coordinator.go' + - 'kv/transaction.go' + - 'kv/fsm.go' + - 'kv/lock_resolver.go' + - 'store/mvcc_store.go' + - 'distribution/**' + - '.github/workflows/tla-check.yml' + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }}-tla-check + cancel-in-progress: false + +permissions: + contents: read + +jobs: + tla-check: + name: tla-check + runs-on: ubuntu-latest + timeout-minutes: 10 + steps: + - name: Check out + uses: actions/checkout@v6 + + - name: Set up JDK 21 (Temurin) + uses: actions/setup-java@v5 + with: + distribution: temurin + java-version: '21' + + # Cache the checksum-pinned tla2tools.jar so we don't re-download + # GitHub release artefacts on every run. Key includes the Makefile + # hash so a version-pin change invalidates the cache automatically + # (TLA_VERSION + TLA_SHA256 both live in the Makefile). + # + # NO restore-keys: a prefix-match restore would bring back an older + # jar after a TLA_VERSION/TLA_SHA256 bump. Because the Makefile's + # $(TLA_JAR) recipe is gated on file existence, the restored stale + # jar would skip checksum verification entirely — defeating the + # whole point of the pin (codex P2 on PR #856 round 4). A full + # miss-and-redownload is correct here. + - name: Cache tla2tools.jar + uses: actions/cache@v5 + with: + path: .cache/tla + key: ${{ runner.os }}-tla-tools-${{ hashFiles('Makefile') }} + + - name: Pre-fetch tla2tools.jar (logs separately from tla-check) + run: make tla-tools + + - name: Run make tla-check + run: make tla-check diff --git a/.gitignore b/.gitignore index b8a06d526..b311a018d 100644 --- a/.gitignore +++ b/.gitignore @@ -39,6 +39,11 @@ jepsen/.ssh/ .golangci-cache/ server +# TLA+ TLC artifacts (counterexample traces, state directories) +tla/**/MCHLC_TTrace_*.tla +tla/**/MCHLC_TTrace_*.bin +tla/**/states/ + # Admin SPA build outputs. The placeholder internal/admin/dist/index.html # is committed so `go build` succeeds in a fresh clone (the //go:embed # directive needs at least one matching file). Everything else under diff --git a/Makefile b/Makefile index 035a0db5b..2af213987 100644 --- a/Makefile +++ b/Makefile @@ -12,3 +12,93 @@ lint: gen: @$(MAKE) -C proto gen + +# === TLA+ model-check support (M1 deliverable) === +# Per docs/design/2026_05_28_partial_tla_safety_spec.md §7.2. +# Downloads a checksum-pinned tla2tools.jar on first use, then runs TLC on +# both MCHLC.cfg (correct design — expected PASS) and MCHLC_gap.cfg +# (preconditions disabled — expected FAIL on HLC-4 with a counterexample +# that motivates the implementation gaps). +TLA_VERSION := v1.8.0 +TLA_JAR := .cache/tla/tla2tools.jar +TLA_SHA256 := 237332bdcc79a35c7d26efa7b82c77c85c2744591c5598673a8a45085ff2a4fb +TLA_URL := https://github.com/tlaplus/tlaplus/releases/download/$(TLA_VERSION)/tla2tools.jar +TLA_LIB := ../lib + +.PHONY: tla-check tla-tools + +tla-tools: $(TLA_JAR) + +$(TLA_JAR): + @mkdir -p $(dir $(TLA_JAR)) + @echo "Downloading tla2tools.jar $(TLA_VERSION)..." + @curl -fsSL -o $(TLA_JAR).tmp $(TLA_URL) + @# Prefer sha256sum (GNU coreutils, universal on Linux); fall back to + @# shasum -a 256 (default on macOS). Either yields the same hex + @# digest in the first whitespace-delimited field. + @if command -v sha256sum >/dev/null 2>&1; then \ + actual=$$(sha256sum $(TLA_JAR).tmp | awk '{print $$1}'); \ + elif command -v shasum >/dev/null 2>&1; then \ + actual=$$(shasum -a 256 $(TLA_JAR).tmp | awk '{print $$1}'); \ + else \ + echo "ERROR: neither sha256sum nor shasum is available."; \ + rm -f $(TLA_JAR).tmp; \ + exit 1; \ + fi; \ + if [ "$$actual" != "$(TLA_SHA256)" ]; then \ + echo "ERROR: tla2tools.jar SHA-256 mismatch."; \ + echo " expected: $(TLA_SHA256)"; \ + echo " actual: $$actual"; \ + rm -f $(TLA_JAR).tmp; \ + exit 1; \ + fi + @mv $(TLA_JAR).tmp $(TLA_JAR) + @echo "tla2tools.jar ready at $(TLA_JAR) (SHA-256 $(TLA_SHA256))" + +tla-check: $(TLA_JAR) + @echo "================================================================" + @echo " TLC: tla/hlc/MCHLC.cfg (correct design, expected PASS)" + @echo "================================================================" + @cd tla/hlc && java -XX:+UseParallelGC \ + -cp ../../$(TLA_JAR) -DTLA-Library=$(TLA_LIB) \ + tlc2.TLC -nowarning -config MCHLC.cfg MCHLC.tla + @echo + @echo "================================================================" + @echo " TLC: tla/hlc/MCHLC_gap.cfg (no preconditions, expected FAIL)" + @echo "================================================================" + @# Capture the gap run output so we can validate not just that TLC + @# exited non-zero but that the failure reason is exactly the HLC-4 + @# invariant violation we expect. Without this check a parse error, + @# deadlock, JVM crash, or different invariant violation would silently + @# count as "expected gap counterexample" (codex P2 on PR #856 round 2). + @cd tla/hlc && \ + out=$$(java -XX:+UseParallelGC \ + -cp ../../$(TLA_JAR) -DTLA-Library=$(TLA_LIB) \ + tlc2.TLC -nowarning -config MCHLC_gap.cfg MCHLC.tla 2>&1) ; \ + rc=$$? ; \ + printf '%s\n' "$$out" ; \ + if [ "$$rc" -eq 0 ]; then \ + echo ; \ + echo "ERROR: MCHLC_gap.cfg unexpectedly passed."; \ + echo " The gap configuration disables HLC-4 preconditions (ii)+(iii);"; \ + echo " TLC was supposed to surface a counterexample showing why those"; \ + echo " preconditions are necessary. A clean pass means either the spec"; \ + echo " no longer encodes the gap correctly or the safety guards leaked"; \ + echo " past the EnableSafety toggle."; \ + exit 1; \ + fi ; \ + if printf '%s\n' "$$out" | grep -qF "Invariant HLC4_NoRegressionAcrossTerms is violated"; then \ + echo ; \ + echo "OK: MCHLC_gap.cfg failed as designed (HLC-4 counterexample)."; \ + else \ + echo ; \ + echo "ERROR: MCHLC_gap.cfg failed, but the reason is NOT a HLC-4 violation."; \ + echo " Expected substring in TLC output:"; \ + echo " 'Invariant HLC4_NoRegressionAcrossTerms is violated'"; \ + echo " The non-zero exit may indicate a parse error, deadlock, JVM"; \ + echo " crash, or a different invariant breaking — review the output"; \ + echo " above before treating this as a regression in the gap evidence."; \ + exit 1; \ + fi + @echo + @echo "tla-check: all model-check outcomes match the design contract." diff --git a/docs/design/2026_05_28_proposed_tla_safety_spec.md b/docs/design/2026_05_28_partial_tla_safety_spec.md similarity index 96% rename from docs/design/2026_05_28_proposed_tla_safety_spec.md rename to docs/design/2026_05_28_partial_tla_safety_spec.md index 62b5489fb..5b29432c6 100644 --- a/docs/design/2026_05_28_proposed_tla_safety_spec.md +++ b/docs/design/2026_05_28_partial_tla_safety_spec.md @@ -1,9 +1,17 @@ # TLA+ Safety Specification for elastickv -Status: Proposed +Status: Partial Author: bootjp Date: 2026-05-28 +> **Implementation status.** M1 landed in PR #TBD (the PR opening this +> document's rename to `partial`). The HLC TLA+ module + `make tla-check` +> + the gap-config counterexample evidence are now part of the build. +> M2–M5 (OCC, MVCC, Routes, Composed) remain open per §8. The follow-up +> Go code changes that strategy (c) and the ceiling fence require (per +> §5.1 HLC-4 (ii) and (iii)) are tracked as separate issues against +> `kv/hlc.go`, `kv/sharded_coordinator.go`, and `kv/fsm.go`. + --- ## 1. Background and Motivation @@ -536,10 +544,12 @@ Both targets shell out to `tla2tools.jar` which is downloaded on first use into `.cache/tla/` (matching the existing `.cache/` pattern from CLAUDE.md). The downloaded jar is checksum-pinned. No system-wide install required. -### 7.3 CI integration (deferred) +### 7.3 CI integration -`tla-check` is intended to run in CI on PRs that touch: +`tla-check` runs in CI via `.github/workflows/tla-check.yml` on every PR +(and push to `main`) that touches: +- `tla/**` and `Makefile` (the spec and the harness themselves). - `kv/hlc*.go`, `kv/coordinator.go`, `kv/sharded_coordinator.go`, `kv/transaction.go`, `kv/fsm.go`, `kv/lock_resolver.go` — the HLC renewal scheduler, the timestamp-issuance dispatch path, and the @@ -547,12 +557,17 @@ The downloaded jar is checksum-pinned. No system-wide install required. anchors); a change here can violate HLC/OCC invariants and must re-run `tla-check`. - `store/mvcc_store.go` -- `distribution/` -- the spec files themselves - -The exact CI wiring (workflow file, path filter) is deferred to the M1 PR -so it can be reviewed alongside a concrete spec rather than in the -abstract. +- `distribution/**` +- `.github/workflows/tla-check.yml` (the workflow file itself). + +The workflow uses Temurin JDK 21, caches `.cache/tla/tla2tools.jar` +keyed on the Makefile hash so version-pin changes invalidate the cache +automatically, and inherits the Makefile's gap-config validation: a +non-zero TLC exit on `MCHLC_gap.cfg` only counts as success if the +output contains the exact string +`Invariant HLC4_NoRegressionAcrossTerms is violated`. Any other failure +mode (parse error, deadlock, JVM crash, different invariant) fails the +job. The full M1 run completes in well under a minute. --- diff --git a/tla/README.md b/tla/README.md new file mode 100644 index 000000000..06bef48a2 --- /dev/null +++ b/tla/README.md @@ -0,0 +1,173 @@ +# elastickv TLA+ specs + +Machine-checkable safety models for the four cross-cutting subsystems +(HLC, OCC, MVCC, route catalog) per +[`docs/design/2026_05_28_partial_tla_safety_spec.md`](../docs/design/2026_05_28_partial_tla_safety_spec.md). + +This directory is **independent of the Go module**; you can navigate and +run it without touching anything under the Go source tree. + +## Status + +| Milestone | Scope | Status | +|---|---|---| +| M1 | `lib/Raft.tla`, `lib/Env.tla`, `hlc/HLC.tla`, `make tla-check` | Landed | +| M2 | `occ/OCC.tla` (safety invariants OCC-1..OCC-5) | Not started | +| M3 | `mvcc/MVCC.tla` (MVCC-1..MVCC-4) | Not started | +| M4 | `routes/Routes.tla` (Routes-1..Routes-4) | Not started | +| M5 | `composed/Composed.tla` + CI integration | Not started | +| M6 | Liveness checking (`tla-check-deep`) | Not started | + +## Install + +The only runtime dependency is a Java VM (TLC ships as `tla2tools.jar`). + +```sh +# JDK 17+ is required. Verify with: +java -version +``` + +`make tla-check` automatically downloads `tla2tools.jar` to +`.cache/tla/tla2tools.jar` on first use and verifies its SHA-256 against +a pinned value defined in the top-level `Makefile` +(`TLA_VERSION`, `TLA_JAR`, `TLA_SHA256`). The file is gitignored. + +To pre-download without running TLC: + +```sh +make tla-tools +``` + +## Run + +From the repository root: + +```sh +make tla-check +``` + +This runs TLC against both configurations and prints whether the outcome +matches the design contract: + +1. `tla/hlc/MCHLC.cfg` — the **correct design**, with HLC-4 + preconditions encoded as ASSUMEs / action guards. TLC must finish + with no invariant violation. +2. `tla/hlc/MCHLC_gap.cfg` — the **gap configuration**, with the + preconditions disabled (`EnableSafety = FALSE`). TLC must produce a + counterexample on `HLC4_NoRegressionAcrossTerms` — this is the + motivating evidence that strategy (c) handoff + the ceiling fence + are necessary. The `tla-check` target inverts the exit code for + this run so a TLC FAILURE here counts as PASS for CI. + +To run a single module by hand (skipping the Makefile): + +```sh +JAR=.cache/tla/tla2tools.jar + +# Safe config (expected PASS): +cd tla/hlc +java -XX:+UseParallelGC -cp ../../$JAR -DTLA-Library=../lib \ + tlc2.TLC -config MCHLC.cfg MCHLC.tla + +# Gap config (expected FAIL on HLC-4): +java -XX:+UseParallelGC -cp ../../$JAR -DTLA-Library=../lib \ + tlc2.TLC -config MCHLC_gap.cfg MCHLC.tla +``` + +State-space bounds are set in the `.cfg` files (M1 defaults: 3 nodes, +2 terms, ≤ 3 IssueTimestamp ops, wall clock ≤ 3 ms, `LogicalMax = 1`). +Raise them locally to deepen exploration; the M1 PR keeps them small so +the full `make tla-check` runs in well under a second. + +## What each module proves + +### `lib/Env.tla` +Shared constants (`Nodes`, `MaxClockSkewMs`, `HlcPhysicalWindowMs`, +`MaxTerms`, `MaxOps`, `MaxWallTime`, `LogicalMax`) and the single +normative `ASSUME MaxClockSkewMs < HlcPhysicalWindowMs` — HLC-4 +precondition (i). All downstream modules `EXTENDS Env`. + +### `lib/Raft.tla` +Abstract Raft interface modelled per NG4 of the design doc: log matching +and leader completeness are assumed (etcd/raft is a verified black box), +and only the `currentTerm` / `leaderOf` / `BecomeLeader` / `IsLeader` +operators that other modules actually need are exposed. `Propose`, +`Apply`, and `InstallSnapshot` are placeholders for later milestones. + +### `hlc/HLC.tla` +The HLC layer. Encodes all three HLC-4 preconditions: + +- **(i) bounded skew** — inherited via the Env ASSUME. +- **(ii) logical-counter handoff** — `BecomeLeader_HLC` calls + `Observe(maxAppliedHLC)` on the elected node when + `EnableSafety = TRUE`. This is strategy (c) from the design doc. +- **(iii) ceiling fence** — `IssueTimestamp` has + `wallNow[n] < physicalCeiling[n]` as an enabling condition when + `EnableSafety = TRUE`. + +Invariants asserted: + +| Invariant | Statement | +|---|---| +| `TypeOK` | Variable types are well-formed | +| `HLC1_PerNodeMonotonic` | Per-node committed ts are strictly ordered by issuance `seq` | +| `HLC1_Action` (PROPERTY) | Transition form: `hlcLast[n]` weakly increases on every step | +| `HLC2_NonNeg` | Residual `physicalCeiling \in Nat` content; harmless type sanity | +| `HLC2_Action` (PROPERTY) | Transition form: `physicalCeiling[n]` weakly increases on every step | +| `HLC3_LeaderOnly` | Every committed ts was issued by the leader of its term | +| `HLC4_NoRegressionAcrossTerms` | Earlier-term commits are strictly less than later-term commits | + +### `hlc/MCHLC.tla` + `MCHLC.cfg` / `MCHLC_gap.cfg` +TLC model-check instance. Two configurations, one module. See [Run](#run). + +## How to interpret a TLC failure + +When TLC finds a counterexample on an invariant it prints: + +``` +Error: Invariant is violated. +``` + +followed by a state trace (one TLA+ record per state). Each state shows +the values of every spec variable after the named action fired. For a +counterexample-of-HLC-4 you can read off which action sequence violated +strict-greater commits across terms — this is exactly the schedule the +follow-up Go code fix must rule out. + +To map a state back to the implementation: + +1. The `State N: ` header names the + spec action. Look it up in the corresponding `.tla` file. +2. Each action documents its corresponding implementation anchor in a + comment at the action definition. Example: `BecomeLeader_HLC` in + `HLC.tla` corresponds to `ShardedCoordinator.RunHLCLeaseRenewal` + detecting a new term and calling `hlc.Observe(fsm.MaxAppliedHLC())`. +3. The state's variable values map to the corresponding Go fields: + `hlcLast[n]` ↔ `kv/hlc.go HLC.last`, `physicalCeiling[n]` ↔ + `kv/hlc.go HLC.physicalCeiling`, etc. + +If the failure is from `tla-check`'s gap-config branch announcing +"unexpectedly passed", the spec or the toggle wiring has drifted — the +preconditions are no longer doing what the design doc says they do. + +## Reading list + +For reviewers new to TLA+: + +- Leslie Lamport, *Specifying Systems* (PDF, free) — the canonical + book; chapters 1–6 are enough to read the modules here. +- Leslie Lamport's TLA+ video lecture series on YouTube. +- Diego Ongaro's Raft TLA+ specification (the abstraction in + `lib/Raft.tla` is intentionally consistent with the style there). +- Hillel Wayne, *Practical TLA+* — friendlier introduction to the + syntax and to TLC model checking. + +For the elastickv-specific design: + +- [`docs/design/2026_05_28_partial_tla_safety_spec.md`](../docs/design/2026_05_28_partial_tla_safety_spec.md) + — the proposal that this directory implements. +- [`docs/architecture_overview.md`](../docs/architecture_overview.md) + — high-level subsystem diagrams that the modules abstract. +- [`CLAUDE.md`](../CLAUDE.md) — coding conventions; the "HLC" section + in particular describes the leader-only issuance invariant the spec + encodes as HLC-3. diff --git a/tla/hlc/HLC.tla b/tla/hlc/HLC.tla new file mode 100644 index 000000000..9be9635d6 --- /dev/null +++ b/tla/hlc/HLC.tla @@ -0,0 +1,246 @@ +--------------------------------- MODULE HLC --------------------------------- +(***************************************************************************) +(* TLA+ specification of the elastickv Hybrid Logical Clock. *) +(* Per docs/design/2026_05_28_partial_tla_safety_spec.md §5.1. *) +(* *) +(* This module encodes the HLC layer that sits on top of an abstract Raft *) +(* (lib/Raft.tla). All three HLC-4 preconditions are first-class: *) +(* *) +(* (i) bounded clock skew (Env.tla ASSUME) *) +(* (ii) strategy (c) logical-counter handoff — Observe(MaxAppliedHLC) *) +(* inside the BecomeLeader action *) +(* (iii) commit-time ceiling fencing — wallNow[n] < physicalCeiling[n] *) +(* as an enabling guard on IssueTimestamp *) +(* *) +(* Both (ii) and (iii) are gated by the CONSTANT EnableSafety so the same *) +(* spec drives the safe model-check (MCHLC.cfg, EnableSafety = TRUE) and *) +(* the gap model-check (MCHLC_gap.cfg, EnableSafety = FALSE) that *) +(* demonstrates the HLC-4 counterexample the design doc anticipates. *) +(***************************************************************************) + +EXTENDS Naturals, FiniteSets, Env, Raft + +CONSTANT EnableSafety \* TRUE: encode preconditions (ii) and (iii); FALSE: gap config + +VARIABLES + hlcLast, \* hlcLast[n] = [wall |-> Nat, logical |-> 0..LogicalMax] + physicalCeiling, \* physicalCeiling[n] : Nat (Raft-applied) + maxAppliedHLC, \* [wall, logical] : maximum committed HLC across all terms (FSM-observable) + wallNow, \* wallNow[n] : Nat (per-node wall clock) + committedTS, \* set of [ts, term] records — every persistence Next() result + opCount \* state-space bound: number of IssueTimestamp calls so far + +hlcVars == <> +vars == <> + +\* === DATA TYPES === +HLCVal(w, l) == [wall |-> w, logical |-> l] + +HLCLE(x, y) == + \/ x.wall < y.wall + \/ (x.wall = y.wall /\ x.logical <= y.logical) + +HLCLT(x, y) == + \/ x.wall < y.wall + \/ (x.wall = y.wall /\ x.logical < y.logical) + +\* === HLC.Next() COMPUTATION (mirrors kv/hlc.go) === +\* nowMs = max(wallNow[n], physicalCeiling[n]) +FlooredNow(n) == + IF wallNow[n] < physicalCeiling[n] THEN physicalCeiling[n] ELSE wallNow[n] + +\* The HLC algorithm proper. prev is the previously issued ts on this node. +\* See kv/hlc.go Next() — the two branches mirror the (nowMs > prevWall) and +\* (nowMs <= prevWall) cases, including the logical-overflow physical bump. +ComputeNextHLC(prev, flooredNow) == + IF flooredNow > prev.wall THEN + HLCVal(flooredNow, 0) + ELSE IF prev.logical < LogicalMax THEN + HLCVal(prev.wall, prev.logical + 1) + ELSE + HLCVal(prev.wall + 1, 0) \* logical overflow bumps wall by 1 + +\* === INIT === +HLCInit == + /\ hlcLast = [n \in Nodes |-> HLCVal(0, 0)] + /\ physicalCeiling = [n \in Nodes |-> 0] + /\ maxAppliedHLC = HLCVal(0, 0) + /\ wallNow = [n \in Nodes |-> 0] + /\ committedTS = {} + /\ opCount = 0 + +Init == RaftInit /\ HLCInit + +\* === ACTIONS === + +(***************************************************************************) +(* BecomeLeader_HLC(n) — Raft elects n as the new term's leader, then *) +(* (under EnableSafety) applies strategy (c): hlcLast[n] is raised to *) +(* max(hlcLast[n], maxAppliedHLC) by an in-memory Observe. This is the *) +(* M1 default per the design doc §5.1 HLC-4 (ii). *) +(***************************************************************************) +BecomeLeader_HLC(n) == + /\ BecomeLeader(n) + /\ hlcLast' = IF EnableSafety /\ HLCLT(hlcLast[n], maxAppliedHLC) + THEN [hlcLast EXCEPT ![n] = maxAppliedHLC] + ELSE hlcLast + /\ UNCHANGED <> + +(***************************************************************************) +(* TickWall(n) — n's wall clock advances by 1 ms. Bounded skew is enforced *) +(* against every other node (HLC-4 precondition (i) governs how big the *) +(* allowed gap is). MaxWallTime caps state-space exploration. *) +(***************************************************************************) +TickWall(n) == + /\ wallNow[n] < MaxWallTime + \* Bounded inter-node skew (HLC-4 precondition (i)). Stated with addition + \* rather than subtraction so the inequality stays inside Nat (per gemini + \* PR #856 review: subtraction in Naturals is undefined when the LHS is + \* smaller and would halt TLC). + \* Self is excluded from the skew check: the bounded-skew assumption + \* is about *inter*-node disagreement, not the ticking node's own + \* clock relative to its post-tick value. Without this exclusion + \* the constraint includes `wallNow[n] + 1 <= wallNow[n] + MaxClockSkewMs` + \* which is unsatisfiable whenever MaxClockSkewMs = 0 — disabling + \* TickWall entirely and preventing the model from ever exercising + \* wall-clock advancement (codex P2 on PR #856 round 2). + /\ \A m \in Nodes \ {n} : + /\ wallNow[n] + 1 <= wallNow[m] + MaxClockSkewMs + /\ wallNow[m] <= wallNow[n] + 1 + MaxClockSkewMs + /\ wallNow' = [wallNow EXCEPT ![n] = @ + 1] + /\ UNCHANGED <> + +(***************************************************************************) +(* ApplyCeiling(n) — the leader proposes a fresh ceiling and Raft applies *) +(* it on every node. Modelled atomically: every node's physicalCeiling *) +(* is raised to max(current, wallNow[leader] + HlcPhysicalWindowMs). *) +(* HLC-2 (ceiling monotonicity) is preserved by the max(). *) +(***************************************************************************) +ApplyCeiling(n) == + /\ IsLeader(n) + /\ LET newC == wallNow[n] + HlcPhysicalWindowMs IN + /\ physicalCeiling' = [m \in Nodes |-> IF physicalCeiling[m] < newC + THEN newC + ELSE physicalCeiling[m]] + /\ UNCHANGED <> + +(***************************************************************************) +(* IssueTimestamp(n) — issue a persistence HLC ts. Two normative gates: *) +(* HLC-3: only the active leader may call this. *) +(* HLC-4 (iii) under EnableSafety: wallNow[n] < physicalCeiling[n] *) +(* (the ceiling fence; if the ceiling has expired the leader *) +(* fails closed and ApplyCeiling must run first). *) +(* The committedTS log records (ts, term) for later HLC-4 checking; the *) +(* FSM-side maxAppliedHLC is bumped so a future BecomeLeader_HLC under *) +(* EnableSafety inherits the highest committed ts. *) +(***************************************************************************) +IssueTimestamp(n) == + /\ IsLeader(n) + /\ opCount < MaxOps \* state-space bound + /\ \/ ~EnableSafety \* gap config: no fence + \/ wallNow[n] < physicalCeiling[n] \* precondition (iii) guard + /\ LET prev == hlcLast[n] + ts == ComputeNextHLC(prev, FlooredNow(n)) + IN /\ hlcLast' = [hlcLast EXCEPT ![n] = ts] + \* Each commit gets the global opCount as a strict issuance + \* ordinal so HLC1_PerNodeMonotonic can talk about "the prev + \* and next commit by the same node" — without it the state + \* invariant would just be a tautology on a totally ordered set. + /\ committedTS' = committedTS \cup + {[ts |-> ts, term |-> activeTerm, node |-> n, seq |-> opCount]} + /\ maxAppliedHLC' = IF HLCLT(maxAppliedHLC, ts) + THEN ts + ELSE maxAppliedHLC + /\ opCount' = opCount + 1 + /\ UNCHANGED <> + +\* === NEXT === +Next == + \/ \E n \in Nodes : BecomeLeader_HLC(n) + \/ \E n \in Nodes : ApplyCeiling(n) + \/ \E n \in Nodes : IssueTimestamp(n) + \/ \E n \in Nodes : TickWall(n) + +Spec == Init /\ [][Next]_vars + +\* === STATE CONSTRAINT (TLC pruning) === +StateConstraint == + /\ opCount <= MaxOps + /\ activeTerm <= MaxTerms + /\ \A n \in Nodes : wallNow[n] <= MaxWallTime + /\ \A n \in Nodes : hlcLast[n].wall <= MaxWallTime + 2 + /\ \A n \in Nodes : physicalCeiling[n] <= MaxWallTime + HlcPhysicalWindowMs + +\* === TYPE INVARIANT === +HLCType == [wall : Nat, logical : 0..LogicalMax] + +TypeOK == + /\ hlcLast \in [Nodes -> HLCType] + /\ physicalCeiling \in [Nodes -> Nat] + /\ maxAppliedHLC \in HLCType + /\ wallNow \in [Nodes -> Nat] + /\ opCount \in 0..MaxOps + /\ \A r \in committedTS : + /\ r.ts \in HLCType + /\ r.term \in 1..MaxTerms + /\ r.node \in Nodes + /\ r.seq \in 0..MaxOps + +\* === SAFETY INVARIANTS === + +\* HLC-1 — per-node monotonicity of hlcLast across IssueTimestamp / +\* BecomeLeader_HLC / Observe. The earlier formulation +\* `HLCLE(r1, r2) \/ HLCLE(r2, r1)` was vacuously true under the total +\* lexicographic order on HLCVal (codex PR #856 P2): it would have admitted +\* a regression (5,0) -> (4,0) as "ordered". We instead carry an explicit +\* issuance ordinal on every committedTS record (`seq` is the global opCount +\* at the moment IssueTimestamp fired) and assert strict-greater on the same +\* node — every later issuance is strictly above the previous one. Combined +\* with the per-action invariant HLC1_Action below this gives the same +\* monotonicity the implementation provides via h.last's CAS. +HLC1_PerNodeMonotonic == + \A r1, r2 \in committedTS : + (r1.node = r2.node /\ r1.seq < r2.seq) => HLCLT(r1.ts, r2.ts) + +\* HLC-2 — physical ceiling monotonicity is fundamentally an *action* +\* invariant: physicalCeiling[n] must not decrease across any step. We +\* state it as the safety property `HLC2_Action` below and reference it +\* via PROPERTIES in MCHLC.cfg. No state-level form is meaningful here +\* (the earlier `>= 0` was vacuously implied by TypeOK — codex PR #856 +\* P2). +HLC2_NonNeg == \A n \in Nodes : physicalCeiling[n] \in Nat + +\* === ACTION-LEVEL INVARIANTS === +(***************************************************************************) +(* Action-level safety properties. In TLA+ idiom these are written as *) +(* [][P]_vars and registered under PROPERTIES (not INVARIANTS) in the .cfg *) +(* — TLC checks them as transition properties without requiring fairness. *) +(***************************************************************************) + +\* HLC-1 (transition form): hlcLast on every node weakly increases across +\* every step. Built into ComputeNextHLC and into BecomeLeader_HLC's +\* Observe; this property pins the contract. +HLC1_Action == + [][\A n \in Nodes : HLCLE(hlcLast[n], hlcLast'[n])]_vars + +\* HLC-2 (transition form): physicalCeiling on every node weakly increases +\* across every step. ApplyCeiling raises with max(); no action lowers it. +HLC2_Action == + [][\A n \in Nodes : physicalCeiling[n] <= physicalCeiling'[n]]_vars + +\* HLC-3 — only the leader of a term issues commits for that term. Built +\* into IssueTimestamp via the IsLeader(n) guard; asserted explicitly so +\* TLC reports a violation if anyone bypasses the guard. +HLC3_LeaderOnly == + \A r \in committedTS : + r.term > 0 /\ leaderOf[r.term] = r.node + +\* HLC-4 — every commit in a later term is strictly greater than every +\* commit in any earlier term. This is the load-bearing invariant: when +\* EnableSafety = TRUE strategy (c) + the ceiling fence enforce it; when +\* EnableSafety = FALSE TLC is expected to surface a counterexample. +HLC4_NoRegressionAcrossTerms == + \A r1, r2 \in committedTS : + r1.term < r2.term => HLCLT(r1.ts, r2.ts) + +============================================================================= diff --git a/tla/hlc/MCHLC.cfg b/tla/hlc/MCHLC.cfg new file mode 100644 index 000000000..b088a885b --- /dev/null +++ b/tla/hlc/MCHLC.cfg @@ -0,0 +1,31 @@ +\* TLC config: HLC safety model — encodes all three HLC-4 preconditions. +\* Expected outcome: TLC explores the bounded state space and finds no +\* invariant violation, i.e. proves HLC-1..HLC-4 hold in the correct +\* design. + +SPECIFICATION Spec + +CONSTANTS + Nodes = {n1, n2, n3} + MaxClockSkewMs = 1 + HlcPhysicalWindowMs = 2 + MaxTerms = 2 + MaxOps = 3 + MaxWallTime = 3 + LogicalMax = 1 + EnableSafety = TRUE + +SYMMETRY NodeSymmetry + +INVARIANTS + TypeOK + HLC1_PerNodeMonotonic + HLC2_NonNeg + HLC3_LeaderOnly + HLC4_NoRegressionAcrossTerms + +PROPERTIES + HLC1_Action + HLC2_Action + +CONSTRAINT StateConstraint diff --git a/tla/hlc/MCHLC.tla b/tla/hlc/MCHLC.tla new file mode 100644 index 000000000..e3debdd99 --- /dev/null +++ b/tla/hlc/MCHLC.tla @@ -0,0 +1,21 @@ +-------------------------------- MODULE MCHLC -------------------------------- +(***************************************************************************) +(* Model-check instance for HLC.tla. *) +(* *) +(* Two .cfg files point at this module: *) +(* MCHLC.cfg — EnableSafety = TRUE (correct design; TLC passes) *) +(* MCHLC_gap.cfg — EnableSafety = FALSE (no preconditions; TLC fails *) +(* on HLC-4 with a counterexample, demonstrating the *) +(* design gap the spec proposal exists to surface) *) +(* *) +(* Run from the repository root: *) +(* make tla-check *) +(***************************************************************************) + +EXTENDS HLC, TLC + +\* TLC symmetry hint: nodes are interchangeable in HLC.tla (per the spec +\* §6.3 per-module symmetry — HLC alone is node-symmetric). +NodeSymmetry == Permutations(Nodes) + +============================================================================= diff --git a/tla/hlc/MCHLC_gap.cfg b/tla/hlc/MCHLC_gap.cfg new file mode 100644 index 000000000..790ffbf39 --- /dev/null +++ b/tla/hlc/MCHLC_gap.cfg @@ -0,0 +1,36 @@ +\* TLC config: HLC gap model — preconditions (ii) and (iii) are disabled +\* (EnableSafety = FALSE). Expected outcome: TLC FAILS the +\* HLC4_NoRegressionAcrossTerms invariant and emits a counterexample +\* trace. This is not a bug in the spec; it is the motivating evidence +\* that strategy (c) handoff + the IssueTimestamp ceiling fence are +\* necessary for HLC-4 to hold. +\* +\* The make tla-check target inverts the exit code for this config so a +\* TLC FAILURE here counts as PASS for CI. + +SPECIFICATION Spec + +CONSTANTS + Nodes = {n1, n2, n3} + MaxClockSkewMs = 1 + HlcPhysicalWindowMs = 2 + MaxTerms = 2 + MaxOps = 3 + MaxWallTime = 3 + LogicalMax = 1 + EnableSafety = FALSE + +SYMMETRY NodeSymmetry + +INVARIANTS + TypeOK + HLC1_PerNodeMonotonic + HLC2_NonNeg + HLC3_LeaderOnly + HLC4_NoRegressionAcrossTerms + +PROPERTIES + HLC1_Action + HLC2_Action + +CONSTRAINT StateConstraint diff --git a/tla/lib/Env.tla b/tla/lib/Env.tla new file mode 100644 index 000000000..add03db74 --- /dev/null +++ b/tla/lib/Env.tla @@ -0,0 +1,32 @@ +-------------------------------- MODULE Env ---------------------------------- +(***************************************************************************) +(* Shared environment constants used by every module in the elastickv *) +(* TLA+ suite. Per docs/design/2026_05_28_partial_tla_safety_spec.md *) +(* §6.1. *) +(* *) +(* The single normative ASSUME below encodes HLC-4 precondition (i) — the *) +(* bounded-skew assumption. Modules that follow (Raft, HLC, …) EXTEND *) +(* this module so the assumption is in scope wherever it is needed. *) +(***************************************************************************) + +EXTENDS Naturals + +CONSTANTS + Nodes, \* finite set of node identities + MaxClockSkewMs, \* upper bound on inter-node wall clock skew (ms) + HlcPhysicalWindowMs, \* hlcPhysicalWindowMs in kv/sharded_coordinator.go + MaxTerms, \* state-space bound: number of leadership terms + MaxOps, \* state-space bound: number of persistence Next() calls + MaxWallTime, \* state-space bound: largest wall clock value + LogicalMax \* logical-counter wrap bound (real impl: 65535) + +\* HLC-4 precondition (i): the renewed ceiling outlives any inter-node skew. +\* See HLC.tla and the design doc §5.1 (i) for the hazard this rules out. +ASSUME MaxClockSkewMs < HlcPhysicalWindowMs + +\* Useful sets +Terms == 1..MaxTerms + +\* Liveness sets (placeholders for future modules; trivial in M1) +LiveNodes == Nodes +============================================================================= diff --git a/tla/lib/Raft.tla b/tla/lib/Raft.tla new file mode 100644 index 000000000..dc48fbe97 --- /dev/null +++ b/tla/lib/Raft.tla @@ -0,0 +1,82 @@ +-------------------------------- MODULE Raft --------------------------------- +(***************************************************************************) +(* Abstract Raft interface for the elastickv TLA+ suite. *) +(* Per docs/design/2026_05_28_partial_tla_safety_spec.md §3 / §6.1, Raft *) +(* is modelled as a black box that delivers the standard log-matching + *) +(* leader-completeness guarantees. This module exposes the **interface** *) +(* (leader / term / BecomeLeader / Apply / InstallSnapshot) — the internal *) +(* election / replication state machine is NOT modelled. Modules that *) +(* need Raft EXTEND or INSTANCE this module. *) +(* *) +(* For M1 (HLC alone) only the leader / term part is exercised. Apply / *) +(* InstallSnapshot are stubs that later milestones (M2..M5) fill in. *) +(***************************************************************************) + +EXTENDS Naturals, FiniteSets, Env + +VARIABLES + currentTerm, \* currentTerm[n] \in 0..MaxTerms : node n's view of the active term + leaderOf, \* leaderOf[t] \in Nodes \cup {NoLeader} : leader elected for term t + activeTerm \* highest term that has had a leader elected (0 = pre-election) + +raftVars == <> + +NoLeader == "no_leader" + +\* All term indices reachable in the bounded model. +TermsExplored == 0..MaxTerms + +\* === INIT === +RaftInit == + /\ currentTerm = [n \in Nodes |-> 0] + /\ leaderOf = [t \in TermsExplored |-> NoLeader] + /\ activeTerm = 0 + +\* === ACTIONS === + +(***************************************************************************) +(* BecomeLeader(n): node n is elected leader of a fresh term. *) +(* Models a successful Raft election as an atomic step — the underlying *) +(* vote-counting protocol is abstracted away (NG4 in the spec doc). *) +(***************************************************************************) +BecomeLeader(n) == + /\ activeTerm < MaxTerms + /\ LET t == activeTerm + 1 IN + /\ activeTerm' = t + /\ leaderOf' = [leaderOf EXCEPT ![t] = n] + /\ currentTerm' = [currentTerm EXCEPT ![n] = t] + +(***************************************************************************) +(* IsLeader(n): predicate. TRUE iff n is the leader of the currently *) +(* active term. Used as a guard wherever the spec restricts an action *) +(* to the leader (e.g. HLC.IssueTimestamp). *) +(***************************************************************************) +IsLeader(n) == + /\ activeTerm > 0 + /\ leaderOf[activeTerm] = n + +\* Convenience: the node leading the current term, or NoLeader. +CurrentLeader == + IF activeTerm = 0 THEN NoLeader ELSE leaderOf[activeTerm] + +\* === STUBS FOR LATER MILESTONES === +(***************************************************************************) +(* Propose / Apply / InstallSnapshot are part of the Raft interface that *) +(* later milestones (M2 OCC, M3 MVCC, M5 Composed) plug into. For M1 they *) +(* are left as comments rather than no-op actions to avoid expanding the *) +(* HLC-only state space. *) +(***************************************************************************) + +\* === ASSUMED PROPERTIES === +(***************************************************************************) +(* Per NG4: the abstract Raft layer assumes log matching and leader *) +(* completeness without proving them — those are properties of etcd/raft *) +(* which we treat as a verified black box. In M1 we exercise only the *) +(* leader-uniqueness consequence, which is structural here: BecomeLeader *) +(* sets exactly one entry in leaderOf per term. *) +(***************************************************************************) +LeaderUniquenessPerTerm == + \A t \in TermsExplored : t > 0 /\ leaderOf[t] # NoLeader => + leaderOf[t] \in Nodes + +=============================================================================