Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ jepsen/.ssh/
server

# TLA+ TLC artifacts (counterexample traces, state directories)
tla/**/MCHLC_TTrace_*.tla
tla/**/MCHLC_TTrace_*.bin
tla/**/MC*_TTrace_*.tla
tla/**/MC*_TTrace_*.bin
tla/**/states/

# Admin SPA build outputs. The placeholder internal/admin/dist/index.html
Expand Down
52 changes: 6 additions & 46 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -56,49 +56,9 @@ $(TLA_JAR):
@echo "tla2tools.jar ready at $(TLA_JAR) (SHA-256 $(TLA_SHA256))"

tla-check: $(TLA_JAR)
@echo "================================================================"
@echo " TLC: tla/hlc/MCHLC.cfg (correct design, expected PASS)"
@echo "================================================================"
@cd tla/hlc && java -XX:+UseParallelGC \
-cp ../../$(TLA_JAR) -DTLA-Library=$(TLA_LIB) \
tlc2.TLC -nowarning -config MCHLC.cfg MCHLC.tla
@echo
@echo "================================================================"
@echo " TLC: tla/hlc/MCHLC_gap.cfg (no preconditions, expected FAIL)"
@echo "================================================================"
@# Capture the gap run output so we can validate not just that TLC
@# exited non-zero but that the failure reason is exactly the HLC-4
@# invariant violation we expect. Without this check a parse error,
@# deadlock, JVM crash, or different invariant violation would silently
@# count as "expected gap counterexample" (codex P2 on PR #856 round 2).
@cd tla/hlc && \
out=$$(java -XX:+UseParallelGC \
-cp ../../$(TLA_JAR) -DTLA-Library=$(TLA_LIB) \
tlc2.TLC -nowarning -config MCHLC_gap.cfg MCHLC.tla 2>&1) ; \
rc=$$? ; \
printf '%s\n' "$$out" ; \
if [ "$$rc" -eq 0 ]; then \
echo ; \
echo "ERROR: MCHLC_gap.cfg unexpectedly passed."; \
echo " The gap configuration disables HLC-4 preconditions (ii)+(iii);"; \
echo " TLC was supposed to surface a counterexample showing why those"; \
echo " preconditions are necessary. A clean pass means either the spec"; \
echo " no longer encodes the gap correctly or the safety guards leaked"; \
echo " past the EnableSafety toggle."; \
exit 1; \
fi ; \
if printf '%s\n' "$$out" | grep -qF "Invariant HLC4_NoRegressionAcrossTerms is violated"; then \
echo ; \
echo "OK: MCHLC_gap.cfg failed as designed (HLC-4 counterexample)."; \
else \
echo ; \
echo "ERROR: MCHLC_gap.cfg failed, but the reason is NOT a HLC-4 violation."; \
echo " Expected substring in TLC output:"; \
echo " 'Invariant HLC4_NoRegressionAcrossTerms is violated'"; \
echo " The non-zero exit may indicate a parse error, deadlock, JVM"; \
echo " crash, or a different invariant breaking — review the output"; \
echo " above before treating this as a regression in the gap evidence."; \
exit 1; \
fi
@echo
@echo "tla-check: all model-check outcomes match the design contract."
@# Per-module orchestration lives in scripts/tla-check.sh so adding
@# M3..M5 only needs an entry in that script's TLA_MODULES array
@# and a `case` line for the gap-invariant string. The script does
@# the safe-config / gap-config pair per module, validates the gap
@# failure reason via string match, and aggregates exit codes.
@bash scripts/tla-check.sh
130 changes: 130 additions & 0 deletions scripts/tla-check.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
#!/usr/bin/env bash
# tla-check.sh — run TLC on every module in the elastickv TLA+ suite.
#
# Per docs/design/2026_05_28_partial_tla_safety_spec.md §7.2. Each
# module under tla/<module>/ ships:
# MC<MODULE>.tla — the TLC model module
# MC<MODULE>.cfg — the correct-design config (expected PASS)
# MC<MODULE>_gap.cfg — the preconditions-disabled config (expected
# FAIL with a SPECIFIC invariant counterexample,
# checked by string match against
# TLA_<MODULE>_GAP_INVARIANT below)
#
# Adding a new module (M3..M5): append to TLA_MODULES, drop the per-
# module gap-invariant string into the case statement, and `make
# tla-check` picks it up automatically.

set -uo pipefail

REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
TLA_JAR="${REPO_ROOT}/.cache/tla/tla2tools.jar"

if [ ! -f "$TLA_JAR" ]; then
echo "ERROR: ${TLA_JAR} not found — run 'make tla-tools' first." >&2
exit 1
fi

# Modules to check, in dependency order (libs before consumers).
TLA_MODULES=( "hlc" "occ" )

# The invariant the gap config is expected to break, per module.
# These strings are matched against TLC stdout via grep -F (literal).
gap_invariant_for() {
case "$1" in
hlc) echo "Invariant HLC4_NoRegressionAcrossTerms is violated" ;;
occ) echo "Invariant OCC1_CommitTsAboveStart is violated" ;;
*)
echo "ERROR: no gap-invariant string registered for module '$1'." \
"Add a case to scripts/tla-check.sh." >&2
exit 64
;;
esac
}

mc_basename() {
# tla/<dir>/MC<UPPER>.tla — module name in UPPERCASE
printf 'MC%s' "$(printf '%s' "$1" | tr '[:lower:]' '[:upper:]')"
}

run_tlc() {
local module="$1"
local cfg="$2"
local mc
mc="$(mc_basename "$module")"
( cd "${REPO_ROOT}/tla/${module}" && \
java -XX:+UseParallelGC \
-cp "${TLA_JAR}" -DTLA-Library=../lib \
tlc2.TLC -nowarning -config "${cfg}" "${mc}.tla" )
}

overall_rc=0

for module in "${TLA_MODULES[@]}"; do
mc="$(mc_basename "$module")"
safe_cfg="${mc}.cfg"
gap_cfg="${mc}_gap.cfg"
# `set -e` is not in effect (the script uses `set -uo pipefail`).
# Without explicit checking, a non-zero exit from gap_invariant_for
# inside the command substitution would only terminate the
# subshell — the parent loop would continue with gap_inv="" and the
# later `grep -qF "$gap_inv"` would always match (empty pattern),
# silently passing the gap check. Check explicitly (gemini HIGH
# on PR #858).
if ! gap_inv="$(gap_invariant_for "$module")"; then
echo "ERROR: gap_invariant_for failed for module '${module}' — see error above." >&2
overall_rc=1
continue
fi

echo "================================================================"
echo " TLC: tla/${module}/${safe_cfg} (correct design, expected PASS)"
echo "================================================================"
if ! run_tlc "$module" "$safe_cfg"; then
echo
echo "ERROR: ${safe_cfg} did not pass — see TLC output above." >&2
overall_rc=1
continue
fi
echo

echo "================================================================"
echo " TLC: tla/${module}/${gap_cfg} (no preconditions, expected FAIL on ${gap_inv})"
echo "================================================================"
# Capture stdout+stderr so we can validate both the exit code AND the
# specific invariant string. Without the string match, a parse
# error / deadlock / JVM crash / different invariant would silently
# count as the expected counterexample (codex P2 on PR #856 round 2).
gap_out=$(run_tlc "$module" "$gap_cfg" 2>&1)
gap_rc=$?
printf '%s\n' "$gap_out"
if [ "$gap_rc" -eq 0 ]; then
echo
echo "ERROR: ${gap_cfg} unexpectedly passed." >&2
echo " The gap configuration disables the safety guard; TLC was" >&2
echo " supposed to surface a counterexample. A clean pass means" >&2
echo " either the spec no longer encodes the gap correctly or the" >&2
echo " safety guards leaked past the EnableSafety toggle." >&2
overall_rc=1
continue
fi
if printf '%s\n' "$gap_out" | grep -qF "$gap_inv"; then
echo
echo "OK: ${gap_cfg} failed as designed (${gap_inv})."
else
echo
echo "ERROR: ${gap_cfg} failed, but the reason is NOT \"${gap_inv}\"." >&2
echo " The non-zero exit may indicate a parse error, deadlock, JVM" >&2
echo " crash, or a different invariant breaking — review the output" >&2
echo " above before treating this as a regression in the gap evidence." >&2
overall_rc=1
continue
fi
echo
done

if [ "$overall_rc" -eq 0 ]; then
echo "tla-check: all model-check outcomes match the design contract."
else
echo "tla-check: at least one module did not match the design contract." >&2
fi
exit "$overall_rc"
75 changes: 57 additions & 18 deletions tla/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ run it without touching anything under the Go source tree.
| Milestone | Scope | Status |
|---|---|---|
| M1 | `lib/Raft.tla`, `lib/Env.tla`, `hlc/HLC.tla`, `make tla-check` | Landed |
| M2 | `occ/OCC.tla` (safety invariants OCC-1..OCC-5) | Not started |
| M2 | `occ/OCC.tla` (safety invariants OCC-1..OCC-5) | Landed |
| M3 | `mvcc/MVCC.tla` (MVCC-1..MVCC-4) | Not started |
| M4 | `routes/Routes.tla` (Routes-1..Routes-4) | Not started |
| M5 | `composed/Composed.tla` + CI integration | Not started |
Expand Down Expand Up @@ -46,38 +46,47 @@ From the repository root:
make tla-check
```

This runs TLC against both configurations and prints whether the outcome
This runs TLC against the safe + gap configuration pair for every
module declared in `scripts/tla-check.sh` (currently HLC and OCC; M3 /
M4 / M5 will be added as they land), and prints whether the outcome
matches the design contract:

1. `tla/hlc/MCHLC.cfg` — the **correct design**, with HLC-4
preconditions encoded as ASSUMEs / action guards. TLC must finish
with no invariant violation.
2. `tla/hlc/MCHLC_gap.cfg` — the **gap configuration**, with the
preconditions disabled (`EnableSafety = FALSE`). TLC must produce a
counterexample on `HLC4_NoRegressionAcrossTerms` — this is the
motivating evidence that strategy (c) handoff + the ceiling fence
are necessary. The `tla-check` target inverts the exit code for
this run so a TLC FAILURE here counts as PASS for CI.

To run a single module by hand (skipping the Makefile):
- **Safe config (`tla/<module>/MC<MODULE>.cfg`)** — the correct
design, with each module's preconditions or commit guards enabled.
TLC must finish with no invariant violation.
- **Gap config (`tla/<module>/MC<MODULE>_gap.cfg`)** — the
preconditions or guards disabled (`EnableSafety = FALSE`). TLC must
produce a counterexample on the module's load-bearing invariant
(`HLC4_NoRegressionAcrossTerms` for HLC; `OCC1_CommitTsAboveStart`
for OCC). The harness inverts the exit code for the gap run AND
greps for the specific invariant-violation string, so any other
failure mode (parse error, deadlock, JVM crash, different
invariant) fails the job rather than silently being treated as the
expected gap evidence.

To run a single module by hand (skipping the harness):

```sh
JAR=.cache/tla/tla2tools.jar

# Safe config (expected PASS):
# HLC safe (expected PASS):
cd tla/hlc
java -XX:+UseParallelGC -cp ../../$JAR -DTLA-Library=../lib \
tlc2.TLC -config MCHLC.cfg MCHLC.tla

# Gap config (expected FAIL on HLC-4):
# HLC gap (expected FAIL on HLC4_NoRegressionAcrossTerms):
java -XX:+UseParallelGC -cp ../../$JAR -DTLA-Library=../lib \
tlc2.TLC -config MCHLC_gap.cfg MCHLC.tla
```

The OCC module follows the same pattern under `tla/occ/` with
`MCOCC.cfg` / `MCOCC_gap.cfg`; future M3 / M4 / M5 modules will too.

State-space bounds are set in the `.cfg` files (M1 defaults: 3 nodes,
2 terms, ≤ 3 IssueTimestamp ops, wall clock ≤ 3 ms, `LogicalMax = 1`).
Raise them locally to deepen exploration; the M1 PR keeps them small so
the full `make tla-check` runs in well under a second.
2 terms, ≤ 3 IssueTimestamp ops, wall clock ≤ 3 ms, `LogicalMax = 1`;
M2 defaults: 2 keys, 2 txns, `MaxTs = 6`, `MaxOps = 3`). Raise them
locally to deepen exploration; the bounded configs keep `make
tla-check` end-to-end in well under a second.

## What each module proves

Expand Down Expand Up @@ -120,6 +129,36 @@ Invariants asserted:
### `hlc/MCHLC.tla` + `MCHLC.cfg` / `MCHLC_gap.cfg`
TLC model-check instance. Two configurations, one module. See [Run](#run).

### `occ/OCC.tla`
The OCC layer. Models the Percolator-style 2PC transaction lifecycle
`Idle → Active → Prepared → Committed / Aborted` and the lock map
`(key, lock_ts) → start_ts`. The lock-resolver
(`kv/lock_resolver.go`) is abstracted into the atomic lock drain
inside `Commit`/`Abort` for M2; the async-resolver case is deferred
to M5 (composed) — see the block comment in `OCC.tla` where
`LockResolve` would have lived. HLC is abstracted to a single global
monotonic counter for M2; M5 will INSTANCE `HLC.tla` for the real
48/16 layout. The `EnableSafety` CONSTANT gates the OCC-1 commit
guard so the same module drives the safe and gap configurations.

Invariants asserted:

| Invariant | Statement |
|---|---|
| `TypeOK` | Variable types are well-formed |
| `OCC1_CommitTsAboveStart` | Every committed txn has `commit_ts > start_ts` |
| `OCC2_NoWriteWriteConflict` | Two committed txns sharing a write key have distinct commit_ts and one started after the other committed (`commit_ts[earlier] <= start_ts[later]`) |
| `OCC3_ReadSnapshotStability` | Every read observation equals `LatestVisible(k, start_ts(t))` (the snapshot value remains stable under monotonic-ts allocation in BeginTxn / Commit) |
| `OCC4_NoStrandedLockAtQuiescence` | When all txns are in a terminal state, no lock remains |
| `OCC5_StartTsConsistency` | Every read observation is bounded by the txn's `start_ts` (= `read_ts` by OCC-5) |
| `OCC5_Action` (PROPERTY) | Transition form: `start_ts[t]` is assigned once at `BeginTxn` and never updated |
| `CommitTsAssignedOnce` (PROPERTY) | Transition form: `commit_ts[t]` is assigned once at `Commit` and never updated |

### `occ/MCOCC.tla` + `MCOCC.cfg` / `MCOCC_gap.cfg`
TLC model-check instance for OCC. Same one-module / two-config layout
as MCHLC. The gap config disables the OCC-1 commit guard; TLC
produces an `OCC1_CommitTsAboveStart` counterexample at depth ≈ 5.

## How to interpret a TLC failure

When TLC finds a counterexample on an invariant it prints:
Expand Down
36 changes: 36 additions & 0 deletions tla/occ/MCOCC.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
\* TLC config: OCC safety model — encodes the OCC-1 commit guard.
\* Expected outcome: TLC explores the bounded state space and finds no
\* invariant violation, i.e. proves OCC-1..OCC-5 hold in the correct
\* design.

SPECIFICATION Spec

CONSTANTS
Keys = {k1, k2}
TxnIds = {t1, t2}
MaxTs = 6
MaxOps = 3
EnableSafety = TRUE

SYMMETRY Symmetry

INVARIANTS
TypeOK
OCC1_CommitTsAboveStart
OCC2_NoWriteWriteConflict
OCC3_ReadSnapshotStability
OCC4_NoStrandedLockAtQuiescence
OCC5_StartTsConsistency

PROPERTIES
OCC5_Action
CommitTsAssignedOnce

CONSTRAINT StateConstraint

\* Quiescence is a valid OCC end state (OCC-4 specifically checks it);
\* a state with no enabled action is "all txns terminal and no locks",
\* which is the bounded-safety form we want to verify, not a model bug.
CHECK_DEADLOCK
FALSE

23 changes: 23 additions & 0 deletions tla/occ/MCOCC.tla
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
-------------------------------- MODULE MCOCC --------------------------------
(***************************************************************************)
(* Model-check instance for OCC.tla. *)
(* *)
(* Two .cfg files point at this module: *)
(* MCOCC.cfg — EnableSafety = TRUE (correct design; TLC passes) *)
(* MCOCC_gap.cfg — EnableSafety = FALSE (Commit's OCC-1 guard removed; *)
(* TLC fails on OCC1_CommitTsAboveStart and emits a *)
(* counterexample, motivating the spec's existence) *)
(* *)
(* Run via `make tla-check` from the repository root. *)
(***************************************************************************)

EXTENDS OCC, TLC

\* TLC symmetry hint: TxnIds and Keys are interchangeable in OCC.tla
\* per the spec doc §6.3 (OCC alone is both txn- and key-symmetric —
\* keys participate only as indices, not in an order).
TxnSymmetry == Permutations(TxnIds)
KeySymmetry == Permutations(Keys)
Symmetry == TxnSymmetry \cup KeySymmetry

=============================================================================
Loading
Loading