diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index c7b0380..c4ba0d5 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -74,11 +74,11 @@ jobs:
       # grammar that is a tree-sitter target, so a conflict introduced by a grammar
       # change is caught even for the dialects whose wasm is not built below (tsx/js/jsx)
       # — exactly the gap that let an unresolved `type`/`class_heritage` conflict ship.
-      # yaml is excluded: its indentation tokens are not yet wired as tree-sitter
-      # externals, so its generated grammar.js is not loadable (separate open issue).
+      # yaml is now included (issue #3): its indent/scalar tokens are wired as tree-sitter
+      # externals and the C indentation scanner is implemented, so its grammar generates + builds.
       - name: Generate every derived tree-sitter grammar (conflict gate, no wasm)
         run: |
-          for g in typescript typescriptreact javascript javascriptreact html; do
+          for g in typescript typescriptreact javascript javascriptreact html yaml; do
             echo "── tree-sitter generate: $g"
             ( cd "tree-sitter/$g" && npx tree-sitter generate )
           done
@@ -97,3 +97,11 @@ jobs:
           npx tree-sitter build --wasm .
           cd ../..
           node test/html-treesitter.ts
+      # The derived YAML tree-sitter (issue #3) — build the wasm (its C indentation scanner must
+      # compile + link). The accuracy bench (test/treesitter-yaml-bench.ts) needs the yaml-test-suite
+      # checkout, so it runs in the readme-bench workflow where the suite is already cloned.
+      - name: Build the derived YAML tree-sitter grammar to wasm
+        run: |
+          cd tree-sitter/yaml
+          npx tree-sitter generate
+          npx tree-sitter build --wasm .
diff --git a/src/gen-treesitter.ts b/src/gen-treesitter.ts
index dda7dea..307cb3f 100644
--- a/src/gen-treesitter.ts
+++ b/src/gen-treesitter.ts
@@ -1,6 +1,6 @@
-import type { CstGrammar, RuleExpr, RuleDecl } from './types.ts';
+import type { CstGrammar, RuleExpr, RuleDecl, TokenPattern } from './types.ts';
 import { collectLiterals, isKeywordLiteral } from './grammar-utils.ts';
-import { tokenPatternIsNever, tokenPatternSource, tokenPatternStartsWithDecimal, tokenPatternStringDelimiters, tokenPatternTrailingCharClass } from './token-pattern.ts';
+import { tokenPatternIsNever, tokenPatternLiteralPrefix, tokenPatternSource, tokenPatternStartsWithDecimal, tokenPatternStringDelimiters, tokenPatternTrailingCharClass } from './token-pattern.ts';
 
 // ════════════════════════════════════════════════════════════════════════════
 // gen-treesitter — derive a tree-sitter parser package from one CstGrammar.
@@ -143,6 +143,14 @@ interface GrammarJsContext {
   externalSnake: Set<string>;
   /** original token name → external scanner token name (snake) if scanner-provided */
   scannerTokenFor: Map<string, string>;
+  /** flow-delimiter LITERAL char (`[` `]` `{` `}`) → synthetic external scanner token (snake). These
+   *  bare literals in the flow rules are swapped for refs to the scanner token (renderExpr). Empty for
+   *  non-flow grammars. See flowSyntheticTokens. */
+  flowLiteralTokens: Map<string, string>;
+  /** Non-start rules whose body can derive the empty string. tree-sitter rejects these, so their
+   *  bodies are made non-empty and every reference to them is wrapped in optional() (ε-elimination,
+   *  see makeNonEmpty / wrapNullableRefs). Empty for grammars with no nullable non-start rules. */
+  nullableNonStart: Set<string>;
   /**
    * If the grammar declares an interpolated-template token, the plan for turning it
    * into a `template` RULE (delimiters + the `${ … }` hole) backed by an external
@@ -177,8 +185,13 @@ function hasMarker(expr: RuleExpr): boolean {
  */
 function renderExpr(expr: RuleExpr, ctx: GrammarJsContext): string {
   switch (expr.type) {
-    case 'literal':
+    case 'literal': {
+      // A flow-collection delimiter literal (`[` `]` `{` `}`) is emitted by the external scanner (so
+      // flow_depth persists), so reference its synthetic scanner token instead of the bare string.
+      const flowSym = ctx.flowLiteralTokens.get(expr.value);
+      if (flowSym) return `$.${flowSym}`;
       return jsString(expr.value);
+    }
     case 'ref': {
       // A token provided by the external scanner is referenced by its scanner
       // symbol name (e.g. `regex` → `regex_literal`), not its plain token snake.
@@ -342,9 +355,116 @@ function buildPrattRule(rule: RuleDecl, ctx: GrammarJsContext): string {
   return `choice(\n      ${branches.join(',\n      ')}\n    )`;
 }
 
+// ── Nullable-rule elimination (ε-elimination) ────────────────────────────────
+// tree-sitter rejects a NON-START rule that can match the empty string. An indentation grammar like
+// YAML has several (a YAML node/entry may be NULL: `key:` with no value, `{a: }`, an empty doc), so
+// `node`/`flow_node`/`flow_map_entry`/… are nullable. We push the emptiness to the CALL SITES: make
+// each such rule's body NON-EMPTY (`makeNonEmpty`) and wrap every reference to it in `optional(...)`
+// (`wrapNullableRefs`). The accepted language is identical (rule-or-empty at each use), and ONLY the
+// tree-sitter target is touched — the parser and the other generators never see this. Computed once
+// and gated on the grammar actually having nullable non-start rules, so every grammar that already
+// `generate`s (no such rules) is byte-identical.
+
+/** Non-start rules whose body can derive ε. `isTerminal` flags tokens / external symbols (never nullable). */
+function computeNullableNonStart(grammar: CstGrammar, startName: string, isTerminal: (name: string) => boolean): Set<string> {
+  const ruleNames = new Set(grammar.rules.map(r => r.name));
+  const nullable = new Set<string>();
+  const exprNullable = (e: RuleExpr): boolean => {
+    switch (e.type) {
+      case 'literal': return e.value === '';
+      case 'ref': return ruleNames.has(e.name) && !isTerminal(e.name) && nullable.has(e.name);
+      case 'seq': return e.items.every(exprNullable);
+      case 'alt': return e.items.some(exprNullable);
+      case 'quantifier': return e.kind === '+' ? exprNullable(e.body) : true; // ?,* match empty
+      case 'group': return exprNullable(e.body);
+      case 'sep': return true;  // renders to optional(seq(...))
+      default: return true;     // not/sameLine/noCommentBefore/noMultilineFlowBefore/op/prefix/postfix → blank()
+    }
+  };
+  let changed = true;
+  while (changed) { changed = false; for (const r of grammar.rules) if (!nullable.has(r.name) && exprNullable(r.body)) { nullable.add(r.name); changed = true; } }
+  nullable.delete(startName); // the start rule MAY be nullable in tree-sitter
+  return nullable;
+}
+
+/** Wrap every reference to a made-non-empty (`nn`) rule in optional() — the may-be-empty form. */
+function wrapNullableRefs(e: RuleExpr, nn: Set<string>): RuleExpr {
+  switch (e.type) {
+    case 'ref': return nn.has(e.name) ? { type: 'quantifier', kind: '?', body: e } : e;
+    case 'seq': return { type: 'seq', items: e.items.map(i => wrapNullableRefs(i, nn)) };
+    case 'alt': return { type: 'alt', items: e.items.map(i => wrapNullableRefs(i, nn)) };
+    case 'quantifier': return { ...e, body: wrapNullableRefs(e.body, nn) };
+    case 'group': return { ...e, body: wrapNullableRefs(e.body, nn) };
+    case 'sep': return { ...e, element: wrapNullableRefs(e.element, nn) };
+    default: return e;
+  }
+}
+
+/** Whether e is nullable AFTER the transform (a ref to an `nn` rule is now wrapped optional → nullable;
+ *  every other ref is non-nullable, since `nn` is exactly the made-non-empty set). */
+function exprNullableAfter(e: RuleExpr, nn: Set<string>): boolean {
+  switch (e.type) {
+    case 'literal': return e.value === '';
+    case 'ref': return nn.has(e.name);
+    case 'seq': return e.items.every(i => exprNullableAfter(i, nn));
+    case 'alt': return e.items.some(i => exprNullableAfter(i, nn));
+    case 'quantifier': return e.kind === '+' ? exprNullableAfter(e.body, nn) : true;
+    case 'group': return exprNullableAfter(e.body, nn);
+    case 'sep': return true;
+    default: return true;
+  }
+}
+
+/** The non-empty form of a (nullable) expr — its language minus ε. `null` if that language is empty
+ *  (a purely zero-width expr). The chosen non-empty position is rendered UNWRAPPED; the rest get the
+ *  may-be-empty form `wrapNullableRefs`. */
+function makeNonEmpty(e: RuleExpr, nn: Set<string>): RuleExpr | null {
+  const T = (x: RuleExpr) => wrapNullableRefs(x, nn);
+  const NE = (x: RuleExpr) => makeNonEmpty(x, nn);
+  const nul = (x: RuleExpr) => exprNullableAfter(x, nn);
+  switch (e.type) {
+    case 'literal': return e.value === '' ? null : e;
+    case 'ref': return e;                                   // an nn rule (now non-empty) or a non-nullable rule/terminal
+    case 'group': { const b = NE(e.body); return b ? { ...e, body: b } : null; }
+    case 'alt': {
+      const parts: RuleExpr[] = [];
+      for (const m of e.items) { const r = nul(m) ? NE(m) : T(m); if (r) parts.push(r); }
+      return parts.length === 0 ? null : parts.length === 1 ? parts[0] : { type: 'alt', items: parts };
+    }
+    case 'seq': {
+      if (e.items.some(i => !nul(i))) return T(e);          // a non-nullable element already forces non-empty
+      const branches: RuleExpr[] = [];                       // all nullable → "first non-empty element is at i"
+      for (let i = 0; i < e.items.length; i++) {
+        const head = NE(e.items[i]);
+        if (!head) continue;
+        const tail = e.items.slice(i + 1).map(T);
+        branches.push(tail.length ? { type: 'seq', items: [head, ...tail] } : head);
+      }
+      return branches.length === 0 ? null : branches.length === 1 ? branches[0] : { type: 'alt', items: branches };
+    }
+    case 'quantifier': {
+      if (e.kind === '?') return nul(e.body) ? NE(e.body) : T(e.body);   // optional(x) non-empty = x non-empty
+      const head = nul(e.body) ? NE(e.body) : T(e.body);                 // *,+ non-empty = one non-empty iter, then repeat
+      return head ? { type: 'seq', items: [head, { type: 'quantifier', kind: '*', body: T(e.body) }] } : null;
+    }
+    case 'sep': {
+      const head = nul(e.element) ? NE(e.element) : T(e.element);
+      if (!head) return null;
+      const d: RuleExpr = { type: 'literal', value: e.delimiter };
+      return { type: 'seq', items: [head, { type: 'quantifier', kind: '*', body: { type: 'seq', items: [d, T(e.element)] } }, { type: 'quantifier', kind: '?', body: d }] };
+    }
+    default: return null;                                   // not/sameLine/…: zero-width, no non-empty form
+  }
+}
+
 /** Build a single rule's body string (Pratt or plain). */
 function buildRuleBody(rule: RuleDecl, ctx: GrammarJsContext): string {
   if (ctx.prattRules.has(rule.name)) return buildPrattRule(rule, ctx);
+  const nn = ctx.nullableNonStart;
+  if (nn.size > 0) {
+    const body = nn.has(rule.name) ? (makeNonEmpty(rule.body, nn) ?? rule.body) : wrapNullableRefs(rule.body, nn);
+    return renderExpr(body, ctx);
+  }
   return renderExpr(rule.body, ctx);
 }
 
@@ -367,7 +487,13 @@ function buildTokenBody(name: string, ctx: GrammarJsContext): string | null {
   // rule reference — but we still emit them so highlights can capture comments.
   // tree-sitter's token() DFA rejects zero-width assertions, so strip them first.
   if (tokenPatternIsNever(tok)) return 'token(/[^\\s\\S]/)';
-  return `token(${jsRegexLiteral(sanitizeTreeSitterRegex(tokenPatternSource(tok)))})`;
+  // A token with a BLOCK-context variant (YAML's scalar tokens: a block plain/key stops at a `: `
+  // separator and a value end, where the flow variant runs through them) — emit the block pattern.
+  // The tree-sitter grammar is block-context at the top level; flow collections are their own rules.
+  // Block-only (no `pattern`) and dual tokens both resolve here; YAML is the only grammar with a
+  // blockPattern, so every other language is unaffected (byte-identical).
+  const src = tok.blockPattern ? tokenPatternSource({ pattern: tok.blockPattern }) : tokenPatternSource(tok);
+  return `token(${jsRegexLiteral(sanitizeTreeSitterRegex(src))})`;
 }
 
 // ── conflicts ────────────────────────────────────────────────────────────────
@@ -423,6 +549,22 @@ const LR_CONFLICT_CLOSURE: string[][] = [
   // while completing the closure (CI builds only the typescript + html tree-sitters, so
   // tsx/jsx generate was never exercised). Each is inert for languages lacking the rule.
   ['type', 'class_heritage'], ['type_param', 'jsxtag_name'], ['expr', 'jsxcontainer'],
+  // YAML (issue #3): an indentation grammar is massively ambiguous — a newline may continue a node or
+  // start the next document, a `:` may open a value or be an empty-key map, a scalar may be a key or a
+  // leaf, a flow collection may be a value or an implicit block key. tree-sitter's GLR absorbs all of
+  // this once the states are declared. These 37 tuples are the fixpoint of its own analysis (collected
+  // via test/collect-conflicts.ts); every name is YAML-specific, so each is inert for the other
+  // languages (verified: zero rule-name overlap with the TS/JS/TSX/JSX grammars).
+  ['stream', 'node'], ['empty_key_mapping'], ['explicit_entry'], ['next_doc'], ['stream', 'next_doc'],
+  ['node'], ['key', 'plain'], ['scalar', 'doc_fold'], ['explicit_mapping'], ['block_sequence'],
+  ['map_value_scalar', 'map_value_node_scalar'], ['scalar', 'block_key_scalar'],
+  ['map_value', 'map_value_node'], ['flow_explicit'], ['flow_mapping'], ['flow_sequence'],
+  ['explicit_doc_body'], ['inline_doc_node'], ['alias_or_keyed'], ['doc_fold'], ['mapping_from_flow'],
+  ['mapping_or_scalar'], ['property', 'node'], ['seq_item'], ['property'], ['flow_node'],
+  ['node', 'explicit_doc_body'], ['node', 'after_doc_end'], ['after_doc_end'], ['map_entry'],
+  ['stream', 'explicit_doc_body'], ['map_entry_no_empty'], ['seq_value_node'],
+  ['mapping_or_scalar', 'doc_fold'], ['map_value_scalar', 'map_inline_scalar'],
+  ['content_node', 'mapping_from_flow'], ['mapping_or_scalar', 'map_value'],
 ];
 
 /**
@@ -475,9 +617,12 @@ function deriveConflicts(ctx: GrammarJsContext): string[][] {
   }
 
   // 3. The LR(1) closure tree-sitter's own analysis reports for this grammar.
-  //    Applied only for tuples whose rules all exist here (inert otherwise).
+  //    Applied only for tuples whose symbols ALL exist here (inert otherwise). A conflict symbol may
+  //    be a RULE or a TOKEN (e.g. YAML's `key`/`plain` are tokens that conflict on a trailing `:`), so
+  //    both name sets count — `$.key` is a valid conflict symbol whether key is a rule or a token.
+  const tokenSnakes = new Set(ctx.tokenSnake.values());
   for (const tuple of LR_CONFLICT_CLOSURE) {
-    if (tuple.every(r => ruleSnakes.has(r))) push(tuple);
+    if (tuple.every(r => ruleSnakes.has(r) || tokenSnakes.has(r))) push(tuple);
   }
 
   return conflicts;
@@ -580,6 +725,69 @@ function planInterpolations(grammar: CstGrammar): InterpolationPlan[] {
   return plans;
 }
 
+/** The block-context SCALAR tokens of an indentation grammar (those carrying a `blockPattern`), split
+ *  by their scope leaf: PLAIN `string.unquoted…`, KEY `entity.name.tag…`, NUM `constant.numeric…`,
+ *  bool/null `constant.language…`. All are scanned in C (see scan_scalar) — a YAML plain/key boundary
+ *  (`:(?=\S)`, `#`-after-space) is a look-around a tree-sitter token DFA can't honour, and a typed
+ *  value emitted by the regex lexer would not carry the key-vs-value decision the GLR parser needs to
+ *  chain top-level mapping entries. Any field may be absent. */
+function planPlainScalarTokens(grammar: CstGrammar): { plain?: string; key?: string; num?: string; boolnull?: string } {
+  if (!grammar.indent) return {};
+  // Every token carrying a `blockPattern` is a block-context scalar; emitting num/bool-null from the
+  // scanner too (classified by shape) — not via a regex token + decline — keeps every scalar an
+  // external token, so the key-vs-value decision is carried and `x: 1\ny: 2` chains correctly.
+  // Split by the scope leaf (the convention is data in the grammar): plain `string.unquoted`, key
+  // `entity.name.tag`, num `constant.numeric`, bool/null `constant.language`.
+  const fam = grammar.tokens.filter(t => t.blockPattern !== undefined && typeof t.blockPattern !== 'string');
+  const num = fam.find(t => (t.scope ?? '').includes('constant.numeric'))?.name;
+  const boolnull = fam.find(t => (t.scope ?? '').includes('constant.language'))?.name;
+  const key = fam.find(t => (t.scope ?? '').startsWith('entity.name.tag'))?.name;
+  const plain = fam.find(t => (t.scope ?? '').startsWith('string.unquoted') && !(t.scope ?? '').includes('constant.'))?.name;
+  return { plain, key, num, boolnull };
+}
+
+/**
+ * Synthetic external tokens for the flow-collection delimiters (`[` `]` `{` `}`). YAML's flow brackets
+ * suspend indentation and turn `,`/brackets into structural separators; a tree-sitter external scanner
+ * can only KEEP that state (flow_depth) across a token if it RETURNS that token (mutations during a
+ * `false` return are discarded — the pre-scan state is restored before the internal bracket is lexed).
+ * So the brackets are emitted by the scanner as external tokens. They have no token name in the source
+ * grammar (they are bare literals in the flow rules), so we synthesize a stable name per delimiter char
+ * and (a) register them as externals here and (b) substitute the matching literal in the rendered rules
+ * (renderExpr). Returns [] for non-flow grammars. Order: every opener (in flowOpen order) then every
+ * closer (flowClose order) — the enum / grammar.js externals follow this order.
+ */
+const FLOW_CHAR_NAMES: Record<string, string> = {
+  '[': 'lbracket', ']': 'rbracket', '{': 'lbrace', '}': 'rbrace', '(': 'lparen', ')': 'rparen',
+};
+function flowSyntheticTokens(grammar: CstGrammar): { sym: string; char: string; open: boolean }[] {
+  const ind = grammar.indent;
+  if (!ind || !(ind.flowOpen?.length || ind.flowClose?.length)) return [];
+  const name = (c: string) => `_flow_${FLOW_CHAR_NAMES[c] ?? `u${c.charCodeAt(0)}`}`;
+  return [
+    ...(ind.flowOpen ?? []).map(c => ({ sym: name(c), char: c, open: true })),
+    ...(ind.flowClose ?? []).map(c => ({ sym: name(c), char: c, open: false })),
+  ];
+}
+
+/**
+ * The document-marker glyphs (`---` / `...`) of an indentation grammar, matched to
+ * `indent.blockScalar.documentMarkers` by token literal prefix — used by the external scanner's
+ * scan_scalar to claim a non-marker glyph as plain and decline a true marker (the markers stay
+ * INTERNAL tokens; see planScannerTokens). Longest glyph first (so a 3-char glyph beats a prefix of
+ * it). Empty unless the grammar declares documentMarkers.
+ */
+function documentMarkerGlyphs(grammar: CstGrammar): string[] {
+  const markers = grammar.indent?.blockScalar?.documentMarkers;
+  if (!markers || markers.length === 0) return [];
+  const out = new Set<string>();
+  for (const tok of grammar.tokens) {
+    const lit = tokenPatternLiteralPrefix(tok);
+    if (lit && markers.includes(lit)) out.add(lit);
+  }
+  return [...out].sort((a, b) => b.length - a.length);
+}
+
 /** Determine which tokens the external scanner must provide. */
 function planScannerTokens(grammar: CstGrammar): Map<string, string> {
   const map = new Map<string, string>();
@@ -587,6 +795,41 @@ function planScannerTokens(grammar: CstGrammar): Map<string, string> {
   // stateless external token (the scanner emits it at each significant line boundary). Listed
   // FIRST so it heads the enum / externals order.
   if (grammar.newline) map.set(grammar.newline.token, toSnake(grammar.newline.token));
+  // An indentation-sensitive grammar (YAML): INDENT / DEDENT / NEWLINE and the block-scalar body are
+  // engine-emitted — the lexer's indent stack (src/gen-lexer.ts) decides them, not a regex — so their
+  // token IR is `never()`. In tree-sitter they become EXTERNAL tokens the C scanner (src/scanner.c)
+  // provides; without this they would serialize as never-match token rules (`token(/[^\s\S]/)`) that
+  // the parser can never match (and the block-scalar body would orphan the scalar). Ordered
+  // indent/dedent/newline/body so grammar.js's `externals` and scanner.c's enum agree positionally.
+  if (grammar.indent) {
+    const ind = grammar.indent;
+    map.set(ind.indentToken, toSnake(ind.indentToken));
+    map.set(ind.dedentToken, toSnake(ind.dedentToken));
+    map.set(ind.newlineToken, toSnake(ind.newlineToken));
+    if (ind.blockScalar) map.set(ind.blockScalar.token, toSnake(ind.blockScalar.token));
+    // The PLAIN and KEY scalars (a `:` is content unless followed by space/EOL; a `#` starts a
+    // comment only after a space) need look-ahead at their boundary, which a tree-sitter `token()`
+    // DFA lacks — so they too become external tokens, scanned by `scan_scalar` in C. Appended AFTER
+    // the block scalar so the enum stays INDENT,DEDENT,NEWLINE,BLOCK_SCALAR,PLAIN,KEY. (Num/BoolNull
+    // are NOT plain-family — their boundary is DFA-expressible — so they stay regex token rules.)
+    const { plain, key, num, boolnull } = planPlainScalarTokens(grammar);
+    if (plain) map.set(plain, toSnake(plain));
+    if (key) map.set(key, toSnake(key));
+    if (num) map.set(num, toSnake(num));
+    if (boolnull) map.set(boolnull, toSnake(boolnull));
+    // The flow-collection delimiter tokens (`[ ] { }`) — emitted by the scanner so flow_depth persists
+    // (a TRUE return). The synthetic name IS the snake symbol; the matching literal in the flow rules is
+    // swapped for a ref to it in renderExpr. Appended last so the scalar-token positions are unchanged.
+    for (const { sym } of flowSyntheticTokens(grammar)) map.set(sym, sym);
+    // Document markers (`---` / `...`) stay INTERNAL tokens (NOT added here). Their IR is
+    // `literal + a sep look-ahead`; tree-sitter's token() DFA drops the look-ahead, leaving a bare
+    // `---`/`...`. That is fine: the external scalar scanner CLAIMS a non-marker glyph (`---foo`) as a
+    // plain scalar (so it never reaches the internal token) and DECLINES a true sep-bounded marker (so
+    // the internal token lexes it — see scan_scalar's document-marker probe). Making them external
+    // instead perturbs the GLR parse tables — a marker token's valid-symbol set then shifts the lexer's
+    // scalar/indent decisions at unrelated boundaries (a same-column block sequence after a key
+    // mis-lexes) — so keeping them internal leaves the tables byte-identical to a no-marker build.
+  }
   // The regex token: '/' is context-sensitive (regex vs division). The scanner
   // resolves it.
   const regexTok = grammar.tokens.find(t => t.flags.includes('regex'));
@@ -778,8 +1021,18 @@ export function generateTreeSitter(grammar: CstGrammar, langName?: string): Tree
   // queries for them. Same shape rule gen-tm.ts uses (inferIdentScope).
   const nameFields = collectNameFields(grammar);
 
+  // ε-elimination set (see makeNonEmpty): the start rule is the entry rule, emitted FIRST below.
+  const entryName = grammar.rules[grammar.rules.length - 1].name;
+  const isTerminalName = (n: string) => tokenNames.has(n) || scannerTokenFor.has(n);
+  const nullableNonStart = computeNullableNonStart(grammar, entryName, isTerminalName);
+
+  const flowLiteralTokens = new Map<string, string>();
+  for (const { sym, char } of flowSyntheticTokens(grammar)) flowLiteralTokens.set(char, sym);
+
   const ctx: GrammarJsContext = {
     grammar, tokenNames, ruleSnake, tokenSnake, prattRules, externalSnake, scannerTokenFor,
+    flowLiteralTokens,
+    nullableNonStart,
     templatePlan,
     interpolationPlans,
     nameFieldNodes: nameFields.nodes,
@@ -1627,11 +1880,657 @@ function cCharList(s: string): string {
   return [...s].map(c => `'${c === '\\' || c === "'" ? '\\' + c : c}'`).join(', ');
 }
 
+// ── Indentation external scanner (YAML) ──────────────────────────────────────
+// An indentation-sensitive grammar emits INDENT / DEDENT / NEWLINE from a line-leading-column state
+// machine that a regex lexer cannot express, so they become external tokens scanned here. This C
+// scanner mirrors the indent-stack logic of src/gen-lexer.ts: at each line boundary it measures the
+// next content line's column and emits INDENT (deeper → push), DEDENT (shallower → pop, one per call
+// until the stack top is reached), or NEWLINE (same column → a sibling separator). Flow context needs
+// no special handling: inside `[`/`{` the grammar never references these tokens, so valid_symbols is
+// false there and the line break falls through to `extras`. The indent stack lives in the Scanner
+// struct and is (de)serialized for incremental re-parsing. Block-scalar bodies are scanned verbatim
+// up to the first line at or below the parent indentation. All language data (the comment introducer,
+// the block-scalar introducer chars, the document markers) is DERIVED from `grammar.indent`.
+function buildIndentScannerC(grammar: CstGrammar, ctx: GrammarJsContext, grammarName: string): { scannerC: string; externalTokens: string[] } {
+  const ind = grammar.indent!;
+  const externalTokens = externalSymbols(ctx); // order MUST match grammar.js externals
+  const sym = (tokenName: string) => ctx.scannerTokenFor.get(tokenName)!.toUpperCase();
+  const INDENT = sym(ind.indentToken), DEDENT = sym(ind.dedentToken), NEWLINE = sym(ind.newlineToken);
+  const BLOCK = ind.blockScalar ? sym(ind.blockScalar.token) : null;
+  // The block-context SCALAR externals — plain, key, and the typed num / bool-null — all scanned by
+  // scan_scalar (a `:` is a separator only before space/EOL/flow-indicator; a `#` a comment only after
+  // a space; a typed run is classified by shape). Emitting num/bool-null from the scanner (not via a
+  // regex token + decline) makes EVERY scalar an external token that carries the key-vs-value decision,
+  // which the GLR parser needs to chain top-level mapping entries.
+  const { plain: plainTok, key: keyTok, num: numTok, boolnull: boolnullTok } = planPlainScalarTokens(grammar);
+  const PLAIN = plainTok ? sym(plainTok) : null;
+  const KEY = keyTok ? sym(keyTok) : null;
+  const NUM = numTok ? sym(numTok) : null;
+  const BOOLNULL = boolnullTok ? sym(boolnullTok) : null;
+  const SCALAR = PLAIN || KEY || NUM || BOOLNULL; // scan_scalar is emitted when at least one exists
+  const scalarGate = [PLAIN, KEY, NUM, BOOLNULL].filter(Boolean).map(s => `valid_symbols[${s}]`).join(' || ') || '0';
+  const want = (s: string | null) => (s ? `valid_symbols[${s}] != 0` : 'false');
+  const cmt = ind.comment ?? '#';
+  const cmtC = cmt.length === 1 ? (cmt === '\\' || cmt === "'" ? `'\\${cmt}'` : `'${cmt}'`) : null;
+  const introCond = (ind.blockScalar?.introducers ?? []).map(c => `lexer->lookahead == '${c}'`).join(' || ') || '0';
+  const enumBody = externalTokens.map(s => `  ${s.toUpperCase()},`).join('\n');
+  const G = grammarName;
+  // Compact-notation entry indicators (YAML `-` / `?`) — DERIVED from grammar.indent.compactIndicators
+  // (nothing hardcoded). A `lexer->lookahead == 'c'` disjunction reused by the scanner's compact logic.
+  // (The other inline-content leads — node-property `&`/`!`, flow `[`/`{`, alias `*` — are mirrored as
+  //  literals from gen-lexer's startsBlockStructuralNode, which itself treats them as fixed YAML
+  //  syntax; only the entry indicators are config-driven, matching IndentConfig.compactIndicators.)
+  const compactIndicators = ind.compactIndicators ?? [];
+  const compactIndicatorCond = (v: string) => compactIndicators.map(c => `${v} == '${c}'`).join(' || ') || '0';
+  const hasCompact = compactIndicators.length > 0 && SCALAR;
+  // Flow-collection delimiters (`[ ] { }`) — DERIVED from grammar.indent.flowOpen / flowClose. Inside a
+  // flow collection (flow_depth > 0) indentation is SUSPENDED and `,`/`[`/`]`/`{`/`}` are item/collection
+  // boundaries; in block context (flow_depth == 0) those same chars are ordinary plain-scalar content
+  // (mirrors the flowDepth counter in src/gen-lexer.ts, the parser's lexer). tree-sitter discards an
+  // external scanner's struct mutations on a `false` return (it restores the pre-scan serialized state
+  // before lexing the internal bracket), so a counter cannot be maintained by peeking-then-returning-
+  // false at the bracket; instead the flow OPEN/CLOSE brackets are emitted as EXTERNAL tokens by the
+  // scanner (a TRUE return DOES persist), and flow_depth is bumped there. The brackets are synthesized
+  // external tokens (no token name in yaml.ts) — see flowSyntheticTokens / the literal substitution in
+  // renderExpr. flow_depth is then carried in the Scanner struct (serialize/deserialize).
+  const flowOpen = ind.flowOpen ?? [];
+  const flowClose = ind.flowClose ?? [];
+  const charLit = (c: string) => (c === '\\' || c === "'" ? `'\\${c}'` : `'${c}'`);
+  // Run-boundary chars inside a flow collection: the closers/openers + the entry separator `,`. A plain
+  // scalar still cannot START with one of these (only contain them in block context).
+  const hasFlow = flowOpen.length > 0 || flowClose.length > 0;
+  const flowBreakCond = (v: string) => [...flowOpen, ...flowClose].map(c => `${v} == ${charLit(c)}`).concat(`${v} == ','`).join(' || ');
+  // The synthetic external token name + char for each flow delimiter (open then close), in the SAME
+  // order they were registered in ctx.scannerTokenFor (so the enum / grammar.js externals positions
+  // agree). Built by flowSyntheticTokens(grammar) and shared with the grammar.js side.
+  const flowTokens = flowSyntheticTokens(grammar); // [{ sym, char, open }]
+
+  // DOCUMENT MARKERS (`---` / `...`) — INTERNAL tokens; the external scalar scanner only CLAIMS a non-
+  // marker glyph as plain and DECLINES a true (sep-bounded) marker so the internal token lexes it.
+  const markers = documentMarkerGlyphs(grammar);
+  const hasMarkers = markers.length > 0 && SCALAR;
+  const cChar = (ch: string) => (ch === '\\' || ch === "'" ? `'\\${ch}'` : `'${ch}'`);
+  // Advance over one glyph char (counting `matched`) — DON'T push to the run or mark the token end yet.
+  // The probe commits (mark_end) only on the plain-content path; a true-marker decline marks nothing, so
+  // the probed chars roll back cleanly and tree-sitter then lexes the internal marker token.
+  const eatGlyphChar = (ch: string) => `if (lexer->lookahead == ${cChar(ch)}) { lexer->advance(lexer, false); matched++; }`;
+  // Replay the k matched glyph chars into the run as scalar content (a non-marker glyph: `---foo`,
+  // `--x`). The lexer is already positioned past them; the main loop continues the run from here.
+  const replayGlyph = (glyph: string) => [...glyph].map((ch, k) =>
+    `if (matched > ${k}) { if (blen < sizeof(buf)) buf[blen++] = ${cChar(ch)}; }`).join(' ');
+  const markerProbe = !hasMarkers ? '' : `
+  // DOCUMENT-MARKER probe (column 0). A \`---\`/\`...\` glyph that is sep-bounded (ws / EOL / EOF) is a
+  // document marker — an INTERNAL token (its IR's sep look-ahead is beyond a tree-sitter token() DFA,
+  // but this external scanner decides the boundary). The glyph is matched WITHOUT marking the token end:
+  //   • a FULL glyph + separator      → a TRUE marker: set s->marker_decline and return false; nothing
+  //     was marked, so the probed chars roll back and the internal \`---\`/\`...\` token lexes it (a non-
+  //     marker glyph never reaches that token, so its dropped look-ahead is moot).
+  //   • a LONE indicator char + sep   → a block indicator (\`- \`/\`? \`); decline so the internal \`-\`/\`?\`
+  //     token takes it.
+  //   • anything else (\`---foo\`, \`-1\`) → plain content: replay the matched glyph chars and fall through
+  //     to the scalar loop, which continues the run (so the marker glyph is CLAIMED as a plain scalar).
+  // Markers (and which lead chars are block indicators) are DERIVED from grammar.indent.
+  if (${hasCompact ? 'compact_col < 0 && ' : ''}lexer->get_column(lexer) == 0) {${markers.map(glyph => `
+    if (lexer->lookahead == ${cChar(glyph[0])}) {
+      unsigned matched = 0;
+      ${[...glyph].map(eatGlyphChar).join('\n      ')}
+      int32_t mn = lexer->lookahead;
+      bool msep = (mn == 0 || mn == ' ' || mn == '\\t' || mn == '\\n' || mn == '\\r');
+      if (matched == ${glyph.length} && msep) { s->marker_decline = true; return false; }${compactIndicators.includes(glyph[0]) ? `
+      if (matched == 1 && msep) return false; // lone \`${glyph[0]}\` + separator → block indicator, not content` : ''}
+      ${replayGlyph(glyph)} if (matched > 0) { has_content = true; lexer->mark_end(lexer); }
+    }`).join('')}
+  }`;
+
+  const scannerC = `// Tree-sitter external scanner generated by monogram (indentation path).
+//
+// Mirrors the indent-stack state machine of src/gen-lexer.ts: INDENT / DEDENT / NEWLINE are emitted
+// from the line-leading column relative to an indent stack; a block-scalar body is scanned verbatim.
+// All language data (comment introducer, block-scalar introducers, document markers) is DERIVED from
+// the grammar's \`indent\` config — nothing below is hand-tuned for a specific language.
+
+#include "tree_sitter/parser.h"
+#include "tree_sitter/alloc.h"
+#include <string.h>
+#include <stdint.h>
+
+enum TokenType {
+${enumBody}
+};
+
+typedef struct {
+  uint32_t len;          // indent-stack depth (>= 1; stack[0] == 0, the document level)
+  uint32_t cap;
+  int16_t *stack;        // indentation columns
+  int16_t pending_col;   // column of the line boundary mid-processing (-1 = none)
+  bool pending_newline;  // a NEWLINE is still owed once dedents reach pending_col
+  bool started;          // any content lexed yet (suppresses a leading NEWLINE)${hasCompact ? `
+  bool at_line_lead;     // the next real content token is its line's first (compact-indicator probe)
+  bool property_lead;    // the line's FIRST token is a node property (\`&\`/\`!\`) — its inline content sits
+                         // at the SAME node level, so it must NOT take the compact mapping-key push (\`&a
+                         // a: b\` is the key \`a\` carrying anchor \`&a\`, not \`&a\`-then-INDENTED-\`a: b\`).
+                         // gen-lexer clears atLineLead on the property token (it sees every token); the
+                         // scanner does not lex the property, so it LATCHES this at the line lead and reads
+                         // it at the push. It must survive the property's INTERNAL lex (which the scanner
+                         // declines via a FALSE return) — tree-sitter deserializes the serialized fields on
+                         // a false return, so a SERIALIZED flag would be rolled back; this one is therefore
+                         // NOT serialized and NOT reset in deserialize (it keeps its in-memory value across
+                         // the decline). It is RE-DERIVED from the lead char at every line boundary, so it
+                         // is always correct at the only points it is read (a line lead).` : ''}${hasFlow ? `
+  uint16_t flow_depth;   // > 0 inside flow collections ([ ] { }) → indentation suspended, ,/[/]/{/}` : ''}${hasMarkers ? `
+  bool marker_decline;   // transient: scan_scalar saw a true \`---\`/\`...\` → external declines so the
+                         // internal marker token lexes it. Set+consumed within one scan; not serialized.` : ''}
+} Scanner;
+
+static inline void advance(TSLexer *lexer) { lexer->advance(lexer, false); }
+static inline void skip(TSLexer *lexer) { lexer->advance(lexer, true); }
+
+static void push_indent(Scanner *s, int16_t col) {
+  if (s->len == s->cap) { s->cap *= 2; s->stack = ts_realloc(s->stack, s->cap * sizeof(int16_t)); }
+  s->stack[s->len++] = col;
+}
+
+void *tree_sitter_${G}_external_scanner_create(void) {
+  Scanner *s = ts_malloc(sizeof(Scanner));
+  s->cap = 16; s->len = 1;
+  s->stack = ts_malloc(s->cap * sizeof(int16_t));
+  s->stack[0] = 0;
+  s->pending_col = -1; s->pending_newline = false; s->started = false;${hasCompact ? `
+  s->at_line_lead = true; s->property_lead = false;` : ''}${hasFlow ? `
+  s->flow_depth = 0;` : ''}${hasMarkers ? `
+  s->marker_decline = false;` : ''}
+  return s;
+}
+
+void tree_sitter_${G}_external_scanner_destroy(void *payload) {
+  Scanner *s = (Scanner *)payload;
+  ts_free(s->stack);
+  ts_free(s);
+}
+
+unsigned tree_sitter_${G}_external_scanner_serialize(void *payload, char *buffer) {
+  Scanner *s = (Scanner *)payload;
+  unsigned n = 0;
+  buffer[n++] = s->started ? 1 : 0;
+  buffer[n++] = s->pending_newline ? 1 : 0;${hasCompact ? `
+  buffer[n++] = s->at_line_lead ? 1 : 0;` : ''}
+  memcpy(&buffer[n], &s->pending_col, sizeof(int16_t)); n += sizeof(int16_t);${hasFlow ? `
+  memcpy(&buffer[n], &s->flow_depth, sizeof(uint16_t)); n += sizeof(uint16_t);` : ''}
+  uint32_t count = s->len;
+  while (n + sizeof(uint32_t) + count * sizeof(int16_t) > TREE_SITTER_SERIALIZATION_BUFFER_SIZE && count > 0) count--;
+  memcpy(&buffer[n], &count, sizeof(uint32_t)); n += sizeof(uint32_t);
+  memcpy(&buffer[n], s->stack, count * sizeof(int16_t)); n += count * sizeof(int16_t);
+  return n;
+}
+
+void tree_sitter_${G}_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) {
+  Scanner *s = (Scanner *)payload;
+  s->len = 1; s->stack[0] = 0; s->pending_col = -1; s->pending_newline = false; s->started = false;${hasCompact ? `
+  s->at_line_lead = true;` : ''}${hasFlow ? `
+  s->flow_depth = 0;` : ''}${hasMarkers ? `
+  s->marker_decline = false;` : ''}
+  if (length < ${hasCompact ? 3 : 2} + sizeof(int16_t)${hasFlow ? ' + sizeof(uint16_t)' : ''} + sizeof(uint32_t)) return;
+  unsigned n = 0;
+  s->started = buffer[n++] != 0;
+  s->pending_newline = buffer[n++] != 0;${hasCompact ? `
+  s->at_line_lead = buffer[n++] != 0;` : ''}
+  memcpy(&s->pending_col, &buffer[n], sizeof(int16_t)); n += sizeof(int16_t);${hasFlow ? `
+  memcpy(&s->flow_depth, &buffer[n], sizeof(uint16_t)); n += sizeof(uint16_t);` : ''}
+  uint32_t count; memcpy(&count, &buffer[n], sizeof(uint32_t)); n += sizeof(uint32_t);
+  if (count == 0) return; // keep stack[0] = 0
+  while (s->cap < count) { s->cap *= 2; s->stack = ts_realloc(s->stack, s->cap * sizeof(int16_t)); }
+  memcpy(s->stack, &buffer[n], count * sizeof(int16_t));
+  s->len = count;
+}
+
+${BLOCK ? `// A block scalar (\`|\` / \`>\`): the introducer + indicators + the verbatim more-indented body, as
+// one token. The body runs while a line is blank or indented MORE than the parent block level (the
+// stack top); it ends at the first non-blank line at or below the parent, or a column-0 document
+// marker, or EOF. mark_end is advanced only over lines that belong to the scalar, so the next node's
+// indentation is left for the normal boundary logic.
+//
+// A ROOT block scalar (the document's own node — stack depth 1) has an effective parent indentation of
+// -1, not 0: its body may sit at column 0 (\`--- >\\nline1\`, yaml-test-suite DK3J / FP8R). So at root,
+// only a column-0 DOCUMENT MARKER (\`---\` / \`...\`) — never plain column-0 text — ends it. The marker
+// is matched without committing the line (no mark_end), so a non-marker column-0 line stays body.${markers.length > 0 ? `` : ''}
+static bool scan_block_scalar(Scanner *s, TSLexer *lexer) {
+  bool root = s->len == 1;           // a document-root block scalar: body may reach column 0
+  int parent = root ? -1 : s->stack[s->len - 1];
+  advance(lexer); // the introducer (| or >)
+  while (lexer->lookahead == '+' || lexer->lookahead == '-' || (lexer->lookahead >= '0' && lexer->lookahead <= '9')) advance(lexer);
+  while (lexer->lookahead != 0 && lexer->lookahead != '\\n' && lexer->lookahead != '\\r') advance(lexer);
+  lexer->mark_end(lexer); // the header line belongs to the scalar
+  for (;;) {
+    if (lexer->lookahead == '\\r') { advance(lexer); if (lexer->lookahead == '\\n') advance(lexer); }
+    else if (lexer->lookahead == '\\n') advance(lexer);
+    else break; // EOF
+    int col = 0;
+    while (lexer->lookahead == ' ') { advance(lexer); col++; }
+    int32_t c = lexer->lookahead;
+    if (c == 0 || c == '\\n' || c == '\\r') { lexer->mark_end(lexer); continue; } // blank line → body${markers.length > 0 ? `
+    if (root && col == 0) {                 // a column-0 document marker ends a root block scalar
+      bool is_marker = false;${markers.map(glyph => `
+      if (!is_marker && c == ${cChar(glyph[0])}) {
+        unsigned m = 0; ${[...glyph].map(ch => `if (lexer->lookahead == ${cChar(ch)}) { advance(lexer); m++; }`).join(' ')}
+        int32_t a = lexer->lookahead;
+        if (m == ${glyph.length} && (a == 0 || a == ' ' || a == '\\t' || a == '\\n' || a == '\\r')) is_marker = true;
+      }`).join('')}
+      if (is_marker) break;                 // leave the marker line for the next token (no mark_end)
+      // not a marker: the chars probed above are body; fall through to consume the rest of the line.
+    }` : ''}
+    if (col <= parent) break; // dedent to/below parent ends the scalar (the line is the next node)
+    while (lexer->lookahead != 0 && lexer->lookahead != '\\n' && lexer->lookahead != '\\r') advance(lexer);
+    lexer->mark_end(lexer);
+  }
+  lexer->result_symbol = ${BLOCK};
+  return true;
+}
+` : ''}${hasCompact ? `
+// Compact block notation (\`- a: 1\` / \`- - x\` / \`- &a k: v\` / \`? key\`): a sequence/explicit-key
+// indicator whose inline content itself begins a block node nests at the content's column, not the
+// indicator's. The indicator chars are DERIVED from grammar.indent.compactIndicators. Mirrors
+// compactNestsHere / startsBlockStructuralNode in src/gen-lexer.ts: the inline content is
+// block-structural when, after an optional node-property prefix (\`&anchor\` / \`!tag\`, 0-2
+// space-separated), it is a further indicator, or a mapping KEY (an unquoted \`:\` then ws/EOL/
+// flow-indicator before a \` #\` comment / EOL, scanned quote-aware). A bare scalar / flow / alias does
+// NOT nest. The property / flow / alias glyphs are fixed YAML syntax (as in gen-lexer); only the entry
+// indicators are config-driven.
+static inline bool compact_is_indicator(int32_t c) { return ${compactIndicatorCond('c')}; }
+static inline bool compact_sep_after(int32_t c) {
+  return c == 0 || c == ' ' || c == '\\t' || c == '\\n' || c == '\\r';
+}
+// The inline content (lookahead is positioned at it) begins a block-structural node. Advances; the
+// caller has frozen a zero-width token end before it and discards the advances (returning the INDENT
+// zero-width on a hit, or rewinding on a miss).
+static bool compact_content_is_structural(TSLexer *lexer) {
+  for (int n = 0; n < 2; n++) {                       // skip 0-2 node-property prefixes (\`&anchor\` / \`!tag\`)
+    int32_t c = lexer->lookahead;
+    if (c == '&' || c == '!') {
+      advance(lexer);
+      while (lexer->lookahead != 0 && !compact_sep_after(lexer->lookahead) && lexer->lookahead != ',') advance(lexer);
+      while (lexer->lookahead == ' ' || lexer->lookahead == '\\t') advance(lexer);
+    } else break;
+  }
+  int32_t c0 = lexer->lookahead;
+  if (c0 == 0 || c0 == '\\n' || c0 == '\\r') return false;            // property alone on the line → no nest
+  if (compact_is_indicator(c0)) { advance(lexer); return compact_sep_after(lexer->lookahead); } // nested indicator
+  if (c0 == '[' || c0 == '{' || c0 == '*') return false;            // flow collection / alias → not a key
+  for (;;) {                                          // scalar KEY sniff (quote-aware), like startsBlockStructuralNode
+    int32_t ch = lexer->lookahead;
+    if (ch == 0 || ch == '\\n' || ch == '\\r') break;
+    if (ch == '"') {
+      advance(lexer);
+      while (lexer->lookahead != 0 && lexer->lookahead != '"' && lexer->lookahead != '\\n') { if (lexer->lookahead == '\\\\') advance(lexer); advance(lexer); }
+      if (lexer->lookahead == '"') advance(lexer);
+      continue;
+    }
+    if (ch == '\\'') {
+      advance(lexer);
+      while (lexer->lookahead != 0 && lexer->lookahead != '\\n') { if (lexer->lookahead == '\\'') { advance(lexer); if (lexer->lookahead != '\\'') break; } advance(lexer); }
+      continue;
+    }
+    if (ch == ' ' || ch == '\\t') { advance(lexer); if (lexer->lookahead == '#') break; continue; } // trailing comment
+    if (ch == ':') {
+      advance(lexer);
+      int32_t n = lexer->lookahead;
+      if (n == 0 || n == ' ' || n == '\\t' || n == '\\n' || n == '\\r' || n == ',' || n == '[' || n == ']' || n == '{' || n == '}') return true;
+      continue;
+    }
+    advance(lexer);
+  }
+  return false;
+}
+` : ''}${SCALAR ? `
+// A PLAIN / KEY scalar. A tree-sitter token() DFA can't decide a plain scalar's boundary (a \`:\` is
+// content unless followed by space/EOL/flow-indicator; a \`#\` starts a comment only after a space),
+// so we scan it here where look-ahead IS available. The run starts at the current column and ends
+// BEFORE the first key/value \`:\`-separator, comment, flow indicator, newline, or EOF; trailing
+// whitespace is trimmed. KEY vs PLAIN is decided by whether a \`:\`-separator immediately follows.
+//
+// A number- or bool/null-SHAPED run is left to the regex \`num\`/\`bool_null\` tokens (return false →
+// tree-sitter rolls back our advances and the typed token matches) — but ONLY where such a token is
+// valid. A multi-line plain fold's continuation line is plain-ONLY (its KEY symbol is not valid), so
+// a numeric-looking continuation ("123" under a plain scalar) must stay PLAIN, not be handed to num.
+//${hasCompact ? `
+// COMPACT mapping-KEY support: when \`compact_col >= 0\` (a line-lead indicator's scalar-led inline
+// content, deeper than the stack top — see the caller), the run is scanned WITHOUT marking the token
+// end (the caller pre-marked a zero-width end at the content start). A KEY run pushes \`compact_col\`
+// and emits a zero-width INDENT — the nested mapping's real indent — and the key is re-lexed on the
+// next call (then compact_col is no longer deeper, so this path is skipped). A leaf scalar (no \`:\`)
+// is emitted as usual, its end marked at the run end. Mirrors compactNestsHere's mapping-key arm.` : ''}
+static bool scan_scalar(TSLexer *lexer, bool want_key, bool want_plain, bool want_num, bool want_boolnull${hasFlow ? `,
+                        int flow_depth` : ''}${(hasCompact || hasMarkers) ? `,
+                        Scanner *s` : ''}${hasCompact ? `,
+                        int16_t compact_col, int indent_sym` : ''}) {${hasCompact ? `
+  bool cm = compact_col >= 0;   // compact-eligible: suppress per-char mark_end (zero-width INDENT on KEY)` : ''}
+  char buf[64];
+  unsigned blen = 0;        // run text (capped) — for the number/bool-null shape test
+  bool has_content = false;
+  bool stopped_at_kv = false; // ended at a \`:\`-separator → this scalar is a mapping KEY
+${markerProbe}
+  for (;;) {
+    int32_t c = lexer->lookahead;
+    if (c == 0) break;                                             // EOF
+    if (c == '\\n' || c == '\\r') {${hasFlow ? `
+      // Inside a flow collection a plain scalar FOLDS across a line break (\`{ multi\\n line: v}\` → the
+      // key is \`multi line\`): the break + surrounding whitespace (and blank/comment-only lines) collapse
+      // to one space and the run continues on the next line. Peek past that trivia run WITHOUT committing
+      // (mark_end stays at the last content char, so a decline trims it): if the next significant char
+      // ENDS the scalar — EOF, a flow indicator/terminator (\`, [ ] { }\`), or a line-leading \`#\` comment —
+      // the break is trailing trivia and the scalar stops here; otherwise fold to a space and continue.
+      if (flow_depth > 0 && has_content) {
+        while (lexer->lookahead == ' ' || lexer->lookahead == '\\t' ||
+               lexer->lookahead == '\\n' || lexer->lookahead == '\\r') lexer->advance(lexer, false);
+        int32_t nx = lexer->lookahead;
+        if (nx == 0 || nx == '#' || (${flowBreakCond('nx')})) break; // scalar ends — the trivia is trailing
+        if (blen < sizeof(buf)) buf[blen++] = ' ';                  // the folded break becomes one space
+        continue;                                                   // next content char marks the new token end
+      }` : ''}
+      break;                                                       // block context (or no content yet): the line break ends the scalar
+    }
+    ${hasFlow ? `if (flow_depth > 0 && (${flowBreakCond('c')})) break; // flow indicators end a scalar — ONLY inside a flow collection` : `if (c == ',' || c == '[' || c == ']' || c == '{' || c == '}') break; // flow indicators end a scalar`}
+    if (!has_content && (c == '-' || c == '?')) {
+      // A leading \`-\`/\`?\` is a block indicator (seq entry / explicit key) when followed by space/EOL/
+      // flow-indicator, and scalar content otherwise (\`-1\`, \`?x\`). Peek the next char to decide.
+      lexer->advance(lexer, false);
+      int32_t n = lexer->lookahead;
+      if (n == 0 || n == ' ' || n == '\\t' || n == '\\n' || n == '\\r'${hasFlow ? ` ||
+          (flow_depth > 0 && (${flowBreakCond('n')}))` : ` ||
+          n == ',' || n == '[' || n == ']' || n == '{' || n == '}'`}) return false; // indicator, not a scalar
+      if (blen < sizeof(buf)) buf[blen++] = (char)c;               // \`-\`/\`?\` glued to non-space is content
+      has_content = true;
+      ${hasCompact ? 'if (!cm) ' : ''}lexer->mark_end(lexer);
+      continue;
+    }
+    if (c == ':') {
+      lexer->advance(lexer, false);                               // past the ':' to peek the next char
+      int32_t n = lexer->lookahead;
+      if (n == 0 || n == ' ' || n == '\\t' || n == '\\n' || n == '\\r'${hasFlow ? ` ||
+          (flow_depth > 0 && (${flowBreakCond('n')}))` : ` ||
+          n == ',' || n == '[' || n == ']' || n == '{' || n == '}'`}) {
+        stopped_at_kv = true; break;                              // ':' is a key/value separator → end before it
+      }
+      if (blen < sizeof(buf)) buf[blen++] = ':';                  // ':' glued to non-space is content
+      has_content = true;
+      ${hasCompact ? 'if (!cm) ' : ''}lexer->mark_end(lexer);
+      continue;
+    }
+    if (c == ' ' || c == '\\t') {
+      lexer->advance(lexer, false);                               // past the space to peek the next char
+      if (lexer->lookahead == '#') break;                        // " #" begins a comment → end before the space
+      if (blen < sizeof(buf)) buf[blen++] = ' ';                  // interior space (e.g. "hello world")
+      continue;                                                   // do NOT mark_end → trailing spaces are trimmed
+    }
+    if (blen < sizeof(buf)) buf[blen++] = (char)c;
+    has_content = true;
+    lexer->advance(lexer, false);
+    ${hasCompact ? 'if (!cm) ' : ''}lexer->mark_end(lexer);                                       // token end follows the last content char
+  }
+  if (!has_content) return false;
+${hasCompact ? `
+  // COMPACT mapping KEY: the inline content after a line-lead indicator is a mapping key → its column
+  // is the nested mapping's indent. Push it and emit the zero-width INDENT (the caller pre-marked the
+  // end at the content start); the key is re-lexed on the next call. A leaf falls through to normal
+  // classification, with its end marked here (run end) since per-char marking was suppressed.
+  if (cm) {
+    if (stopped_at_kv) {
+      push_indent(s, compact_col);
+      s->at_line_lead = true;     // the key is itself this line's fresh lead (re-lexed next call)
+      lexer->result_symbol = indent_sym;
+      return true;                // zero-width INDENT at the content start (advances discarded)
+    }
+    lexer->mark_end(lexer);       // leaf: take the whole run (trailing-space trim is skipped in compact mode)
+  }
+` : ''}
+  // Number / bool-null SHAPE test (so the typed regex tokens still classify \`1\`/\`true\`). Decide KEY
+  // vs PLAIN first, because a typed-looking run is only deferred where a typed token is valid: a KEY
+  // position admits num/bool_null (block_key_scalar), and a non-KEY value position likewise — but a
+  // plain-ONLY fold continuation (neither KEY here, and the run is not a key) must stay PLAIN.
+  // numeric / bool-null SHAPE test — a loose superset is fine for classification (only a typed-shaped
+  // run is emitted as NUM/BOOL_NULL; a run with any other char is PLAIN), at the cost of mis-typing a
+  // rare plain like \`1abc\` as numeric (the documented imprecise edge).
+  bool numeric = blen > 0;
+  for (unsigned i = 0; i < blen; i++) {
+    char ch = buf[i];
+    bool ok = (ch >= '0' && ch <= '9') || ch == '.' || ch == '+' || ch == '-' || ch == 'e' || ch == 'E' ||
+              ch == 'x' || ch == 'o' || ch == 'n' /* inf/nan */ || ch == 'a' || ch == 'f' || ch == 'i' ||
+              ch == 'I' || ch == 'N' || ch == 'F' || (ch >= 'A' && ch <= 'F');
+    if (!ok) { numeric = false; break; }
+  }
+  // also require at least one digit OR a .inf/.nan/~ shape so a bare "e"/"a" word isn't called numeric
+  if (numeric) {
+    bool any_digit = false;
+    for (unsigned i = 0; i < blen; i++) if (buf[i] >= '0' && buf[i] <= '9') { any_digit = true; break; }
+    if (!any_digit) numeric = false;
+  }
+  bool boolnull = false;
+  {
+    static const char *WORDS[] = { "true","True","TRUE","false","False","FALSE","null","Null","NULL","~" };
+    for (unsigned w = 0; w < sizeof(WORDS)/sizeof(WORDS[0]); w++) {
+      const char *p = WORDS[w]; unsigned i = 0;
+      while (i < blen && p[i] && buf[i] == p[i]) i++;
+      if (i == blen && p[i] == 0) { boolnull = true; break; }
+    }
+  }
+  // Classify + emit. The external scalar token CARRIES the key-vs-value decision (a trailing \`: \`
+  // means KEY), which the GLR parser needs to chain mapping entries — so a typed value is emitted as
+  // NUM/BOOL_NULL here, NOT deferred to a regex token (deferring drops the disambiguation and
+  // mis-parses a top-level \`x: 1\\ny: 2\`). A key wins first; then the typed shapes; then PLAIN. Each
+  // is gated on its token being admissible here (valid_symbols), falling through otherwise.
+  if (stopped_at_kv && want_key) { lexer->result_symbol = ${KEY ?? PLAIN}; return true; }
+  if (numeric && want_num) { lexer->result_symbol = ${NUM ?? PLAIN}; return true; }
+  if (boolnull && want_boolnull) { lexer->result_symbol = ${BOOLNULL ?? PLAIN}; return true; }
+  if (want_plain) { lexer->result_symbol = ${PLAIN ?? KEY}; return true; }
+  if (want_key) { lexer->result_symbol = ${KEY ?? PLAIN}; return true; }
+  return false;
+}
+` : ''}
+bool tree_sitter_${G}_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
+  Scanner *s = (Scanner *)payload;
+  bool want_indent = valid_symbols[${INDENT}];
+  bool want_dedent = valid_symbols[${DEDENT}];
+  bool want_newline = valid_symbols[${NEWLINE}];${BLOCK ? `\n  bool want_block = valid_symbols[${BLOCK}];` : ''}${hasMarkers ? `
+  // Content lies to our left whenever we are not at column 0 — including right after an INTERNAL token
+  // (e.g. a \`---\`/\`...\` document marker, whose match the scanner never sees). Mark started so the line
+  // boundary that follows emits its NEWLINE (the leading-NEWLINE suppression is only for blank lines at
+  // the very start of input, which are always at column 0). Without this, the NEWLINE after a leading
+  // \`---\` would be dropped and the document body could not attach.
+  if (lexer->get_column(lexer) > 0) s->started = true;` : ''}
+
+  // Finish a line boundary already in progress: emit the remaining DEDENTs (one per call), then the
+  // owed NEWLINE when the stack top reaches the boundary column. No input is consumed here.
+  if (s->pending_col >= 0) {
+    int top = s->stack[s->len - 1];
+    if (s->pending_col < top) {
+      if (want_dedent) { s->len--; lexer->result_symbol = ${DEDENT}; return true; }
+      s->pending_col = -1; s->pending_newline = false; return false;
+    }
+    bool owed = s->pending_newline;
+    int16_t col = s->pending_col;
+    s->pending_col = -1; s->pending_newline = false;
+    if (col == top && owed && want_newline && s->started) { lexer->result_symbol = ${NEWLINE}; return true; }
+    return false;
+  }
+${hasFlow ? `
+  // Inside a flow collection (flow_depth > 0) a line break is INSIGNIFICANT — indentation is suspended,
+  // so a flow scalar / nested bracket may sit on a following line (\`[a,\\n b]\`). tree-sitter's \`/\\s/\`
+  // extra cannot skip the newline here: the external scanner is consulted first, and a \`false\` return
+  // (the only way to "decline") rolls back any advance, so the newline is never consumed and the parser
+  // stalls into error recovery. So when in flow, the scanner itself eats the flow-insignificant run
+  // (spaces, tabs, newlines, comments) as the LEADING trivia of the next token it returns — the bracket
+  // emission and scalar scan below both return TRUE, which makes the skip stick. (In block context this
+  // is skipped: a newline IS significant and drives the INDENT/DEDENT/NEWLINE boundary logic.)
+  if (s->flow_depth > 0) {
+    for (;;) {
+      int32_t c = lexer->lookahead;
+      if (c == ' ' || c == '\\t' || c == '\\n' || c == '\\r') skip(lexer);
+      ${cmtC ? `else if (c == ${cmtC}) { while (lexer->lookahead != 0 && lexer->lookahead != '\\n' && lexer->lookahead != '\\r') skip(lexer); }` : ''}
+      else break;
+    }
+  }
+  // Flow-collection delimiters ([ ] { }). These are emitted as EXTERNAL tokens (not the internal DFA) so
+  // flow_depth — the open-bracket nesting that suspends indentation — PERSISTS: tree-sitter keeps an
+  // external scanner's struct mutations only across a token it RETURNS (on a \`false\` return it restores
+  // the pre-scan serialized state, so a peek-then-false counter is silently rolled back before the
+  // internal bracket is lexed). Each is gated on its own valid_symbols, so a \`[\`/\`{\` that is plain
+  // content (a scalar contains but never STARTS with one — handled in scan_scalar) is NOT consumed here:
+  // at those positions the flow token isn't valid and we fall through. Skip inline space/tab first (the
+  // flow newline skip above already ran when in flow; in block a newline still drives the indent logic).
+  {
+    while (lexer->lookahead == ' ' || lexer->lookahead == '\\t') skip(lexer);
+    int32_t fc = lexer->lookahead;
+${flowTokens.map(t => `    if (fc == ${charLit(t.char)} && valid_symbols[${t.sym.toUpperCase()}]) {
+      advance(lexer); lexer->mark_end(lexer); lexer->result_symbol = ${t.sym.toUpperCase()};
+      ${t.open ? 'if (s->flow_depth < UINT16_MAX) s->flow_depth++;' : 'if (s->flow_depth > 0) s->flow_depth--;'}
+      s->started = true;${hasCompact ? ' s->at_line_lead = false;' : ''} return true;
+    }`).join('\n')}
+  }
+` : ''}${hasCompact ? `
+  // Compact block notation (\`- a: 1\` / \`- - x\` / \`- &a k: v\` / \`? key\`). The line-lead indicator was
+  // just lexed by tree-sitter's internal DFA, so at_line_lead is still set (the scanner emits no
+  // indicator token to clear it) and tree-sitter now wants the nested node's INDENT on the SAME line.
+  // When the inline content begins a block node, its column — not the indicator's — is the node's
+  // indentation: emit a zero-width INDENT there and push it (the DEDENT logic closes it when a
+  // shallower line arrives). The work splits by what leads the inline content, because the sniff
+  // ADVANCES (irrecoverably) and external-scanner state changes are reverted on a false return:
+  //   • a node-property / flow / alias / nested-indicator lead — sniff it here; a structural hit pushes
+  //     INDENT, a miss returns false so tree-sitter rewinds and the leading char (all INTERNAL-lexable,
+  //     or the scalar handled on the next call) is re-lexed.
+  //   • a plain/quoted SCALAR lead — fall through to scan_scalar (below), which pushes the compact
+  //     INDENT itself when the run is a mapping KEY. (A bare scalar must NOT be sniffed-then-rewound: a
+  //     plain scalar is external-only, so a false return here would loop.)
+  if (want_indent && s->at_line_lead${hasFlow ? ' && s->flow_depth == 0' : ''}) {
+    while (lexer->lookahead == ' ' || lexer->lookahead == '\\t') skip(lexer);
+    int32_t c = lexer->lookahead;
+    // GENUINE line lead: the line's first token is not yet lexed, so its column == the stack top (a
+    // line boundary set top to the lead column; at stream start both are the document level). Once a
+    // token has been lexed on this line the next content is DEEPER than top. Record whether that first
+    // token is a node PROPERTY (\`&\`/\`!\`): a property leads a node, so its inline content is at the
+    // SAME level (no compact push), whereas a compact indicator (\`-\`/\`?\`) DOES open a nested level
+    // for its content. This is the one fact lost when the property is lexed internally (the scanner
+    // never sees it), so it is latched here and checked at the two compact-push sites below.
+    if ((int16_t)lexer->get_column(lexer) == s->stack[s->len - 1]) s->property_lead = (c == '&' || c == '!');
+    bool nonscalar_lead = c == '&' || c == '!' || c == '[' || c == '{' || c == '*' || compact_is_indicator(c);
+    // A property that LEADS the line (property_lead) does not nest — skip the compact push so its key
+    // stays at the node's level (\`&a a: b\` / \`!!str &a1 "foo":\`). A property that follows a compact
+    // indicator (\`- &a k: v\`) is NOT a line lead (property_lead was set false at the \`-\`), so it still
+    // pushes via compact_content_is_structural's property-skip.
+    if (nonscalar_lead && !s->property_lead && (int16_t)lexer->get_column(lexer) > s->stack[s->len - 1]) {
+      int16_t col = (int16_t)lexer->get_column(lexer);
+      lexer->mark_end(lexer); // freeze the zero-width INDENT end at the content column before the sniff advances
+      if (compact_content_is_structural(lexer)) {
+        push_indent(s, col);
+        // A NESTED indicator's content is itself a fresh line-lead (so \`- - x\` nests once more); but a
+        // node-property prefix (\`- &a k: v\`) is followed INLINE by its own mapping KEY at the SAME level
+        // — that key must NOT push again, so clear the lead for the property / direct-key case.
+        s->at_line_lead = compact_is_indicator(c);
+        lexer->result_symbol = ${INDENT};
+        return true;
+      }
+      return false; // not block-structural → rewind; the internal-lexable lead (or next-call scalar) re-lexes
+    }
+  }
+` : ''}${BLOCK ? `
+  // A block scalar value (\`key: |\`): scan its body before the indent logic — its more-indented
+  // lines are content, not nested structure. Skip the inline space after the \`:\`/\`-\` first. Block
+  // scalars are a block-context construct — inside a flow collection \`|\`/\`>\` are plain content.
+  if (want_block${hasFlow ? ' && s->flow_depth == 0' : ''}) {
+    while (lexer->lookahead == ' ' || lexer->lookahead == '\\t') skip(lexer);
+    if (${introCond}) { if (scan_block_scalar(s, lexer)) {${hasCompact ? ' s->at_line_lead = false;' : ''} return true; } }
+  }
+` : ''}${SCALAR ? `
+  // A SCALAR (plain / key / num / bool-null) on the CURRENT line — NOT at a line boundary (a leading
+  // newline falls through to the indent logic so INDENT/DEDENT/NEWLINE are emitted first). Skip inline
+  // spaces/tabs, then if the next char could begin a plain scalar (not a newline/EOF and not a YAML
+  // indicator — a leading \`-\`/\`?\`/\`:\` is resolved inside scan_scalar), scan it where look-ahead is
+  // available. scan_scalar classifies the run and emits the admissible token.
+  if (${scalarGate}) {
+    while (lexer->lookahead == ' ' || lexer->lookahead == '\\t') skip(lexer);
+    int32_t h = lexer->lookahead;
+    bool indicator = h == 0 || h == '\\n' || h == '\\r' || h == ',' || h == '[' || h == ']' || h == '{' ||
+                     h == '}' || h == '#' || h == '&' || h == '*' || h == '!' || h == '|' || h == '>' ||
+                     h == '\\'' || h == '"' || h == '%' || h == '@' || h == '\`';
+    if (!indicator) {${hasCompact ? `
+      // Compact mapping-KEY (part B): a line-lead indicator's scalar-led inline content, deeper than the
+      // stack top. scan_scalar then pushes the nested mapping's INDENT (zero-width, pre-marked here) if
+      // the run is a KEY, or emits the leaf scalar otherwise (so \`- x\` stays a plain item, no push).
+      // \`!s->property_lead\`: a key after a LINE-LEAD node property (\`&a a: b\`) sits at the node's level,
+      // not a compact-nested one — so do NOT pre-mark a compact INDENT; scan_scalar then emits the key as
+      // an ordinary value-position scalar (the enclosing Node carries the property). A key after a compact
+      // indicator (\`- a: 1\`) has property_lead == false and still nests.
+      int16_t compact_col = -1;
+      if (want_indent && s->at_line_lead && !s->property_lead && (int16_t)lexer->get_column(lexer) > s->stack[s->len - 1]) {
+        compact_col = (int16_t)lexer->get_column(lexer);
+        lexer->mark_end(lexer); // zero-width INDENT end at the content start (used iff the run is a KEY)
+      }
+      if (scan_scalar(lexer, ${want(KEY)}, ${want(PLAIN)}, ${want(NUM)}, ${want(BOOLNULL)}${hasFlow ? ', s->flow_depth' : ''}, s, compact_col, ${INDENT})) { s->started = true; s->at_line_lead = (lexer->result_symbol == ${INDENT}); return true; }${hasMarkers ? `
+      if (s->marker_decline) { s->marker_decline = false; return false; } // a true \`---\`/\`...\` here → let the internal marker token lex it` : ''}` : `
+      if (scan_scalar(lexer, ${want(KEY)}, ${want(PLAIN)}, ${want(NUM)}, ${want(BOOLNULL)}${hasFlow ? ', s->flow_depth' : ''}${hasMarkers ? ', s' : ''})) { s->started = true; return true; }${hasMarkers ? `
+      if (s->marker_decline) { s->marker_decline = false; return false; }` : ''}`}
+    }
+  }
+` : ''}
+  if (!want_indent && !want_dedent && !want_newline) return false; // no indent tokens valid${hasFlow ? `
+  // Inside a flow collection a newline is INSIGNIFICANT (indentation suspended): emit NO INDENT/DEDENT/
+  // NEWLINE so the line break is consumed by tree-sitter's \`/\\s/\` extra and the flow spans the line.
+  if (s->flow_depth > 0) return false;` : ''}
+
+  // Skip blank lines, comment-only lines, and leading whitespace, noting whether a line break was
+  // crossed (only a real boundary drives the indent logic).
+  bool found_eol = false;
+  for (;;) {
+    int32_t c = lexer->lookahead;
+    if (c == '\\n') { skip(lexer); found_eol = true; }
+    else if (c == '\\r') { skip(lexer); if (lexer->lookahead == '\\n') skip(lexer); found_eol = true; }
+    else if (c == ' ' || c == '\\t') { skip(lexer); }
+    ${cmtC ? `else if (c == ${cmtC}) { while (lexer->lookahead != 0 && lexer->lookahead != '\\n' && lexer->lookahead != '\\r') skip(lexer); }` : ''}
+    else break; // content or EOF
+  }
+
+  if (lexer->eof(lexer)) {
+    if (s->len > 1 && want_dedent) { s->len--; lexer->result_symbol = ${DEDENT}; return true; } // close open blocks at EOF
+    return false;
+  }
+  bool was_started = s->started;
+  s->started = true; // content lies ahead — mark started even on a mid-line return
+  if (!found_eol) return false; // not at a line boundary
+
+  int16_t col = (int16_t)lexer->get_column(lexer);
+  lexer->mark_end(lexer); // INDENT/DEDENT/NEWLINE are zero-width at the content column
+  int top = s->stack[s->len - 1];${hasCompact ? `
+  s->at_line_lead = true; // a real line boundary — the next real token leads its line
+  // Latch whether THIS new line is led by a node property (\`&\`/\`!\`) — the lookahead is the line's
+  // first content char (blanks/comments already skipped). A property leads a node, so its inline content
+  // is at the same level and must NOT take a compact push (the gates below check property_lead). This is
+  // a TRUE-return site so the latch persists through the property's internal lex; it is also re-derived
+  // for the FIRST line (which reaches no boundary) by the compact block's genuine-line-lead detection.
+  s->property_lead = (lexer->lookahead == '&' || lexer->lookahead == '!');` : ''}
+
+  if (col > top) {
+    if (want_indent) { push_indent(s, col); lexer->result_symbol = ${INDENT}; return true; }
+    return false;
+  }
+  if (col == top) {
+    if (want_newline && was_started) { lexer->result_symbol = ${NEWLINE}; return true; }
+    return false;
+  }
+  // col < top: a dedent. Emit one DEDENT now; queue the rest + the trailing NEWLINE for re-entry.
+  if (want_dedent) {
+    s->pending_col = col; s->pending_newline = true;
+    s->len--; lexer->result_symbol = ${DEDENT}; return true;
+  }
+  return false;
+}
+`;
+  return { scannerC, externalTokens };
+}
+
 function buildScannerC(
   grammar: CstGrammar,
   ctx: GrammarJsContext,
   grammarName: string,
 ): { scannerC: string; externalTokens: string[] } {
+  if (grammar.indent) return buildIndentScannerC(grammar, ctx, grammarName);
   const regexTok = grammar.tokens.find(t => t.flags.includes('regex'));
   const tp = ctx.templatePlan;
 
diff --git a/test/treesitter-yaml-bench.ts b/test/treesitter-yaml-bench.ts
new file mode 100644
index 0000000..db6190d
--- /dev/null
+++ b/test/treesitter-yaml-bench.ts
@@ -0,0 +1,45 @@
+// YAML tree-sitter accuracy bench (issue #3): how many VALID yaml-test-suite inputs the DERIVED
+// YAML tree-sitter parses with no ERROR/MISSING node. "Valid" = the `yaml` package accepts the input
+// (so a failure is the tree-sitter grammar's, not a malformed sample). The corpus is extracted from
+// the yaml-test-suite src meta-files exactly like test/src-coverage-yaml.ts.
+//
+//   git clone --depth 1 https://github.com/yaml/yaml-test-suite /tmp/yaml-test-suite
+//   cd tree-sitter/yaml && npx tree-sitter generate && npx tree-sitter build --wasm .
+//   node test/treesitter-yaml-bench.ts
+import { readdirSync, readFileSync, existsSync } from 'node:fs';
+import { parse as yamlParse, parseAllDocuments } from 'yaml';
+
+const WASM = 'tree-sitter/yaml/tree-sitter-yaml.wasm';
+const SUITE = '/tmp/yaml-test-suite/src';
+if (!existsSync(WASM)) { console.error(`missing ${WASM} — run: (cd tree-sitter/yaml && npx tree-sitter build --wasm .)`); process.exit(1); }
+if (!existsSync(SUITE)) { console.error(`missing ${SUITE} — git clone --depth 1 https://github.com/yaml/yaml-test-suite /tmp/yaml-test-suite`); process.exit(1); }
+
+const { Parser, Language } = await import('web-tree-sitter');
+await Parser.init();
+const lang = await Language.load(WASM);
+const parser = new Parser();
+parser.setLanguage(lang);
+
+// Decode the suite's visible-whitespace markers to real bytes (same as src-coverage-yaml).
+const decode = (s: string) => s.replace(/␣/g, ' ').replace(/—*»/g, '\t').replace(/[↵∎]/g, '');
+const corpus: string[] = [];
+for (const f of readdirSync(SUITE).filter((n) => n.endsWith('.yaml'))) {
+  try {
+    const meta = yamlParse(readFileSync(`${SUITE}/${f}`, 'utf8'));
+    for (const t of (Array.isArray(meta) ? meta : [meta])) if (t && typeof t.yaml === 'string') corpus.push(decode(t.yaml));
+  } catch { /* skip meta-files that don't round-trip */ }
+}
+const valid = corpus.filter((c) => { try { return parseAllDocuments(c).every((d: any) => d.errors.length === 0); } catch { return false; } });
+
+function hasError(node: any): boolean {
+  if (node.type === 'ERROR' || node.isError === true || node.isMissing === true) return true;
+  for (let i = 0; i < node.childCount; i++) { const c = node.child(i); if (c && hasError(c)) return true; }
+  return false;
+}
+
+let ok = 0;
+for (const c of valid) { const tree = parser.parse(c); if (tree && !hasError(tree.rootNode)) ok++; }
+const pct = ((100 * ok) / valid.length).toFixed(1);
+console.log(`YAML corpus: ${corpus.length} inputs (${valid.length} valid per the yaml package).`);
+console.log(`YAML tree-sitter accuracy: ${ok}/${valid.length} valid inputs parse ERROR-free (${pct}%).`);
+console.log(`##TSYAML## ${JSON.stringify({ name: 'YAML', engine: 'tree-sitter (derived)', valid: valid.length, errorFree: ok, pct: Number(pct) })}`);
diff --git a/tree-sitter/yaml/grammar.js b/tree-sitter/yaml/grammar.js
index 1e852fc..d5233e9 100644
--- a/tree-sitter/yaml/grammar.js
+++ b/tree-sitter/yaml/grammar.js
@@ -16,14 +16,69 @@ module.exports = grammar({
     $.comment
   ],
 
+  externals: $ => [
+    $.indent,
+    $.dedent,
+    $.newline,
+    $.block_scalar,
+    $.plain,
+    $.key,
+    $.num,
+    $.bool_null,
+    $._flow_lbracket,
+    $._flow_lbrace,
+    $._flow_rbracket,
+    $._flow_rbrace
+  ],
+
+  conflicts: $ => [
+    [$.stream, $.node],
+    [$.empty_key_mapping],
+    [$.explicit_entry],
+    [$.next_doc],
+    [$.stream, $.next_doc],
+    [$.node],
+    [$.key, $.plain],
+    [$.scalar, $.doc_fold],
+    [$.explicit_mapping],
+    [$.block_sequence],
+    [$.map_value_scalar, $.map_value_node_scalar],
+    [$.scalar, $.block_key_scalar],
+    [$.map_value, $.map_value_node],
+    [$.flow_explicit],
+    [$.flow_mapping],
+    [$.flow_sequence],
+    [$.explicit_doc_body],
+    [$.inline_doc_node],
+    [$.alias_or_keyed],
+    [$.doc_fold],
+    [$.mapping_from_flow],
+    [$.mapping_or_scalar],
+    [$.property, $.node],
+    [$.seq_item],
+    [$.property],
+    [$.flow_node],
+    [$.node, $.explicit_doc_body],
+    [$.node, $.after_doc_end],
+    [$.after_doc_end],
+    [$.map_entry],
+    [$.stream, $.explicit_doc_body],
+    [$.map_entry_no_empty],
+    [$.seq_value_node],
+    [$.mapping_or_scalar, $.doc_fold],
+    [$.map_value_scalar, $.map_inline_scalar],
+    [$.content_node, $.mapping_from_flow],
+    [$.mapping_or_scalar, $.map_value],
+  ],
+
   rules: {
-    stream: $ => choice(seq(repeat1(seq(choice($.yaml_directive, $.directive), optional($.newline))), optional(seq($.doc_start, optional($.explicit_doc_body), repeat(seq(optional($.newline), $.next_doc)), optional($.newline), optional($.doc_end), optional($.newline)))), seq(optional($.indent), optional(choice($.doc_fold, $.node)), optional($.dedent), repeat(seq(optional($.newline), $.next_doc)), optional($.newline), optional($.doc_end), optional($.newline))),
+    stream: $ => choice(seq(repeat1(seq(choice($.yaml_directive, $.directive), optional($.newline))), optional(seq($.doc_start, optional($.explicit_doc_body), repeat(seq(optional($.newline), $.next_doc)), optional($.newline), optional($.doc_end), optional($.newline)))), seq(optional($.indent), optional(choice($.doc_fold, optional($.node))), optional($.dedent), repeat(seq(optional($.newline), $.next_doc)), optional($.newline), optional($.doc_end), optional($.newline))),
 
     property: $ => choice(seq($.anchor, optional($.tag)), seq($.tag, optional($.anchor))),
 
     content_node: $ => choice($.block_sequence, $.explicit_mapping, $.empty_key_mapping, $.flow_mapping, $.flow_sequence, $.mapping_from_flow, $.alias_or_keyed, $.mapping_or_scalar),
 
-    node: $ => choice(seq(optional($.anchor), optional($.tag), optional(choice(seq($.indent, $.node, $.dedent), seq($.newline, $.node), $.block_sequence, $.explicit_mapping, $.empty_key_mapping, $.mapping_from_flow, $.flow_mapping, $.flow_sequence, $.alias_or_keyed, $.mapping_or_scalar))), seq($.tag, $.anchor, optional(choice(seq($.indent, $.node, $.dedent), seq($.newline, $.node), $.block_sequence, $.explicit_mapping, $.empty_key_mapping, $.mapping_from_flow, $.flow_mapping, $.flow_sequence, $.alias_or_keyed, $.mapping_or_scalar))), $.block_sequence),
+    node: $ => choice(choice(seq($.anchor, optional($.tag), optional(choice(seq($.indent, optional($.node), $.dedent), seq($.newline, optional($.node)), $.block_sequence, $.explicit_mapping, $.empty_key_mapping, $.mapping_from_flow, $.flow_mapping, $.flow_sequence, $.alias_or_keyed, $.mapping_or_scalar))), seq($.tag, optional(choice(seq($.indent, optional($.node), $.dedent), seq($.newline, optional($.node)), $.block_sequence, $.explicit_mapping, $.empty_key_mapping, $.mapping_from_flow, $.flow_mapping, $.flow_sequence, $.alias_or_keyed, $.mapping_or_scalar))), choice(seq($.indent, optional($.node), $.dedent), seq($.newline, optional($.node)), $.block_sequence, $.explicit_mapping, $.empty_key_mapping, $.mapping_from_flow, $.flow_mapping, $.flow_sequence, $.alias_or_keyed, $.mapping_or_scalar)), seq($.tag, $.anchor, optional(choice(seq($.indent, optional($.node), $.dedent), seq($.newline, optional($.node)), $.block_sequence, $.explicit_mapping, $.empty_key_mapping, $.mapping_from_flow, $.flow_mapping, $.flow_sequence, $.alias_or_keyed, $.mapping_or_scalar))), $.block_sequence),
 
     mapping_or_scalar: $ => choice(seq(choice($.num, $.bool_null, $.plain), $.indent, choice($.num, $.bool_null, $.plain), repeat(seq($.newline, choice($.num, $.bool_null, $.plain))), $.dedent), seq($.block_key_scalar, ":", optional($.map_value_scalar), repeat(seq($.newline, $.map_entry))), $.scalar),
 
@@ -41,11 +96,11 @@ module.exports = grammar({
 
     empty_key_mapping: $ => seq(":", optional($.map_value_scalar), repeat(seq($.newline, $.map_entry_no_empty))),
 
-    value: $ => choice(seq($.indent, choice($.num, $.bool_null, $.plain), repeat1(seq($.newline, $.plain)), $.dedent), seq($.indent, $.node, $.dedent), $.seq_value_node),
+    value: $ => choice(seq($.indent, choice($.num, $.bool_null, $.plain), repeat1(seq($.newline, $.plain)), $.dedent), seq($.indent, optional($.node), $.dedent), $.seq_value_node),
 
-    map_value: $ => choice(seq(choice($.num, $.bool_null, $.plain), $.indent, choice($.num, $.bool_null, $.plain), repeat(seq($.newline, choice($.num, $.bool_null, $.plain))), $.dedent), seq($.indent, choice($.num, $.bool_null, $.plain), repeat1(seq($.newline, $.plain)), $.dedent), seq($.indent, $.node, $.dedent), seq($.indent, $.property, $.dedent, $.content_node), seq($.indent, $.property, $.dedent, $.newline, $.block_sequence), seq($.property, $.newline, $.block_sequence), seq($.newline, $.block_sequence), $.map_value_node),
+    map_value: $ => choice(seq(choice($.num, $.bool_null, $.plain), $.indent, choice($.num, $.bool_null, $.plain), repeat(seq($.newline, choice($.num, $.bool_null, $.plain))), $.dedent), seq($.indent, choice($.num, $.bool_null, $.plain), repeat1(seq($.newline, $.plain)), $.dedent), seq($.indent, optional($.node), $.dedent), seq($.indent, $.property, $.dedent, $.content_node), seq($.indent, $.property, $.dedent, $.newline, $.block_sequence), seq($.property, $.newline, $.block_sequence), seq($.newline, $.block_sequence), $.map_value_node),
 
-    map_value_scalar: $ => choice(seq(choice($.num, $.bool_null, $.plain), $.indent, choice($.num, $.bool_null, $.plain), repeat(seq($.newline, choice($.num, $.bool_null, $.plain))), $.dedent), seq($.indent, choice($.num, $.bool_null, $.plain), repeat1(seq($.newline, $.plain)), $.dedent), seq($.indent, $.node, $.dedent), seq($.indent, $.property, $.dedent, $.content_node), seq($.indent, $.property, $.dedent, $.newline, $.block_sequence), seq($.property, $.newline, $.block_sequence), seq($.newline, $.block_sequence), $.map_value_node_scalar),
+    map_value_scalar: $ => choice(seq(choice($.num, $.bool_null, $.plain), $.indent, choice($.num, $.bool_null, $.plain), repeat(seq($.newline, choice($.num, $.bool_null, $.plain))), $.dedent), seq($.indent, choice($.num, $.bool_null, $.plain), repeat1(seq($.newline, $.plain)), $.dedent), seq($.indent, optional($.node), $.dedent), seq($.indent, $.property, $.dedent, $.content_node), seq($.indent, $.property, $.dedent, $.newline, $.block_sequence), seq($.property, $.newline, $.block_sequence), seq($.newline, $.block_sequence), $.map_value_node_scalar),
 
     indented_value_node: $ => choice(seq($.property, choice(seq($.indent, $.indented_value_node, $.dedent), $.collection_content)), $.content_node),
 
@@ -71,19 +126,19 @@ module.exports = grammar({
 
     seq_item: $ => seq("-", optional($.value)),
 
-    flow_node: $ => seq(optional($.property), optional(choice($.flow_mapping, $.flow_sequence, $.alias, $.scalar))),
+    flow_node: $ => choice(seq($.property, optional(choice($.flow_mapping, $.flow_sequence, $.alias, $.scalar))), choice($.flow_mapping, $.flow_sequence, $.alias, $.scalar)),
 
-    flow_explicit: $ => seq("?", optional($.flow_node)),
+    flow_explicit: $ => seq("?", optional(optional($.flow_node))),
 
-    flow_map_entry: $ => seq(optional($.flow_explicit), optional($.flow_node), optional(seq(":", optional($.flow_node)))),
+    flow_map_entry: $ => choice(seq($.flow_explicit, optional(optional($.flow_node)), optional(seq(":", optional(optional($.flow_node))))), seq($.flow_node, optional(seq(":", optional(optional($.flow_node))))), seq(":", optional(optional($.flow_node)))),
 
-    flow_mapping: $ => seq("{", optional(seq($.flow_map_entry, repeat(seq(",", $.flow_map_entry)))), optional(","), "}"),
+    flow_mapping: $ => seq($._flow_lbrace, optional(seq(optional($.flow_map_entry), repeat(seq(",", optional($.flow_map_entry))))), optional(","), $._flow_rbrace),
 
-    flow_seq_entry: $ => choice(seq($.flow_seq_key, ":", optional($.flow_node)), seq("?", optional($.flow_node), optional(seq(":", optional($.flow_node)))), seq(":", optional($.flow_node)), $.flow_node),
+    flow_seq_entry: $ => choice(seq($.flow_seq_key, ":", optional(optional($.flow_node))), seq("?", optional(optional($.flow_node)), optional(seq(":", optional(optional($.flow_node))))), seq(":", optional(optional($.flow_node))), $.flow_node),
 
     flow_seq_key: $ => choice(seq(optional($.property), choice($.flow_mapping, $.flow_sequence, $.dquote_key, $.squote_key, $.key)), $.alias),
 
-    flow_sequence: $ => seq("[", optional(seq($.flow_seq_entry, repeat(seq(",", $.flow_seq_entry)))), optional(","), "]"),
+    flow_sequence: $ => seq($._flow_lbracket, optional(seq(optional($.flow_seq_entry), repeat(seq(",", optional($.flow_seq_entry))))), optional(","), $._flow_rbracket),
 
     scalar: $ => choice($.dquote_key, $.squote_key, $.dquote, $.squote, $.block_scalar, $.key, $.num, $.bool_null, $.plain),
 
@@ -91,13 +146,13 @@ module.exports = grammar({
 
     doc_fold: $ => seq(choice($.num, $.bool_null, $.plain), repeat1(choice(seq($.newline, choice($.plain, $.yaml_directive, $.directive)), seq($.indent, choice($.plain, $.yaml_directive, $.directive), repeat(seq($.newline, choice($.plain, $.yaml_directive, $.directive))), $.dedent)))),
 
-    inline_doc_node: $ => choice(seq($.property, optional(choice(seq($.indent, $.doc_fold, $.dedent), seq($.indent, $.node, $.dedent), seq($.newline, $.doc_fold), seq($.newline, $.node), $.flow_mapping, $.flow_sequence, $.alias, $.scalar))), $.doc_fold, choice($.flow_mapping, $.flow_sequence, $.alias, $.scalar)),
+    inline_doc_node: $ => choice(seq($.property, optional(choice(seq($.indent, $.doc_fold, $.dedent), seq($.indent, optional($.node), $.dedent), seq($.newline, $.doc_fold), seq($.newline, optional($.node)), $.flow_mapping, $.flow_sequence, $.alias, $.scalar))), $.doc_fold, choice($.flow_mapping, $.flow_sequence, $.alias, $.scalar)),
 
-    explicit_doc_body: $ => choice(seq($.newline, optional($.indent), optional(choice($.doc_fold, $.node)), optional($.dedent)), $.inline_doc_node),
+    explicit_doc_body: $ => choice(seq($.newline, optional($.indent), optional(choice($.doc_fold, optional($.node))), optional($.dedent)), $.inline_doc_node),
 
-    after_doc_end: $ => choice(seq(repeat(seq(choice($.yaml_directive, $.directive), optional($.newline))), $.doc_start, optional($.explicit_doc_body)), seq(optional($.indent), choice($.doc_fold, $.node), optional($.dedent))),
+    after_doc_end: $ => choice(seq(repeat(seq(choice($.yaml_directive, $.directive), optional($.newline))), $.doc_start, optional($.explicit_doc_body)), choice(seq($.indent, choice($.doc_fold, optional($.node)), optional($.dedent)), seq(choice($.doc_fold, $.node), optional($.dedent)), $.dedent)),
 
-    next_doc: $ => choice(seq($.doc_start, optional($.explicit_doc_body)), seq($.doc_end, optional(seq($.newline, optional($.after_doc_end))))),
+    next_doc: $ => choice(seq($.doc_start, optional($.explicit_doc_body)), seq($.doc_end, optional(seq($.newline, optional(optional($.after_doc_end)))))),
 
     doc_start: $ => token(/---/),
 
@@ -121,23 +176,7 @@ module.exports = grammar({
 
     alias: $ => token(/\*[^\t\n\f\r \[\]{},]+/),
 
-    tag: $ => token(/!(?:<[^>]*>|[^\t\n\f\r \[\]{},]*)/),
-
-    block_scalar: $ => token(/[^\s\S]/),
-
-    key: $ => token(/(?:[^\t\n\f\r \-?:,\[\]{}#&*!|>'"%@`]|[\-?:])(?:[^:#\n,\[\]{}]|:|#)*/),
-
-    num: $ => token(/(?:[+\-]?\.(?:inf|Inf|INF)|\.(?:nan|NaN|NAN)|0x[0-9A-Fa-f]+|0o[0-7]+|[+\-]?(?:\.[0-9]+|[0-9]+(?:\.[0-9]*)?)(?:[eE][+\-]?[0-9]+)?)/),
-
-    bool_null: $ => token(/(?:true|True|TRUE|false|False|FALSE|null|Null|NULL|~)/),
-
-    plain: $ => token(/(?:[^\t\n\f\r \-?:,\[\]{}#&*!|>'"%@`]|[\-?:])(?:[^:#\n,\[\]{}]|:|#)*/),
-
-    indent: $ => token(/[^\s\S]/),
-
-    dedent: $ => token(/[^\s\S]/),
-
-    newline: $ => token(/[^\s\S]/)
+    tag: $ => token(/!(?:<[^>]*>|[^\t\n\f\r \[\]{},]*)/)
   }
 });
 
diff --git a/tree-sitter/yaml/src/scanner.c b/tree-sitter/yaml/src/scanner.c
index 7653989..f9ec5d8 100644
--- a/tree-sitter/yaml/src/scanner.c
+++ b/tree-sitter/yaml/src/scanner.c
@@ -1,50 +1,601 @@
-// Tree-sitter external scanner generated by monogram.
+// Tree-sitter external scanner generated by monogram (indentation path).
 //
-// COMPLETE — the regex-literal scan and the template-literal scan are both
-// wired from the grammar's token hints (`regexContext` and `template`).
-//
-// All language-specific data below is DERIVED from the CstGrammar, not
-// hardcoded: the regex flag chars and the template delimiters all come from
-// the grammar's token hints.
+// Mirrors the indent-stack state machine of src/gen-lexer.ts: INDENT / DEDENT / NEWLINE are emitted
+// from the line-leading column relative to an indent stack; a block-scalar body is scanned verbatim.
+// All language data (comment introducer, block-scalar introducers, document markers) is DERIVED from
+// the grammar's `indent` config — nothing below is hand-tuned for a specific language.
 
 #include "tree_sitter/parser.h"
 #include "tree_sitter/alloc.h"
 #include <string.h>
-#include <wctype.h>
+#include <stdint.h>
 
 enum TokenType {
-  NO_EXTERNAL_TOKENS,
+  INDENT,
+  DEDENT,
+  NEWLINE,
+  BLOCK_SCALAR,
+  PLAIN,
+  KEY,
+  NUM,
+  BOOL_NULL,
+  _FLOW_LBRACKET,
+  _FLOW_LBRACE,
+  _FLOW_RBRACKET,
+  _FLOW_RBRACE,
 };
 
-// The scanner is stateless — tree-sitter's `valid_symbols` already encodes
-// the parse context (inside a regex slot? inside a template span?), and the
-// `${ … }` brace nesting is handled by the template_substitution rule in the
-// CFG, so there is nothing to (de)serialize.
-typedef struct { char unused; } Scanner;
+typedef struct {
+  uint32_t len;          // indent-stack depth (>= 1; stack[0] == 0, the document level)
+  uint32_t cap;
+  int16_t *stack;        // indentation columns
+  int16_t pending_col;   // column of the line boundary mid-processing (-1 = none)
+  bool pending_newline;  // a NEWLINE is still owed once dedents reach pending_col
+  bool started;          // any content lexed yet (suppresses a leading NEWLINE)
+  bool at_line_lead;     // the next real content token is its line's first (compact-indicator probe)
+  bool property_lead;    // the line's FIRST token is a node property (`&`/`!`) — its inline content sits
+                         // at the SAME node level, so it must NOT take the compact mapping-key push (`&a
+                         // a: b` is the key `a` carrying anchor `&a`, not `&a`-then-INDENTED-`a: b`).
+                         // gen-lexer clears atLineLead on the property token (it sees every token); the
+                         // scanner does not lex the property, so it LATCHES this at the line lead and reads
+                         // it at the push. It must survive the property's INTERNAL lex (which the scanner
+                         // declines via a FALSE return) — tree-sitter deserializes the serialized fields on
+                         // a false return, so a SERIALIZED flag would be rolled back; this one is therefore
+                         // NOT serialized and NOT reset in deserialize (it keeps its in-memory value across
+                         // the decline). It is RE-DERIVED from the lead char at every line boundary, so it
+                         // is always correct at the only points it is read (a line lead).
+  uint16_t flow_depth;   // > 0 inside flow collections ([ ] { }) → indentation suspended, ,/[/]/{/}
+  bool marker_decline;   // transient: scan_scalar saw a true `---`/`...` → external declines so the
+                         // internal marker token lexes it. Set+consumed within one scan; not serialized.
+} Scanner;
+
+static inline void advance(TSLexer *lexer) { lexer->advance(lexer, false); }
+static inline void skip(TSLexer *lexer) { lexer->advance(lexer, true); }
+
+static void push_indent(Scanner *s, int16_t col) {
+  if (s->len == s->cap) { s->cap *= 2; s->stack = ts_realloc(s->stack, s->cap * sizeof(int16_t)); }
+  s->stack[s->len++] = col;
+}
 
 void *tree_sitter_yaml_external_scanner_create(void) {
-  return ts_calloc(1, sizeof(Scanner));
+  Scanner *s = ts_malloc(sizeof(Scanner));
+  s->cap = 16; s->len = 1;
+  s->stack = ts_malloc(s->cap * sizeof(int16_t));
+  s->stack[0] = 0;
+  s->pending_col = -1; s->pending_newline = false; s->started = false;
+  s->at_line_lead = true; s->property_lead = false;
+  s->flow_depth = 0;
+  s->marker_decline = false;
+  return s;
 }
 
 void tree_sitter_yaml_external_scanner_destroy(void *payload) {
-  ts_free(payload);
+  Scanner *s = (Scanner *)payload;
+  ts_free(s->stack);
+  ts_free(s);
 }
 
 unsigned tree_sitter_yaml_external_scanner_serialize(void *payload, char *buffer) {
-  (void)payload; (void)buffer;
-  return 0;
+  Scanner *s = (Scanner *)payload;
+  unsigned n = 0;
+  buffer[n++] = s->started ? 1 : 0;
+  buffer[n++] = s->pending_newline ? 1 : 0;
+  buffer[n++] = s->at_line_lead ? 1 : 0;
+  memcpy(&buffer[n], &s->pending_col, sizeof(int16_t)); n += sizeof(int16_t);
+  memcpy(&buffer[n], &s->flow_depth, sizeof(uint16_t)); n += sizeof(uint16_t);
+  uint32_t count = s->len;
+  while (n + sizeof(uint32_t) + count * sizeof(int16_t) > TREE_SITTER_SERIALIZATION_BUFFER_SIZE && count > 0) count--;
+  memcpy(&buffer[n], &count, sizeof(uint32_t)); n += sizeof(uint32_t);
+  memcpy(&buffer[n], s->stack, count * sizeof(int16_t)); n += count * sizeof(int16_t);
+  return n;
 }
 
 void tree_sitter_yaml_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) {
-  (void)payload; (void)buffer; (void)length;
+  Scanner *s = (Scanner *)payload;
+  s->len = 1; s->stack[0] = 0; s->pending_col = -1; s->pending_newline = false; s->started = false;
+  s->at_line_lead = true;
+  s->flow_depth = 0;
+  s->marker_decline = false;
+  if (length < 3 + sizeof(int16_t) + sizeof(uint16_t) + sizeof(uint32_t)) return;
+  unsigned n = 0;
+  s->started = buffer[n++] != 0;
+  s->pending_newline = buffer[n++] != 0;
+  s->at_line_lead = buffer[n++] != 0;
+  memcpy(&s->pending_col, &buffer[n], sizeof(int16_t)); n += sizeof(int16_t);
+  memcpy(&s->flow_depth, &buffer[n], sizeof(uint16_t)); n += sizeof(uint16_t);
+  uint32_t count; memcpy(&count, &buffer[n], sizeof(uint32_t)); n += sizeof(uint32_t);
+  if (count == 0) return; // keep stack[0] = 0
+  while (s->cap < count) { s->cap *= 2; s->stack = ts_realloc(s->stack, s->cap * sizeof(int16_t)); }
+  memcpy(s->stack, &buffer[n], count * sizeof(int16_t));
+  s->len = count;
 }
 
-static inline void advance(TSLexer *lexer) { lexer->advance(lexer, false); }
-static inline void skip(TSLexer *lexer) { lexer->advance(lexer, true); }
+// A block scalar (`|` / `>`): the introducer + indicators + the verbatim more-indented body, as
+// one token. The body runs while a line is blank or indented MORE than the parent block level (the
+// stack top); it ends at the first non-blank line at or below the parent, or a column-0 document
+// marker, or EOF. mark_end is advanced only over lines that belong to the scalar, so the next node's
+// indentation is left for the normal boundary logic.
+//
+// A ROOT block scalar (the document's own node — stack depth 1) has an effective parent indentation of
+// -1, not 0: its body may sit at column 0 (`--- >\nline1`, yaml-test-suite DK3J / FP8R). So at root,
+// only a column-0 DOCUMENT MARKER (`---` / `...`) — never plain column-0 text — ends it. The marker
+// is matched without committing the line (no mark_end), so a non-marker column-0 line stays body.
+static bool scan_block_scalar(Scanner *s, TSLexer *lexer) {
+  bool root = s->len == 1;           // a document-root block scalar: body may reach column 0
+  int parent = root ? -1 : s->stack[s->len - 1];
+  advance(lexer); // the introducer (| or >)
+  while (lexer->lookahead == '+' || lexer->lookahead == '-' || (lexer->lookahead >= '0' && lexer->lookahead <= '9')) advance(lexer);
+  while (lexer->lookahead != 0 && lexer->lookahead != '\n' && lexer->lookahead != '\r') advance(lexer);
+  lexer->mark_end(lexer); // the header line belongs to the scalar
+  for (;;) {
+    if (lexer->lookahead == '\r') { advance(lexer); if (lexer->lookahead == '\n') advance(lexer); }
+    else if (lexer->lookahead == '\n') advance(lexer);
+    else break; // EOF
+    int col = 0;
+    while (lexer->lookahead == ' ') { advance(lexer); col++; }
+    int32_t c = lexer->lookahead;
+    if (c == 0 || c == '\n' || c == '\r') { lexer->mark_end(lexer); continue; } // blank line → body
+    if (root && col == 0) {                 // a column-0 document marker ends a root block scalar
+      bool is_marker = false;
+      if (!is_marker && c == '-') {
+        unsigned m = 0; if (lexer->lookahead == '-') { advance(lexer); m++; } if (lexer->lookahead == '-') { advance(lexer); m++; } if (lexer->lookahead == '-') { advance(lexer); m++; }
+        int32_t a = lexer->lookahead;
+        if (m == 3 && (a == 0 || a == ' ' || a == '\t' || a == '\n' || a == '\r')) is_marker = true;
+      }
+      if (!is_marker && c == '.') {
+        unsigned m = 0; if (lexer->lookahead == '.') { advance(lexer); m++; } if (lexer->lookahead == '.') { advance(lexer); m++; } if (lexer->lookahead == '.') { advance(lexer); m++; }
+        int32_t a = lexer->lookahead;
+        if (m == 3 && (a == 0 || a == ' ' || a == '\t' || a == '\n' || a == '\r')) is_marker = true;
+      }
+      if (is_marker) break;                 // leave the marker line for the next token (no mark_end)
+      // not a marker: the chars probed above are body; fall through to consume the rest of the line.
+    }
+    if (col <= parent) break; // dedent to/below parent ends the scalar (the line is the next node)
+    while (lexer->lookahead != 0 && lexer->lookahead != '\n' && lexer->lookahead != '\r') advance(lexer);
+    lexer->mark_end(lexer);
+  }
+  lexer->result_symbol = BLOCK_SCALAR;
+  return true;
+}
+
+// Compact block notation (`- a: 1` / `- - x` / `- &a k: v` / `? key`): a sequence/explicit-key
+// indicator whose inline content itself begins a block node nests at the content's column, not the
+// indicator's. The indicator chars are DERIVED from grammar.indent.compactIndicators. Mirrors
+// compactNestsHere / startsBlockStructuralNode in src/gen-lexer.ts: the inline content is
+// block-structural when, after an optional node-property prefix (`&anchor` / `!tag`, 0-2
+// space-separated), it is a further indicator, or a mapping KEY (an unquoted `:` then ws/EOL/
+// flow-indicator before a ` #` comment / EOL, scanned quote-aware). A bare scalar / flow / alias does
+// NOT nest. The property / flow / alias glyphs are fixed YAML syntax (as in gen-lexer); only the entry
+// indicators are config-driven.
+static inline bool compact_is_indicator(int32_t c) { return c == '-' || c == '?'; }
+static inline bool compact_sep_after(int32_t c) {
+  return c == 0 || c == ' ' || c == '\t' || c == '\n' || c == '\r';
+}
+// The inline content (lookahead is positioned at it) begins a block-structural node. Advances; the
+// caller has frozen a zero-width token end before it and discards the advances (returning the INDENT
+// zero-width on a hit, or rewinding on a miss).
+static bool compact_content_is_structural(TSLexer *lexer) {
+  for (int n = 0; n < 2; n++) {                       // skip 0-2 node-property prefixes (`&anchor` / `!tag`)
+    int32_t c = lexer->lookahead;
+    if (c == '&' || c == '!') {
+      advance(lexer);
+      while (lexer->lookahead != 0 && !compact_sep_after(lexer->lookahead) && lexer->lookahead != ',') advance(lexer);
+      while (lexer->lookahead == ' ' || lexer->lookahead == '\t') advance(lexer);
+    } else break;
+  }
+  int32_t c0 = lexer->lookahead;
+  if (c0 == 0 || c0 == '\n' || c0 == '\r') return false;            // property alone on the line → no nest
+  if (compact_is_indicator(c0)) { advance(lexer); return compact_sep_after(lexer->lookahead); } // nested indicator
+  if (c0 == '[' || c0 == '{' || c0 == '*') return false;            // flow collection / alias → not a key
+  for (;;) {                                          // scalar KEY sniff (quote-aware), like startsBlockStructuralNode
+    int32_t ch = lexer->lookahead;
+    if (ch == 0 || ch == '\n' || ch == '\r') break;
+    if (ch == '"') {
+      advance(lexer);
+      while (lexer->lookahead != 0 && lexer->lookahead != '"' && lexer->lookahead != '\n') { if (lexer->lookahead == '\\') advance(lexer); advance(lexer); }
+      if (lexer->lookahead == '"') advance(lexer);
+      continue;
+    }
+    if (ch == '\'') {
+      advance(lexer);
+      while (lexer->lookahead != 0 && lexer->lookahead != '\n') { if (lexer->lookahead == '\'') { advance(lexer); if (lexer->lookahead != '\'') break; } advance(lexer); }
+      continue;
+    }
+    if (ch == ' ' || ch == '\t') { advance(lexer); if (lexer->lookahead == '#') break; continue; } // trailing comment
+    if (ch == ':') {
+      advance(lexer);
+      int32_t n = lexer->lookahead;
+      if (n == 0 || n == ' ' || n == '\t' || n == '\n' || n == '\r' || n == ',' || n == '[' || n == ']' || n == '{' || n == '}') return true;
+      continue;
+    }
+    advance(lexer);
+  }
+  return false;
+}
+
+// A PLAIN / KEY scalar. A tree-sitter token() DFA can't decide a plain scalar's boundary (a `:` is
+// content unless followed by space/EOL/flow-indicator; a `#` starts a comment only after a space),
+// so we scan it here where look-ahead IS available. The run starts at the current column and ends
+// BEFORE the first key/value `:`-separator, comment, flow indicator, newline, or EOF; trailing
+// whitespace is trimmed. KEY vs PLAIN is decided by whether a `:`-separator immediately follows.
+//
+// A number- or bool/null-SHAPED run is left to the regex `num`/`bool_null` tokens (return false →
+// tree-sitter rolls back our advances and the typed token matches) — but ONLY where such a token is
+// valid. A multi-line plain fold's continuation line is plain-ONLY (its KEY symbol is not valid), so
+// a numeric-looking continuation ("123" under a plain scalar) must stay PLAIN, not be handed to num.
+//
+// COMPACT mapping-KEY support: when `compact_col >= 0` (a line-lead indicator's scalar-led inline
+// content, deeper than the stack top — see the caller), the run is scanned WITHOUT marking the token
+// end (the caller pre-marked a zero-width end at the content start). A KEY run pushes `compact_col`
+// and emits a zero-width INDENT — the nested mapping's real indent — and the key is re-lexed on the
+// next call (then compact_col is no longer deeper, so this path is skipped). A leaf scalar (no `:`)
+// is emitted as usual, its end marked at the run end. Mirrors compactNestsHere's mapping-key arm.
+static bool scan_scalar(TSLexer *lexer, bool want_key, bool want_plain, bool want_num, bool want_boolnull,
+                        int flow_depth,
+                        Scanner *s,
+                        int16_t compact_col, int indent_sym) {
+  bool cm = compact_col >= 0;   // compact-eligible: suppress per-char mark_end (zero-width INDENT on KEY)
+  char buf[64];
+  unsigned blen = 0;        // run text (capped) — for the number/bool-null shape test
+  bool has_content = false;
+  bool stopped_at_kv = false; // ended at a `:`-separator → this scalar is a mapping KEY
+
+  // DOCUMENT-MARKER probe (column 0). A `---`/`...` glyph that is sep-bounded (ws / EOL / EOF) is a
+  // document marker — an INTERNAL token (its IR's sep look-ahead is beyond a tree-sitter token() DFA,
+  // but this external scanner decides the boundary). The glyph is matched WITHOUT marking the token end:
+  //   • a FULL glyph + separator      → a TRUE marker: set s->marker_decline and return false; nothing
+  //     was marked, so the probed chars roll back and the internal `---`/`...` token lexes it (a non-
+  //     marker glyph never reaches that token, so its dropped look-ahead is moot).
+  //   • a LONE indicator char + sep   → a block indicator (`- `/`? `); decline so the internal `-`/`?`
+  //     token takes it.
+  //   • anything else (`---foo`, `-1`) → plain content: replay the matched glyph chars and fall through
+  //     to the scalar loop, which continues the run (so the marker glyph is CLAIMED as a plain scalar).
+  // Markers (and which lead chars are block indicators) are DERIVED from grammar.indent.
+  if (compact_col < 0 && lexer->get_column(lexer) == 0) {
+    if (lexer->lookahead == '-') {
+      unsigned matched = 0;
+      if (lexer->lookahead == '-') { lexer->advance(lexer, false); matched++; }
+      if (lexer->lookahead == '-') { lexer->advance(lexer, false); matched++; }
+      if (lexer->lookahead == '-') { lexer->advance(lexer, false); matched++; }
+      int32_t mn = lexer->lookahead;
+      bool msep = (mn == 0 || mn == ' ' || mn == '\t' || mn == '\n' || mn == '\r');
+      if (matched == 3 && msep) { s->marker_decline = true; return false; }
+      if (matched == 1 && msep) return false; // lone `-` + separator → block indicator, not content
+      if (matched > 0) { if (blen < sizeof(buf)) buf[blen++] = '-'; } if (matched > 1) { if (blen < sizeof(buf)) buf[blen++] = '-'; } if (matched > 2) { if (blen < sizeof(buf)) buf[blen++] = '-'; } if (matched > 0) { has_content = true; lexer->mark_end(lexer); }
+    }
+    if (lexer->lookahead == '.') {
+      unsigned matched = 0;
+      if (lexer->lookahead == '.') { lexer->advance(lexer, false); matched++; }
+      if (lexer->lookahead == '.') { lexer->advance(lexer, false); matched++; }
+      if (lexer->lookahead == '.') { lexer->advance(lexer, false); matched++; }
+      int32_t mn = lexer->lookahead;
+      bool msep = (mn == 0 || mn == ' ' || mn == '\t' || mn == '\n' || mn == '\r');
+      if (matched == 3 && msep) { s->marker_decline = true; return false; }
+      if (matched > 0) { if (blen < sizeof(buf)) buf[blen++] = '.'; } if (matched > 1) { if (blen < sizeof(buf)) buf[blen++] = '.'; } if (matched > 2) { if (blen < sizeof(buf)) buf[blen++] = '.'; } if (matched > 0) { has_content = true; lexer->mark_end(lexer); }
+    }
+  }
+  for (;;) {
+    int32_t c = lexer->lookahead;
+    if (c == 0) break;                                             // EOF
+    if (c == '\n' || c == '\r') {
+      // Inside a flow collection a plain scalar FOLDS across a line break (`{ multi\n line: v}` → the
+      // key is `multi line`): the break + surrounding whitespace (and blank/comment-only lines) collapse
+      // to one space and the run continues on the next line. Peek past that trivia run WITHOUT committing
+      // (mark_end stays at the last content char, so a decline trims it): if the next significant char
+      // ENDS the scalar — EOF, a flow indicator/terminator (`, [ ] { }`), or a line-leading `#` comment —
+      // the break is trailing trivia and the scalar stops here; otherwise fold to a space and continue.
+      if (flow_depth > 0 && has_content) {
+        while (lexer->lookahead == ' ' || lexer->lookahead == '\t' ||
+               lexer->lookahead == '\n' || lexer->lookahead == '\r') lexer->advance(lexer, false);
+        int32_t nx = lexer->lookahead;
+        if (nx == 0 || nx == '#' || (nx == '[' || nx == '{' || nx == ']' || nx == '}' || nx == ',')) break; // scalar ends — the trivia is trailing
+        if (blen < sizeof(buf)) buf[blen++] = ' ';                  // the folded break becomes one space
+        continue;                                                   // next content char marks the new token end
+      }
+      break;                                                       // block context (or no content yet): the line break ends the scalar
+    }
+    if (flow_depth > 0 && (c == '[' || c == '{' || c == ']' || c == '}' || c == ',')) break; // flow indicators end a scalar — ONLY inside a flow collection
+    if (!has_content && (c == '-' || c == '?')) {
+      // A leading `-`/`?` is a block indicator (seq entry / explicit key) when followed by space/EOL/
+      // flow-indicator, and scalar content otherwise (`-1`, `?x`). Peek the next char to decide.
+      lexer->advance(lexer, false);
+      int32_t n = lexer->lookahead;
+      if (n == 0 || n == ' ' || n == '\t' || n == '\n' || n == '\r' ||
+          (flow_depth > 0 && (n == '[' || n == '{' || n == ']' || n == '}' || n == ','))) return false; // indicator, not a scalar
+      if (blen < sizeof(buf)) buf[blen++] = (char)c;               // `-`/`?` glued to non-space is content
+      has_content = true;
+      if (!cm) lexer->mark_end(lexer);
+      continue;
+    }
+    if (c == ':') {
+      lexer->advance(lexer, false);                               // past the ':' to peek the next char
+      int32_t n = lexer->lookahead;
+      if (n == 0 || n == ' ' || n == '\t' || n == '\n' || n == '\r' ||
+          (flow_depth > 0 && (n == '[' || n == '{' || n == ']' || n == '}' || n == ','))) {
+        stopped_at_kv = true; break;                              // ':' is a key/value separator → end before it
+      }
+      if (blen < sizeof(buf)) buf[blen++] = ':';                  // ':' glued to non-space is content
+      has_content = true;
+      if (!cm) lexer->mark_end(lexer);
+      continue;
+    }
+    if (c == ' ' || c == '\t') {
+      lexer->advance(lexer, false);                               // past the space to peek the next char
+      if (lexer->lookahead == '#') break;                        // " #" begins a comment → end before the space
+      if (blen < sizeof(buf)) buf[blen++] = ' ';                  // interior space (e.g. "hello world")
+      continue;                                                   // do NOT mark_end → trailing spaces are trimmed
+    }
+    if (blen < sizeof(buf)) buf[blen++] = (char)c;
+    has_content = true;
+    lexer->advance(lexer, false);
+    if (!cm) lexer->mark_end(lexer);                                       // token end follows the last content char
+  }
+  if (!has_content) return false;
+
+  // COMPACT mapping KEY: the inline content after a line-lead indicator is a mapping key → its column
+  // is the nested mapping's indent. Push it and emit the zero-width INDENT (the caller pre-marked the
+  // end at the content start); the key is re-lexed on the next call. A leaf falls through to normal
+  // classification, with its end marked here (run end) since per-char marking was suppressed.
+  if (cm) {
+    if (stopped_at_kv) {
+      push_indent(s, compact_col);
+      s->at_line_lead = true;     // the key is itself this line's fresh lead (re-lexed next call)
+      lexer->result_symbol = indent_sym;
+      return true;                // zero-width INDENT at the content start (advances discarded)
+    }
+    lexer->mark_end(lexer);       // leaf: take the whole run (trailing-space trim is skipped in compact mode)
+  }
+
+  // Number / bool-null SHAPE test (so the typed regex tokens still classify `1`/`true`). Decide KEY
+  // vs PLAIN first, because a typed-looking run is only deferred where a typed token is valid: a KEY
+  // position admits num/bool_null (block_key_scalar), and a non-KEY value position likewise — but a
+  // plain-ONLY fold continuation (neither KEY here, and the run is not a key) must stay PLAIN.
+  // numeric / bool-null SHAPE test — a loose superset is fine for classification (only a typed-shaped
+  // run is emitted as NUM/BOOL_NULL; a run with any other char is PLAIN), at the cost of mis-typing a
+  // rare plain like `1abc` as numeric (the documented imprecise edge).
+  bool numeric = blen > 0;
+  for (unsigned i = 0; i < blen; i++) {
+    char ch = buf[i];
+    bool ok = (ch >= '0' && ch <= '9') || ch == '.' || ch == '+' || ch == '-' || ch == 'e' || ch == 'E' ||
+              ch == 'x' || ch == 'o' || ch == 'n' /* inf/nan */ || ch == 'a' || ch == 'f' || ch == 'i' ||
+              ch == 'I' || ch == 'N' || ch == 'F' || (ch >= 'A' && ch <= 'F');
+    if (!ok) { numeric = false; break; }
+  }
+  // also require at least one digit OR a .inf/.nan/~ shape so a bare "e"/"a" word isn't called numeric
+  if (numeric) {
+    bool any_digit = false;
+    for (unsigned i = 0; i < blen; i++) if (buf[i] >= '0' && buf[i] <= '9') { any_digit = true; break; }
+    if (!any_digit) numeric = false;
+  }
+  bool boolnull = false;
+  {
+    static const char *WORDS[] = { "true","True","TRUE","false","False","FALSE","null","Null","NULL","~" };
+    for (unsigned w = 0; w < sizeof(WORDS)/sizeof(WORDS[0]); w++) {
+      const char *p = WORDS[w]; unsigned i = 0;
+      while (i < blen && p[i] && buf[i] == p[i]) i++;
+      if (i == blen && p[i] == 0) { boolnull = true; break; }
+    }
+  }
+  // Classify + emit. The external scalar token CARRIES the key-vs-value decision (a trailing `: `
+  // means KEY), which the GLR parser needs to chain mapping entries — so a typed value is emitted as
+  // NUM/BOOL_NULL here, NOT deferred to a regex token (deferring drops the disambiguation and
+  // mis-parses a top-level `x: 1\ny: 2`). A key wins first; then the typed shapes; then PLAIN. Each
+  // is gated on its token being admissible here (valid_symbols), falling through otherwise.
+  if (stopped_at_kv && want_key) { lexer->result_symbol = KEY; return true; }
+  if (numeric && want_num) { lexer->result_symbol = NUM; return true; }
+  if (boolnull && want_boolnull) { lexer->result_symbol = BOOL_NULL; return true; }
+  if (want_plain) { lexer->result_symbol = PLAIN; return true; }
+  if (want_key) { lexer->result_symbol = KEY; return true; }
+  return false;
+}
+
+bool tree_sitter_yaml_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
+  Scanner *s = (Scanner *)payload;
+  bool want_indent = valid_symbols[INDENT];
+  bool want_dedent = valid_symbols[DEDENT];
+  bool want_newline = valid_symbols[NEWLINE];
+  bool want_block = valid_symbols[BLOCK_SCALAR];
+  // Content lies to our left whenever we are not at column 0 — including right after an INTERNAL token
+  // (e.g. a `---`/`...` document marker, whose match the scanner never sees). Mark started so the line
+  // boundary that follows emits its NEWLINE (the leading-NEWLINE suppression is only for blank lines at
+  // the very start of input, which are always at column 0). Without this, the NEWLINE after a leading
+  // `---` would be dropped and the document body could not attach.
+  if (lexer->get_column(lexer) > 0) s->started = true;
+
+  // Finish a line boundary already in progress: emit the remaining DEDENTs (one per call), then the
+  // owed NEWLINE when the stack top reaches the boundary column. No input is consumed here.
+  if (s->pending_col >= 0) {
+    int top = s->stack[s->len - 1];
+    if (s->pending_col < top) {
+      if (want_dedent) { s->len--; lexer->result_symbol = DEDENT; return true; }
+      s->pending_col = -1; s->pending_newline = false; return false;
+    }
+    bool owed = s->pending_newline;
+    int16_t col = s->pending_col;
+    s->pending_col = -1; s->pending_newline = false;
+    if (col == top && owed && want_newline && s->started) { lexer->result_symbol = NEWLINE; return true; }
+    return false;
+  }
+
+  // Inside a flow collection (flow_depth > 0) a line break is INSIGNIFICANT — indentation is suspended,
+  // so a flow scalar / nested bracket may sit on a following line (`[a,\n b]`). tree-sitter's `/\s/`
+  // extra cannot skip the newline here: the external scanner is consulted first, and a `false` return
+  // (the only way to "decline") rolls back any advance, so the newline is never consumed and the parser
+  // stalls into error recovery. So when in flow, the scanner itself eats the flow-insignificant run
+  // (spaces, tabs, newlines, comments) as the LEADING trivia of the next token it returns — the bracket
+  // emission and scalar scan below both return TRUE, which makes the skip stick. (In block context this
+  // is skipped: a newline IS significant and drives the INDENT/DEDENT/NEWLINE boundary logic.)
+  if (s->flow_depth > 0) {
+    for (;;) {
+      int32_t c = lexer->lookahead;
+      if (c == ' ' || c == '\t' || c == '\n' || c == '\r') skip(lexer);
+      else if (c == '#') { while (lexer->lookahead != 0 && lexer->lookahead != '\n' && lexer->lookahead != '\r') skip(lexer); }
+      else break;
+    }
+  }
+  // Flow-collection delimiters ([ ] { }). These are emitted as EXTERNAL tokens (not the internal DFA) so
+  // flow_depth — the open-bracket nesting that suspends indentation — PERSISTS: tree-sitter keeps an
+  // external scanner's struct mutations only across a token it RETURNS (on a `false` return it restores
+  // the pre-scan serialized state, so a peek-then-false counter is silently rolled back before the
+  // internal bracket is lexed). Each is gated on its own valid_symbols, so a `[`/`{` that is plain
+  // content (a scalar contains but never STARTS with one — handled in scan_scalar) is NOT consumed here:
+  // at those positions the flow token isn't valid and we fall through. Skip inline space/tab first (the
+  // flow newline skip above already ran when in flow; in block a newline still drives the indent logic).
+  {
+    while (lexer->lookahead == ' ' || lexer->lookahead == '\t') skip(lexer);
+    int32_t fc = lexer->lookahead;
+    if (fc == '[' && valid_symbols[_FLOW_LBRACKET]) {
+      advance(lexer); lexer->mark_end(lexer); lexer->result_symbol = _FLOW_LBRACKET;
+      if (s->flow_depth < UINT16_MAX) s->flow_depth++;
+      s->started = true; s->at_line_lead = false; return true;
+    }
+    if (fc == '{' && valid_symbols[_FLOW_LBRACE]) {
+      advance(lexer); lexer->mark_end(lexer); lexer->result_symbol = _FLOW_LBRACE;
+      if (s->flow_depth < UINT16_MAX) s->flow_depth++;
+      s->started = true; s->at_line_lead = false; return true;
+    }
+    if (fc == ']' && valid_symbols[_FLOW_RBRACKET]) {
+      advance(lexer); lexer->mark_end(lexer); lexer->result_symbol = _FLOW_RBRACKET;
+      if (s->flow_depth > 0) s->flow_depth--;
+      s->started = true; s->at_line_lead = false; return true;
+    }
+    if (fc == '}' && valid_symbols[_FLOW_RBRACE]) {
+      advance(lexer); lexer->mark_end(lexer); lexer->result_symbol = _FLOW_RBRACE;
+      if (s->flow_depth > 0) s->flow_depth--;
+      s->started = true; s->at_line_lead = false; return true;
+    }
+  }
+
+  // Compact block notation (`- a: 1` / `- - x` / `- &a k: v` / `? key`). The line-lead indicator was
+  // just lexed by tree-sitter's internal DFA, so at_line_lead is still set (the scanner emits no
+  // indicator token to clear it) and tree-sitter now wants the nested node's INDENT on the SAME line.
+  // When the inline content begins a block node, its column — not the indicator's — is the node's
+  // indentation: emit a zero-width INDENT there and push it (the DEDENT logic closes it when a
+  // shallower line arrives). The work splits by what leads the inline content, because the sniff
+  // ADVANCES (irrecoverably) and external-scanner state changes are reverted on a false return:
+  //   • a node-property / flow / alias / nested-indicator lead — sniff it here; a structural hit pushes
+  //     INDENT, a miss returns false so tree-sitter rewinds and the leading char (all INTERNAL-lexable,
+  //     or the scalar handled on the next call) is re-lexed.
+  //   • a plain/quoted SCALAR lead — fall through to scan_scalar (below), which pushes the compact
+  //     INDENT itself when the run is a mapping KEY. (A bare scalar must NOT be sniffed-then-rewound: a
+  //     plain scalar is external-only, so a false return here would loop.)
+  if (want_indent && s->at_line_lead && s->flow_depth == 0) {
+    while (lexer->lookahead == ' ' || lexer->lookahead == '\t') skip(lexer);
+    int32_t c = lexer->lookahead;
+    // GENUINE line lead: the line's first token is not yet lexed, so its column == the stack top (a
+    // line boundary set top to the lead column; at stream start both are the document level). Once a
+    // token has been lexed on this line the next content is DEEPER than top. Record whether that first
+    // token is a node PROPERTY (`&`/`!`): a property leads a node, so its inline content is at the
+    // SAME level (no compact push), whereas a compact indicator (`-`/`?`) DOES open a nested level
+    // for its content. This is the one fact lost when the property is lexed internally (the scanner
+    // never sees it), so it is latched here and checked at the two compact-push sites below.
+    if ((int16_t)lexer->get_column(lexer) == s->stack[s->len - 1]) s->property_lead = (c == '&' || c == '!');
+    bool nonscalar_lead = c == '&' || c == '!' || c == '[' || c == '{' || c == '*' || compact_is_indicator(c);
+    // A property that LEADS the line (property_lead) does not nest — skip the compact push so its key
+    // stays at the node's level (`&a a: b` / `!!str &a1 "foo":`). A property that follows a compact
+    // indicator (`- &a k: v`) is NOT a line lead (property_lead was set false at the `-`), so it still
+    // pushes via compact_content_is_structural's property-skip.
+    if (nonscalar_lead && !s->property_lead && (int16_t)lexer->get_column(lexer) > s->stack[s->len - 1]) {
+      int16_t col = (int16_t)lexer->get_column(lexer);
+      lexer->mark_end(lexer); // freeze the zero-width INDENT end at the content column before the sniff advances
+      if (compact_content_is_structural(lexer)) {
+        push_indent(s, col);
+        // A NESTED indicator's content is itself a fresh line-lead (so `- - x` nests once more); but a
+        // node-property prefix (`- &a k: v`) is followed INLINE by its own mapping KEY at the SAME level
+        // — that key must NOT push again, so clear the lead for the property / direct-key case.
+        s->at_line_lead = compact_is_indicator(c);
+        lexer->result_symbol = INDENT;
+        return true;
+      }
+      return false; // not block-structural → rewind; the internal-lexable lead (or next-call scalar) re-lexes
+    }
+  }
+
+  // A block scalar value (`key: |`): scan its body before the indent logic — its more-indented
+  // lines are content, not nested structure. Skip the inline space after the `:`/`-` first. Block
+  // scalars are a block-context construct — inside a flow collection `|`/`>` are plain content.
+  if (want_block && s->flow_depth == 0) {
+    while (lexer->lookahead == ' ' || lexer->lookahead == '\t') skip(lexer);
+    if (lexer->lookahead == '|' || lexer->lookahead == '>') { if (scan_block_scalar(s, lexer)) { s->at_line_lead = false; return true; } }
+  }
+
+  // A SCALAR (plain / key / num / bool-null) on the CURRENT line — NOT at a line boundary (a leading
+  // newline falls through to the indent logic so INDENT/DEDENT/NEWLINE are emitted first). Skip inline
+  // spaces/tabs, then if the next char could begin a plain scalar (not a newline/EOF and not a YAML
+  // indicator — a leading `-`/`?`/`:` is resolved inside scan_scalar), scan it where look-ahead is
+  // available. scan_scalar classifies the run and emits the admissible token.
+  if (valid_symbols[PLAIN] || valid_symbols[KEY] || valid_symbols[NUM] || valid_symbols[BOOL_NULL]) {
+    while (lexer->lookahead == ' ' || lexer->lookahead == '\t') skip(lexer);
+    int32_t h = lexer->lookahead;
+    bool indicator = h == 0 || h == '\n' || h == '\r' || h == ',' || h == '[' || h == ']' || h == '{' ||
+                     h == '}' || h == '#' || h == '&' || h == '*' || h == '!' || h == '|' || h == '>' ||
+                     h == '\'' || h == '"' || h == '%' || h == '@' || h == '`';
+    if (!indicator) {
+      // Compact mapping-KEY (part B): a line-lead indicator's scalar-led inline content, deeper than the
+      // stack top. scan_scalar then pushes the nested mapping's INDENT (zero-width, pre-marked here) if
+      // the run is a KEY, or emits the leaf scalar otherwise (so `- x` stays a plain item, no push).
+      // `!s->property_lead`: a key after a LINE-LEAD node property (`&a a: b`) sits at the node's level,
+      // not a compact-nested one — so do NOT pre-mark a compact INDENT; scan_scalar then emits the key as
+      // an ordinary value-position scalar (the enclosing Node carries the property). A key after a compact
+      // indicator (`- a: 1`) has property_lead == false and still nests.
+      int16_t compact_col = -1;
+      if (want_indent && s->at_line_lead && !s->property_lead && (int16_t)lexer->get_column(lexer) > s->stack[s->len - 1]) {
+        compact_col = (int16_t)lexer->get_column(lexer);
+        lexer->mark_end(lexer); // zero-width INDENT end at the content start (used iff the run is a KEY)
+      }
+      if (scan_scalar(lexer, valid_symbols[KEY] != 0, valid_symbols[PLAIN] != 0, valid_symbols[NUM] != 0, valid_symbols[BOOL_NULL] != 0, s->flow_depth, s, compact_col, INDENT)) { s->started = true; s->at_line_lead = (lexer->result_symbol == INDENT); return true; }
+      if (s->marker_decline) { s->marker_decline = false; return false; } // a true `---`/`...` here → let the internal marker token lex it
+    }
+  }
+
+  if (!want_indent && !want_dedent && !want_newline) return false; // no indent tokens valid
+  // Inside a flow collection a newline is INSIGNIFICANT (indentation suspended): emit NO INDENT/DEDENT/
+  // NEWLINE so the line break is consumed by tree-sitter's `/\s/` extra and the flow spans the line.
+  if (s->flow_depth > 0) return false;
+
+  // Skip blank lines, comment-only lines, and leading whitespace, noting whether a line break was
+  // crossed (only a real boundary drives the indent logic).
+  bool found_eol = false;
+  for (;;) {
+    int32_t c = lexer->lookahead;
+    if (c == '\n') { skip(lexer); found_eol = true; }
+    else if (c == '\r') { skip(lexer); if (lexer->lookahead == '\n') skip(lexer); found_eol = true; }
+    else if (c == ' ' || c == '\t') { skip(lexer); }
+    else if (c == '#') { while (lexer->lookahead != 0 && lexer->lookahead != '\n' && lexer->lookahead != '\r') skip(lexer); }
+    else break; // content or EOF
+  }
+
+  if (lexer->eof(lexer)) {
+    if (s->len > 1 && want_dedent) { s->len--; lexer->result_symbol = DEDENT; return true; } // close open blocks at EOF
+    return false;
+  }
+  bool was_started = s->started;
+  s->started = true; // content lies ahead — mark started even on a mid-line return
+  if (!found_eol) return false; // not at a line boundary
 
-bool tree_sitter_yaml_external_scanner_scan(void *payload, TSLexer *lexer,
-                                                          const bool *valid_symbols) {
-  (void)payload;
+  int16_t col = (int16_t)lexer->get_column(lexer);
+  lexer->mark_end(lexer); // INDENT/DEDENT/NEWLINE are zero-width at the content column
+  int top = s->stack[s->len - 1];
+  s->at_line_lead = true; // a real line boundary — the next real token leads its line
+  // Latch whether THIS new line is led by a node property (`&`/`!`) — the lookahead is the line's
+  // first content char (blanks/comments already skipped). A property leads a node, so its inline content
+  // is at the same level and must NOT take a compact push (the gates below check property_lead). This is
+  // a TRUE-return site so the latch persists through the property's internal lex; it is also re-derived
+  // for the FIRST line (which reaches no boundary) by the compact block's genuine-line-lead detection.
+  s->property_lead = (lexer->lookahead == '&' || lexer->lookahead == '!');
 
+  if (col > top) {
+    if (want_indent) { push_indent(s, col); lexer->result_symbol = INDENT; return true; }
+    return false;
+  }
+  if (col == top) {
+    if (want_newline && was_started) { lexer->result_symbol = NEWLINE; return true; }
+    return false;
+  }
+  // col < top: a dedent. Emit one DEDENT now; queue the rest + the trailing NEWLINE for re-entry.
+  if (want_dedent) {
+    s->pending_col = col; s->pending_newline = true;
+    s->len--; lexer->result_symbol = DEDENT; return true;
+  }
   return false;
 }