From 6273a9073008d4947c058ed77f6a2c82f7d32d63 Mon Sep 17 00:00:00 2001 From: hyperpolymath <6759885+hyperpolymath@users.noreply.github.com> Date: Mon, 18 May 2026 11:18:55 +0100 Subject: [PATCH] fix(parser): remove duplicate [T] array-sugar production MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `type_expr_primary` declared the array-shorthand production `LBRACKET type_expr RBRACKET -> TyApp("Array", ...)` twice (lines 443/469) — added independently by issues-drafts/02 and #40 with byte-identical semantic actions. The second copy was a verbatim duplicate of the first. Effects of the duplication, both confirmed via `menhir --explain`: - Menhir reported "1 production is never reduced" (the second copy is dead — the first always wins in the LALR automaton). - It generated a real reduce/reduce conflict family: state 80 (reached after `[ type_expr ]`) could not choose which identical production to reduce, and that ambiguity propagated through inherited lookaheads. Removing the dead copy and folding its substantive rationale (the `Array`-not-`List` desugar reason + typecheck.ml canonicalisation note + #40 ref) into the surviving comment so no documented knowledge is lost. Measured impact (full `dune test --force` gate green at 257/257 before and after — zero parse-behaviour change, this is a pure dedup): - "production never reduced" warning: eliminated. - reduce/reduce conflicts: 36 -> 10 (R/R conflict states 5 -> 4). - shift/reduce conflicts: unchanged at 75 (the duplicate was a reduce/reduce source only). The residual 75 S/R + 10 R/R conflicts are inherent LALR(1) ambiguities that Menhir resolves deterministically and correctly (the 257-test suite, incl. STAGE-A AOT + multi-module + STAGE-B effect-row coverage, proves the chosen resolution is the intended language). This Menhir (20260209) has no expected-conflict declaration mechanism, so eliminating the rest requires deliberate per-conflict grammar refactoring with estate-wide parse blast radius — tracked separately, not bundled here. Refs #40 Co-Authored-By: Claude Opus 4.7 (1M context) --- lib/parser.mly | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/lib/parser.mly b/lib/parser.mly index 212147c..d1ded99 100644 --- a/lib/parser.mly +++ b/lib/parser.mly @@ -437,9 +437,14 @@ type_expr_primary: LT/GT tokens are shared with expression-position less-than. */ | name = upper_ident LT args = separated_nonempty_list(COMMA, type_arg) GT { TyApp (mk_ident name $startpos(name) $endpos(name), args) } - /* Array sugar: [T] desugars to Array[T] (issues-drafts/02). The element - type can be any type_expr (recursive), so [[Int]] means Array[Array[Int]] - and [Result[T, E]] works as expected. */ + /* Array sugar: `[T]` desugars to `Array[T]` (issues-drafts/02; #40). The + element type can be any type_expr (recursive), so `[[Int]]` means + Array[Array[Int]] and `[Result[T, E]]` works as expected. This is the + syntax stdlib has used all along (`fn map(arr: [T], ...)`). The + typechecker (lib/typecheck.ml ~724, 813, 1024) canonicalises array + literals/operations on `TApp (TCon "Array", ...)`, so `Array` is the + right desugar target — `List` here would trigger a + `Unify.TypeMismatch (List, Array)` at check time. */ | LBRACKET elem = type_expr RBRACKET { TyApp (mk_ident "Array" $startpos $endpos, [TyArg elem]) } /* Function-type-as-type: `fn(A, B) -> C` lowers to the curried arrow @@ -458,16 +463,6 @@ type_expr_primary: the interior in one pass without lookahead conflicts. */ | LBRACE body = ty_record_body RBRACE { TyRecord (fst body, snd body) } - /* Array shorthand: `[T]` desugars to `Array[T]`. This is the syntax - stdlib has used all along (`fn map(arr: [T], f: T -> U) -> [U]`, - `Result[[T], E]`, etc.) but it was previously only accepted in stdlib - load paths, not in user source. The typechecker (lib/typecheck.ml - lines 724, 813, 1024) canonicalises array literals/operations on - `TApp (TCon "Array", ...)`, so `Array` is the right desugar target — - using `List` here triggers a `Unify.TypeMismatch (List, Array)` at - check time. Closes #40. */ - | LBRACKET ty = type_expr RBRACKET - { TyApp (mk_ident "Array" $startpos $endpos, [TyArg ty]) } /* Built-in types */ | NAT { TyCon (mk_ident "Nat" $startpos $endpos) } | INT_T { TyCon (mk_ident "Int" $startpos $endpos) }