From bdd611930a6b2bedc58270eb4d5c2cede4f77d84 Mon Sep 17 00:00:00 2001 From: "Jonathan D.A. Jewell" <6759885+hyperpolymath@users.noreply.github.com> Date: Fri, 15 May 2026 00:34:14 +0100 Subject: [PATCH] =?UTF-8?q?feat:=20Phases=20F=20+=20G=20+=20H=20=E2=80=94?= =?UTF-8?q?=20implicit-in,=20abstract=20extern=20types,=20Unit/Bytes?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three more v2-grammar pieces for hyperpolymath/ephapax#43, on top of the phase E PR. After this commit, hypatia's bridge.eph parses end-to-end and desugars cleanly through every `extern "gossamer"` signature; the remaining gate is cross-module type resolution for `Model`/`Msg`/etc. imported from `hypatia/ui/gui` (#43 follow-up). Phase F — implicit `in` between sequential lets ----------------------------------------------- New grammar rule: block_expr = { sequential_let+ ~ expression } sequential_let = { ("let!" | "let") ~ let_binder ~ (":" ~ ty)? ~ "=" ~ block_rhs } block_rhs = { lambda_expr | if_expr | region_expr | match_expr | case_expr | handle_expr | or_expr } added at the top of the `single_expr` choice so it's tried before the existing `let_expr` / `let_lin_expr`. PEG ordering means the legacy `let x = e in body` form still parses — if the parser sees `in` after the rhs, `block_expr` rolls back and `let_expr` matches. Folded at parse time into nested `Let` / `LetLin` AST nodes (`Let{a, .., body: Let{b, .., body: }}`). Tuple binders reuse the Phase E `match_arm_from_tuple_binder` lowering, so let! (ch2, msg) = ipc_recv(ch) let new_model = update(msg, model) run(ch2, new_model) works with no `in` keywords and a destructured first binding — exactly the shape bridge.eph uses on its TEA loop. Phase G — abstract extern types ------------------------------- `ExternItem::Type` items in `extern "abi" { type Foo }` blocks now register in the `DataRegistry`'s new `extern_types` map. The `desugar_named_type` path checks that map first; opaque extern types resolve to `Ty::Base(BaseTy::I32)` (host handle / pointer representation, matching the existing wasm import convention). Type arguments on opaque types are rejected (`Window(T)` is an error) since extern types are monomorphic by construction. Phase H — Unit / Bytes built-in type aliases -------------------------------------------- `Unit` resolves to `Ty::Base(BaseTy::Unit)` (the type-position spelling of the literal `()`); `Bytes` resolves to `Ty::Base(BaseTy::I32)` as the conventional host-managed buffer handle. These should eventually migrate to a stdlib prelude — for now they're hard-coded fast-paths in `desugar_named_type`, sitting before the data registry / extern-type lookups. Tests ----- - `tests/v2-grammar/fixtures/implicit-in.eph` - `tests/v2-grammar/fixtures/implicit-in-tuple.eph` - `tests/v2-grammar/fixtures/extern-abstract-types.eph` - `src/ephapax-cli/tests/v2_grammar_phase_f.rs` — 4 tests: - `implicit_in_chain_compiles` - `implicit_in_with_tuple_binders_compiles` - `legacy_explicit_in_still_compiles` (regression for the `in` form) - `implicit_in_let_lin_chain_parses` - `src/ephapax-cli/tests/v2_grammar_phase_gh.rs` — 3 tests: - `extern_abstract_types_desugar_to_i32_handles` (parse → desugar → typecheck → wasm validate on a fixture using `Window`, `Channel`, `Bytes`, `Unit` opaquely) - `unit_alias_resolves_to_base_unit` - `bytes_alias_resolves_to_i32` - `cargo test --workspace` clean — no regressions in existing match/pair/let/desugar/typing tests. bridge.eph status ----------------- After this commit, `cargo run -- compile-eph bridge.eph` fails with Desugar error: unknown type `Model` — i.e. the parser, surface AST, and `extern "gossamer"` signature desugar are all working. `Model` / `Msg` / `Department` etc. come from `import hypatia/ui/gui`, which requires cross-module type resolution. That's a separate piece of work tracked on #43. Co-Authored-By: Claude Opus 4.7 --- src/ephapax-cli/tests/v2_grammar_phase_f.rs | 78 +++++++++++++ src/ephapax-cli/tests/v2_grammar_phase_gh.rs | 46 ++++++++ src/ephapax-desugar/src/lib.rs | 75 ++++++++++++- src/ephapax-parser/src/ephapax.pest | 39 ++++++- src/ephapax-parser/src/surface.rs | 103 ++++++++++++++++++ .../fixtures/extern-abstract-types.eph | 17 +++ .../v2-grammar/fixtures/implicit-in-tuple.eph | 13 +++ tests/v2-grammar/fixtures/implicit-in.eph | 12 ++ 8 files changed, 379 insertions(+), 4 deletions(-) create mode 100644 src/ephapax-cli/tests/v2_grammar_phase_f.rs create mode 100644 src/ephapax-cli/tests/v2_grammar_phase_gh.rs create mode 100644 tests/v2-grammar/fixtures/extern-abstract-types.eph create mode 100644 tests/v2-grammar/fixtures/implicit-in-tuple.eph create mode 100644 tests/v2-grammar/fixtures/implicit-in.eph diff --git a/src/ephapax-cli/tests/v2_grammar_phase_f.rs b/src/ephapax-cli/tests/v2_grammar_phase_f.rs new file mode 100644 index 0000000..2cb31f3 --- /dev/null +++ b/src/ephapax-cli/tests/v2_grammar_phase_f.rs @@ -0,0 +1,78 @@ +// SPDX-License-Identifier: PMPL-1.0-or-later +// +// Phase F regressions for hyperpolymath/ephapax#43. Implicit-`in` between +// sequential `let` bindings — the deepest grammar gap bridge.eph relied +// on. +// +// The grammar adds a new `block_expr` form, tried *before* the legacy +// `let_expr` in the `single_expr` choice. A `block_expr` is +// `sequential_let+ ~ expression`, where each `sequential_let` has the +// shape `("let" | "let!") ~ let_binder ~ (":" ~ ty)? ~ "=" ~ block_rhs` +// without a trailing `in` keyword. The parser folds them at parse time: +// +// let a = 10 +// let b = 20 +// a + b +// +// becomes +// +// Let { name: a, value: 10, body: +// Let { name: b, value: 20, body: +// a + b } } + +use ephapax_desugar::desugar; +use ephapax_parser::parse_surface_module; +use ephapax_typing::type_check_module; + +const IMPLICIT_IN: &str = include_str!(concat!( + env!("CARGO_MANIFEST_DIR"), + "/../../tests/v2-grammar/fixtures/implicit-in.eph" +)); + +const IMPLICIT_IN_TUPLE: &str = include_str!(concat!( + env!("CARGO_MANIFEST_DIR"), + "/../../tests/v2-grammar/fixtures/implicit-in-tuple.eph" +)); + +fn compile_to_wasm(source: &str, name: &str) -> Vec { + let surface = parse_surface_module(source, name).expect("must parse"); + let core = desugar(&surface).expect("must desugar"); + type_check_module(&core).expect("must type-check"); + ephapax_wasm::compile_module(&core).expect("must codegen") +} + +#[test] +fn implicit_in_chain_compiles() { + let wasm = compile_to_wasm(IMPLICIT_IN, "implicit-in"); + wasmparser::validate(&wasm).expect("wasm validates"); +} + +#[test] +fn implicit_in_with_tuple_binders_compiles() { + let wasm = compile_to_wasm(IMPLICIT_IN_TUPLE, "implicit-in-tuple"); + wasmparser::validate(&wasm).expect("wasm validates"); +} + +#[test] +fn legacy_explicit_in_still_compiles() { + // Regression: don't break the legacy form. The grammar tries + // `block_expr` first, fails on the `in` keyword after the rhs, rolls + // back, then `let_expr` matches. + let source = "module test\n\ + fn entry(): I32 = let x = 1 in let y = 2 in x\n"; + let wasm = compile_to_wasm(source, "legacy-in"); + wasmparser::validate(&wasm).expect("wasm validates"); +} + +#[test] +fn implicit_in_let_lin_chain_parses() { + // `let!` (linear) form bridge.eph uses — `let! (ch2, msg) = ipc_recv(ch)` + // followed by more lets. We only assert PARSE here; typing the linear + // form requires extern fns this fixture doesn't have. + let source = "module test\n\ + fn use_pair(p: (I32, I32)): I32 =\n\ + let! (a, b) = p\n\ + let c = a\n\ + c\n"; + let _ = parse_surface_module(source, "let-lin-block").expect("must parse"); +} diff --git a/src/ephapax-cli/tests/v2_grammar_phase_gh.rs b/src/ephapax-cli/tests/v2_grammar_phase_gh.rs new file mode 100644 index 0000000..91bf998 --- /dev/null +++ b/src/ephapax-cli/tests/v2_grammar_phase_gh.rs @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: PMPL-1.0-or-later +// +// Phase G + H regressions for hyperpolymath/ephapax#43: +// +// G — `extern "abi" { type Foo }` items register as opaque types in the +// desugar registry; `SurfaceTy::Named { name: "Foo" }` resolves to +// `Ty::Base(I32)` (host handle representation). +// +// H — `Unit` and `Bytes` are built-in type-name aliases. `Unit` is the +// type-position spelling of the literal `()`; `Bytes` resolves to +// `I32` until a stdlib `Bytes` ADT lands. + +use ephapax_desugar::desugar; +use ephapax_parser::parse_surface_module; +use ephapax_typing::type_check_module; + +const EXTERN_ABSTRACT: &str = include_str!(concat!( + env!("CARGO_MANIFEST_DIR"), + "/../../tests/v2-grammar/fixtures/extern-abstract-types.eph" +)); + +#[test] +fn extern_abstract_types_desugar_to_i32_handles() { + let surface = parse_surface_module(EXTERN_ABSTRACT, "extern-abstract").expect("must parse"); + let core = desugar(&surface).expect("must desugar"); + type_check_module(&core).expect("must type-check"); + let wasm = ephapax_wasm::compile_module(&core).expect("must codegen"); + wasmparser::validate(&wasm).expect("wasm validates"); +} + +#[test] +fn unit_alias_resolves_to_base_unit() { + let source = "module test\n\ + fn noop(): Unit = ()\n"; + let surface = parse_surface_module(source, "unit-alias").expect("must parse"); + let _ = desugar(&surface).expect("Unit must alias to ()"); +} + +#[test] +fn bytes_alias_resolves_to_i32() { + let source = "module test\n\ + fn id(b: Bytes): Bytes = b\n"; + let surface = parse_surface_module(source, "bytes-alias").expect("must parse"); + let core = desugar(&surface).expect("Bytes must alias to I32"); + type_check_module(&core).expect("must type-check"); +} diff --git a/src/ephapax-desugar/src/lib.rs b/src/ephapax-desugar/src/lib.rs index 3bebe17..7b724dd 100644 --- a/src/ephapax-desugar/src/lib.rs +++ b/src/ephapax-desugar/src/lib.rs @@ -122,6 +122,12 @@ pub struct DataRegistry { constructors: HashMap, /// Data type name → (params, constructors) types: HashMap, Vec)>, + /// Extern-declared opaque types — `extern "abi" { type Foo }`. They + /// have no constructors visible to the surface language; their core + /// representation is `I32` (the host runtime treats them as handles + /// or pointers). Listed separately from `types` because they don't + /// participate in `Construct`/`Match` desugaring. + extern_types: HashMap, } impl DataRegistry { @@ -151,6 +157,13 @@ impl DataRegistry { ); } + /// Register an opaque type from an `extern "abi" { type Foo }` block. + /// Subsequent `SurfaceTy::Named { name: "Foo" }` references resolve + /// to `Ty::Base(BaseTy::I32)` (host handle representation). + pub fn register_extern_type(&mut self, name: SmolStr) { + self.extern_types.insert(name, ()); + } + /// Look up a constructor by name. fn get_ctor(&self, name: &str) -> Option<&ConstructorInfo> { self.constructors.get(name) @@ -160,6 +173,11 @@ impl DataRegistry { fn get_type_ctors(&self, name: &str) -> Option<&(Vec, Vec)> { self.types.get(name) } + + /// `true` if `name` was declared as an extern opaque type. + fn is_extern_type(&self, name: &str) -> bool { + self.extern_types.contains_key(name) + } } // ========================================================================= @@ -193,10 +211,20 @@ impl Desugarer { /// First pass: collect all data declarations into the registry. /// Second pass: desugar all declarations. pub fn desugar_module(&mut self, module: &SurfaceModule) -> Result { - // First pass: register all data types + // First pass: register all data types AND extern opaque types so + // subsequent `desugar_ty` calls (against fn signatures, etc.) can + // resolve `Window` / `IpcChannel` / etc. as `I32` handles. for decl in &module.decls { - if let SurfaceDecl::Data(data) = decl { - self.registry.register(data); + match decl { + SurfaceDecl::Data(data) => self.registry.register(data), + SurfaceDecl::Extern(block) => { + for item in &block.items { + if let ephapax_surface::ExternItem::Type(name) = item { + self.registry.register_extern_type(name.clone()); + } + } + } + _ => {} } } @@ -498,6 +526,47 @@ impl Desugarer { /// `Option(I32)` → `() + I32` /// `Result(I32, Bool)` → `I32 + Bool` fn desugar_named_type(&self, name: &SmolStr, args: &[SurfaceTy]) -> Result { + // Built-in type aliases that aren't (yet) keywords in the lexer. + // `Unit` is the type-position spelling of the literal `()`; bridge.eph + // and other ML-adjacent corpora write it freely. + if name.as_str() == "Unit" { + if !args.is_empty() { + return Err(DesugarError::TypeArityMismatch { + name: name.to_string(), + expected: 0, + got: args.len(), + }); + } + return Ok(Ty::Base(BaseTy::Unit)); + } + // `Bytes` is the conventional name for a host-managed buffer. Until + // the stdlib publishes a real `Bytes` ADT, treat it as an I32 handle + // (the wasm host passes pointer/length pairs across `__ffi` calls; + // for direct extern-fn signatures the handle alone is enough). + if name.as_str() == "Bytes" { + if !args.is_empty() { + return Err(DesugarError::TypeArityMismatch { + name: name.to_string(), + expected: 0, + got: args.len(), + }); + } + return Ok(Ty::Base(BaseTy::I32)); + } + + // Opaque extern types resolve to `I32` (host handle / pointer). + // They take no type arguments — extern types are monomorphic. + if self.registry.is_extern_type(name.as_str()) { + if !args.is_empty() { + return Err(DesugarError::TypeArityMismatch { + name: name.to_string(), + expected: 0, + got: args.len(), + }); + } + return Ok(Ty::Base(BaseTy::I32)); + } + let (params, ctors) = self.registry.get_type_ctors(name.as_str()).ok_or_else(|| { DesugarError::UnknownType { name: name.to_string(), diff --git a/src/ephapax-parser/src/ephapax.pest b/src/ephapax-parser/src/ephapax.pest index 24c54a4..da9a329 100644 --- a/src/ephapax-parser/src/ephapax.pest +++ b/src/ephapax-parser/src/ephapax.pest @@ -167,7 +167,20 @@ expression = { seq_expr = { single_expr ~ (";" ~ single_expr)* } single_expr = { - let_lin_expr + // Implicit-`in` block: a chain of `let`/`let!` bindings without `in` + // keywords, followed by a final result expression. Each binding's body + // is "everything that comes after it inside this block." Tried first so + // that bridge-eph-style sequences like + // + // let! (ch2, msg_bytes) = ipc_recv(ch) + // let msg: Msg = decode_msg(msg_bytes) + // run(ch2, msg) + // + // parse without explicit `in` keywords. PEG ordering means the legacy + // `let x = e in body` form still works — if the parser sees `in` after + // the rhs, this rule fails and `let_expr` (next in the list) succeeds. + block_expr + | let_lin_expr | let_expr | lambda_expr | if_expr @@ -178,6 +191,30 @@ single_expr = { | or_expr } +// One or more `sequential_let`s followed by a trailing result expression. +// The trailing expression can itself be any `expression` (which folds back +// through `seq_expr` / `single_expr`), so nested block forms and explicit +// `let ... in ...` both compose normally. +block_expr = { sequential_let+ ~ expression } + +sequential_let = { + ("let!" | "let") ~ let_binder ~ (":" ~ ty)? ~ "=" ~ block_rhs +} + +// A `sequential_let`'s rhs may be any expression form *except* a top-level +// `let`/`let!` — those participate in the block via the next iteration of +// `sequential_let`. Parenthesised lets and lets inside lambdas still parse +// because their parens / lambda contexts route through `expression` again. +block_rhs = { + lambda_expr + | if_expr + | region_expr + | match_expr + | case_expr + | handle_expr + | or_expr +} + // Pattern matching: match x of | None => 0 | Some(v) => v end match_expr = { "match" ~ expression ~ "of" diff --git a/src/ephapax-parser/src/surface.rs b/src/ephapax-parser/src/surface.rs index c9ab5e5..6a79f93 100644 --- a/src/ephapax-parser/src/surface.rs +++ b/src/ephapax-parser/src/surface.rs @@ -356,6 +356,7 @@ fn parse_expression(pair: pest::iterators::Pair) -> Result parse_seq_expr(inner), // Legacy: if expression directly contains a single_expr child Rule::single_expr => parse_single_expr(inner), + Rule::block_expr => parse_block_expr(inner), Rule::let_expr => parse_let_expr(inner), Rule::let_lin_expr => parse_let_lin_expr(inner), Rule::lambda_expr => parse_lambda_expr(inner), @@ -425,6 +426,7 @@ fn parse_single_expr(pair: pest::iterators::Pair) -> Result parse_block_expr(inner), Rule::let_expr => parse_let_expr(inner), Rule::let_lin_expr => parse_let_lin_expr(inner), Rule::lambda_expr => parse_lambda_expr(inner), @@ -440,6 +442,107 @@ fn parse_single_expr(pair: pest::iterators::Pair) -> Result) -> Result { + let span = span_from_pair(&pair); + let children: Vec<_> = pair.into_inner().collect(); + + // Children: zero or more `sequential_let` pairs, then exactly one final + // `expression`. The grammar guarantees `sequential_let+`, so we can + // unwrap the trailing expression. + let (lets, trailing): (Vec<_>, Vec<_>) = children + .into_iter() + .partition(|p| p.as_rule() == Rule::sequential_let); + let trailing_expr = trailing + .into_iter() + .find(|p| p.as_rule() == Rule::expression) + .ok_or_else(|| ParseError::missing("block trailing expression"))?; + let mut body = parse_expression(trailing_expr)?; + + // Fold from the right: the last let wraps the trailing expression as + // its body, the previous let wraps that, etc. + for stmt in lets.into_iter().rev() { + body = parse_sequential_let(stmt, body, span)?; + } + Ok(body) +} + +/// Parse one `sequential_let` (a `let` / `let!` without trailing `in`) +/// against an already-parsed body expression. Reuses the tuple-binder +/// lowering from [`match_arm_from_tuple_binder`]. +fn parse_sequential_let( + pair: pest::iterators::Pair, + body: SurfaceExpr, + span: Span, +) -> Result { + // The first token (`let` or `let!`) is consumed by the grammar but + // doesn't appear as a Pair — we detect it from the source slice. + let src = pair.as_str(); + let is_linear = src.trim_start().starts_with("let!"); + + let mut inner = pair.into_inner(); + let binder = parse_let_binder( + inner + .next() + .ok_or_else(|| ParseError::missing("sequential let binder"))?, + )?; + + let mut ty = None; + let mut value = None; + for item in inner { + match item.as_rule() { + Rule::ty if ty.is_none() => ty = Some(parse_type(item)?), + Rule::block_rhs if value.is_none() => value = Some(parse_block_rhs(item)?), + _ => {} + } + } + let value = value.ok_or_else(|| ParseError::missing("sequential let value"))?; + + Ok(match (binder, is_linear) { + (LetBinder::Single(name), false) => SurfaceExpr::new( + SurfaceExprKind::Let { + name, + ty, + value: Box::new(value), + body: Box::new(body), + }, + span, + ), + (LetBinder::Single(name), true) => SurfaceExpr::new( + SurfaceExprKind::LetLin { + name, + ty, + value: Box::new(value), + body: Box::new(body), + }, + span, + ), + (LetBinder::Tuple(names), _) => match_arm_from_tuple_binder(names, value, body, span), + }) +} + +fn parse_block_rhs(pair: pest::iterators::Pair) -> Result { + let span = span_from_pair(&pair); + let inner = pair + .into_inner() + .next() + .ok_or_else(|| ParseError::unexpected_end("block rhs"))?; + match inner.as_rule() { + Rule::lambda_expr => parse_lambda_expr(inner), + Rule::if_expr => parse_if_expr(inner), + Rule::region_expr => parse_region_expr(inner), + Rule::match_expr => parse_match_expr(inner), + Rule::case_expr => parse_case_expr(inner), + Rule::or_expr => parse_or_expr(inner), + other => Err(ParseError::Syntax { + message: format!("Unexpected block rhs rule: {:?}", other), + span, + }), + } +} + /// A parsed `let_binder` — either a single identifier or a list of /// identifiers from a `tuple_binder`. Tuple binders are lowered to a /// 1-arm match at the parse site. diff --git a/tests/v2-grammar/fixtures/extern-abstract-types.eph b/tests/v2-grammar/fixtures/extern-abstract-types.eph new file mode 100644 index 0000000..a0650e5 --- /dev/null +++ b/tests/v2-grammar/fixtures/extern-abstract-types.eph @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: PMPL-1.0-or-later +// `extern "abi" { type T }` opaque types are now usable in fn signatures. +// Desugar maps them to `I32` (host handle representation). Combined with +// the `Unit` and `Bytes` builtin aliases, this is enough for the +// `extern "gossamer" { ... }` block in hypatia's bridge.eph to desugar. + +module hyperpolymath/ephapax/test + +extern "host" { + type Window + type Channel + fn open(title: String): Window + fn poll(ch: Channel): Bytes + fn close(w: Window): Unit +} + +fn entry(w: Window): Window = w diff --git a/tests/v2-grammar/fixtures/implicit-in-tuple.eph b/tests/v2-grammar/fixtures/implicit-in-tuple.eph new file mode 100644 index 0000000..5372f2c --- /dev/null +++ b/tests/v2-grammar/fixtures/implicit-in-tuple.eph @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: PMPL-1.0-or-later +// Implicit-`in` with tuple binders. Two `let` statements destructuring +// pairs, followed by a final expression. This is the shape bridge.eph +// uses on its TEA loop. + +module hyperpolymath/ephapax/test + +fn entry(): I32 = + let p: (I32, I32) = (1, 2) + let (a, b) = p + let q: (I32, I32) = (a, b) + let (c, d) = q + c diff --git a/tests/v2-grammar/fixtures/implicit-in.eph b/tests/v2-grammar/fixtures/implicit-in.eph new file mode 100644 index 0000000..6c6a6dc --- /dev/null +++ b/tests/v2-grammar/fixtures/implicit-in.eph @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: PMPL-1.0-or-later +// Implicit-`in` between sequential `let` bindings. Each `let` / `let!` +// is followed by another let or by the final expression — no `in` +// keyword needed. Equivalent to the bridge.eph style. + +module hyperpolymath/ephapax/test + +fn entry(): I32 = + let a = 1 + let b = 2 + let c = 3 + a