From 2ec9bf4d2da0defc51fa24578d49d56a4503f45c Mon Sep 17 00:00:00 2001 From: dylan-sutton-chavez Date: Tue, 26 May 2026 00:16:48 -0600 Subject: [PATCH 1/2] Add lazy default package resolution --- compiler/src/main/exports.rs | 17 ++++- compiler/src/modules/packages/mod.rs | 82 ++++++++++++++++++--- documentation/getting-started/quickstart.md | 2 +- documentation/reference/imports.md | 4 +- documentation/reference/packages.md | 39 +++++----- documentation/reference/writing-modules.md | 2 +- runtime/README.md | 17 +++-- runtime/src/defaults.js | 18 +++++ runtime/src/element.js | 11 ++- runtime/src/index.js | 38 +++++++++- runtime/src/prefetch.js | 80 ++++++++++++++++---- runtime/src/specs.js | 7 +- runtime/tests/runtime.json | 5 +- runtime/tests/runtime.test.js | 28 ++++++- runtime/worker/engine.js | 56 ++++++++++---- runtime/worker/worker.js | 20 ++++- 16 files changed, 333 insertions(+), 93 deletions(-) create mode 100644 runtime/src/defaults.js diff --git a/compiler/src/main/exports.rs b/compiler/src/main/exports.rs index 54f61a2..82b7375 100644 --- a/compiler/src/main/exports.rs +++ b/compiler/src/main/exports.rs @@ -87,15 +87,26 @@ fn read_src(len: usize) -> Result { }) } +/* Pre-fetch feed: each import as `bname` (bare, resolve via manifest) or `qspec` (quoted URL/path), one per line. */ #[unsafe(no_mangle)] pub unsafe extern "C" fn extract_imports(len: usize) -> usize { + use crate::modules::packages::{scan_imports, ImportSpec}; let src = match read_src(len) { Ok(s) => s, Err(_) => return unsafe { write_out("") }, }; - let specs = crate::modules::packages::scan_string_imports(&src); - let joined = specs.join("\n"); - unsafe { write_out(&joined) } + let mut buf = alloc::string::String::new(); + for spec in scan_imports(&src) { + if !buf.is_empty() { buf.push('\n'); } + let (kind, name) = match &spec { + ImportSpec::Bare(n) => ('b', n), + ImportSpec::Quoted(u) => ('q', u), + }; + buf.push(kind); + buf.push('\t'); + buf.push_str(name); + } + unsafe { write_out(&buf) } } /* Drive one segment of execution; on `Pending*` re-stash the VM into the recycled `PausedRun` box. */ diff --git a/compiler/src/modules/packages/mod.rs b/compiler/src/modules/packages/mod.rs index 22fdd64..c03ed56 100644 --- a/compiler/src/modules/packages/mod.rs +++ b/compiler/src/modules/packages/mod.rs @@ -6,6 +6,7 @@ use alloc::{boxed::Box, string::{String, ToString}, sync::Arc, vec::Vec}; use crate::s; use crate::modules::vm::types::{HeapPool, Val, VmErr}; +use crate::modules::lexer::{lex, Token, TokenType}; pub mod manifest; pub use manifest::{Manifest, parse_manifest, walk_up_dirs, dir_of, join_relative}; @@ -101,19 +102,76 @@ pub(crate) fn binding_to_extern(b: &NativeBinding) -> crate::modules::vm::types: } } -/* Scans source for quoted from-import specs; WASM host uses results to pre-fetch URLs before compile. */ -pub fn scan_string_imports(src: &str) -> Vec { +/* A scanned import: Quoted is a direct URL/path; Bare is a name resolved against the manifest chain. */ +#[derive(Debug, Clone, PartialEq)] +pub enum ImportSpec { + Quoted(String), + Bare(String), +} + +/* Content between the first quote and its matching close; tolerates string prefixes (r, b, f). Specs carry no escapes, so a raw slice suffices. */ +fn unquote(raw: &str) -> String { + let bytes = raw.as_bytes(); + let Some(open) = bytes.iter().position(|&c| c == b'"' || c == b'\'') else { return raw.to_string() }; + let quote = bytes[open] as char; + match raw[open + 1..].rfind(quote) { + Some(rel) => raw[open + 1..open + 1 + rel].to_string(), + None => raw.to_string(), + } +} + +/* Reads the module spec at token `j`: a quoted string or a dotted bare name. Returns (spec, index past it). */ +fn read_spec(src: &str, tokens: &[Token], j: usize) -> Option<(ImportSpec, usize)> { + let t = tokens.get(j)?; + match t.kind { + TokenType::String => Some((ImportSpec::Quoted(unquote(&src[t.start..t.end])), j + 1)), + TokenType::Name => { + let mut name = src[t.start..t.end].to_string(); + let mut k = j + 1; + // Dotted segments: a.b.c. + while tokens.get(k).map(|x| x.kind) == Some(TokenType::Dot) { + let Some(seg) = tokens.get(k + 1).filter(|s| s.kind == TokenType::Name) else { break }; + name.push('.'); + name.push_str(&src[seg.start..seg.end]); + k += 2; + } + Some((ImportSpec::Bare(name), k)) + } + _ => None, + } +} + +/* Every import spec, classified Bare vs Quoted, via the lexer so a `from`/`import` inside a comment or string is never a false hit. */ +pub fn scan_imports(src: &str) -> Vec { + let (tokens, _errs) = lex(src); let mut out = Vec::new(); - for line in src.lines() { - let t = line.trim_start(); - if !t.starts_with("from ") { continue; } - let rest = &t[5..].trim_start(); - let bytes = rest.as_bytes(); - if bytes.is_empty() || bytes[0] != b'"' { continue; } - let mut end = 1; - while end < bytes.len() && bytes[end] != b'"' { end += 1; } - if end < bytes.len() { - out.push(rest[1..end].to_string()); + let mut i = 0; + while i < tokens.len() { + match tokens[i].kind { + TokenType::From => { + if let Some((spec, next)) = read_spec(src, &tokens, i + 1) { + out.push(spec); + // Step past the `import` of this from-statement so it isn't read as a fresh statement. + i = if tokens.get(next).map(|x| x.kind) == Some(TokenType::Import) { next + 1 } else { next }; + } else { + i += 1; + } + } + TokenType::Import => { + // `import a, b as c`: comma-separated specs, each with an optional `as` alias. + let mut j = i + 1; + while let Some((spec, next)) = read_spec(src, &tokens, j) { + out.push(spec); + j = next; + if tokens.get(j).map(|x| x.kind) == Some(TokenType::As) { + j += if tokens.get(j + 1).map(|x| x.kind) == Some(TokenType::Name) { 2 } else { 1 }; + } + if tokens.get(j).map(|x| x.kind) != Some(TokenType::Comma) { break; } + j += 1; + } + i = j.max(i + 1); + } + _ => i += 1, } } out diff --git a/documentation/getting-started/quickstart.md b/documentation/getting-started/quickstart.md index dc70ae3..2680973 100644 --- a/documentation/getting-started/quickstart.md +++ b/documentation/getting-started/quickstart.md @@ -48,7 +48,7 @@ from dom import query, set_text set_text(query("#app"), "Hello from Python") ``` -`dom` is one of the official [host libraries](/reference/packages#host-libraries-edge-python-host) (`dom`, `network`, `storage` and more), served as JS sources alongside your app; standard `.wasm` packages like [`json`](/reference/packages#json) ship alongside too. See [Official packages](/reference/packages) for the full catalog, and the [runtime README](https://github.com/dylan-sutton-chavez/edge-python/tree/main/runtime) for all `` attributes and the `imports` field for `.py` / `.wasm` modules. +`dom` is one of the official [host libraries](/reference/packages#host-libraries-edge-python-host) (`dom`, `network`, `storage` and more); standard `.wasm` packages like [`json`](/reference/packages#json) sit alongside them. The `packages.json` above declares `dom` explicitly, but the browser runtime also resolves the official packages by bare name with no manifest at all (see [Defaults](/reference/packages#defaults)), fetching each lazily on first import. See [Official packages](/reference/packages) for the full catalog, and the [runtime README](https://github.com/dylan-sutton-chavez/edge-python/tree/main/runtime) for all `` attributes and the `imports` field for `.py` / `.wasm` modules. ## Your first program diff --git a/documentation/reference/imports.md b/documentation/reference/imports.md index d90b983..5eea718 100644 --- a/documentation/reference/imports.md +++ b/documentation/reference/imports.md @@ -48,7 +48,7 @@ from utils import * print(slugify("Hello world")) ``` -The names above (`json`, `utils`, `math`) are illustrative, none are built-in. `json` is an [official standard package](/reference/packages#json); the rest stand in for your own modules. Every bare name must be declared in `packages.json` or supplied as a quoted path/URL. +The names above (`json`, `utils`, `math`) are illustrative. `json` is an [official standard package](/reference/packages#json) that the browser runtime resolves by [default](/reference/packages#defaults); `utils` and `math` stand in for your own modules. Apart from the official defaults, every bare name must be declared in `packages.json` or supplied as a quoted path/URL. ## How resolution works @@ -88,7 +88,7 @@ Schema: `from utils import x` resolves to `./lib/utils.py` relative to the entry script; `from math import add` loads `.wasm` per the [wire format](/reference/wasm-abi). -`packages.json` is optional, scripts can use string-form paths directly without project config. +`packages.json` is optional, scripts can use string-form paths directly without project config, and the browser runtime resolves the [official packages](/reference/packages#defaults) by bare name without it. ### Walk-up resolution diff --git a/documentation/reference/packages.md b/documentation/reference/packages.md index 148ef55..1a8ad38 100644 --- a/documentation/reference/packages.md +++ b/documentation/reference/packages.md @@ -16,55 +16,41 @@ Standard packages are host-agnostic (they run wherever WASM runs). Host librarie ## Standard packages (`edge-python-std`) -Language-agnostic `.wasm` plugins over the [WASM module ABI](/reference/wasm-abi). Import by URL or via a `packages.json` alias; the host fetches the `.wasm` and treats its exports as native bindings. +Language-agnostic `.wasm` plugins over the [WASM module ABI](/reference/wasm-abi). Import by bare name (the browser runtime resolves the official ones by default, see [Defaults](#defaults)), by URL, or via a `packages.json` alias; the host fetches the `.wasm` and treats its exports as native bindings. ### `json` JSON serialization and deserialization, full CPython `json.loads` / `json.dumps` kwargs parity (`object_hook`, `parse_float`, `indent`, `sort_keys`, `ensure_ascii`, `default`, and more). ```python -from "https://std.edgepython.com/json.wasm" import dumps, loads +from json import dumps, loads data = loads('{"name":"ada","tags":["math","cs"]}') print(data["name"]) # ada print(dumps({"k": [1, 2, 3], "ok": True})) # {"k":[1,2,3],"ok":true} ``` -Or with a `packages.json` alias so scripts can write the bare name: - -```json -{ - "imports": { - "json": "https://std.edgepython.com/json.wasm" - } -} -``` - -```python -from json import dumps, loads -``` - Pre-built `.wasm` is published on the [`edge-python-std` releases](https://github.com/dylan-sutton-chavez/edge-python-std). Full API: [`json/README.md`](https://github.com/dylan-sutton-chavez/edge-python-std/tree/main/json). -> **`json` is not built-in.** Examples elsewhere in these docs write `from json import ...` for brevity, but `json` is this external package, you must declare it (alias or URL) like any other module. +> **`json` is an external package, but the browser runtime resolves it by default.** It isn't compiled into `compiler_lib.wasm`, it's this `.wasm` package. In the browser runtime you can write `from json import ...` with no `packages.json` (a built-in [default](#defaults), fetched lazily on first import). Other hosts, or `defaults: false`, need it declared (alias or URL) like any other module. ### `re` Regular expressions, a CPython `re` subset on a compact backtracking engine. Unicode aware `\d` `\w` `\s` and `(?i)` without shipping Unicode tables, plus capture groups, backreferences, lookahead, and fixed width lookbehind. A step budget raises `RuntimeError` on catastrophic backtracking instead of hanging, so a degrading pattern is reported rather than freezing the worker. ```python -from "https://std.edgepython.com/re.wasm" import search, sub, findall +from re import search, sub, findall print(search(r'(\d+)-(\d+)', 'order 12-34')) # 12-34 print(sub(r'\s+', '_', 'a b c')) # a_b_c print(findall(r'\w+', 'one two three')) # ['one', 'two', 'three'] ``` -Functions: `match`, `search`, `fullmatch`, `findall`, `groups`, `span`, `sub`; flags go inline (`(?i)`, `(?s)`, `(?m)`). The same `packages.json` alias trick lets scripts write the bare `from re import ...`. Pre-built `.wasm` is published on the [`edge-python-std` releases](https://github.com/dylan-sutton-chavez/edge-python-std). Full API: [`re/README.md`](https://github.com/dylan-sutton-chavez/edge-python-std/tree/main/re). +Functions: `match`, `search`, `fullmatch`, `findall`, `groups`, `span`, `sub`; flags go inline (`(?i)`, `(?s)`, `(?m)`). Pre-built `.wasm` is published on the [`edge-python-std` releases](https://github.com/dylan-sutton-chavez/edge-python-std). Full API: [`re/README.md`](https://github.com/dylan-sutton-chavez/edge-python-std/tree/main/re). ## Host libraries (`edge-python-host`) -Plain-JS capabilities that run on the browser's main thread, registered declaratively via the `host` field of [`packages.json`](/reference/imports#packages-json) (with the `` element) or programmatically via `createWorker({ mainThreadModules })`. No `.wasm`, no Rust, no build step. Each call defers to the main thread over `postMessage` (around 0.1 to 0.4 ms); Python sees a synchronous call. +Plain-JS capabilities that run on the browser's main thread, registered declaratively via the `host` field of [`packages.json`](/reference/imports#packages-json) (with the `` element), programmatically via `createWorker({ hostModules })`, or resolved by default with no config at all (see [Defaults](#defaults)). No `.wasm`, no Rust, no build step. Each call defers to the main thread over `postMessage` (around 0.1 to 0.4 ms); Python sees a synchronous call. The ESM loads lazily, the first time a run imports it. ### `dom` @@ -126,9 +112,10 @@ Handlers: `time`, `time_ns`, `monotonic`, `monotonic_ns`, `perf_counter`, `perf_ | You have... | Do | |---|---| +| Any official package, browser runtime | Just `from import ...`, the runtime resolves the official std/host packages by default. Declare it only to pin a different version, or opt out with `defaults: false` | | A standard `.wasm` package (e.g., `json`) | Quoted URL `from "https://.../json.wasm" import ...`, or a `packages.json` `imports` alias | | A host library (e.g., `dom`, `network`, `storage`), `` element | Add it to the `host` field of `packages.json` | -| A host library, programmatic `createWorker` | Pass it in `mainThreadModules` | +| A host library, programmatic `createWorker` | Pass its URL in `hostModules` (lazy) or an in-memory factory in `mainThreadModules` (eager) | ```json { @@ -139,6 +126,16 @@ Handlers: `time`, `time_ns`, `monotonic`, `monotonic_ns`, `perf_counter`, `perf_ One manifest drives both directions: `imports` for worker-side `.py` / `.wasm` modules, `host` for main-thread libraries. See [Imports](/reference/imports) for resolution semantics and the full `packages.json` schema, and the [runtime README](https://github.com/dylan-sutton-chavez/edge-python/tree/main/runtime) for `` attributes and `createWorker` options. +### Defaults + +The browser runtime ships a built-in base manifest, so the official packages resolve by bare name with **no `packages.json` at all**: the std `.wasm` packages (`json`, `re`) and the host libraries (`dom`, `network`, `storage`, `time`). Three rules: + +- **Lazy.** A default is fetched only when a run actually imports it. Unused defaults never hit the network. +- **Overridable.** Your `packages.json` (or `imports` / `hostModules`) wins for the same name, so you can pin a specific version or URL. +- **Opt-out.** Pass `defaults: false` to `createWorker` to disable the base manifest entirely (e.g. offline or non-browser embedders). + +Defaults are a convenience of the browser runtime, not the compiler: `compiler_lib.wasm` stays hermetic and resolves bare names only through the manifest the host provides. Non-browser hosts decide their own defaults, if any. + ## See also - [Imports](/reference/imports), import syntax, `packages.json`, integrity verification. diff --git a/documentation/reference/writing-modules.md b/documentation/reference/writing-modules.md index f4d9d24..2739df1 100644 --- a/documentation/reference/writing-modules.md +++ b/documentation/reference/writing-modules.md @@ -71,7 +71,7 @@ Browsers run the engine in a Web Worker (no `document`, no `window`). Path C bri Async handlers (returning a `Promise`) run concurrently when several coroutines call them under `gather`: each result is routed back to the coroutine that issued it, and a rejected handler raises a catchable exception in that one coroutine without disturbing its peers. -No `.wasm`, no Rust, no build step. +Three ways to register: pass the imported object to `mainThreadModules` (eager, shown below); give a URL to `hostModules` or the `packages.json` `host` field, imported lazily the first time a run uses it; or, for the official libraries, rely on the runtime [defaults](/reference/packages#defaults) with no config. No `.wasm`, no Rust, no build step. ### Sketch diff --git a/runtime/README.md b/runtime/README.md index 25d0b79..cc5f2e0 100644 --- a/runtime/README.md +++ b/runtime/README.md @@ -41,7 +41,7 @@ Declarative alternative to `createWorker`: include the script, drop a tag, and a ``` -Importing `element.js` auto-registers the tag. On connect, the element reads its attributes, loads the modules declared in `packages.json` (below), spawns the worker, runs `entry` if present, then fires a `ready` event. `compiler_lib.wasm` loads from the CDN automatically. +Importing `element.js` auto-registers the tag. On connect, the element reads its attributes and `packages.json`, spawns the worker, runs `entry` if present, then fires a `ready` event. `compiler_lib.wasm` loads from the CDN automatically. Modules load lazily: only what a run actually imports is fetched, host libraries included. | Attribute | Description | |---|---| @@ -80,7 +80,7 @@ Host libraries (DOM, etc.) are plain-JS modules whose handlers run on the **page } ``` -For each entry the element dynamically `import()`s the URL (resolved against the `packages.json` location) and registers its named exports, every export except `default`, as [main-thread modules](#main-thread-modules). The export name is the module name Python imports, so `export const dom` becomes `from dom import ...`: +Each `host` entry maps a name to an ESM URL (resolved against the `packages.json` location). The element passes these to `createWorker` as `hostModules`; the module is `import()`ed lazily the first time a run imports that name, never at connect, so an unused host library is never fetched. The ESM exports its handler factory under the host name (or as `default`), so `export const dom` answers `from dom import ...`: ```python # app/main.py @@ -102,7 +102,9 @@ Spawns a Web Worker, loads `compiler_lib.wasm` inside it, returns a proxy. | `integrity` | `boolean` | `true` | When `true`, use IDB + lockfile to cache and verify fetched module bytes. Falls back to in-memory cache (with `console.warn`) if IDB is unavailable. | | `imports` | `Record` | `null` | Bare-name shortcut: maps Python bare names (`from import ...`) to URLs of `.py` / `.wasm` modules. Replaces the need for a physical `packages.json` for simple projects. | | `loaders` | `string[]` | `[]` | URLs of module loader plugins. Each loader is a `.js` file with a default export `{ match, load }`. See [Writing a loader](#writing-a-loader). | -| `mainThreadModules` | `Record` | `{}` | Synthetic native modules whose handlers run on the main thread. Each entry registers `from import ...` for Python. See [Main-thread modules](#main-thread-modules). | +| `mainThreadModules` | `Record` | `{}` | Main-thread modules supplied as in-memory factories/objects, registered eagerly. Use `hostModules` instead when you have URLs and want lazy loading. See [Main-thread modules](#main-thread-modules). | +| `hostModules` | `Record` | `{}` | Main-thread host libraries by URL (`name -> ESM url`), `import()`ed lazily the first time a run imports the name. The `` element fills this from the `host` field. | +| `defaults` | `boolean` | `true` | Seed the resolution table with the official packages so they resolve by bare name without a `packages.json`: std `json` / `re` (worker `.wasm`) and host `dom` / `network` / `storage` / `time` (main-thread ESM). Lazy, an unused default is never fetched. Set `false` to opt out. URLs live in `src/defaults.js`. | | `version` | `string` | `null` | Optional lockfile version key. When present, mismatches with the stored version invalidate the cache before run. Useful to pin cache to a deploy/commit. | ### `Worker` @@ -205,7 +207,7 @@ When the runtime is cross-origin (page on `demo.edgepython.com`, runtime on `run ## Module fetch lifecycle -`load` runs once per Worker; `run` can be called many times. `compiler_lib.wasm` is compiled once at `load`; a fresh instance is created per `run` so VM state cannot leak. Module bytes (`.py` / `.wasm` / `packages.json`) are cached across runs in the same Worker, BFS prefetch skips fetched specs, 404'd manifests are remembered. Use `clearCache()` to drop both caches. +`load` runs once per Worker; `run` can be called many times. `compiler_lib.wasm` is compiled once at `load`; a fresh instance is created per `run` so VM state cannot leak. Resolution is lazy: the compiler classifies each import and only the modules a run actually uses get fetched. Bare names resolve against the manifest chain (built-in defaults < user `packages.json`); manifests are resolution tables, not download lists, so a declared-but-unused package is never downloaded. Module bytes (`.py` / `.wasm` / `packages.json`) are cached across runs in the same Worker, prefetch skips fetched specs, 404'd manifests are remembered. Use `clearCache()` to drop both caches. A spec the prefetch can't fetch or register (wrong scheme, a `.wasm` served as HTML, a malformed binary) aborts the run before it starts with a clear error, with an `https://` hint for `http://` or schemeless URL specs, instead of letting the VM fail later with `not registered`. @@ -235,11 +237,12 @@ A spec the prefetch can't fetch or register (wrong scheme, a `.wasm` served as H | Path | Purpose | |---|---| | `src/index.js` | Public API. `createWorker` factory (main-thread). | -| `src/element.js` | Public `` custom element. Wraps `createWorker`, loads modules from `packages.json` (`host` and `imports`). | -| `worker/engine.js` | Internal orchestrator (Worker only). `load`, `run`, `pushEvent`, `reset`, `clearCache`, `dispose`, `setHostCallDelegate`. | +| `src/element.js` | Public `` custom element. Wraps `createWorker`; reads `host` / `imports` from `packages.json` (host libraries load lazily on first import). | +| `worker/engine.js` | Internal orchestrator (Worker only). `load`, `run`, `pushEvent`, `reset`, `clearCache`, `dispose`, `setHostCallDelegate`, `setLoadHostDelegate`. | | `src/env.js` | The 4 `env.*` imports `compiler_lib` declares: `host_print`, `host_call_native`, `host_fetch_bytes`, `host_now_ns`. | | `src/native.js` | Native module loader extension point + built-in Path A (wasm-pdk) loader + `nativeTable`. | -| `src/prefetch.js` | BFS over the dependency graph; pre-fetches and registers all `.py` / `.wasm` / `packages.json`. | +| `src/prefetch.js` | Lazy BFS over the dependency graph; resolves each imported name and registers only the `.py` / `.wasm` / host modules a run uses. | +| `src/defaults.js` | Built-in base manifest: official std (`json`, `re`) and host (`dom`, ...) packages, resolvable by bare name without `packages.json`. | | `src/fetch.js` | CAS-backed fetch with lockfile integrity check. | | `src/specs.js` | URL/spec helpers mirroring `compiler_lib::modules::packages::manifest`. | | `src/rt.js` | Handle codec wrappers (`decodeStr`, `encodeInt`, ...) for loaders. | diff --git a/runtime/src/defaults.js b/runtime/src/defaults.js new file mode 100644 index 0000000..f875667 --- /dev/null +++ b/runtime/src/defaults.js @@ -0,0 +1,18 @@ +/* +Built-in base manifest. Official std packages resolvable by bare name with no user packages.json. +Lowest precedence (user `imports` win) and lazy: an unused default is never fetched. +*/ + +/* Worker-side std packages (.wasm). Pinned for reproducibility; the lockfile verifies the bytes when integrity is on. */ +export const DEFAULT_IMPORTS = { + json: 'https://std.edgepython.com/json.wasm', + re: 'https://std.edgepython.com/re.wasm', +}; + +/* Main-thread host libraries (ESM). Pages flattens each `/src/` to `host.edgepython.com//`. Same lazy + opt-out rules; merged under any user `host` entries. */ +export const DEFAULT_HOST = { + dom: 'https://host.edgepython.com/dom/index.js', + network: 'https://host.edgepython.com/network/index.js', + storage: 'https://host.edgepython.com/storage/index.js', + time: 'https://host.edgepython.com/time/index.js', +}; diff --git a/runtime/src/element.js b/runtime/src/element.js index 319a1e6..95bb675 100644 --- a/runtime/src/element.js +++ b/runtime/src/element.js @@ -10,15 +10,14 @@ export class EdgePythonElement extends HTMLElement { const file = this.getAttribute('entry'); const pkg = this.getAttribute('packages'); - // host -> main-thread modules, imports -> worker .py/.wasm modules - const mainThreadModules = {}; + // host -> main-thread modules (lazy: name -> url, imported on first use), imports -> worker .py/.wasm modules + const hostModules = {}; let imports; if (pkg) { const base = new URL(pkg, location.href); const manifest = await fetch(base).then(r => r.json()); - for (const url of Object.values(manifest.host ?? {})) { - const { default: _, ...mods } = await import(new URL(url, base).href); - Object.assign(mainThreadModules, mods); + for (const [name, url] of Object.entries(manifest.host ?? {})) { + hostModules[name] = new URL(url, base).href; } if (manifest.imports) { imports = {}; @@ -29,7 +28,7 @@ export class EdgePythonElement extends HTMLElement { // Kept on the element so callers can drive the same worker after the declarative run. this.worker = await createWorker({ wasmUrl: "https://runtime.edgepython.com/js/compiler_lib.wasm", - mainThreadModules, + hostModules, imports, }); // `entry` is optional: omit it to just spin up the worker and drive it via run(). diff --git a/runtime/src/index.js b/runtime/src/index.js index 63b19fa..6632c31 100644 --- a/runtime/src/index.js +++ b/runtime/src/index.js @@ -2,6 +2,8 @@ Public entry. `createWorker(opts)` spawns a Web Worker around `engine.js` and returns a proxy whose methods round-trip via postMessage. See README for options. */ +import { DEFAULT_HOST } from './defaults.js'; + export async function createWorker(opts) { // Chromium blocks `new Worker(crossOriginUrl)` even with `type:'module'`; cross-origin runtimes need the Blob bootstrap below. const workerUrl = new URL('../worker/worker.js', import.meta.url); @@ -28,6 +30,24 @@ export async function createWorker(opts) { } } + /* Lazy host modules: name -> ESM url, imported only when the worker reports the bare name is used. Base defaults sit under user entries; `defaults:false` opts out. */ + const hostUrls = { ...(opts?.defaults !== false ? DEFAULT_HOST : {}), ...(opts?.hostModules || {}) }; + const loadedHosts = new Map(); // name -> export names, memoized across runs + const loadHostModule = async (name) => { + if (loadedHosts.has(name)) return loadedHosts.get(name); + const url = hostUrls[name]; + if (!url) throw new Error(`no host module registered for '${name}'`); + const mod = await import(url); + const factory = mod[name] ?? mod.default; + const handlers = typeof factory === 'function' ? factory({ pushEvent }) : factory; + for (const [fnName, handler] of Object.entries(handlers)) { + mainThreadHandlers[`${name}:${fnName}`] = handler; + } + const exports = Object.keys(handlers); + loadedHosts.set(name, exports); + return exports; + }; + worker.onmessage = async ({ data }) => { if (data.type === 'line') { if (outputHandler) outputHandler(data.text); @@ -47,6 +67,15 @@ export async function createWorker(opts) { } return; } + if (data.type === 'load-host') { + try { + const exports = await loadHostModule(data.name); + worker.postMessage({ type: 'load-host-response', reqId: data.reqId, exports }); + } catch (e) { + worker.postMessage({ type: 'load-host-response', reqId: data.reqId, error: e?.message ?? String(e) }); + } + return; + } const cb = pending.get(data.reqId); if (!cb) return; pending.delete(data.reqId); @@ -66,9 +95,12 @@ export async function createWorker(opts) { worker.postMessage({ type, reqId, ...payload }); }); - /* Strip mainThreadModules before crossing postMessage: factories/handlers aren't structured-cloneable. Send only the manifests. */ - const { mainThreadModules: _drop, ...workerOpts } = opts || {}; - const ready = await send('load', { opts: workerOpts, mainThreadManifests: manifests }); + /* Strip mainThreadModules/hostModules before crossing postMessage: not structured-cloneable / loaded on the page. The worker only needs eager manifests and the lazy host names. */ + const { mainThreadModules: _drop, hostModules: _dropHosts, ...workerOpts } = opts || {}; + const ready = await send('load', { + opts: { ...workerOpts, availableHosts: Object.keys(hostUrls) }, + mainThreadManifests: manifests, + }); /* Browser bridges fire `CustomEvent("edge-python-event")` on the global; route the detail to the Worker. Gated on `document` to skip Workers / Deno where this listener has no meaning. */ if (typeof document !== 'undefined') { diff --git a/runtime/src/prefetch.js b/runtime/src/prefetch.js index de40827..e7556e5 100644 --- a/runtime/src/prefetch.js +++ b/runtime/src/prefetch.js @@ -1,10 +1,11 @@ /* -BFS over dependency graph: visited specs feed `ctx.fetchedSources`, register code/native, plus sibling `packages.json`. `importsMap` synthesizes a root manifest for bare names. +Lazy prefetch over the dependency graph. The compiler classifies each import (bare vs quoted); +bare names resolve against the manifest chain (defaults < user packages.json), and only the imports a module actually uses get fetched. Manifests are resolution tables, not download lists. */ import { fetchWithLockfile } from './fetch.js'; import { loadNativeModule, nativeTable } from './native.js'; -import { dirOf, joinRel, scanStringImports } from './specs.js'; +import { dirOf, joinRel, SOURCE_LIMIT } from './specs.js'; const TD = new TextDecoder(); const TE = new TextEncoder(); @@ -24,12 +25,33 @@ function schemeHint(spec) { return null; } +/* Imports of `src`, classified, via the compiler (single source of truth). Returns [{ bare, spec }]. */ +function scanImports(src, exports) { + if (typeof exports.extract_imports !== 'function') { + throw new Error('compiler_lib is missing extract_imports; runtime and wasm are out of sync'); + } + const bytes = TE.encode(src); + const len = Math.min(bytes.length, SOURCE_LIMIT); + new Uint8Array(exports.memory.buffer, exports.src_ptr(), len).set(bytes.subarray(0, len)); + const outLen = exports.extract_imports(len); + if (!outLen) return []; + const text = TD.decode(new Uint8Array(exports.memory.buffer, exports.out_ptr(), outLen)); + return text.split('\n').filter(Boolean).map((line) => ({ + bare: line[0] === 'b', + spec: line.slice(line.indexOf('\t') + 1), + })); +} + export async function bfsPrefetch(rootSrc, exports, lockfile, ctx) { const { fetchedSources, knownMissing, importsMap, mainThreadSpecs } = ctx; const visited = new Set(); const queue = []; - // Module specs that never registered; thrown together at the end so the user sees a clear cause, not the VM's later "not registered". + // Module specs that never registered; thrown together at the end so the user sees a clear cause. const failures = []; + // Bare-name -> target spec. Seeded from importsMap (defaults + user); physical packages.json merge in as discovered. + const table = { ...(importsMap || {}) }; + // Bare names scanned before a manifest declared them; retried after each manifest merge. + const pendingBare = new Map(); // name -> importer dir, for relative targets const writeBytes = (bytes) => { const ptr = exports.wasm_alloc(Math.max(1, bytes.length)); @@ -41,16 +63,27 @@ export async function bfsPrefetch(rootSrc, exports, lockfile, ctx) { if (!knownMissing.has(m)) queue.push(m); }; - // Seed a virtual root packages.json from `importsMap` so bare-name imports resolve without a physical manifest. - if (importsMap && Object.keys(importsMap).length > 0) { - const synthetic = JSON.stringify({ imports: importsMap }); - fetchedSources.set('packages.json', TE.encode(synthetic)); + /* A scanned import contributes at most one fetch target: quoted is direct, bare resolves via the table. */ + const enqueueImport = (imp, dir) => { + if (!imp.bare) { queue.push(joinRel(dir, imp.spec)); return; } + const target = table[imp.spec]; + if (target !== undefined) queue.push(joinRel(dir, target)); + else pendingBare.set(imp.spec, dir); // a later manifest may declare it + }; + const retryPending = () => { + for (const [name, dir] of [...pendingBare]) { + const target = table[name]; + if (target !== undefined) { queue.push(joinRel(dir, target)); pendingBare.delete(name); } + } + }; + + // Synthetic root packages.json so the COMPILER resolves bare names at parse time the same way. + if (Object.keys(table).length > 0) { + fetchedSources.set('packages.json', TE.encode(JSON.stringify({ imports: table }))); knownMissing.delete('packages.json'); - // Enqueue the targets so BFS fetches them. - for (const target of Object.values(importsMap)) queue.push(joinRel('', target)); } - for (const q of scanStringImports(rootSrc)) queue.push(q); + for (const imp of scanImports(rootSrc, exports)) enqueueImport(imp, ''); if (!knownMissing.has('packages.json')) queue.push('packages.json'); while (queue.length) { @@ -58,10 +91,20 @@ export async function bfsPrefetch(rootSrc, exports, lockfile, ctx) { if (visited.has(spec)) continue; visited.add(spec); - // Main-thread modules are pre-registered by engine.run; the synthetic `mt:` spec has no bytes to fetch. + // Eager host (programmatic object) already registered before prefetch; nothing to fetch. if (mainThreadSpecs && mainThreadSpecs.has(spec)) continue; - // Reuse worker-lifetime cache (synthetic root packages.json from importsMap, or anything previously fetched in an earlier run) instead of re-fetching every Run. + // Lazy host: ask the page to load the ESM, then register its exports as `mt:` stubs. + if (spec.startsWith('mt:')) { + const name = spec.slice(3); + let exportNames; + try { exportNames = await ctx.loadHost(name); } + catch (e) { failures.push(`host '${name}' failed to load: ${e?.message ?? e}`); continue; } + ctx.registerHost(name, exportNames); + mainThreadSpecs.add(spec); + continue; + } + let bytes; if (fetchedSources.has(spec)) { bytes = fetchedSources.get(spec); @@ -80,7 +123,11 @@ export async function bfsPrefetch(rootSrc, exports, lockfile, ctx) { try { parsed = JSON.parse(TD.decode(bytes)); } catch { continue; } const dir = dirOf(spec); - for (const target of Object.values(parsed.imports || {})) queue.push(joinRel(dir, target)); + // Merge as a resolution table (nearer manifests already in `table` win), then resolve any deferred names. + for (const [name, target] of Object.entries(parsed.imports || {})) { + if (!(name in table)) table[name] = joinRel(dir, target); + } + retryPending(); if (parsed.extends) { const extDir = joinRel(dir, parsed.extends); queue.push((extDir.endsWith('/') ? extDir : extDir + '/') + 'packages.json'); @@ -93,7 +140,7 @@ export async function bfsPrefetch(rootSrc, exports, lockfile, ctx) { try { ({ names, fns } = await loadNativeModule(spec, bytes, ctx)); } catch (e) { - // Bytes fetched but the module won't load (bad ABI / corrupt wasm); a scheme issue would have failed earlier at fetch, so surface the real error. + // Bytes fetched but the module won't load (bad ABI / corrupt wasm); a scheme issue would have failed at fetch. failures.push(`'${spec}' failed to load as a wasm module: ${e?.message ?? e}`); continue; } @@ -111,16 +158,17 @@ export async function bfsPrefetch(rootSrc, exports, lockfile, ctx) { continue; } - // .py module + // .py module: register, then scan ITS imports (bare + quoted) so transitive deps stay lazy too. const specBytes = TE.encode(spec); exports.register_code_module(writeBytes(specBytes), specBytes.length, writeBytes(bytes), bytes.length); const dir = dirOf(spec); - for (const q of scanStringImports(TD.decode(bytes))) queue.push(joinRel(dir, q)); + for (const imp of scanImports(TD.decode(bytes), exports)) enqueueImport(imp, dir); enqueueManifestSibling(spec); } if (failures.length) { throw new Error(`could not pre-fetch every imported module:\n ${failures.join('\n ')}`); } + // Unresolved bare names are left to the compiler's parse-time resolver, which emits the precise error. } diff --git a/runtime/src/specs.js b/runtime/src/specs.js index aac69ef..545ff50 100644 --- a/runtime/src/specs.js +++ b/runtime/src/specs.js @@ -2,6 +2,9 @@ Spec/URL helpers. Mirror `compiler_lib::modules::packages::manifest` so transitive imports canonicalize identically on both sides. */ +/* Byte cap on any source handed to the compiler; mirrors the wasm SRC buffer (compiler SZ). */ +export const SOURCE_LIMIT = 1 << 20; + export const sha256Hex = async (bytes) => { const digest = await crypto.subtle.digest('SHA-256', bytes); return [...new Uint8Array(digest)].map(b => b.toString(16).padStart(2, '0')).join(''); @@ -37,7 +40,3 @@ export const joinRel = (base, target) => { } return b + t; }; - -/* Quoted-form imports (`from "" import ...`); bare names route through packages.json walk-up. */ -export const scanStringImports = (src) => - [...src.matchAll(/^[ \t]*from [ \t]*"([^"]+)"/gm)].map(m => m[1]); diff --git a/runtime/tests/runtime.json b/runtime/tests/runtime.json index 897fbc9..c2f76aa 100644 --- a/runtime/tests/runtime.json +++ b/runtime/tests/runtime.json @@ -2,5 +2,8 @@ { "script": "render(str(add(2, 3) + sub(10, 4) + mul(2, 5)))", "expect": "21" }, { "script": "render(upper('hola'))", "expect": "HOLA" }, { "script": "render(f'{2 ** 10}')", "expect": "1024" }, - { "script": "render(str(10 // 0))", "error": "ZeroDivisionError" } + { "script": "render(str(10 // 0))", "error": "ZeroDivisionError" }, + { "script": "import calc\nrender(str(calc.add(2, 3)))", "expect": "5" }, + { "script": "from json import dumps\nrender(dumps(True))", "expect": "true" }, + { "script": "from time import strftime, gmtime\nrender(strftime('%Y-%m-%d', gmtime(0)))", "expect": "1970-01-01" } ] diff --git a/runtime/tests/runtime.test.js b/runtime/tests/runtime.test.js index 74a91ff..4d41c8a 100644 --- a/runtime/tests/runtime.test.js +++ b/runtime/tests/runtime.test.js @@ -24,10 +24,25 @@ Deno.test("runtime: runs the corpus through index.html", async () const errors = []; page.on("pageerror", (e) => errors.push(e.message)); page.on("console", (m) => { if (m.type() === "error") errors.push(m.text()); }); + const requested = []; + page.on("request", (q) => requested.push(q.url())); + const LOCAL_WASM = REPO + "target/wasm32-unknown-unknown/release/compiler_lib.wasm"; + const STD_JSON = new URL("../../../edge-python-std/json/target/wasm32-unknown-unknown/release/json.wasm", import.meta.url).pathname; + const HOST_REPO = new URL("../../../edge-python-host", import.meta.url).pathname; await page.route("**/*", (r) => { const u = new URL(r.request().url()); - if (u.host !== "localhost") return r.continue(); // CDN wasm passes through + // Serve this branch's compiler (it carries the export the runtime now needs), not the published CDN one. + if (u.href.includes("compiler_lib.wasm")) return r.fulfill({ contentType: "application/wasm", body: readFileSync(LOCAL_WASM) }); + // Point the std/host defaults at the existing artifacts in the sibling repos (no new fixtures). + if (u.href.includes("std.edgepython.com/json.wasm")) return r.fulfill({ contentType: "application/wasm", body: readFileSync(STD_JSON) }); + if (u.host === "host.edgepython.com") { + // Production (Pages) flattens /src/* to /*; map back to the repo layout. + const repoPath = u.pathname.replace(/^\/([^/]+)\//, "/$1/src/"); + try { return r.fulfill({ contentType: "text/javascript", body: readFileSync(HOST_REPO + repoPath) }); } + catch { return r.fulfill({ status: 404 }); } + } + if (u.host !== "localhost") return r.continue(); // any other CDN asset passes through const ext = u.pathname.slice(u.pathname.lastIndexOf(".")); try { return r.fulfill({ contentType: TYPES[ext] ?? "application/octet-stream", body: readFileSync(REPO + u.pathname.slice(1)) }); } catch { return r.fulfill({ status: 404 }); } @@ -45,6 +60,10 @@ Deno.test("runtime: runs the corpus through index.html", async () globalThis.el = el; }); + const reqd = (frag) => requested.some((u) => u.includes(frag)); + // Lazy host: a host ESM must not load at boot, only when a run first imports it. + if (reqd("/app/ui.js")) throw new Error("host ui.js loaded at boot; host modules must be lazy"); + for (const c of cases) { errors.length = 0; const got = await page.evaluate(async (src) => { @@ -62,6 +81,13 @@ Deno.test("runtime: runs the corpus through index.html", async () throw new Error(`script:\n${c.script}\n got: ${JSON.stringify(got.app)}\n want: ${JSON.stringify(c.expect)}\n errors: ${errors.join(" | ") || "(none)"}`); } } + + // Laziness: only what the corpus imports gets fetched; declared-but-unused stays untouched. + if (!reqd("/app/ui.js")) throw new Error("host ui was used but ui.js never loaded"); + if (!reqd("json.wasm")) throw new Error("json default imported but json.wasm never fetched"); + if (!reqd("host.edgepython.com/time")) throw new Error("time host default imported but never loaded"); + if (reqd("re.wasm")) throw new Error("re default never imported yet re.wasm was fetched (not lazy)"); + if (reqd("host.edgepython.com/network")) throw new Error("network host default never imported yet fetched (not lazy)"); } finally { await browser.close(); } diff --git a/runtime/worker/engine.js b/runtime/worker/engine.js index bffce17..f4fdcc1 100644 --- a/runtime/worker/engine.js +++ b/runtime/worker/engine.js @@ -8,8 +8,9 @@ import { bfsPrefetch } from '../src/prefetch.js'; import { makeCompilerEnv } from '../src/env.js'; import { makeRt } from '../src/rt.js'; import { nativeTable, resetNativeTable } from '../src/native.js'; +import { DEFAULT_IMPORTS } from '../src/defaults.js'; +import { SOURCE_LIMIT } from '../src/specs.js'; -const SOURCE_LIMIT = 1 << 20; // 1 MiB const TE = new TextEncoder(); const TD = new TextDecoder(); @@ -31,6 +32,8 @@ let cache = null; let integrityActive = false; let loaders = []; let importsMap = null; +// Seed the resolution table with the official base packages unless the embedder opts out. +let useDefaults = true; // Resolves run()'s current `await` when a `PendingEvent` wake-up arrives via `pushEvent`. let eventWaiter = null; // Events `pushEvent`'d before the VM was ready (no `compilerExports`, or no paused run yet). Drained at the next `PENDING_EVENT` yield. @@ -39,15 +42,20 @@ const pendingEvents = []; const pendingHostCalls = new Map(); /* (name, args) => Promise. Set by worker.js (postMessage round-trip) or by a main-thread embedder. */ let hostCallDelegate = null; +/* Host modules resolvable by bare name but loaded on demand; (name) => Promise. */ +let loadHostDelegate = null; +let lazyHostNames = []; // Source/missing caches persist across runs so the BFS skips refetching modules and re-probing 404'd `packages.json` paths on every Run press. Wiped by `clearCache()`. const fetchedSources = new Map(); const knownMissing = new Set(); /* Synthetic native modules (handlers live on main thread). Re-applied at every `run` since `resetNativeTable` clears them. */ let mainThreadManifests = []; -export async function load({ wasmUrl, integrity = true, loaders: loaderUrls = [], imports = null, version = null }, manifests = []) { +export async function load({ wasmUrl, integrity = true, loaders: loaderUrls = [], imports = null, version = null, defaults = true, availableHosts = [] }, manifests = []) { const t0 = performance.now(); importsMap = imports; + useDefaults = defaults; + lazyHostNames = availableHosts; cache = await openCache(integrity); integrityActive = cache instanceof MemoryCache ? false : Boolean(integrity); @@ -110,29 +118,36 @@ export async function run({ src, entryDir = '', baseUrl = null, onLine }) { return ptr; }; - /* Synthetic main-thread modules: register at `mt:` specs and graft ` -> mt:` into importsMap so the bare name resolves via the synthesized packages.json. */ - const mainThreadSpecs = new Set(); - const augmentedImports = { ...(importsMap || {}) }; - for (const m of mainThreadManifests) { + /* Register a main-thread module at `mt:`: push a stub per export (the real call defers to the page) and tell the compiler its export names. */ + const registerHost = (name, exportNames) => { const baseId = nativeTable.length; - for (const fnName of m.exports) { + for (const fnName of exportNames) { const stub = () => {}; stub.__edge_kind = 'capability'; stub.__edge_main_thread = true; stub.__edge_name = fnName; - stub.__edge_module = m.name; + stub.__edge_module = name; nativeTable.push(stub); } - const spec = `mt:${m.name}`; - mainThreadSpecs.add(spec); - augmentedImports[m.name] = spec; - const specBytes = TE.encode(spec); - const namesBytes = TE.encode(m.exports.join('\n')); + const specBytes = TE.encode(`mt:${name}`); + const namesBytes = TE.encode(exportNames.join('\n')); exports.register_native_module( writeBytes(specBytes), specBytes.length, writeBytes(namesBytes), namesBytes.length, baseId, ); + }; + + /* Both kinds graft ` -> mt:` so the bare name resolves; eager ones (programmatic objects) register now, lazy ones (urls) load on first import during prefetch. */ + const mainThreadSpecs = new Set(); + const augmentedImports = { ...(useDefaults ? DEFAULT_IMPORTS : {}), ...(importsMap || {}) }; + for (const m of mainThreadManifests) { + registerHost(m.name, m.exports); + mainThreadSpecs.add(`mt:${m.name}`); + augmentedImports[m.name] = `mt:${m.name}`; + } + for (const name of lazyHostNames) { + if (!mainThreadSpecs.has(`mt:${name}`)) augmentedImports[name] = `mt:${name}`; } const writeSrc = () => new Uint8Array(exports.memory.buffer).set(srcBytes, exports.src_ptr()); @@ -143,13 +158,19 @@ export async function run({ src, entryDir = '', baseUrl = null, onLine }) { baseUrl, entryDir, knownMissing, - importsMap: mainThreadManifests.length ? augmentedImports : importsMap, + importsMap: augmentedImports, mainThreadSpecs, integrityActive, fetchedSources, compilerExports: exports, rt, loaders, + // Lazy host: fetch export names from the page, then register the mt: stubs here. + loadHost: (name) => { + if (!loadHostDelegate) throw new Error(`host '${name}' imported but no main-thread loader is wired`); + return loadHostDelegate(name); + }, + registerHost, }); // `wasm_alloc` during prefetch may have grown memory and detached our src view. @@ -249,6 +270,11 @@ export function setHostCallDelegate(fn) { hostCallDelegate = fn; } +/* Register the lazy host loader: (name) => Promise. worker.js wires the postMessage round-trip. */ +export function setLoadHostDelegate(fn) { + loadHostDelegate = fn; +} + export function reset() { if (compilerExports) compilerExports.reset_modules(); resetNativeTable(); @@ -272,6 +298,8 @@ export function dispose() { resetNativeTable(); pendingHostCalls.clear(); hostCallDelegate = null; + loadHostDelegate = null; + lazyHostNames = []; mainThreadManifests = []; } diff --git a/runtime/worker/worker.js b/runtime/worker/worker.js index f368f29..69501ea 100644 --- a/runtime/worker/worker.js +++ b/runtime/worker/worker.js @@ -16,6 +16,16 @@ engine.setHostCallDelegate((module, name, args) => new Promise((resolve, reject) self.postMessage({ type: 'host-call', reqId, module, name, args }); })); +/* Lazy host load: post `{type:'load-host', reqId, name}` to main and await `{type:'load-host-response'}` with export names. */ +let nextLoadHostReqId = 0; +const pendingLoadHost = new Map(); + +engine.setLoadHostDelegate((name) => new Promise((resolve, reject) => { + const reqId = ++nextLoadHostReqId; + pendingLoadHost.set(reqId, { resolve, reject }); + self.postMessage({ type: 'load-host', reqId, name }); +})); + const handlers = { load: (data) => engine.load(data.opts, data.mainThreadManifests), run: (data) => engine.run({ ...data, onLine }), @@ -32,6 +42,14 @@ const handlers = { if (data.error) cb.reject(new Error(data.error)); else cb.resolve(data.value); }, + /* Main thread loaded a lazy host module; resolve with its export names. */ + 'load-host-response': (data) => { + const cb = pendingLoadHost.get(data.reqId); + if (!cb) return; + pendingLoadHost.delete(data.reqId); + if (data.error) cb.reject(new Error(data.error)); + else cb.resolve(data.exports); + }, }; self.onmessage = async ({ data }) => { @@ -43,7 +61,7 @@ self.onmessage = async ({ data }) => { try { const result = await handler(data); /* Fire-and-forget message types skip the response post; only reply when an outer reqId was attached. */ - if (data.reqId != null && data.type !== 'host-call-response' && data.type !== 'push-event') { + if (data.reqId != null && data.type !== 'host-call-response' && data.type !== 'load-host-response' && data.type !== 'push-event') { self.postMessage({ type: 'response', reqId: data.reqId, result }); } } catch (e) { From 8adf3c164acc72a1ba44c859c8fc4ea8bfcc5fc9 Mon Sep 17 00:00:00 2001 From: dylan-sutton-chavez Date: Tue, 26 May 2026 06:18:57 -0600 Subject: [PATCH 2/2] fix(pendingBare, DEFAULT_IMPORTS): Cleaning that two issues. --- runtime/src/index.js | 6 ++++-- runtime/src/prefetch.js | 8 ++++---- runtime/tests/runtime.test.js | 3 --- runtime/worker/engine.js | 8 ++------ 4 files changed, 10 insertions(+), 15 deletions(-) diff --git a/runtime/src/index.js b/runtime/src/index.js index 6632c31..c49394b 100644 --- a/runtime/src/index.js +++ b/runtime/src/index.js @@ -2,7 +2,7 @@ Public entry. `createWorker(opts)` spawns a Web Worker around `engine.js` and returns a proxy whose methods round-trip via postMessage. See README for options. */ -import { DEFAULT_HOST } from './defaults.js'; +import { DEFAULT_HOST, DEFAULT_IMPORTS } from './defaults.js'; export async function createWorker(opts) { // Chromium blocks `new Worker(crossOriginUrl)` even with `type:'module'`; cross-origin runtimes need the Blob bootstrap below. @@ -97,8 +97,10 @@ export async function createWorker(opts) { /* Strip mainThreadModules/hostModules before crossing postMessage: not structured-cloneable / loaded on the page. The worker only needs eager manifests and the lazy host names. */ const { mainThreadModules: _drop, hostModules: _dropHosts, ...workerOpts } = opts || {}; + /* Fold the std .wasm defaults into imports here so the worker engine stays embedder-neutral; `defaults:false` opts out. */ + const imports = { ...(opts?.defaults !== false ? DEFAULT_IMPORTS : {}), ...(opts?.imports || {}) }; const ready = await send('load', { - opts: { ...workerOpts, availableHosts: Object.keys(hostUrls) }, + opts: { ...workerOpts, imports, availableHosts: Object.keys(hostUrls) }, mainThreadManifests: manifests, }); diff --git a/runtime/src/prefetch.js b/runtime/src/prefetch.js index e7556e5..2db466d 100644 --- a/runtime/src/prefetch.js +++ b/runtime/src/prefetch.js @@ -51,7 +51,7 @@ export async function bfsPrefetch(rootSrc, exports, lockfile, ctx) { // Bare-name -> target spec. Seeded from importsMap (defaults + user); physical packages.json merge in as discovered. const table = { ...(importsMap || {}) }; // Bare names scanned before a manifest declared them; retried after each manifest merge. - const pendingBare = new Map(); // name -> importer dir, for relative targets + const pendingBare = new Map(); // name -> importer dirs, for relative targets const writeBytes = (bytes) => { const ptr = exports.wasm_alloc(Math.max(1, bytes.length)); @@ -68,12 +68,12 @@ export async function bfsPrefetch(rootSrc, exports, lockfile, ctx) { if (!imp.bare) { queue.push(joinRel(dir, imp.spec)); return; } const target = table[imp.spec]; if (target !== undefined) queue.push(joinRel(dir, target)); - else pendingBare.set(imp.spec, dir); // a later manifest may declare it + else { const ds = pendingBare.get(imp.spec); ds ? ds.push(dir) : pendingBare.set(imp.spec, [dir]); } // a later manifest may declare it }; const retryPending = () => { - for (const [name, dir] of [...pendingBare]) { + for (const [name, dirs] of [...pendingBare]) { const target = table[name]; - if (target !== undefined) { queue.push(joinRel(dir, target)); pendingBare.delete(name); } + if (target !== undefined) { for (const dir of dirs) queue.push(joinRel(dir, target)); pendingBare.delete(name); } } }; diff --git a/runtime/tests/runtime.test.js b/runtime/tests/runtime.test.js index 4d41c8a..4f57240 100644 --- a/runtime/tests/runtime.test.js +++ b/runtime/tests/runtime.test.js @@ -27,13 +27,10 @@ Deno.test("runtime: runs the corpus through index.html", async () const requested = []; page.on("request", (q) => requested.push(q.url())); - const LOCAL_WASM = REPO + "target/wasm32-unknown-unknown/release/compiler_lib.wasm"; const STD_JSON = new URL("../../../edge-python-std/json/target/wasm32-unknown-unknown/release/json.wasm", import.meta.url).pathname; const HOST_REPO = new URL("../../../edge-python-host", import.meta.url).pathname; await page.route("**/*", (r) => { const u = new URL(r.request().url()); - // Serve this branch's compiler (it carries the export the runtime now needs), not the published CDN one. - if (u.href.includes("compiler_lib.wasm")) return r.fulfill({ contentType: "application/wasm", body: readFileSync(LOCAL_WASM) }); // Point the std/host defaults at the existing artifacts in the sibling repos (no new fixtures). if (u.href.includes("std.edgepython.com/json.wasm")) return r.fulfill({ contentType: "application/wasm", body: readFileSync(STD_JSON) }); if (u.host === "host.edgepython.com") { diff --git a/runtime/worker/engine.js b/runtime/worker/engine.js index f4fdcc1..0557d6c 100644 --- a/runtime/worker/engine.js +++ b/runtime/worker/engine.js @@ -8,7 +8,6 @@ import { bfsPrefetch } from '../src/prefetch.js'; import { makeCompilerEnv } from '../src/env.js'; import { makeRt } from '../src/rt.js'; import { nativeTable, resetNativeTable } from '../src/native.js'; -import { DEFAULT_IMPORTS } from '../src/defaults.js'; import { SOURCE_LIMIT } from '../src/specs.js'; const TE = new TextEncoder(); @@ -32,8 +31,6 @@ let cache = null; let integrityActive = false; let loaders = []; let importsMap = null; -// Seed the resolution table with the official base packages unless the embedder opts out. -let useDefaults = true; // Resolves run()'s current `await` when a `PendingEvent` wake-up arrives via `pushEvent`. let eventWaiter = null; // Events `pushEvent`'d before the VM was ready (no `compilerExports`, or no paused run yet). Drained at the next `PENDING_EVENT` yield. @@ -51,10 +48,9 @@ const knownMissing = new Set(); /* Synthetic native modules (handlers live on main thread). Re-applied at every `run` since `resetNativeTable` clears them. */ let mainThreadManifests = []; -export async function load({ wasmUrl, integrity = true, loaders: loaderUrls = [], imports = null, version = null, defaults = true, availableHosts = [] }, manifests = []) { +export async function load({ wasmUrl, integrity = true, loaders: loaderUrls = [], imports = null, version = null, availableHosts = [] }, manifests = []) { const t0 = performance.now(); importsMap = imports; - useDefaults = defaults; lazyHostNames = availableHosts; cache = await openCache(integrity); @@ -140,7 +136,7 @@ export async function run({ src, entryDir = '', baseUrl = null, onLine }) { /* Both kinds graft ` -> mt:` so the bare name resolves; eager ones (programmatic objects) register now, lazy ones (urls) load on first import during prefetch. */ const mainThreadSpecs = new Set(); - const augmentedImports = { ...(useDefaults ? DEFAULT_IMPORTS : {}), ...(importsMap || {}) }; + const augmentedImports = { ...(importsMap || {}) }; // defaults already folded in by the embedder (index.js) for (const m of mainThreadManifests) { registerHost(m.name, m.exports); mainThreadSpecs.add(`mt:${m.name}`);