diff --git a/packages/engine/src/services/frameCapture-pollImagesReady.test.ts b/packages/engine/src/services/frameCapture-pollImagesReady.test.ts new file mode 100644 index 000000000..4c70dbda4 --- /dev/null +++ b/packages/engine/src/services/frameCapture-pollImagesReady.test.ts @@ -0,0 +1,153 @@ +/** + * Tests for `pollImagesReady` — the image-side analog of `pollVideosReady`. + * + * Critical contract: + * - Successfully loaded image (complete=true, naturalWidth>0) → settled. + * - Broken / 404 image (complete=true, naturalWidth=0) → settled. + * This mirrors `pollVideosReady`'s `ve.error` early-exit. Without it, + * the htmlCompiler 404-fallback path (where a remote URL failed + * to download and the original URL is preserved) would silently spin + * the full `pageReadyTimeout` budget waiting for an image that will + * never load — a 45 s regression vs the pre-PR behavior. + * - In-flight image (complete=false) → still waiting. + * - data: URI src → settled (no network fetch). + * - Empty src → settled (nothing to load). + */ + +import { describe, expect, it } from "vitest"; +import type { Page } from "puppeteer-core"; +import { pollImagesReady } from "./frameCapture.js"; + +interface ImageSpec { + src: string; + complete: boolean; + naturalWidth: number; +} + +// Mock `page` whose `evaluate(fn)` invokes `fn` with a Node-side `document` +// mock that returns synthetic image objects matching the spec. Snapshots the +// image state at evaluate-time, so callers can mutate `imgs` between polls +// to simulate progressive load completion. +function makeMockPage(imgs: () => ImageSpec[]): Page { + return { + evaluate: async (fn: () => unknown) => { + const snapshot = imgs(); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const prevDoc = (globalThis as any).document; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (globalThis as any).document = { + querySelectorAll: () => + snapshot.map((spec) => ({ + getAttribute: (attr: string) => (attr === "src" ? spec.src : null), + complete: spec.complete, + naturalWidth: spec.naturalWidth, + })), + }; + try { + return await fn(); + } finally { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (globalThis as any).document = prevDoc; + } + }, + // eslint-disable-next-line @typescript-eslint/no-explicit-any + } as any as Page; +} + +describe("pollImagesReady", () => { + it("resolves immediately when there are no elements", async () => { + const page = makeMockPage(() => []); + const result = await pollImagesReady(page, 1000, 10); + expect(result).toBe(true); + }); + + it("resolves immediately when every image has loaded successfully", async () => { + const page = makeMockPage(() => [ + { src: "/a.png", complete: true, naturalWidth: 100 }, + { src: "/b.png", complete: true, naturalWidth: 200 }, + ]); + const result = await pollImagesReady(page, 1000, 10); + expect(result).toBe(true); + }); + + it("treats a broken image (complete=true, naturalWidth=0) as settled — does NOT wait for timeout", async () => { + // This is the bug Magi flagged. Without the broken-image escape, this + // test would block the full 1000ms timeout and return false. + const page = makeMockPage(() => [ + { src: "/a.png", complete: true, naturalWidth: 100 }, + { src: "https://broken.example.com/404.png", complete: true, naturalWidth: 0 }, + ]); + const t0 = Date.now(); + const result = await pollImagesReady(page, 1000, 10); + const elapsed = Date.now() - t0; + expect(result).toBe(true); + // Must resolve fast — well under the 1000ms timeout. + expect(elapsed).toBeLessThan(500); + }); + + it("treats a data: URI src as settled regardless of complete/naturalWidth", async () => { + const page = makeMockPage(() => [ + { src: "data:image/svg+xml,%3Csvg/%3E", complete: false, naturalWidth: 0 }, + ]); + const result = await pollImagesReady(page, 1000, 10); + expect(result).toBe(true); + }); + + it("treats an empty src as settled (nothing to load)", async () => { + const page = makeMockPage(() => [{ src: "", complete: false, naturalWidth: 0 }]); + const result = await pollImagesReady(page, 1000, 10); + expect(result).toBe(true); + }); + + it("waits for an in-flight image and resolves once it completes", async () => { + // Image starts in-flight, then completes after ~50ms. + let started = false; + const startTime = { value: 0 }; + const page = makeMockPage(() => { + if (!started) { + started = true; + startTime.value = Date.now(); + } + const elapsed = Date.now() - startTime.value; + const loaded = elapsed >= 50; + return [{ src: "/slow.png", complete: loaded, naturalWidth: loaded ? 100 : 0 }]; + }); + const result = await pollImagesReady(page, 1000, 10); + expect(result).toBe(true); + }); + + it("times out and returns false when an in-flight image never resolves", async () => { + // Image stays in-flight (complete=false) for the full timeout. + const page = makeMockPage(() => [ + { src: "/never-loads.png", complete: false, naturalWidth: 0 }, + ]); + const result = await pollImagesReady(page, 100, 10); + expect(result).toBe(false); + }); + + it("mixed batch: loaded + broken + data: + in-flight → waits only on the in-flight image", async () => { + let resolved = false; + const start = Date.now(); + const page = makeMockPage(() => { + const elapsed = Date.now() - start; + if (elapsed >= 30) resolved = true; + return [ + { src: "/loaded.png", complete: true, naturalWidth: 800 }, + { src: "https://broken.example.com/404.jpg", complete: true, naturalWidth: 0 }, + { src: "data:image/svg+xml,abc", complete: false, naturalWidth: 0 }, + { + src: "/in-flight.png", + complete: resolved, + naturalWidth: resolved ? 200 : 0, + }, + ]; + }); + const t0 = Date.now(); + const result = await pollImagesReady(page, 1000, 10); + const elapsed = Date.now() - t0; + expect(result).toBe(true); + // Should wait roughly for the in-flight image to settle (~30ms) — not the + // full timeout. Allow generous slack for CI scheduler jitter. + expect(elapsed).toBeLessThan(500); + }); +}); diff --git a/packages/engine/src/services/frameCapture.ts b/packages/engine/src/services/frameCapture.ts index 78613c3dc..be3facdf6 100644 --- a/packages/engine/src/services/frameCapture.ts +++ b/packages/engine/src/services/frameCapture.ts @@ -550,6 +550,89 @@ async function pollVideosReady( return check(); } +// Wait for every `` with a non-`data:` src to have settled — either +// successfully loaded (`complete && naturalWidth > 0`) or failed with a +// broken-image marker (`complete && naturalWidth === 0`, the HTMLImageElement +// equivalent of HTMLMediaElement.error). htmlCompiler localises remote `` +// URLs to the local file server before this point, so in practice this polls +// for the local fetch to land — but the guard is a defensive net so that any +// future composition path that leaves a remote URL in place won't capture +// frames before the pixels arrive. Mirrors `pollVideosReady` for parity with +// the video-side readiness contract (videos exit-early on `ve.error`; images +// exit-early on `complete && naturalWidth === 0`). +/** @internal exported for unit testing only */ +export async function pollImagesReady( + page: Page, + timeoutMs: number, + intervalMs: number = 100, +): Promise { + const check = async (): Promise => { + return Boolean( + await page.evaluate(() => { + const imgs = Array.from(document.querySelectorAll("img")); + return ( + imgs.length === 0 || + imgs.every((img) => { + const ie = img as HTMLImageElement; + const src = ie.getAttribute("src") || ""; + if (!src || src.startsWith("data:")) return true; + // A `complete` image with zero naturalWidth has settled with an + // error (404 / decode failure / CORS rejection / blocked). Treat + // as done — waiting won't make it load — and let the render + // continue with the broken-image marker visible. Mirrors how + // pollVideosReady treats `ve.error`. + if (ie.complete && ie.naturalWidth === 0) return true; + if (ie.complete && ie.naturalWidth > 0) return true; + return false; + }) + ); + }), + ); + }; + const deadline = Date.now() + timeoutMs; + while (Date.now() < deadline) { + if (await check()) return true; + await new Promise((resolve) => setTimeout(resolve, intervalMs)); + } + return check(); +} + +// Force every successfully-loaded `` to be GPU-uploaded before the first +// frame capture. `naturalWidth > 0` means the bitmap has been decoded into +// CPU memory, but compositor-side GPU upload can still happen lazily on first +// paint. Calling `img.decode()` returns a Promise that resolves once the image +// is ready for synchronous painting — eliminating the small first-frame race +// between "image is technically loaded" and "the rasterized texture is on the +// GPU and ready to composite". +// +// Note this is purely an init-time guard; it doesn't prevent Chrome from +// evicting decoded pixels mid-render. The producer-side `localizeRemoteImageSources` +// is what bounds the eviction risk (a re-fetch hits the local file server's +// disk-backed paging, not S3 over the network). +// +// Critical: `decode()` on an in-flight image waits for the fetch to resolve. +// If `pollImagesReady` timed out with some images still loading (`!complete`), +// calling `decode()` on them would block here until the network finally +// completes — or until puppeteer's evaluate timeout fires and throws an +// uncaught error that aborts the render. Skip in-flight and broken images; +// only force GPU upload for images that successfully loaded. +async function decodeAllImages(page: Page): Promise { + await page.evaluate(async () => { + const imgs = Array.from(document.querySelectorAll("img")); + await Promise.all( + imgs.map((img) => { + const ie = img as HTMLImageElement; + if (typeof ie.decode !== "function") return Promise.resolve(); + // Skip still-loading images (in-flight decode() would hang) and + // broken images (decode() rejects, but pre-filtering is clearer + // than relying on the .catch). + if (!ie.complete || ie.naturalWidth === 0) return Promise.resolve(); + return ie.decode().catch(() => undefined); + }), + ); + }); +} + async function applyVideoMetadataHints( page: Page, hints: readonly CaptureVideoMetadataHint[] | undefined, @@ -707,6 +790,26 @@ export async function initializeSession(session: CaptureSession): Promise ); } + const imagesReady = await pollImagesReady(page, pageReadyTimeout); + if (!imagesReady) { + const failedImages = await page.evaluate(() => { + return Array.from(document.querySelectorAll("img")) + .filter((img) => { + const ie = img as HTMLImageElement; + const src = ie.getAttribute("src") || ""; + if (!src || src.startsWith("data:")) return false; + return !(ie.complete && ie.naturalWidth > 0); + }) + .map((img) => (img as HTMLImageElement).src || img.getAttribute("src") || "(no src)") + .join(", "); + }); + console.warn( + `[FrameCapture] Some image elements did not load within ${pageReadyTimeout}ms: ${failedImages}. ` + + `Continuing render — affected images may appear blank/missing in early frames.`, + ); + } + await decodeAllImages(page); + await page.evaluate(`document.fonts?.ready`); await waitForOptionalTailwindReady(page, pageReadyTimeout); @@ -812,6 +915,28 @@ export async function initializeSession(session: CaptureSession): Promise ); } + // Image readiness — parity with pollVideosReady. Defense against remote + // URLs that bypass the htmlCompiler localize step. + const bfImagesReady = await pollImagesReady(page, pageReadyTimeout); + if (!bfImagesReady) { + const failedImages = await page.evaluate(() => { + return Array.from(document.querySelectorAll("img")) + .filter((img) => { + const ie = img as HTMLImageElement; + const src = ie.getAttribute("src") || ""; + if (!src || src.startsWith("data:")) return false; + return !(ie.complete && ie.naturalWidth > 0); + }) + .map((img) => (img as HTMLImageElement).src || img.getAttribute("src") || "(no src)") + .join(", "); + }); + console.warn( + `[FrameCapture] Some image elements did not load within ${pageReadyTimeout}ms: ${failedImages}. ` + + `Continuing render — affected images may appear blank/missing in early frames.`, + ); + } + await decodeAllImages(page); + // Font check (no rAF dependency — uses fonts.ready API directly) await page.evaluate(`document.fonts?.ready`); await waitForOptionalTailwindReady(page, pageReadyTimeout); diff --git a/packages/producer/src/services/htmlCompiler.test.ts b/packages/producer/src/services/htmlCompiler.test.ts index 158102663..cb8c9bda8 100644 --- a/packages/producer/src/services/htmlCompiler.test.ts +++ b/packages/producer/src/services/htmlCompiler.test.ts @@ -11,6 +11,7 @@ import { discoverAudioVolumeAutomationFromTimeline, inlineExternalScripts, localizeRemoteMediaSources, + localizeRemoteImageSources, localizeRemoteFontFaces, recompileWithResolutions, } from "./htmlCompiler.js"; @@ -950,6 +951,146 @@ describe("localizeRemoteMediaSources", () => { }); }); +// ── localizeRemoteImageSources ─────────────────────────────────────────────── +// +// Regression coverage for the agent-pipeline `` flicker bug: producer's +// frame-capture has no `pollImagesReady` analog of `pollVideosReady`, so a +// composition with raw S3 `` URLs (astral / daphne / +// hyperion multi-v2 outputs) reaches Chrome with a network dependency that +// races the readiness gate AND can be evicted mid-render. Localising before +// render is the architectural fix; `pollImagesReady` in frameCapture is the +// defense-in-depth layer. +// +// Mirrors the localizeRemoteMediaSources test shape; fetch is patched in +// for success cases and a real 404 covers the fallback path. + +describe("localizeRemoteImageSources", () => { + it("rewrites remote src to _remote_media path when download succeeds", async () => { + const orig = globalThis.fetch; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (globalThis as any).fetch = async () => new Response(new Uint8Array(100), { status: 200 }); + try { + const dl = mkdtempSync(join(tmpdir(), "hf-img-ok-")); + const html = ``; + const { html: result, remoteMediaAssets } = await localizeRemoteImageSources(html, dl); + expect(result).not.toContain("https://img-ok.example.com/"); + expect(result).toContain("_remote_media/"); + expect(remoteMediaAssets.size).toBe(1); + } finally { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (globalThis as any).fetch = orig; + } + }); + + it("preserves original URL on download failure without throwing", async () => { + const dl = mkdtempSync(join(tmpdir(), "hf-img-fail-")); + const url = "https://example.com/will-404-image-localize-test.png"; + const html = ``; + const { html: result, remoteMediaAssets } = await localizeRemoteImageSources(html, dl); + expect(result).toContain(url); + expect(remoteMediaAssets.size).toBe(0); + }); + + it("deduplicates: two tags with the same src URL → one download", async () => { + const orig = globalThis.fetch; + let fetchCount = 0; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (globalThis as any).fetch = async () => { + fetchCount++; + return new Response(new Uint8Array(100), { status: 200 }); + }; + try { + const dl = mkdtempSync(join(tmpdir(), "hf-img-dedup-")); + const html = ` +`; + await localizeRemoteImageSources(html, dl); + expect(fetchCount).toBe(1); + } finally { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (globalThis as any).fetch = orig; + } + }); + + it("does not rewrite local (non-HTTP) src paths", async () => { + const dl = mkdtempSync(join(tmpdir(), "hf-img-local-")); + const html = ``; + const { html: result, remoteMediaAssets } = await localizeRemoteImageSources(html, dl); + expect(result).toContain("assets/hero.png"); + expect(result).not.toContain("_remote_media/"); + expect(remoteMediaAssets.size).toBe(0); + }); + + it("does not rewrite data: URI src", async () => { + const dl = mkdtempSync(join(tmpdir(), "hf-img-data-")); + const html = ``; + const { html: result, remoteMediaAssets } = await localizeRemoteImageSources(html, dl); + expect(result).toContain("data:image/svg+xml"); + expect(remoteMediaAssets.size).toBe(0); + }); + + it("rewrites both double-quoted and single-quoted src attributes", async () => { + const orig = globalThis.fetch; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (globalThis as any).fetch = async () => new Response(new Uint8Array(100), { status: 200 }); + try { + const dl = mkdtempSync(join(tmpdir(), "hf-img-quotes-")); + const html = ` +`; + const { html: result } = await localizeRemoteImageSources(html, dl); + expect(result).not.toContain("https://q-img.example.com/"); + expect(result.match(/_remote_media\//g)?.length).toBe(2); + } finally { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (globalThis as any).fetch = orig; + } + }); + + it("does not match data-src (lazy-loader placeholder), only the real src attribute", async () => { + // A lazy-loader emits the real asset in `data-src` and a placeholder in + // `src`. We must localise what Chrome actually paints (the real `src`), + // not the `data-src` URL — matching `data-src` would download an asset the + // render never shows and could break the loader's runtime swap. + const orig = globalThis.fetch; + let fetchCount = 0; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (globalThis as any).fetch = async () => { + fetchCount++; + return new Response(new Uint8Array(100), { status: 200 }); + }; + try { + const dl = mkdtempSync(join(tmpdir(), "hf-img-datasrc-")); + const html = ``; + const { html: result } = await localizeRemoteImageSources(html, dl); + // The real src is localised; the data-src URL is left untouched. + expect(result).toContain("https://lazy.example.com/real.png"); + expect(result).not.toContain("https://cdn.example.com/placeholder.png"); + expect(fetchCount).toBe(1); + } finally { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (globalThis as any).fetch = orig; + } + }); + + it("handles src attribute not as the first attribute (agent-pipeline shape)", async () => { + // The 02_kobe astral-pipeline composition that surfaced this bug emits + // tags with `class` before `src`. Regex must not assume src position. + const orig = globalThis.fetch; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (globalThis as any).fetch = async () => new Response(new Uint8Array(100), { status: 200 }); + try { + const dl = mkdtempSync(join(tmpdir(), "hf-img-attr-order-")); + const html = `kobe`; + const { html: result, remoteMediaAssets } = await localizeRemoteImageSources(html, dl); + expect(result).not.toContain("https://astral.example.com/"); + expect(result).toContain("_remote_media/"); + expect(remoteMediaAssets.size).toBe(1); + } finally { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (globalThis as any).fetch = orig; + } + }); +}); + // ── localizeRemoteFontFaces ────────────────────────────────────────────────── describe("localizeRemoteFontFaces", () => { diff --git a/packages/producer/src/services/htmlCompiler.ts b/packages/producer/src/services/htmlCompiler.ts index 6ae75a282..26309ff34 100644 --- a/packages/producer/src/services/htmlCompiler.ts +++ b/packages/producer/src/services/htmlCompiler.ts @@ -877,6 +877,14 @@ const REMOTE_MEDIA_SUBDIR = "_remote_media"; // have `>` inside quoted attribute values (data-title etc.). const REMOTE_MEDIA_TAG_RE = /<(?:video|audio)\b[^>]*?\bsrc\s*=\s*["'](https?:\/\/[^"']+)["'][^>]*>/gi; +// Match tags (including agent-pipeline-emitted variants where `src` is +// not the first attribute). Producer-side localisation is the primary fix for +// the remote- flicker; frameCapture's `pollImagesReady`/`decodeAllImages` +// are the defense-in-depth layer for any remote URL that bypasses this step. +// The `(?]*?(?]*>/gi; /** * Download a set of remote URLs in parallel into `remoteDir`, build the @@ -971,6 +979,49 @@ export async function localizeRemoteMediaSources( ); } +/** + * Download any remote `src` URLs on `` elements into a local subdirectory + * of `downloadDir`, rewrite the HTML src attributes to relative paths, and + * return a `{ relativePath → absoluteLocalPath }` map for the orchestrator. + * + * Why: a composition with remote S3 `` URLs reaches Chrome unchanged; + * the readiness check can pass before the image is fully decoded, *and* Chrome + * may evict decoded pixels mid-render under memory pressure and re-fetch from + * the remote origin. Either path produces blank-frame flicker. Localising the + * sources before render eliminates both races — once the file is local, + * Chrome's image cache is bounded by fast disk reads, not S3 latency, so a + * mid-render re-fetch lands within a frame instead of flickering. This is the + * primary fix; frameCapture's `pollImagesReady` is the defense-in-depth layer. + * + * Scope: only `` is localised here. Remote `srcset`, + * ``, SVG ``, and CSS `background-image: url()` + * outside `@font-face` are NOT covered — agent-pipeline compositions emit + * plain ``, but those are open follow-ups if other shapes appear. + * + * This bites agent-pipeline-generated compositions (astral / daphne / + * hyperion `multi-v2` outputs) which render directly without going through + * `hyperframes publish`'s archive-time localize step. + */ +/** @internal exported for unit testing only */ +export async function localizeRemoteImageSources( + html: string, + downloadDir: string, +): Promise<{ html: string; remoteMediaAssets: Map }> { + const urlSet = new Set(); + const re = new RegExp(REMOTE_IMG_TAG_RE.source, REMOTE_IMG_TAG_RE.flags); + let m: RegExpExecArray | null; + while ((m = re.exec(html)) !== null) { + if (m[1]) urlSet.add(m[1]); + } + return downloadAndRewriteUrls( + urlSet, + html, + join(downloadDir, REMOTE_MEDIA_SUBDIR), + "Remote image download failed for", + "Localized remote image source(s)", + ); +} + // Match url("https://...") or url('https://...') inside @font-face blocks. // We scan the full HTML (which includes