+
+
+
+ '@ | Set-Content -Path index.html -Encoding utf8
+
+ - name: Render issue #574 reused-video regression
+ shell: pwsh
+ env:
+ PRODUCER_PLAYER_READY_TIMEOUT_MS: "15000"
+ run: |
+ cd "$env:RUNNER_TEMP\issue-574-reused-video"
+ node "$env:GITHUB_WORKSPACE\packages\cli\dist\cli.js" render `
+ --fps 30 `
+ --quality standard `
+ --workers 1 `
+ --output renders\issue-574.mp4
+
+ - name: Verify issue #574 rendered MP4
+ shell: pwsh
+ run: |
+ $mp4 = "$env:RUNNER_TEMP\issue-574-reused-video\renders\issue-574.mp4"
+ if (-not (Test-Path $mp4)) { throw "issue-574.mp4 not produced" }
+
+ $probe = ffprobe -v error -select_streams v:0 `
+ -show_entries stream=width,height,r_frame_rate -show_entries format=duration `
+ -of default=noprint_wrappers=1 $mp4
+ Write-Host $probe
+
+ $width = ($probe | Select-String '^width=(.+)$').Matches.Groups[1].Value
+ $height = ($probe | Select-String '^height=(.+)$').Matches.Groups[1].Value
+ $fps = ($probe | Select-String '^r_frame_rate=(.+)$').Matches.Groups[1].Value
+ $duration = [double]($probe | Select-String '^duration=(.+)$').Matches.Groups[1].Value
+
+ if ([int]$width -ne 1920) { throw "expected 1920 width, got $width" }
+ if ([int]$height -ne 1080) { throw "expected 1080 height, got $height" }
+ if ($fps -ne "30/1") { throw "expected 30fps, got $fps" }
+ if ($duration -lt 11.5 -or $duration -gt 12.5) { throw "expected ~12s duration, got $duration" }
+
+ Write-Host "issue-574.mp4 ok: ${width}x${height} @ $fps, ${duration}s"
+
- name: Upload rendered MP4 artifact
if: always()
- uses: actions/upload-artifact@v4
+ uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
with:
name: windows-render-${{ github.run_id }}
- path: ${{ runner.temp }}/windows-canary/canary/renders/canary.mp4
+ path: |
+ ${{ runner.temp }}/windows-canary/canary/renders/canary.mp4
+ ${{ runner.temp }}/issue-574-reused-video/renders/issue-574.mp4
if-no-files-found: error
retention-days: 7
@@ -226,7 +335,7 @@ jobs:
steps:
- name: Checkout
- uses: actions/checkout@v4
+ uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
with:
ref: ${{ github.event.inputs.ref }}
lfs: true
@@ -242,10 +351,10 @@ jobs:
uses: ./.github/actions/install-ffmpeg-windows
- name: Install Bun
- uses: oven-sh/setup-bun@v2
+ uses: oven-sh/setup-bun@0c5077e51419868618aeaa5fe8019c62421857d6 # v2
- name: Install Node
- uses: actions/setup-node@v4
+ uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4
with:
node-version: 22
diff --git a/.gitignore b/.gitignore
index 55b6babf0..24a7d2c79 100644
--- a/.gitignore
+++ b/.gitignore
@@ -32,6 +32,7 @@ coverage/
# Producer regression test failures (generated debugging artifacts)
packages/producer/tests/*/failures/
+packages/producer/tests/parity/fixtures/hyperframe.runtime.iife.js
# Player perf test results (generated each run, attached as CI artifact)
packages/player/tests/perf/results/
diff --git a/ADOPTERS.md b/ADOPTERS.md
new file mode 100644
index 000000000..04064559a
--- /dev/null
+++ b/ADOPTERS.md
@@ -0,0 +1,25 @@
+# Adopters
+
+This page lists organizations using HyperFrames in production or actively evaluating it. If your team is shipping with HyperFrames — whether you're rendering hundreds of videos a day, building agent-driven composition tooling, or experimenting with HTML-as-video for the first time — we'd love to hear about it.
+
+Adding your organization helps the community understand how HyperFrames is being used in the wild and makes it easier for new users to find peers solving similar problems.
+
+## How to add your organization
+
+Open a pull request that adds a row to the table below. Keep entries short:
+
+- **Organization** — your company or project name, linked to your website.
+- **Contact** — a GitHub handle or contact person who can answer questions about your usage.
+- **How HyperFrames is used** — one sentence on the use case (e.g., "Personalized video at scale," "Agent-authored marketing assets," "Slides-to-video pipeline").
+
+If you'd rather not be listed publicly, that's fine — drop a note in [our Discord](https://discord.gg/EbK98HBPdk) instead. We always like hearing about how the project is being used.
+
+## Production
+
+| Organization | Contact | How HyperFrames is used |
+| -------------------------------- | -------------------------------------------- | ------------------------------------------------------------------------------------------ |
+| [HeyGen](https://www.heygen.com) | [@jrusso1020](https://github.com/jrusso1020) | Powers AI-generated video composition and rendering across HeyGen's video product surface. |
+
+## Evaluating
+
+_Open a PR to add your organization here if you're trying HyperFrames in a non-production context._
diff --git a/CLAUDE.md b/CLAUDE.md
index e9fc57bc4..4c8b6990a 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -67,4 +67,4 @@ will not match CI. Use it only for local-only experimentation.
## Skills
-Composition authoring (not repo development) is guided by skills installed via `npx skills add heygen-com/hyperframes`. See `skills/` for source. Invoke `/hyperframes`, `/hyperframes-cli`, `/hyperframes-registry`, or `/gsap` when authoring compositions. When a user provides a website URL and wants a video, invoke `/website-to-hyperframes` — it runs the full 7-step capture-to-video pipeline.
+Composition authoring (not repo development) is guided by skills installed via `npx skills add heygen-com/hyperframes`. See `skills/` for source. Invoke `/hyperframes`, `/hyperframes-cli`, `/hyperframes-registry`, `/tailwind`, or `/gsap` when authoring compositions. Use `/tailwind` for projects created with `hyperframes init --tailwind` so agents follow the pinned Tailwind v4 browser-runtime contract instead of Studio's Tailwind v3 setup. Use `/animejs`, `/css-animations`, `/lottie`, `/three`, or `/waapi` when a composition uses those first-party runtime adapters. Invoke `/hyperframes-media` for asset preprocessing (TTS narration, audio/video transcription, background removal for transparent overlays) — these commands have their own skill so the CLI skill stays focused on the dev loop. When a user provides a website URL and wants a video, invoke `/website-to-hyperframes` — it runs the full 7-step capture-to-video pipeline.
diff --git a/README.md b/README.md
index e35c1e8c1..37ea6f165 100644
--- a/README.md
+++ b/README.md
@@ -11,6 +11,7 @@
+
Write HTML. Render video. Built for agents.
@@ -31,7 +32,7 @@ Install the HyperFrames skills, then describe the video you want:
npx skills add heygen-com/hyperframes
```
-This teaches your agent (Claude Code, Cursor, Gemini CLI, Codex) how to write correct compositions and GSAP animations. In Claude Code, the skills register as slash commands — invoke `/hyperframes` to author compositions, `/hyperframes-cli` for CLI commands, and `/gsap` for animation help.
+This teaches your agent (Claude Code, Cursor, Gemini CLI, Codex) how to write correct compositions, GSAP timelines, Tailwind v4 browser-runtime styles, and first-party adapter animations. In Claude Code, the skills register as slash commands — invoke `/hyperframes` to author compositions, `/hyperframes-cli` for the dev-loop commands (init, lint, preview, render), `/hyperframes-media` for asset preprocessing (TTS, transcription, background removal), `/tailwind` for `init --tailwind` projects, `/gsap` for timeline animation help, or the adapter skills (`/animejs`, `/css-animations`, `/lottie`, `/three`, `/waapi`) when a composition uses those runtimes.
For Claude Design, open [`docs/guides/claude-design-hyperframes.md`](https://github.com/heygen-com/hyperframes/blob/main/docs/guides/claude-design-hyperframes.md) on GitHub and click the download button (↓) to save it, then attach the file to your Claude Design chat. It produces a valid first draft; refine in any AI coding agent. See the [Claude Design guide](https://hyperframes.heygen.com/guides/claude-design).
@@ -184,13 +185,20 @@ HyperFrames ships [skills](https://github.com/vercel-labs/skills) that teach AI
npx skills add heygen-com/hyperframes
```
-| Skill | What it teaches |
-| ------------------------ | -------------------------------------------------------------------------------------------- |
-| `hyperframes` | HTML composition authoring, captions, TTS, audio-reactive animation, transitions |
-| `hyperframes-cli` | CLI commands: init, lint, preview, render, transcribe, tts, doctor |
-| `hyperframes-registry` | Block and component installation via `hyperframes add` |
-| `website-to-hyperframes` | Capture a URL and turn it into a video — full website-to-video pipeline |
-| `gsap` | GSAP animation API, timelines, easing, ScrollTrigger, plugins, React/Vue/Svelte, performance |
+| Skill | What it teaches |
+| ------------------------- | ------------------------------------------------------------------------------------------------------------------- |
+| `hyperframes` | HTML composition authoring, captions, TTS, audio-reactive animation, transitions |
+| `hyperframes-cli` | Dev-loop CLI: init, lint, inspect, preview, render, doctor |
+| `hyperframes-media` | Asset preprocessing: tts (Kokoro), transcribe (Whisper), remove-background (u2net) — voice/model/codec selection |
+| `hyperframes-registry` | Block and component installation via `hyperframes add` |
+| `website-to-hyperframes` | Capture a URL and turn it into a video — full website-to-video pipeline |
+| `remotion-to-hyperframes` | Translate a Remotion (React) composition into a HyperFrames HTML composition |
+| `gsap` | GSAP timelines for HyperFrames: paused registration, deterministic seeking, easing, sequencing, performance |
+| `animejs` | Anime.js animations and timelines registered on `window.__hfAnime` for deterministic HyperFrames seeking |
+| `css-animations` | CSS keyframe animation patterns that HyperFrames can discover, pause, and seek |
+| `lottie` | `lottie-web` and dotLottie players registered on `window.__hfLottie` with local assets and paused playback |
+| `three` | Three.js scenes that render from HyperFrames `hf-seek` events and `window.__hfThreeTime` instead of wall-clock time |
+| `waapi` | Web Animations API `element.animate()` patterns seeked through `document.getAnimations()` |
## Contributing
diff --git a/bun.lock b/bun.lock
index 51eaf3a8d..cba993eed 100644
--- a/bun.lock
+++ b/bun.lock
@@ -21,7 +21,7 @@
},
"packages/cli": {
"name": "@hyperframes/cli",
- "version": "0.4.27",
+ "version": "0.4.45",
"bin": {
"hyperframes": "./dist/cli.js",
},
@@ -35,6 +35,7 @@
"giget": "^3.2.0",
"hono": "^4.0.0",
"mime-types": "^3.0.2",
+ "onnxruntime-node": "^1.20.0",
"open": "^10.0.0",
"postcss": "^8.5.8",
"prettier": "^3.8.1",
@@ -58,14 +59,15 @@
"vitest": "^3.2.4",
},
"optionalDependencies": {
- "@google/genai": "^1.50.0",
+ "@google/genai": "^1.50.1",
},
},
"packages/core": {
"name": "@hyperframes/core",
- "version": "0.4.27",
+ "version": "0.4.45",
"dependencies": {
"@chenglou/pretext": "^0.0.5",
+ "postcss": "^8.5.8",
"sharp": "^0.34.5",
},
"devDependencies": {
@@ -90,7 +92,7 @@
},
"packages/engine": {
"name": "@hyperframes/engine",
- "version": "0.4.27",
+ "version": "0.4.45",
"dependencies": {
"@hono/node-server": "^1.13.0",
"@hyperframes/core": "workspace:^",
@@ -108,7 +110,7 @@
},
"packages/player": {
"name": "@hyperframes/player",
- "version": "0.4.27",
+ "version": "0.4.45",
"devDependencies": {
"@types/bun": "^1.1.0",
"gsap": "^3.12.5",
@@ -120,7 +122,7 @@
},
"packages/producer": {
"name": "@hyperframes/producer",
- "version": "0.4.27",
+ "version": "0.4.45",
"dependencies": {
"@fontsource/archivo-black": "^5.2.8",
"@fontsource/eb-garamond": "^5.2.7",
@@ -160,7 +162,7 @@
},
"packages/shader-transitions": {
"name": "@hyperframes/shader-transitions",
- "version": "0.4.27",
+ "version": "0.4.45",
"dependencies": {
"html2canvas": "^1.4.1",
},
@@ -172,7 +174,7 @@
},
"packages/studio": {
"name": "@hyperframes/studio",
- "version": "0.4.27",
+ "version": "0.4.45",
"dependencies": {
"@codemirror/autocomplete": "^6.20.1",
"@codemirror/commands": "^6.10.3",
@@ -1008,8 +1010,12 @@
"default-browser-id": ["default-browser-id@5.0.1", "", {}, "sha512-x1VCxdX4t+8wVfd1so/9w+vQ4vx7lKd2Qp5tDRutErwmR85OgmfX7RlLRMWafRMY7hbEiXIbudNrjOAPa/hL8Q=="],
+ "define-data-property": ["define-data-property@1.1.4", "", { "dependencies": { "es-define-property": "^1.0.0", "es-errors": "^1.3.0", "gopd": "^1.0.1" } }, "sha512-rBMvIzlpA8v6E+SJZoo++HAYqsLrkg7MSfIinMPFhmkorw7X+dOXVJQs+QT69zGkzMyfDnIMN2Wid1+NbL3T+A=="],
+
"define-lazy-prop": ["define-lazy-prop@3.0.0", "", {}, "sha512-N+MeXYoqr3pOgn8xfyRPREN7gHakLYjhsHhWGT3fWAiL4IkAt0iDw14QiiEm2bE30c5XX5q0FtAA3CK5f9/BUg=="],
+ "define-properties": ["define-properties@1.2.1", "", { "dependencies": { "define-data-property": "^1.0.1", "has-property-descriptors": "^1.0.0", "object-keys": "^1.1.1" } }, "sha512-8QmQKqEASLd5nx0U1B1okLElbUuuttJ/AnYmRXbbbGDWh6uS208EjD4Xqq/I9wK7u0v6O08XhTWnt5XtEbR6Dg=="],
+
"degenerator": ["degenerator@5.0.1", "", { "dependencies": { "ast-types": "0.13.4", "escodegen": "2.1.0", "esprima": "4.0.1" } }, "sha512-TllpMR/t0M5sqCXfj85i4XaAzxmS5tVA16dqvdkMwGmzI+dXLXnw3J+3Vdv7VKw+ThlTMboK6i9rnZ6Nntj5CQ=="],
"detect-libc": ["detect-libc@2.1.2", "", {}, "sha512-Btj2BOOO83o3WyH59e8MgXsxEQVcarkUOpEYrubB0urwnN10yQ364rsiByU11nZlqWYZm05i/of7io4mzihBtQ=="],
@@ -1046,12 +1052,18 @@
"error-ex": ["error-ex@1.3.4", "", { "dependencies": { "is-arrayish": "0.2.1" } }, "sha512-sqQamAnR14VgCr1A618A3sGrygcpK+HEbenA/HiEAkkUwcZIIB/tgWqHFxWgOyDh4nB4JCRimh79dR5Ywc9MDQ=="],
+ "es-define-property": ["es-define-property@1.0.1", "", {}, "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g=="],
+
+ "es-errors": ["es-errors@1.3.0", "", {}, "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw=="],
+
"es-module-lexer": ["es-module-lexer@1.7.0", "", {}, "sha512-jEQoCwk8hyb2AZziIOLhDqpm5+2ww5uIE6lkO/6jcOCusfk6LhMHpXXfBLXTZ7Ydyt0j4VoUQv6uGNYbdW+kBA=="],
"esbuild": ["esbuild@0.25.12", "", { "optionalDependencies": { "@esbuild/aix-ppc64": "0.25.12", "@esbuild/android-arm": "0.25.12", "@esbuild/android-arm64": "0.25.12", "@esbuild/android-x64": "0.25.12", "@esbuild/darwin-arm64": "0.25.12", "@esbuild/darwin-x64": "0.25.12", "@esbuild/freebsd-arm64": "0.25.12", "@esbuild/freebsd-x64": "0.25.12", "@esbuild/linux-arm": "0.25.12", "@esbuild/linux-arm64": "0.25.12", "@esbuild/linux-ia32": "0.25.12", "@esbuild/linux-loong64": "0.25.12", "@esbuild/linux-mips64el": "0.25.12", "@esbuild/linux-ppc64": "0.25.12", "@esbuild/linux-riscv64": "0.25.12", "@esbuild/linux-s390x": "0.25.12", "@esbuild/linux-x64": "0.25.12", "@esbuild/netbsd-arm64": "0.25.12", "@esbuild/netbsd-x64": "0.25.12", "@esbuild/openbsd-arm64": "0.25.12", "@esbuild/openbsd-x64": "0.25.12", "@esbuild/openharmony-arm64": "0.25.12", "@esbuild/sunos-x64": "0.25.12", "@esbuild/win32-arm64": "0.25.12", "@esbuild/win32-ia32": "0.25.12", "@esbuild/win32-x64": "0.25.12" }, "bin": { "esbuild": "bin/esbuild" } }, "sha512-bbPBYYrtZbkt6Os6FiTLCTFxvq4tt3JKall1vRwshA3fdVztsLAatFaZobhkBC8/BrPetoa0oksYoKXoG4ryJg=="],
"escalade": ["escalade@3.2.0", "", {}, "sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA=="],
+ "escape-string-regexp": ["escape-string-regexp@4.0.0", "", {}, "sha512-TtpcNJ3XAzx3Gq8sWRzJaVajRs0uVxA2YAkdb1jm2YkPz4G6egUFAyA3n5vtEIZefPk5Wa4UXbKuS5fKkJWdgA=="],
+
"escodegen": ["escodegen@2.1.0", "", { "dependencies": { "esprima": "4.0.1", "estraverse": "5.3.0", "esutils": "2.0.3" }, "optionalDependencies": { "source-map": "0.6.1" }, "bin": { "esgenerate": "bin/esgenerate.js", "escodegen": "bin/escodegen.js" } }, "sha512-2NlIDTwUWJN0mRPQOdtQBzbUHvdGY2P1VXSyU83Q3xKxM7WHX2Ql8dKq782Q9TgQUNOLEzEYu9bzLNj1q88I5w=="],
"esprima": ["esprima@4.0.1", "", { "bin": { "esparse": "./bin/esparse.js", "esvalidate": "./bin/esvalidate.js" } }, "sha512-eGuFFw7Upda+g4p+QHvnW0RyTX/SVeJBDM/gCtMARO0cLuT2HcEKnTPvhjV6aGeqrCB/sbNop0Kszm0jsaWU4A=="],
@@ -1128,18 +1140,26 @@
"glob-parent": ["glob-parent@6.0.2", "", { "dependencies": { "is-glob": "4.0.3" } }, "sha512-XxwI8EOhVQgWp6iDL+3b0r86f4d6AX6zSU55HfB4ydCEuXLXc5FcYeOu+nnGftS4TEju/11rt4KJPTMgbfmv4A=="],
+ "global-agent": ["global-agent@4.1.3", "", { "dependencies": { "globalthis": "^1.0.2", "matcher": "^4.0.0", "semver": "^7.3.5", "serialize-error": "^8.1.0" } }, "sha512-KUJEViiuFT3I97t+GYMikLPJS2Lfo/S2F+DQuBWzuzaMPnvt5yyZePzArx36fBzpGTxZjIpDbXLeySLgh+k76g=="],
+
"global-directory": ["global-directory@4.0.1", "", { "dependencies": { "ini": "4.1.1" } }, "sha512-wHTUcDUoZ1H5/0iVqEudYW4/kAlN5cZ3j/bXn0Dpbizl9iaUVeWSHqiOjsgk6OW2bkLclbBjzewBz6weQ1zA2Q=="],
+ "globalthis": ["globalthis@1.0.4", "", { "dependencies": { "define-properties": "^1.2.1", "gopd": "^1.0.1" } }, "sha512-DpLKbNU4WylpxJykQujfCcwYWiV/Jhm50Goo0wrVILAv5jOr9d+H+UR3PhSCD2rCCEIg0uc+G+muBTwD54JhDQ=="],
+
"google-auth-library": ["google-auth-library@10.6.2", "", { "dependencies": { "base64-js": "^1.3.0", "ecdsa-sig-formatter": "^1.0.11", "gaxios": "^7.1.4", "gcp-metadata": "8.1.2", "google-logging-utils": "1.1.3", "jws": "^4.0.0" } }, "sha512-e27Z6EThmVNNvtYASwQxose/G57rkRuaRbQyxM2bvYLLX/GqWZ5chWq2EBoUchJbCc57eC9ArzO5wMsEmWftCw=="],
"google-logging-utils": ["google-logging-utils@1.1.3", "", {}, "sha512-eAmLkjDjAFCVXg7A1unxHsLf961m6y17QFqXqAXGj/gVkKFrEICfStRfwUlGNfeCEjNRa32JEWOUTlYXPyyKvA=="],
+ "gopd": ["gopd@1.2.0", "", {}, "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg=="],
+
"gsap": ["gsap@3.15.0", "", {}, "sha512-dMW4CWBTUK1AEEDeZc1g4xpPGIrSf9fJF960qbTZmN/QwZIWY5wgliS6JWl9/25fpTGJrMRtSjGtOmPnfjZB+A=="],
"happy-dom": ["happy-dom@20.9.0", "", { "dependencies": { "@types/node": ">=20.0.0", "@types/whatwg-mimetype": "^3.0.2", "@types/ws": "^8.18.1", "entities": "^7.0.1", "whatwg-mimetype": "^3.0.0", "ws": "^8.18.3" } }, "sha512-GZZ9mKe8r646NUAf/zemnGbjYh4Bt8/MqASJY+pSm5ZDtc3YQox+4gsLI7yi1hba6o+eCsGxpHn5+iEVn31/FQ=="],
"has-flag": ["has-flag@4.0.0", "", {}, "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ=="],
+ "has-property-descriptors": ["has-property-descriptors@1.0.2", "", { "dependencies": { "es-define-property": "^1.0.0" } }, "sha512-55JNKuIW+vq4Ke1BjOTjM2YctQIvCT7GFzHwmfZPGo5wnrgkid0YQtnAleFSqumZm4az3n2BS+erby5ipJdgrg=="],
+
"hasown": ["hasown@2.0.2", "", { "dependencies": { "function-bind": "1.1.2" } }, "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ=="],
"hono": ["hono@4.12.8", "", {}, "sha512-VJCEvtrezO1IAR+kqEYnxUOoStaQPGrCmX3j4wDTNOcD1uRPFpGlwQUIW8niPuvHXaTUxeOUl5MMDGrl+tmO9A=="],
@@ -1286,6 +1306,8 @@
"make-dir": ["make-dir@4.0.0", "", { "dependencies": { "semver": "7.7.4" } }, "sha512-hXdUTZYIVOt1Ex//jAQi+wTZZpUpwBj/0QsOzqegb3rGMMeJiSEu5xLHnYfBrRV4RH2+OCSOO95Is/7x1WJ4bw=="],
+ "matcher": ["matcher@4.0.0", "", { "dependencies": { "escape-string-regexp": "^4.0.0" } }, "sha512-S6x5wmcDmsDRRU/c2dkccDwQPXoFczc5+HpQ2lON8pnvHlnvHAHj5WlLVvw6n6vNyHuVugYrFohYxbS+pvFpKQ=="],
+
"mdn-data": ["mdn-data@2.27.1", "", {}, "sha512-9Yubnt3e8A0OKwxYSXyhLymGW4sCufcLG6VdiDdUGVkPhpqLxlvP5vl1983gQjJl3tqbrM731mjaZaP68AgosQ=="],
"meow": ["meow@13.2.0", "", {}, "sha512-pxQJQzB6djGPXh08dacEloMFopsOqGVRKFPYvPOt9XDZ1HasbgDZA74CJGreSU4G3Ak7EFJGoiH2auq+yXISgA=="],
@@ -1336,8 +1358,14 @@
"object-hash": ["object-hash@3.0.0", "", {}, "sha512-RSn9F68PjH9HqtltsSnqYC1XXoWe9Bju5+213R98cNGttag9q9yAOTzdbsqvIa7aNm5WffBZFpWYr2aWrklWAw=="],
+ "object-keys": ["object-keys@1.1.1", "", {}, "sha512-NuAESUOUMrlIXOfHKzD6bpPu3tYt3xvjNdRIQ+FeT0lNb4K8WR70CaDxhuNguS2XG+GjkyMwOzsN5ZktImfhLA=="],
+
"once": ["once@1.4.0", "", { "dependencies": { "wrappy": "1.0.2" } }, "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w=="],
+ "onnxruntime-common": ["onnxruntime-common@1.25.1", "", {}, "sha512-kKvYQFdos4LWJqhZ+nmKu3NT8NXzw8I5x9fNUKe1rNKcPfNKnYXUtW7JBpcKFsvLtrJashRgVYSbFap4cHxvNg=="],
+
+ "onnxruntime-node": ["onnxruntime-node@1.25.1", "", { "dependencies": { "adm-zip": "^0.5.16", "global-agent": "^4.1.3", "onnxruntime-common": "1.25.1" }, "os": [ "linux", "win32", "darwin", ] }, "sha512-N0M58CGTiTsLkPpx9bxmRFi24GT6r67Qei/GrBEIiDyntcYdXU5vQZp112ypydG9vEKRFgbgUYQJnEi+jll8dg=="],
+
"open": ["open@10.2.0", "", { "dependencies": { "default-browser": "5.5.0", "define-lazy-prop": "3.0.0", "is-inside-container": "1.0.0", "wsl-utils": "0.1.0" } }, "sha512-YgBpdJHPyQ2UE5x+hlSXcnejzAvD0b22U2OuAP+8OnlJT+PjWPxtgmGqKKc+RgTM63U9gN0YzrYc71R2WT/hTA=="],
"oxc-parser": ["oxc-parser@0.120.0", "", { "dependencies": { "@oxc-project/types": "0.120.0" }, "optionalDependencies": { "@oxc-parser/binding-android-arm-eabi": "0.120.0", "@oxc-parser/binding-android-arm64": "0.120.0", "@oxc-parser/binding-darwin-arm64": "0.120.0", "@oxc-parser/binding-darwin-x64": "0.120.0", "@oxc-parser/binding-freebsd-x64": "0.120.0", "@oxc-parser/binding-linux-arm-gnueabihf": "0.120.0", "@oxc-parser/binding-linux-arm-musleabihf": "0.120.0", "@oxc-parser/binding-linux-arm64-gnu": "0.120.0", "@oxc-parser/binding-linux-arm64-musl": "0.120.0", "@oxc-parser/binding-linux-ppc64-gnu": "0.120.0", "@oxc-parser/binding-linux-riscv64-gnu": "0.120.0", "@oxc-parser/binding-linux-riscv64-musl": "0.120.0", "@oxc-parser/binding-linux-s390x-gnu": "0.120.0", "@oxc-parser/binding-linux-x64-gnu": "0.120.0", "@oxc-parser/binding-linux-x64-musl": "0.120.0", "@oxc-parser/binding-openharmony-arm64": "0.120.0", "@oxc-parser/binding-wasm32-wasi": "0.120.0", "@oxc-parser/binding-win32-arm64-msvc": "0.120.0", "@oxc-parser/binding-win32-ia32-msvc": "0.120.0", "@oxc-parser/binding-win32-x64-msvc": "0.120.0" } }, "sha512-WyPWZlcIm+Fkte63FGfgFB8mAAk33aH9h5N9lphXVOHSXEBFFsmYdOBedVKly363aWABjZdaj/m9lBfEY4wt+w=="],
@@ -1458,6 +1486,8 @@
"semver": ["semver@7.7.4", "", { "bin": { "semver": "bin/semver.js" } }, "sha512-vFKC2IEtQnVhpT78h1Yp8wzwrf8CM+MzKMHGJZfBtzhZNycRFnXsHk6E5TxIkkMsgNS7mdX3AGB7x2QM2di4lA=="],
+ "serialize-error": ["serialize-error@8.1.0", "", { "dependencies": { "type-fest": "^0.20.2" } }, "sha512-3NnuWfM6vBYoy5gZFvHiYsVbafvI9vZv/+jlIigFn4oP4zjNPK3LhcY0xSCgeb1a5L8jO71Mit9LlNoi2UfDDQ=="],
+
"sharp": ["sharp@0.34.5", "", { "dependencies": { "@img/colour": "^1.0.0", "detect-libc": "^2.1.2", "semver": "^7.7.3" }, "optionalDependencies": { "@img/sharp-darwin-arm64": "0.34.5", "@img/sharp-darwin-x64": "0.34.5", "@img/sharp-libvips-darwin-arm64": "1.2.4", "@img/sharp-libvips-darwin-x64": "1.2.4", "@img/sharp-libvips-linux-arm": "1.2.4", "@img/sharp-libvips-linux-arm64": "1.2.4", "@img/sharp-libvips-linux-ppc64": "1.2.4", "@img/sharp-libvips-linux-riscv64": "1.2.4", "@img/sharp-libvips-linux-s390x": "1.2.4", "@img/sharp-libvips-linux-x64": "1.2.4", "@img/sharp-libvips-linuxmusl-arm64": "1.2.4", "@img/sharp-libvips-linuxmusl-x64": "1.2.4", "@img/sharp-linux-arm": "0.34.5", "@img/sharp-linux-arm64": "0.34.5", "@img/sharp-linux-ppc64": "0.34.5", "@img/sharp-linux-riscv64": "0.34.5", "@img/sharp-linux-s390x": "0.34.5", "@img/sharp-linux-x64": "0.34.5", "@img/sharp-linuxmusl-arm64": "0.34.5", "@img/sharp-linuxmusl-x64": "0.34.5", "@img/sharp-wasm32": "0.34.5", "@img/sharp-win32-arm64": "0.34.5", "@img/sharp-win32-ia32": "0.34.5", "@img/sharp-win32-x64": "0.34.5" } }, "sha512-Ou9I5Ft9WNcCbXrU9cMgPBcCK8LiwLqcbywW3t4oDV37n1pzpuNLsYiAV8eODnjbtQlSDwZ2cUEeQz4E54Hltg=="],
"shebang-command": ["shebang-command@2.0.0", "", { "dependencies": { "shebang-regex": "3.0.0" } }, "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA=="],
@@ -1564,6 +1594,8 @@
"tsx": ["tsx@4.21.0", "", { "dependencies": { "esbuild": "0.27.4", "get-tsconfig": "4.13.6" }, "optionalDependencies": { "fsevents": "2.3.3" }, "bin": { "tsx": "dist/cli.mjs" } }, "sha512-5C1sg4USs1lfG0GFb2RLXsdpXqBSEhAaA/0kPL01wxzpMqLILNxIxIOKiILz+cdg/pLnOUxFYOR5yhHU666wbw=="],
+ "type-fest": ["type-fest@0.20.2", "", {}, "sha512-Ne+eE4r0/iWnpAxD852z3A+N0Bt5RN//NjJwRd2VFHEmrywxf5vsZlh4R6lixl6B+wz/8d+maTSAkN1FIkI3LQ=="],
+
"typed-query-selector": ["typed-query-selector@2.12.1", "", {}, "sha512-uzR+FzI8qrUEIu96oaeBJmd9E7CFEiQ3goA5qCVgc4s5llSubcfGHq9yUstZx/k4s9dXHVKsE35YWoFyvEqEHA=="],
"typescript": ["typescript@5.9.3", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw=="],
diff --git a/docs/catalog/blocks/apple-money-count.mdx b/docs/catalog/blocks/apple-money-count.mdx
new file mode 100644
index 000000000..34056c079
--- /dev/null
+++ b/docs/catalog/blocks/apple-money-count.mdx
@@ -0,0 +1,53 @@
+---
+title: "Apple Money Count"
+description: "Apple-style finance counter that counts from $0 to $10,000, flashes green, and bursts money icons with sound."
+---
+
+# Apple Money Count
+
+Apple-style finance counter that counts from $0 to $10,000, flashes green, and bursts money icons with sound.
+
+`showcase` `finance` `kinetic` `youtube` `sfx`
+
+Created by [Stronkter](https://x.com/Stronkter).
+
+## Source Prompt
+
+```text
+📷HyperFrames by HeyGen Make me a five-second video of, on a white background, of a Apple-style bold font counting from $0 to $10,000. Once it counts to $10,000, it changes to a green color and the screen also flashes green for a second, and then money icons come out of the $10,000 amount all over the screen and then disappear.
+```
+
+
+
+## Install
+
+
+
+```bash Terminal
+npx hyperframes add apple-money-count
+```
+
+
+
+## Details
+
+| Property | Value |
+| --- | --- |
+| Type | Block |
+| Dimensions | 1920×1080 |
+| Duration | 5s |
+
+## Files
+
+| File | Target | Type |
+| --- | --- | --- |
+| `apple-money-count.html` | `compositions/apple-money-count.html` | hyperframes:composition |
+| `assets/sfx-production.wav` | `assets/sfx-production.wav` | hyperframes:asset |
+
+## Usage
+
+After installing, add the block to your host composition:
+
+```html
+
+```
diff --git a/docs/catalog/blocks/blue-sweater-intro-video.mdx b/docs/catalog/blocks/blue-sweater-intro-video.mdx
new file mode 100644
index 000000000..2c3417dff
--- /dev/null
+++ b/docs/catalog/blocks/blue-sweater-intro-video.mdx
@@ -0,0 +1,48 @@
+---
+title: "Blue Sweater Intro Video"
+description: "Warm AI creator intro sequence that resolves into an X follow card for @_blue_sweater_."
+---
+
+# Blue Sweater Intro Video
+
+Warm AI creator intro sequence that resolves into an X follow card for @_blue_sweater_.
+
+`showcase` `ai` `creator` `sfx`
+
+Created by [Joe Sai](https://x.com/_blue_sweater_).
+
+
+
+## Install
+
+
+
+```bash Terminal
+npx hyperframes add blue-sweater-intro-video
+```
+
+
+
+## Details
+
+| Property | Value |
+| --- | --- |
+| Type | Block |
+| Dimensions | 1920×1080 |
+| Duration | 12s |
+
+## Files
+
+| File | Target | Type |
+| --- | --- | --- |
+| `blue-sweater-intro-video.html` | `compositions/blue-sweater-intro-video.html` | hyperframes:composition |
+| `assets/joe-sai-avatar.png` | `assets/joe-sai-avatar.png` | hyperframes:asset |
+| `assets/sfx/integrated-melodic-tech-mix.wav` | `assets/sfx/integrated-melodic-tech-mix.wav` | hyperframes:asset |
+
+## Usage
+
+After installing, add the block to your host composition:
+
+```html
+
+```
diff --git a/docs/catalog/blocks/north-korea-locked-down.mdx b/docs/catalog/blocks/north-korea-locked-down.mdx
new file mode 100644
index 000000000..55262c7e6
--- /dev/null
+++ b/docs/catalog/blocks/north-korea-locked-down.mdx
@@ -0,0 +1,53 @@
+---
+title: "North Korea Locked Down"
+description: "Realistic map zoom into North Korea with a red scribble circle, locked-down pop-up label, and reddish editorial wash."
+---
+
+# North Korea Locked Down
+
+Realistic map zoom into North Korea with a red scribble circle, locked-down pop-up label, and reddish editorial wash.
+
+`showcase` `map` `annotation` `youtube` `kinetic`
+
+Created by [Stronkter](https://x.com/Stronkter).
+
+## Source Prompt
+
+```text
+use 📷HyperFrames by HeyGen and Image Gen if you need it for assets or like png images of assets without backround to make a youtube style camera moving in out and other things that are in youtube videos, to make a video of a map zooms in on north korea and a scribble style circle circles the country and a text pops up above it saying locked down when the text apears the video turns a bit redish make the video 7 seconds long id like the map to look realistic and accurate to real lfe
+```
+
+
+
+## Install
+
+
+
+```bash Terminal
+npx hyperframes add north-korea-locked-down
+```
+
+
+
+## Details
+
+| Property | Value |
+| --- | --- |
+| Type | Block |
+| Dimensions | 1920×1080 |
+| Duration | 7s |
+
+## Files
+
+| File | Target | Type |
+| --- | --- | --- |
+| `north-korea-locked-down.html` | `compositions/north-korea-locked-down.html` | hyperframes:composition |
+| `assets/korea-map.png` | `assets/korea-map.png` | hyperframes:asset |
+
+## Usage
+
+After installing, add the block to your host composition:
+
+```html
+
+```
diff --git a/docs/catalog/blocks/nyc-paris-flight.mdx b/docs/catalog/blocks/nyc-paris-flight.mdx
new file mode 100644
index 000000000..f384e7623
--- /dev/null
+++ b/docs/catalog/blocks/nyc-paris-flight.mdx
@@ -0,0 +1,54 @@
+---
+title: "NYC Paris Flight"
+description: "Apple-style realistic map animation with a plane flying from New York to Paris, marker circle, landing pop, and sound effects."
+---
+
+# NYC Paris Flight
+
+Apple-style realistic map animation with a plane flying from New York to Paris, marker circle, landing pop, and sound effects.
+
+`showcase` `travel` `map` `youtube` `sfx`
+
+Created by [Stronkter](https://x.com/Stronkter).
+
+## Source Prompt
+
+```text
+📷HyperFrames by HeyGen Make a six-second Apple-style font bold video of a plane going from New York to Paris. A map animation, and then it shows the plane going from New York to Paris. Make the fonts Apple-style bold and make the map actual realistic, actually realistic. Before it lands in Paris, do a doodle circle in red around Paris, and then it lands in the doodle circle, and the video basically cuts to a white screen at the end. Also add sound effects for everything as well. Every nice motion, every the plane for pop-ups, bubble, pop-up effect, everything you want.
+```
+
+
+
+## Install
+
+
+
+```bash Terminal
+npx hyperframes add nyc-paris-flight
+```
+
+
+
+## Details
+
+| Property | Value |
+| --- | --- |
+| Type | Block |
+| Dimensions | 1920×1080 |
+| Duration | 6s |
+
+## Files
+
+| File | Target | Type |
+| --- | --- | --- |
+| `nyc-paris-flight.html` | `compositions/nyc-paris-flight.html` | hyperframes:composition |
+| `assets/map-nyc-paris.png` | `assets/map-nyc-paris.png` | hyperframes:asset |
+| `assets/sfx-mix.wav` | `assets/sfx-mix.wav` | hyperframes:asset |
+
+## Usage
+
+After installing, add the block to your host composition:
+
+```html
+
+```
diff --git a/docs/catalog/blocks/vpn-youtube-spot.mdx b/docs/catalog/blocks/vpn-youtube-spot.mdx
new file mode 100644
index 000000000..d2d044b9e
--- /dev/null
+++ b/docs/catalog/blocks/vpn-youtube-spot.mdx
@@ -0,0 +1,53 @@
+---
+title: "VPN YouTube Spot"
+description: "Snappy Apple-style YouTube insert showing a phone finding and installing a friendly VPN app with sound effects."
+---
+
+# VPN YouTube Spot
+
+Snappy Apple-style YouTube insert showing a phone finding and installing a friendly VPN app with sound effects.
+
+`app` `showcase` `youtube` `sfx`
+
+Created by [Stronkter](https://x.com/Stronkter).
+
+## Source Prompt
+
+```text
+HyperFrames by HeyGen make me a 7s video with Apple-style bold font and styling: a phone scrolling in an app store, clicking on a friendly VPN app called VPN, installing it, then snapping down and fading to a white background. Make it snappy and polished for a YouTube insert, with sound effects, 60fps, and 1920x1080.
+```
+
+
+
+## Install
+
+
+
+```bash Terminal
+npx hyperframes add vpn-youtube-spot
+```
+
+
+
+## Details
+
+| Property | Value |
+| --- | --- |
+| Type | Block |
+| Dimensions | 1920×1080 |
+| Duration | 7s |
+
+## Files
+
+| File | Target | Type |
+| --- | --- | --- |
+| `vpn-youtube-spot.html` | `compositions/vpn-youtube-spot.html` | hyperframes:composition |
+| `assets/vpn-sfx.wav` | `assets/vpn-sfx.wav` | hyperframes:asset |
+
+## Usage
+
+After installing, add the block to your host composition:
+
+```html
+
+```
diff --git a/docs/community/adopters.mdx b/docs/community/adopters.mdx
new file mode 100644
index 000000000..4d7e42caa
--- /dev/null
+++ b/docs/community/adopters.mdx
@@ -0,0 +1,33 @@
+---
+title: Adopters
+description: Organizations using HyperFrames in production or actively evaluating it.
+---
+
+The teams below are shipping with HyperFrames. If your organization uses HyperFrames — in production, in evaluation, or in a side project — we'd love to add you.
+
+## How to add your organization
+
+Open a pull request that adds your team to [`ADOPTERS.md`](https://github.com/heygen-com/hyperframes/blob/main/ADOPTERS.md) at the repository root. The format is intentionally lightweight:
+
+- **Organization** — your company or project name, linked to your website.
+- **Contact** — a GitHub handle so other adopters can reach out.
+- **How HyperFrames is used** — one sentence on the use case.
+- **Logo** _(optional)_ — a square logo or icon to show on this page. If you skip this, your entry still appears in the table below.
+
+If you'd rather not be listed publicly, we'd still love to hear about your usage — drop a note in [our Discord](https://discord.gg/EbK98HBPdk).
+
+## Production
+
+
+
+ Powers AI-generated video composition and rendering across HeyGen's video product surface.
+
+
+
+| Organization | Contact | How HyperFrames is used |
+| -------------------------------- | -------------------------------------------- | ------------------------------------------------------------------------------------------ |
+| [HeyGen](https://www.heygen.com) | [@jrusso1020](https://github.com/jrusso1020) | Powers AI-generated video composition and rendering across HeyGen's video product surface. |
+
+## Evaluating
+
+_Open a PR to add your organization here if you're trying HyperFrames in a non-production context._
diff --git a/docs/concepts/compositions.mdx b/docs/concepts/compositions.mdx
index 5c9dbfa26..b9e698a40 100644
--- a/docs/concepts/compositions.mdx
+++ b/docs/concepts/compositions.mdx
@@ -129,52 +129,62 @@ Every composition has two layers:
HyperFrames does not automatically bind `data-var-*` attributes into your composition DOM or CSS.
-Today, the supported pattern is:
+The supported pattern is:
-1. Pass per-instance values on the composition host with `data-variable-values`
-2. Read those values inside the composition and apply them in your own script
+1. Declare the variables once on the sub-comp's `` root with `data-composition-variables` (id + type + default).
+2. Pass per-instance values on each composition host with `data-variable-values`.
+3. Read the resolved values inside the composition with `window.__hyperframes.getVariables()`. The runtime layers the host's `data-variable-values` over the declared defaults on a per-instance basis, so the same source can be embedded multiple times with different values.
```html index.html
+
```
```html compositions/card.html
-
-
-
Fallback
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
```
-If you are building tooling on top of `@hyperframes/core`, you can also declare variable metadata separately with `data-composition-variables` and read it via `extractCompositionMetadata()`. That metadata is descriptive only; you still apply the actual values manually inside the composition.
+If you are building tooling on top of `@hyperframes/core`, the same `data-composition-variables` array is readable via `extractCompositionMetadata()` for Studio editing UI and analysis pipelines.
## Listing Compositions
diff --git a/docs/concepts/data-attributes.mdx b/docs/concepts/data-attributes.mdx
index dc1c3fc1f..ed1caaff1 100644
--- a/docs/concepts/data-attributes.mdx
+++ b/docs/concepts/data-attributes.mdx
@@ -29,7 +29,8 @@ Hyperframes uses HTML data attributes to control timing, media playback, and [co
| `data-width` | `"1920"` | Composition width in pixels |
| `data-height` | `"1080"` | Composition height in pixels |
| `data-composition-src` | `"./intro.html"` | Path to external [composition](/concepts/compositions) HTML file |
-| `data-variable-values` | `'{"title":"Hello"}'` | JSON object of values passed to a nested composition. HyperFrames carries these values through, but your composition script must read and apply them manually. |
+| `data-variable-values` | `'{"title":"Hello"}'` | JSON object of values passed to a nested composition. Inside the sub-composition, read them via `window.__hyperframes.getVariables()` — the runtime layers these over the sub-comp's own `data-composition-variables` defaults and exposes the merged result on a per-instance basis (the same source can be embedded multiple times with different values). |
+| `data-composition-variables` | `'[{"id":"title","type":"string","label":"Title","default":"Hello"}]'` | JSON array of declared variables (`id`, `type`, `label`, `default`). Drives Studio editing UI and provides defaults read by `window.__hyperframes.getVariables()`. The CLI flag `hyperframes render --variables ''` overrides these defaults at top-level render time; host elements override them per-instance via `data-variable-values`. |
## Element Visibility
diff --git a/docs/concepts/frame-adapters.mdx b/docs/concepts/frame-adapters.mdx
index 3a1d9c3c1..63388431a 100644
--- a/docs/concepts/frame-adapters.mdx
+++ b/docs/concepts/frame-adapters.mdx
@@ -104,15 +104,16 @@ These rules are non-negotiable for any adapter. They are the foundation of Hyper
## Supported Runtimes
-First-party adapters:
-
-| Runtime | Seek Method | Status |
-|---------|------------|--------|
-| [GSAP](/guides/gsap-animation) | `timeline.seek(frame / fps)` | Available |
-| CSS/WAAPI | `animation.currentTime` | Planned |
-| Lottie | Set animation frame/progress | Planned |
-| Three.js/WebGL | Compute deterministic scene state | Planned |
-| SVG/Anime | Implement seek + duration contract | Planned |
+First-party runtime adapters:
+
+| Runtime | Seek Method | Skill |
+|---------|-------------|-------|
+| [GSAP](/guides/gsap-animation) | `timeline.totalTime(timeSeconds)` or `timeline.seek(timeSeconds)` | `/gsap` |
+| Anime.js | `instance.seek(timeMs)` for animations registered on `window.__hfAnime` | `/animejs` |
+| CSS keyframes | Browser `Animation.currentTime`, with paused negative-delay fallback | `/css-animations` |
+| Lottie / dotLottie | `goToAndStop(timeMs, false)`, raw-frame setters, or player seek APIs | `/lottie` |
+| Three.js / WebGL | `hf-seek` events plus `window.__hfThreeTime` for deterministic scene rendering | `/three` |
+| Web Animations API | `document.getAnimations()` and `animation.currentTime` | `/waapi` |
Community adapters are welcome -- if it can seek by frame, it belongs in Hyperframes.
diff --git a/docs/contributing.mdx b/docs/contributing.mdx
index 17d1e19fc..8029fa01b 100644
--- a/docs/contributing.mdx
+++ b/docs/contributing.mdx
@@ -123,6 +123,8 @@ All of the following must pass before your PR can be merged:
- PRs require at least 1 approval from a maintainer
- Keep PRs focused — one feature or fix per PR
+- Target alpha-only PRs at `next` instead of `main`; see
+ [Release channels](/contributing/release-channels) for branch policy details
- Include a clear description of what changed and why
- Add tests for new features and bug fixes
diff --git a/docs/docs.json b/docs/docs.json
index 8548fd0b1..7867ffc88 100644
--- a/docs/docs.json
+++ b/docs/docs.json
@@ -71,13 +71,16 @@
"pages": [
"guides/website-to-video",
"guides/claude-design",
+ "guides/open-design",
"guides/prompting",
"guides/hyperframes-vs-remotion",
"guides/gsap-animation",
"guides/rendering",
+ "guides/remove-background",
"guides/hdr",
"guides/performance",
"guides/timeline-editing",
+ "guides/video-editor-cheatsheet",
"guides/common-mistakes",
"guides/troubleshooting"
]
@@ -140,7 +143,12 @@
"group": "Showcases",
"pages": [
"catalog/blocks/app-showcase",
- "catalog/blocks/ui-3d-reveal"
+ "catalog/blocks/apple-money-count",
+ "catalog/blocks/blue-sweater-intro-video",
+ "catalog/blocks/north-korea-locked-down",
+ "catalog/blocks/nyc-paris-flight",
+ "catalog/blocks/ui-3d-reveal",
+ "catalog/blocks/vpn-youtube-spot"
]
},
{
@@ -198,6 +206,12 @@
"contributing/release-channels",
"contributing/testing-local-changes"
]
+ },
+ {
+ "group": "Community",
+ "pages": [
+ "community/adopters"
+ ]
}
]
}
diff --git a/docs/guides/common-mistakes.mdx b/docs/guides/common-mistakes.mdx
index 4872c295e..015c3e619 100644
--- a/docs/guides/common-mistakes.mdx
+++ b/docs/guides/common-mistakes.mdx
@@ -190,11 +190,11 @@ These are mistakes that cannot be caught by the linter. For automated checks, ru
- **Symptom:** Rendered with `--hdr`, but the output looks the same as SDR or `ffprobe` reports `color_transfer=bt709`.
+ **Symptom:** Expected an HDR render, but the output looks the same as SDR or `ffprobe` reports `color_transfer=bt709`.
- **Cause:** `--hdr` is a *detection* flag, not a *force* flag. Hyperframes only switches to HDR encoding when a source `
diff --git a/docs/guides/hdr.mdx b/docs/guides/hdr.mdx
index 5774cfae6..bd079461a 100644
--- a/docs/guides/hdr.mdx
+++ b/docs/guides/hdr.mdx
@@ -3,10 +3,10 @@ title: HDR Rendering
description: "Render compositions to HDR10 MP4 (BT.2020 PQ or HLG, 10-bit H.265) when sources contain HDR video or images."
---
-Hyperframes can render to HDR10 MP4 (H.265 10-bit, BT.2020) when your composition references HDR video or HDR still images. HDR is opt-in via the `--hdr` flag — it auto-detects HDR sources and falls back to SDR when none are present.
+Hyperframes can render to HDR10 MP4 (H.265 10-bit, BT.2020) when your composition references HDR video or HDR still images. HDR is auto-detected by default from your media sources and falls back to SDR when none are present.
- The `--hdr` flag does not *force* HDR. It enables HDR detection. If your composition contains only SDR media, the flag is a no-op and you get a normal SDR render.
+ By default, Hyperframes probes your media and enables HDR only when HDR sources are present. Use `--hdr` to force HDR even without HDR sources, or `--sdr` to force SDR even when HDR sources are present.
## Quickstart
@@ -20,12 +20,12 @@ Hyperframes can render to HDR10 MP4 (H.265 10-bit, BT.2020) when your compositio
See [Source Media](#source-media-requirements) for full details.
-
+
```bash Terminal
- npx hyperframes render --hdr --output output.mp4
+ npx hyperframes render --output output.mp4
```
- HDR output requires `--format mp4`. If you also pass `--format mov` or `--format webm`, Hyperframes logs a warning and falls back to SDR.
+ HDR output requires `--format mp4`. If Hyperframes detects HDR sources, it renders HDR automatically. If you also pass `--format mov` or `--format webm`, Hyperframes logs a warning and falls back to SDR.
Use `ffprobe` to confirm the encoded stream carries HDR color tagging and HDR10 metadata:
@@ -40,14 +40,14 @@ Hyperframes can render to HDR10 MP4 (H.265 10-bit, BT.2020) when your compositio
## How HDR Mode Works
-When `--hdr` is set, the producer:
+During render, the producer:
- Runs `ffprobe` on each `` and `` source to read its color space (primaries, transfer function, matrix). Probing is gated on `--hdr` to avoid `ffprobe` overhead on SDR-only renders.
+ Runs `ffprobe` on each `` and `` source to read its color space (primaries, transfer function, matrix). This probe drives the default auto-detect behavior and is skipped only when you explicitly force SDR with `--sdr`.
- If any source uses PQ (`smpte2084`), the output uses **PQ**. Otherwise, if any source uses HLG (`arib-std-b67`), the output uses **HLG**. If no HDR sources are found, the flag is a no-op and you get an SDR render.
+ If any source uses PQ (`smpte2084`), the output uses **PQ**. Otherwise, if any source uses HLG (`arib-std-b67`), the output uses **HLG**. If no HDR sources are found, the render stays SDR.
The video encoder switches to `libx265` with `-pix_fmt yuv420p10le`, color tagging `colorprim=bt2020:transfer=:colormatrix=bt2020nc`, and HDR10 static metadata (`master-display` and `max-cll`). Without that metadata, players (QuickTime, YouTube, HDR TVs) tone-map the stream as if it were SDR BT.2020 — which looks wrong.
@@ -89,7 +89,7 @@ Hyperframes supports HDR still images delivered as **16-bit PNGs** tagged with B
src="./assets/hdr-photo.png" />
```
-When `--hdr` is set, the image is decoded once to 16-bit linear-light RGB and composited natively into the HDR output.
+When HDR is enabled, the image is decoded once to 16-bit linear-light RGB and composited natively into the HDR output.
HDR `` decoding is limited to **16-bit PNG**. JPEG, WebP, AVIF, and APNG are not recognized as HDR sources — they load through the normal SDR DOM path. For HDR motion, use a `` element.
@@ -113,7 +113,7 @@ This is the same pipeline that handles compositions where, for example, an HDR d
| `mov` | No — falls back to SDR |
| `webm` | No — falls back to SDR |
-If you set `--hdr` together with `--format mov` or `--format webm`, Hyperframes logs a message and produces the equivalent SDR render. There is no error — the render still completes — so check the logs (or your verification step) to confirm you got HDR.
+If HDR is enabled and you also pass `--format mov` or `--format webm`, Hyperframes logs a message and produces the equivalent SDR render. There is no error — the render still completes — so check the logs (or your verification step) to confirm you got HDR.
## Verifying HDR Output
@@ -147,10 +147,10 @@ For HLG renders the only difference is `color_transfer=arib-std-b67` — the res
## Docker Rendering
-`--hdr` is forwarded into the Docker render pipeline, so you can produce HDR10 MP4 output from the containerized renderer:
+Docker uses the same auto-detect logic as local rendering, so you can produce HDR10 MP4 output from the containerized renderer without extra flags:
```bash Terminal
-npx hyperframes render --hdr --docker --output output.mp4
+npx hyperframes render --docker --output output.mp4
```
The container runs the same probe → composite → encode pipeline as the local renderer. Verify the output with the same `ffprobe` checks described in [Verifying HDR output](#verifying-hdr-output).
@@ -161,7 +161,7 @@ The container runs the same probe → composite → encode pipeline as the local
## Limitations
-- **MP4 only** — `--hdr` with `--format mov` or `--format webm` falls back to SDR
+- **MP4 only** — HDR output with `--format mov` or `--format webm` falls back to SDR
- **HDR images: 16-bit PNG only** — other formats (JPEG, WebP, AVIF, APNG) are not decoded as HDR and fall through the SDR DOM path
- **H.265 only — H.264 is stripped** — calling the encoder with `codec: "h264"` and `hdr: { transfer }` is rejected; the encoder logs a warning, drops `hdr`, and tags the output as SDR/BT.709. `libx264` cannot encode HDR, so the alternative would be a "half-HDR" file (BT.2020 container tags but a BT.709 VUI block in the bitstream) which confuses HDR-aware players.
- **GPU H.265 emits color tags but no static mastering metadata** — `useGpu: true` with HDR (nvenc, videotoolbox, qsv, vaapi) tags the stream with BT.2020 + the correct transfer (smpte2084 / arib-std-b67) but does **not** embed `master-display` or `max-cll` SEI. ffmpeg does not let those flags pass through hardware encoders. The output is suitable for previews and authoring but not for HDR10-aware delivery (Apple TV, YouTube, Netflix). For spec-compliant HDR10 production output, leave `useGpu: false` so the SW `libx265` path embeds the mastering metadata.
@@ -172,7 +172,7 @@ The container runs the same probe → composite → encode pipeline as the local
| Symptom | Likely cause |
|---------|--------------|
-| Output looks identical to SDR | Source media is SDR — `--hdr` is a no-op without an HDR source. Run `ffprobe` on your inputs |
+| Output looks identical to SDR | Source media is SDR, or SDR was forced with `--sdr`. Run `ffprobe` on your inputs and check the render logs |
| Output is "kind of HDR" but tone-mapped wrong on YouTube/QuickTime | Missing HDR10 static metadata on the encoded stream. Verify with the ffprobe snippet above |
| Docker render is much slower than local | Expected — the container falls back to software WebGL for SDR DOM capture. Pixel output is the same |
| Used `--format webm` and got SDR | Expected — HDR output is MP4 only |
@@ -185,7 +185,7 @@ The container runs the same probe → composite → encode pipeline as the local
Local vs Docker, quality presets, workers
- Full `render` command reference including `--hdr`
+ Full `render` command reference including HDR auto-detect, `--hdr`, and `--sdr`
Public HDR utilities exported from `@hyperframes/engine`
diff --git a/docs/guides/open-design-hyperframes.md b/docs/guides/open-design-hyperframes.md
new file mode 100644
index 000000000..a3899a6fb
--- /dev/null
+++ b/docs/guides/open-design-hyperframes.md
@@ -0,0 +1,421 @@
+---
+name: hyperframes-handoff
+description: |
+ Produce a HyperFrames-valid HTML composition — paused GSAP timeline, data
+ attributes, scene structure — that any AI coding agent can immediately
+ refine with `npx hyperframes lint` and `npx hyperframes preview`. Use when
+ the brief mentions "video", "reel", "motion graphic", "title card",
+ "animated explainer", or pairs Open Design with HyperFrames for export.
+triggers:
+ - "hyperframes"
+ - "video"
+ - "reel"
+ - "motion graphic"
+ - "animated explainer"
+ - "title card"
+ - "kinetic typography"
+ - "动效视频"
+ - "视频海报"
+od:
+ mode: prototype
+ platform: desktop
+ scenario: marketing
+ preview:
+ type: html
+ entry: index.html
+ design_system:
+ requires: true
+ sections: [color, typography, layout, motion]
+ example_prompt: "Design a 15-second Instagram reel announcing dark mode for Taskflow (#6C5CE7). Output as a HyperFrames composition I can render locally."
+---
+
+# HyperFrames Handoff — for Open Design
+
+> **Drop this file at `skills/hyperframes-handoff/SKILL.md` inside your local
+> [Open Design](https://github.com/nexu-io/open-design) checkout, restart the
+> daemon, and the skill appears in the picker. Or attach it to a fresh chat
+> as a one-shot.**
+
+This skill teaches Open Design to emit a **valid first draft** of a
+[HyperFrames](https://github.com/heygen-com/hyperframes) composition — plain
+HTML + CSS + a paused GSAP timeline. The CLI (`npx hyperframes render
+index.html`) turns the HTML into an MP4. You author the HTML; the user runs
+the render locally.
+
+**HyperFrames replaces the default video-artifact workflow.** Do NOT emit a
+React/Babel composition, do NOT call other prototype skills, do NOT use the
+sandboxed iframe's wall-clock playback for timing decisions. Plain HTML +
+GSAP only. Treat the [`claude-design-hyperframes.md`](https://github.com/heygen-com/hyperframes/blob/main/docs/guides/claude-design-hyperframes.md)
+companion document as the **upstream spec for HyperFrames structural rules** —
+the rules below condense it to what Open Design needs at emission time, but
+that file is the source of truth for shader catalogs, skeleton variants, and
+edge cases.
+
+---
+
+## Your role
+
+**You produce a valid first draft — not a final render.** Open Design's
+strengths are visual identity (driven by the active `DESIGN.md`), layout, and
+brand-accurate content decisions. The user (or their coding agent) handles
+animation polish, timing micro-adjustments, and production QA after handoff.
+
+The user's workflow:
+
+1. **Open Design** (you) — pick palette + typography from the active
+ `DESIGN.md`, fill scene content, lay down first-pass GSAP entrances and
+ mid-scene activity, pick shader transitions for 2–3 key moments
+2. **Save to disk** — Open Design writes the project into
+ `.od/projects//` (real `cwd`, agent-ready)
+3. **Any AI coding agent** (Claude Code, Codex, Cursor, …) — `npx hyperframes
+ lint`, `npx hyperframes preview`, then iterate timing, eases, shader
+ choices, pacing
+
+Your output must be a **valid starting point a coding agent can open and
+refine immediately** — no structural fixes needed.
+
+### What you optimize for
+
+- The active `DESIGN.md` palette + typography bound onto `:root` (never
+ freestyle a palette when one is active)
+- Strong visual layout per scene (hierarchy, spacing, readability at video
+ size — 60px+ headlines, 20px+ body)
+- Scene content that tells the story (headlines, stats, copy, imagery)
+- Structural validity (passes `npx hyperframes lint` with zero errors)
+- Appropriate shader choices for the mood (use the catalog at
+ [hyperframes.heygen.com/catalog](https://hyperframes.heygen.com/catalog))
+- Reasonable scene count and durations for the video type
+
+### What the coding agent polishes after you
+
+You ship every scene with entrance tweens, breathing motion, and shader
+transitions. The video plays with full motion from your first draft. The
+agent does the **edit-bay refinement**: ease curve tweaks, stagger timing,
+scene-duration micro-adjustments, richer mid-scene activity, shader swaps,
+production QA.
+
+---
+
+## Hard rules (must-pass before emitting ``)
+
+These are HyperFrames-structural and non-negotiable. Open Design's
+five-dimensional self-critique gate must verify all of them before emission.
+
+1. **Single HTML file.** `` through ``, all CSS inline,
+ GSAP loaded from CDN. No build step.
+2. **Root composition element.** A single `
` with:
+ - `data-composition-id=""`
+ - `data-start="0"`
+ - `data-width` / `data-height` (e.g. `1080` × `1920` for 9:16, `1920` ×
+ `1080` for 16:9, `1080` × `1080` for square)
+ - `data-duration=""` matching the sum of scene durations
+3. **Scenes are children of `#stage`.** Each scene is `
` with:
+ - `data-start=""`
+ - `data-duration=""`
+ - `data-track-index="0"` (HyperFrames uses tracks for layering; visual
+ scenes share track 0 unless you intentionally overlap)
+ - A `.scene-content` wrapper inside it that holds the readable content
+ (headlines, stats, imagery). Decoratives (glows, grain, vignette) live
+ directly inside `.scene` but **outside** `.scene-content`.
+4. **GSAP timeline registered paused.** A single timeline created with
+ `gsap.timeline({ paused: true })` and registered on
+ `window.__timelines = window.__timelines || {}; window.__timelines[""] = tl;`.
+ This is what makes the composition deterministically seekable — the
+ HyperFrames engine drives the playhead.
+5. **`tl.from()` for entrances.** Animate FROM offscreen/invisible TO the
+ resting CSS position. Offset the first tween 0.1–0.3s into each scene to
+ avoid jump-cuts.
+6. **Mid-scene activity on every scene.** Every visible element keeps moving
+ after its entrance. A still element on a still background is a JPEG with
+ a progress bar. Use at least 2 patterns per scene from the table below.
+7. **Shader transitions ONLY at scene boundaries**, and at most 2–3 in the
+ whole video. Use HyperFrames' built-in shader blocks
+ (`flash-through-white`, `whip-pan`, `cinematic-zoom`, `glitch`,
+ `ripple-waves`, `light-leak`, `cross-warp-morph`, `chromatic-radial-split`,
+ `swirl-vortex`, `gravitational-lens`, `domain-warp-dissolve`, `ridged-burn`,
+ `sdf-iris`, `thermal-distortion`). Hard cuts everywhere else.
+8. **No external assets the user didn't provide.** Use solid colors, CSS
+ gradients, inline SVG, `data:` images. Reference the user's uploaded
+ images by their saved filenames; don't invent stock URLs.
+9. **`preview.html` token forwarding** — emit a sibling `preview.html` that
+ loads `index.html` in an iframe and forwards URL hash tokens (`?frame=…`
+ for scrubbing). Skeleton is in §6.
+
+---
+
+## Step 1 — Understand the brief
+
+**Gate:** You can name the subject, duration, aspect ratio, and at least one
+source of visual direction.
+
+Open Design's `RULE 1` already covers this — turn 1 is a ``
+when the brief is sparse. **Do not skip it for video briefs**; pacing
+decisions hinge on locking duration and aspect ratio early.
+
+Inputs in order of reliability:
+
+1. **Active `DESIGN.md`** (strongest) — Open Design always has one bound when
+ this skill runs. Read its palette, typography, and motion sections; bind
+ verbatim onto `:root`.
+2. **Attachments** — screenshots, PDFs, brand guides; mine for any signal the
+ active DS doesn't already cover.
+3. **Pasted content** — hex codes, copy, scripts, exact durations.
+4. **Web research** (`WebFetch` + grep for hex) — only if the user names a
+ brand and the active DS isn't theirs.
+
+---
+
+## Step 2 — Pick a skeleton, fill identity
+
+**Gate:** A working `index.html` exists with the active DS's palette and
+typography on `:root`. The preview renders even if scenes are empty.
+
+| Type | Aspect | Duration | Scenes |
+| ------------------------- | ------ | --------- | ------ |
+| Social reel | 9:16 | 10–15s | 5–7 |
+| Launch teaser | 16:9 | 15–25s | 7–10 |
+| Product explainer | 16:9 | 30–60s | 10–18 |
+| Cinematic title | 16:9 | 45–90s | 7–12 |
+
+Bind `:root` from the active `DESIGN.md`:
+
+```css
+:root {
+ /* From active DESIGN.md — never invented */
+ --bg: var(--ds-canvas);
+ --ink: var(--ds-foreground);
+ --accent: var(--ds-accent);
+ --muted: var(--ds-muted);
+ --font-display: var(--ds-display);
+ --font-body: var(--ds-body);
+}
+```
+
+If the active DS uses different token names, alias them — but **always
+source the values from the DS file**, never hard-code a hex from memory.
+
+---
+
+## Step 3 — Fill scenes (content + animation)
+
+**Gate:** Every scene has visible content, at least 2 animation patterns from
+the table, and mid-scene activity. No scene is a static slide.
+
+### 3a. Content goes inside `.scene-content`
+
+```html
+
+
+
$1.9 Trillion
+
processed annually
+
+
+
+
+```
+
+### 3b. Entrance tweens (offset 0.1–0.3s into each scene)
+
+```js
+// === SCENE 3 (data-start=10.0) ===
+tl.from("#s3-title", { y: 40, autoAlpha: 0, duration: 0.6, ease: "power3.out" }, 10.3);
+tl.from("#s3-sub", { y: 20, autoAlpha: 0, duration: 0.5, ease: "power2.out" }, 10.7);
+tl.from("#s3-bar-chart", { scaleY: 0, transformOrigin: "bottom", duration: 0.8, ease: "expo.out" }, 11.0);
+```
+
+### 3c. Mid-scene activity (this is what separates video from slides)
+
+| Element | Mid-scene motion | Pattern |
+| ------------------ | ---------------------------------------- | ----------------------------------------------------------------------- |
+| Stat / number | Counter from 0 → target | `tl.to({n:0}, { n: target, duration, onUpdate: …, ease: "power2.out" })` |
+| SVG line / path | Draws itself in real time | `strokeDashoffset` from `pathLength → 0` |
+| Title / wordmark | Characters enter one by one | `tl.from(chars, { autoAlpha: 0, y: 8, stagger: 0.04 })` |
+| Logo / lockup | Subtle vertical drift | `tl.to(el, { y: -6, duration: sceneLength, ease: "sine.inOut" })` |
+| Chart / bars | Bars fill sequentially | `tl.from(bars, { scaleY: 0, transformOrigin: "bottom", stagger: 0.08 })` |
+| Image / screenshot | Slow zoom: `scale: 1 → 1.03` | Ken Burns — `tl.to(img, { scale: 1.03, duration: sceneLength, ease: "none" })` |
+| Background glow | Opacity pulse | `tl.to(".glow", { opacity: 0.6, duration: 1.5, ease: "sine.inOut", yoyo: true, repeat: 1 })` |
+
+**Minimum per scene:** entrance tweens + at least one continuous motion
+(float, counter, zoom, or glow).
+
+### 3d. Adjust scene duration by reading time
+
+| Display text | Min duration |
+| --------------------------- | ------------ |
+| No text (hero, icon) | 1.5–2s |
+| 1–3 words | 2–3s |
+| 4–10 words | 3–4s |
+| 11–20 words | 4–6s |
+| 21–35 words | 6–8s |
+| 35+ words | Split scenes |
+
+**Hard ceiling: 5s per scene** unless you name a specific reason (hero hold,
+cinematic push, long counter animation).
+
+When you change a scene's duration, update `data-start` on every subsequent
+scene to keep them tiled end-to-end, and update `#stage`'s `data-duration` to
+match the total.
+
+### 3e. Vary eases
+
+Use at least 3 different eases across the timeline. Don't default to
+`power2.out` on everything. Good defaults: `power3.out` (heavy entrances),
+`expo.out` (snappy stat reveals), `sine.inOut` (breathing loops),
+`elastic.out(1, 0.5)` (playful overshoot — sparingly).
+
+---
+
+## Step 4 — Shader transitions (2–3 max)
+
+Use HyperFrames' built-in shader blocks at scene boundaries. Pick by mood:
+
+| Shader | Mood |
+| -------------------------- | ------------------------------------- |
+| `flash-through-white` | Energetic, optimistic, pop |
+| `whip-pan` | High-energy, sports/news cut |
+| `cinematic-zoom` | Reveal, magnification, "let me show you" |
+| `glitch` | Tech, edgy, glitch-pop |
+| `ripple-waves` | Soft, organic, lifestyle |
+| `light-leak` | Warm, nostalgic, film-like |
+| `cross-warp-morph` | Smooth scene-to-scene continuity |
+| `chromatic-radial-split` | Retro tech, VHS aesthetic |
+| `swirl-vortex` | Disorienting, dream sequence |
+
+Hard cuts everywhere else. A good rule: shader at the beginning, shader at
+the climax, shader at the end. Anything more is over-decorated.
+
+---
+
+## Step 5 — Self-critique (Open Design's 5-dim gate)
+
+Before emitting ``, score yourself 1–5 across:
+
+- **Philosophy** — Is the visual stance coherent with the brief and the
+ active DS, or is it generic?
+- **Hierarchy** — Does each scene have a single dominant element? Is
+ reading order obvious?
+- **Detail** — Do shader/eases/durations match the mood, or are they
+ defaulted?
+- **Function** — Does the timeline play smoothly when the engine seeks?
+ Are all scene `data-start`s tiled? Does total `data-duration` match?
+- **Innovation** — Is there at least one moment that wouldn't appear in a
+ generic AI render?
+
+Anything under 3/5 is a regression — fix and rescore. Two passes is normal.
+
+---
+
+## Step 6 — Output contract
+
+Emit exactly two files inside ``:
+
+### `index.html` — the composition
+
+```html
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+```
+
+### `preview.html` — the local-preview shim
+
+```html
+
+Preview
+
+
+
+
+
+```
+
+Save both files into the project's `cwd` (Open Design has already set this
+to `.od/projects//`). The agent can immediately run:
+
+```bash
+npx hyperframes lint # should pass with zero errors
+npx hyperframes preview # opens the studio
+npx hyperframes render # writes MP4
+```
+
+---
+
+## Anti-AI-slop blacklist (HyperFrames-specific)
+
+- **No purple gradients on dark backgrounds** unless the brief explicitly
+ names that aesthetic.
+- **No generic emoji icons** — use inline SVG or DS-provided iconography.
+- **No "10× faster" / "AI-powered" filler copy** — write the user's actual
+ words or use honest placeholders (`—` or labelled grey blocks).
+- **No invented brand colors** — read from the active DS or the user's
+ attachment, never from memory.
+- **No identical card grids** for every scene — at least 3 distinct layout
+ postures across the video.
+- **No wall-clock JS animations** — `setTimeout`, `setInterval`,
+ `requestAnimationFrame`-driven animation breaks deterministic seeking. GSAP
+ timeline only. (Library-clock animations like Anime.js, Motion One, and
+ Lottie are supported via [HyperFrames' Frame Adapter](https://hyperframes.heygen.com/concepts/frame-adapters)
+ pattern, but stick to GSAP for first-draft handoffs unless the brief
+ requires another runtime.)
+
+---
+
+## When to defer to the Claude Design instructions
+
+For these advanced areas, treat
+[`claude-design-hyperframes.md`](https://github.com/heygen-com/hyperframes/blob/main/docs/guides/claude-design-hyperframes.md)
+as the canonical reference and follow its patterns verbatim:
+
+- The full skeleton catalog (Skeletons A–D)
+- Complete shader-block insertion patterns
+- HDR / wide-gamut color handling
+- Audio-reactive animation (`hf-seek` + `window.__hfAudio`)
+- Captions / TTS integration
+- The `hyperframes add` registry (50+ blocks and components)
+
+This skill stays focused on what Open Design needs at emission time — the
+structural rules, the active-`DESIGN.md` binding, and the 5-dim self-critique
+that's specific to OD's prompt stack.
diff --git a/docs/guides/open-design.mdx b/docs/guides/open-design.mdx
new file mode 100644
index 000000000..d79acf514
--- /dev/null
+++ b/docs/guides/open-design.mdx
@@ -0,0 +1,175 @@
+---
+title: Open Design
+description: "Create HyperFrames video drafts in Open Design — the open-source, BYOK Claude-Design alternative — then refine in any AI coding agent."
+---
+
+[Open Design](https://github.com/nexu-io/open-design) is an open-source, local-first alternative to Claude Design. It runs on your laptop with `pnpm tools-dev`, deploys the web layer to Vercel, and delegates to whichever coding-agent CLI you already have on your `PATH` (Claude Code, Codex, Cursor Agent, Gemini CLI, OpenCode, Qwen, Copilot, Hermes, Kimi, Pi) — or to any OpenAI-compatible BYOK endpoint.
+
+Open Design produces a **valid first draft** of a HyperFrames composition — palette, scene content, GSAP entrance tweens, mid-scene activity, and shader transitions. You then download the project and refine in any AI coding agent with linting and live preview.
+
+## Get started
+
+
+
+ Open [`open-design-hyperframes.md`](https://github.com/heygen-com/hyperframes/blob/main/docs/guides/open-design-hyperframes.md) on GitHub and click the download button (↓) to save it.
+
+
+ ```bash
+ git clone https://github.com/nexu-io/open-design.git
+ cd open-design
+ pnpm install
+ pnpm tools-dev run web
+ # open the web URL printed by tools-dev
+ ```
+
+
+ **Recommended:** copy `open-design-hyperframes.md` to `skills/hyperframes-handoff/SKILL.md` inside the Open Design repo. The daemon auto-discovers it on the next request and exposes it as a skill in the picker. **Or:** start a new chat and attach the file directly — Open Design reads attachments natively.
+
+
+ Pick the `hyperframes-handoff` skill (or your active prototype skill), pick a design system or visual direction, and type the brief. Include screenshots, brand assets, or a palette if you have them.
+
+
+ Open Design writes `index.html`, `preview.html`, `README.md`, and a `DESIGN.md` snapshot into `.od/projects//`. Click **Save to disk** or download as a project ZIP.
+
+
+ The Open Design project folder is already a real on-disk working directory. Hand it off to Claude Code, Cursor, Codex, or any agent with terminal access:
+ ```bash
+ cd .od/projects/
+ npx skills add heygen-com/hyperframes # install skills (one-time)
+ npx hyperframes lint # should pass with zero errors
+ npx hyperframes preview # open the studio
+ ```
+
+
+
+
+ **Drop into `skills/`, don't paste into chat.** Open Design's daemon reads `SKILL.md` files at request time and injects the side files (templates, references) automatically. A pasted URL or chat attachment works, but the skill path gives you the full pre-flight pipeline (template injection + 5-dimensional self-critique gate).
+
+
+## Which setup to use
+
+| Surface | Recommended setup |
+| -------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| Open Design (open-source) | Drop [`open-design-hyperframes.md`](https://github.com/heygen-com/hyperframes/blob/main/docs/guides/open-design-hyperframes.md) into `skills/hyperframes-handoff/SKILL.md` |
+| Claude Code | `npx skills add heygen-com/hyperframes`, then use `/hyperframes` |
+| Cursor / Codex / Gemini CLI | `npx skills add heygen-com/hyperframes` |
+| Claude Design (closed-source) | Attach [`claude-design-hyperframes.md`](https://github.com/heygen-com/hyperframes/blob/main/docs/guides/claude-design-hyperframes.md) to your chat |
+
+## How it works
+
+The instruction file gives Open Design **pre-valid HTML skeletons** — the structural rules (data attributes, timeline registration, scene visibility, preview token forwarding) are already embedded. Open Design fills in the creative work:
+
+1. **Palette + typography** — driven by the active `DESIGN.md` (Open Design ships 72 systems out of the box, plus 5 deterministic visual directions when no brand is named) bound onto `:root`
+2. **Scene content** — text, images, layout inside `.scene-content` wrappers
+3. **Animations** — GSAP entrance tweens and mid-scene activity
+4. **Transitions** — hard cuts for most scenes, shader transitions at 2-3 key moments
+
+Open Design's [5-dimensional self-critique](https://github.com/nexu-io/open-design/blob/main/apps/web/src/prompts/discovery.ts) runs before emission, so the artifact arrives lint-clean and your coding agent can start refining immediately without structural fixes.
+
+## Example prompts
+
+
+
+ ```text
+ Use the hyperframes-handoff skill. I just shipped dark mode for my app.
+ Make me a 15-second Instagram reel announcing it.
+
+ - App name: Taskflow
+ - Primary color: #6C5CE7
+ - The vibe is clean, minimal, dark
+ - Key stat: "47% of users requested this"
+ ```
+
+
+ ```text
+ Use the hyperframes-handoff skill on Codex CLI. 25-second LinkedIn video.
+
+ Problem: Sales teams waste 3 hours/day on manual CRM updates.
+ Solution: AutoCRM — AI that logs every call, email, and meeting.
+ Traction: 200+ teams, $1.2M ARR, 18% MoM growth.
+ CTA: autocrmhq.com
+
+ Use the Linear design system. Professional but not corporate.
+ ```
+
+
+ ```text
+ Use the hyperframes-handoff skill. Design system: Stripe. 10-second reel.
+ One big number: "$4.2 billion processed in Q1 2026"
+
+ Dark background, the number animates up from zero. Subtle, confident.
+ End with logo placeholder and "stripe.com"
+ ```
+
+
+ ```text
+ Use the hyperframes-handoff skill. 30-second launch video for Orbit.
+ ```
+
+ Open Design's `RULE 1` always opens with a `` before emitting code, so a sparse brief turns into one short question form (surface · audience · tone · brand · scale) instead of an AI-freestyle render.
+
+
+
+## What to include in your prompt
+
+Open Design reads inputs in this order of reliability: **active DESIGN.md > attachments > pasted content > web research > URLs**.
+
+| Input type | What it gives Open Design |
+| --- | --- |
+| Active design system (72 shipped, switchable from picker) | Full 9-section spec (color, typography, spacing, layout, components, motion, voice, brand, anti-patterns) — strongest source |
+| Screenshots / PDFs / brand guides | Palette, typography, UI patterns, tone — read by the agent natively |
+| Pasted hex codes, typefaces, copy | Authoritative for what they cover |
+| Brand name (well-known) | Open Design can `WebFetch` blogs, press, Wikipedia |
+| SPA URL (React/Vue homepage) | Returns near-empty shell — pivot to blog/press instead |
+
+The more specific your prompt, the better the output. Pick a design system or visual direction up front, then describe content.
+
+## Why use Open Design vs. Claude Design
+
+| | Claude Design | **Open Design** |
+| --- | --- | --- |
+| License | Closed | Apache-2.0 |
+| Cost | Pro / Max / Team | BYOK (free with your own CLI / API key) |
+| Form factor | Web (claude.ai) only | Web app + local daemon (or deploy to Vercel) |
+| Agent runtime | Anthropic only (Opus 4.7) | 10 CLI adapters + OpenAI-compatible BYOK proxy |
+| Skills | Proprietary | 31 file-based `SKILL.md` bundles, droppable |
+| Design systems | Proprietary | 72 shipped `DESIGN.md` systems |
+| Filesystem-grade workspace | ❌ | ✅ Real `cwd`, real `Read` / `Write` / `Bash` / `WebFetch` |
+
+If you want the Claude Design loop without lock-in, Open Design is the same artifact-first mental model — open, local, BYOK.
+
+## Known limitations
+
+- **Render still happens locally** — Open Design produces the HTML; `npx hyperframes render` and HDR encoding still need FFmpeg + Node 22+ on your machine.
+- **In-pane preview is sandboxed iframe** — full browser playback is reliable; for frame-accurate scrubbing use `npx hyperframes preview` after handoff.
+- **Shader passthrough requires WebGL** — same as the Claude Design path; Open Design's iframe sandbox supports it.
+- **Skill pre-flight is daemon-side** — if you bypass the skill picker and paste raw HTML into chat, you lose the side-file injection and 5-dim critique gate. Use the skill.
+
+## The handoff to your coding agent
+
+Open Design's project folder is the agent's `cwd`. There's no "export then re-import" step — open Claude Code, Cursor, Codex, or any AI coding agent against the same directory:
+
+```bash
+cd .od/projects/
+npx skills add heygen-com/hyperframes # one-time setup
+npx hyperframes lint # verify structure
+npx hyperframes preview # open the studio
+```
+
+Then iterate the same way as the Claude Design path:
+
+- "Make scene 3's entrance snappier"
+- "Add a counter animation to the stat in scene 5"
+- "Tighten the pacing — scenes 4 and 6 feel too long"
+- "Change the shader on transition 2 to glitch"
+
+## Next steps
+
+
+
+ The closed-source flavor of the same workflow — useful when you don't have a CLI on your laptop.
+
+
+ More prompt patterns for HyperFrames across Claude Code, Claude Design, Open Design, and other agents.
+
+
diff --git a/docs/guides/prompting.mdx b/docs/guides/prompting.mdx
index 92836a03e..2529ba44c 100644
--- a/docs/guides/prompting.mdx
+++ b/docs/guides/prompting.mdx
@@ -18,7 +18,8 @@ In Claude Code, restart the session after installing. Skills register as **slash
| Slash command | What it loads |
| -------------------------- | -------------------------------------------------------------------------- |
| `/hyperframes` | Composition authoring — HTML structure, timing, captions, TTS, transitions |
-| `/hyperframes-cli` | CLI commands — `init`, `lint`, `preview`, `render`, `transcribe`, `tts` |
+| `/hyperframes-cli` | Dev-loop CLI — `init`, `lint`, `inspect`, `preview`, `render`, `doctor` |
+| `/hyperframes-media` | Asset preprocessing — `tts`, `transcribe`, `remove-background` |
| `/hyperframes-registry` | Block and component installation via `hyperframes add` |
| `/website-to-hyperframes` | Capture a URL and turn it into a video — full website-to-video pipeline |
| `/gsap` | GSAP animation API — timelines, easing, ScrollTrigger, plugins |
diff --git a/docs/guides/remove-background.mdx b/docs/guides/remove-background.mdx
new file mode 100644
index 000000000..334bb1163
--- /dev/null
+++ b/docs/guides/remove-background.mdx
@@ -0,0 +1,424 @@
+---
+title: Remove Background (transparent video)
+description: "Remove the background from a video or image and drop it into any composition as a transparent overlay."
+---
+
+Background removal — also called *matting* in VFX — separates a foreground subject (typically a person) from its background. The output is a video with an alpha channel: fully transparent where the background was, opaque where the subject is. Drop it into any HyperFrames composition as a `` tag and the subject floats over whatever you put behind them.
+
+The CLI ships a built-in `remove-background` command that runs locally — no API keys, no cloud upload, no green screen.
+
+## Quick Start
+
+
+
+ The pipeline needs `ffmpeg` and `ffprobe` for decode + encode. Most systems already have them; if not:
+
+ ```bash Terminal
+ # macOS
+ brew install ffmpeg
+
+ # Ubuntu / Debian
+ sudo apt install ffmpeg
+ ```
+
+ Confirm with `npx hyperframes doctor` — both should be green.
+
+
+ ```bash Terminal
+ npx hyperframes remove-background subject.mp4 -o transparent.webm
+ ```
+
+ On the first run, the CLI downloads ~168 MB of model weights to `~/.cache/hyperframes/background-removal/models/`. Subsequent runs reuse the cache.
+
+ Output:
+
+ ```
+ ◇ Removed background from 240 frames in 38.4s (6.3 fps, CoreML) → ./transparent.webm
+ ```
+
+
+ The output is a standard VP9-with-alpha WebM. Chrome's `` element decodes the alpha plane natively — no special player needed:
+
+ ```html composition.html
+
+
+
+
+
+
+
+ ```
+
+ Render the composition with the usual `hyperframes render`.
+
+
+
+## How it works
+
+The pipeline runs four stages, all locally:
+
+```
+ffmpeg decode → u²-net_human_seg inference → alpha composite → ffmpeg encode
+ (raw RGB) (320×320 mask, then upsampled) (VP9-alpha)
+```
+
+The model is **u²-net_human_seg** (MIT license, ~168 MB ONNX). It runs through `onnxruntime-node` with the best-available execution provider on your machine: CoreML on Apple Silicon, CUDA on NVIDIA, CPU otherwise.
+
+The output is encoded with the exact ffmpeg flags Chrome's `` element needs to decode alpha — `-pix_fmt yuva420p` plus the `alpha_mode=1` metadata tag. Get those wrong and the alpha plane is silently discarded by browsers.
+
+## Output formats
+
+| Extension | Codec | When to use | Size (4s @ 1080p) |
+|-----------|-------|-------------|-------------------|
+| `.webm` (default) | VP9 with alpha | Drop into `` for HTML5-native transparent playback | ~1 MB |
+| `.mov` | ProRes 4444 with alpha | Editing round-trip in Premiere / Resolve / Final Cut | ~50 MB |
+| `.png` | PNG with alpha | Single-image cutout (only when the input is also a single image) | varies |
+
+```bash Terminal
+npx hyperframes remove-background subject.mp4 -o transparent.webm # web playback
+npx hyperframes remove-background subject.mp4 -o transparent.mov # editing
+npx hyperframes remove-background portrait.jpg -o cutout.png # still image
+```
+
+## Layer separation: emit the cutout and the background plate together
+
+Pass `--background-output` (alias `-b`) to write a *second* transparent video alongside the cutout. Same source RGB, alpha is the *inverse* mask — opaque where the surroundings were, transparent where the subject is. The result is a clean two-layer separation in a single inference pass:
+
+```bash Terminal
+npx hyperframes remove-background subject.mp4 \
+ -o subject.webm \
+ --background-output plate.webm
+```
+
+| Output | Alpha | Use it as |
+| ------ | ----- | --------- |
+| `subject.webm` | Mask — subject opaque | Foreground layer (top of stack) |
+| `plate.webm` | `255 − mask` — subject region transparent | Background layer; place anything you want **under the subject's silhouette** between this and `subject.webm` |
+
+Both encoders share the source W/H/fps and your `--quality` preset, so the layers are pixel-aligned. Encode cost roughly doubles; segmentation cost is unchanged.
+
+
+**This is a hole-cut plate, not an inpainted clean plate.** The subject region in `plate.webm` is fully transparent — you have to composite something opaque under it (a graphic, a blurred copy, a different scene) to fill the hole. If you need an actual filled background where the subject was, use a video inpainter (LaMa, ProPainter, RunwayML Inpaint) — `remove-background` is not the right tool for that.
+
+
+### Hole-cut vs. clean plate — when does the difference matter?
+
+A **hole-cut plate** keeps the original surroundings and makes the subject region transparent. A **clean plate** fills the subject region with reconstructed background — produced by a separate inpainting model. Display each alone over black:
+
+| | Hole-cut plate (this command) | Clean plate (inpainted) |
+| --- | --- | --- |
+| Subject region | Transparent silhouette | Reconstructed background pixels |
+| What you see alone | A person-shaped hole | An empty room |
+| Cost | One inference pass, one extra ffmpeg encode | A second model (LaMa, ProPainter, E2FGVI) |
+| Tool | `remove-background --background-output` | Outside this CLI |
+
+The line is: **does anything ever need to be visible *through* the subject's silhouette where the subject used to be?**
+
+| Use case | What you need |
+| --- | --- |
+| Text/graphics live *between* the cutout and the plate (the example above) | **Hole-cut** — the graphics fill the hole. |
+| Composite the subject onto an unrelated scene | Neither. Just use `subject.webm`; the plate is irrelevant. |
+| Show "the room without the person" as a real background | **Clean plate** — a hole-cut plate would show a transparent void. |
+| Replace the person with a different subject (re-target) | **Clean plate** — the new subject needs real pixels under it. |
+| VFX rotoscoping / "remove an extra from this take" | **Clean plate** — the canonical inpainting use case. |
+
+If something opaque always covers the silhouette, hole-cut is sufficient and ~1000× cheaper than running an inpainter.
+
+### The two-layer composition pattern
+
+The two-layer pattern is functionally a drop-in for [text-behind-subject](#text-behind-subject-the-recommended-layout) without needing the original `presenter.mp4` in the project — the plate replaces it as the bottom layer:
+
+```html
+
+
+
+
+
+ MAKE IT IN HYPERFRAMES
+
+
+
+
+
+
+```
+
+Constraints: the flag requires a video input and `.webm` or `.mov` for both outputs. It's not valid for image inputs (no temporal pairing to do) and won't accept `.png` for the plate.
+
+## Performance
+
+Real-world numbers from the [matting eval](https://www.heygenverse.com/a/0dd5a431-1832-4858-862d-de7fb7d02654), running u²-net_human_seg on a 4-second 1080p clip:
+
+| Platform | Provider | ms/frame | 30-second clip |
+|----------|----------|----------|----------------|
+| Apple Silicon (M2 Pro / M3 / M4) | CoreML | ~263 | ~2 min |
+| NVIDIA GPU (T4, A10, RTX) | CUDA | ~80–150 | ~30–60 s |
+| Linux x86 | CPU | ~1100 | ~16 min |
+| macOS Intel | CPU | ~900 | ~13 min |
+
+Matting is offline preprocessing — you run it once per asset and reuse the output. CPU-only is slow but always works; if you reuse the same subject clip repeatedly, run it once on a faster machine and check the transparent output into your project.
+
+## Picking a device explicitly
+
+`--device auto` is the default and right for almost everyone. The flag exists for two cases:
+
+- **Force CPU on a GPU box** when you want to keep the GPU free for other work, or are debugging an EP-specific issue:
+
+ ```bash Terminal
+ npx hyperframes remove-background subject.mp4 -o transparent.webm --device cpu
+ ```
+
+- **Opt into CUDA** by setting `HYPERFRAMES_CUDA=1` and providing a GPU-enabled `onnxruntime-node` build (the bundled build is CPU + CoreML only, to keep the install small for the 99% of users who don't have a GPU):
+
+ ```bash Terminal
+ HYPERFRAMES_CUDA=1 npx hyperframes remove-background subject.mp4 -o transparent.webm --device cuda
+ ```
+
+Run `npx hyperframes remove-background --info` to see what providers are detected on your machine and which one `auto` would pick.
+
+## Using the transparent video in a composition
+
+The transparent WebM behaves like any other video element. The two patterns you'll use most:
+
+**Subject over a background image:**
+
+```html
+
+
+
+
+```
+
+**Subject over a HyperFrames scene:**
+
+```html
+
+
Welcome
+
+
+
+```
+
+The cutout inherits the composition's frame rate and timeline — it plays through once during the scene's duration, so match the source clip length to the scene length when possible. If the scene is longer than the clip, `loop` handles it.
+
+
+ When rendering a composition that contains a `` element, the renderer reads the source via ffmpeg internally. Transparent WebMs are decoded with the alpha plane preserved.
+
+
+## Compositing patterns and pitfalls
+
+The cutout webm is a **re-encoded copy** of the source mp4's RGB — the matter pipeline decodes the source to raw RGB, runs segmentation, and re-encodes to VP9 with alpha. That choice has consequences depending on what you put behind it.
+
+### The three patterns
+
+| Pattern | Behind the cutout | Result |
+|---|---|---|
+| **Cutout over a different scene** *(most common)* | Static image, gradient, animated bg, or unrelated footage | Clean. The cutout is the only source of the subject — no doubling, no edge halo. Use any `--quality`. |
+| **Cutout over its own source mp4** *(text-behind-subject, talking-head with overlays)* | The same mp4 the cutout was generated from | Two RGB sources for the same person. At default `--quality balanced` (crf 18) the doubling is barely visible; at `--quality fast` (crf 30) you'll see a slight color shift / soft edge on the silhouette. Use `--quality best` (crf 12) for hero shots. |
+| **Cutout over different footage of the same subject** | Another take of the same person | Looks like two overlapping people. Avoid — re-shoot or re-cut the source. |
+
+### Text-behind-subject: the recommended layout
+
+Putting a headline *behind* a presenter so their silhouette occludes the text:
+
+```html
+
+
+
+
+
+ MAKE IT IN HYPERFRAMES
+
+
+
+
+
+
+```
+
+```js
+const tl = gsap.timeline({ paused: true });
+const CUT = 3.3;
+
+// Reveal the headline early
+tl.to("#cf-headline", { clipPath: "inset(0 0 0% 0)", duration: 0.6, ease: "expo.out" }, 0.25);
+
+// At the cut, flip the cutout wrapper visible — silhouette punches through the headline
+tl.set(".cutout-wrap", { opacity: 1 }, CUT);
+
+// Sentinel: extend timeline to the composition's full duration so the renderer
+// doesn't bail past the last meaningful tween.
+tl.set({}, {}, 6);
+```
+
+### Two non-obvious rules
+
+**1. Wrap the cutout video in a non-timed `
` and animate the wrapper, not the video.**
+
+The framework forces `opacity: 1` on any element with `data-start`/`data-duration` while it's "active" — that's how it controls clip visibility. CSS `opacity: 0` on the video element is silently overwritten by the framework's clip lifecycle, so an opacity tween on the video element won't do anything. Wrap the video in a `
` that has no `data-*` attributes; the wrapper is owned entirely by your CSS/GSAP.
+
+**2. Both videos start at `data-start="0"` and decode in sync from t=0.**
+
+It's tempting to "late-mount" the cutout (`data-start="3.3"` to match the cut). Don't — Chrome does a seek + decoder warm-up at mount, which can land one frame off the base mp4 at the cut moment. With both videos mounted from t=0 and the cutout's wrapper opacity-animated, both decoders advance the same way and stay frame-accurate.
+
+### Quality preset and color match
+
+When the cutout is overlaid on its own source mp4, the encoder's CRF directly affects how visible the doubling is at edges:
+
+| `--quality` | CRF | File size (12s @ 1080p) | When to use |
+|---|---|---|---|
+| `fast` | 30 | ~2 MB | Cutout sits over an unrelated background and file size matters |
+| `balanced` *(default)* | 18 | ~6 MB | Recommended for text-behind-subject and any pattern that overlays on the source |
+| `best` | 12 | ~12 MB | Hero shots, masters, or anything you'll re-encode downstream |
+
+The encoder also writes BT.709 + limited-range color metadata so Chrome's YUV→RGB pipeline matches the source mp4's. Without those tags, the cutout would render slightly differently from the underlying mp4 even at lossless quality (visible red/skin shift).
+
+## What u²-net_human_seg is and isn't good for
+
+The model is purpose-built for **portrait / human matting**. It excels when:
+
+- ✅ The subject is a person, head-and-shoulders or full-body
+- ✅ The framing is reasonably stable (not a wide handheld shot)
+- ✅ The background contrasts with the subject
+
+It struggles or fails on:
+
+- ❌ Non-human subjects (products, animals, objects). The model will return a mostly-empty mask.
+- ❌ Very fine hair detail on a busy background. The 320×320 inference resolution means hair tips get softened — fine for most use cases, but compositors notice.
+- ❌ Frame-to-frame temporal consistency. Each frame is processed independently, so static backgrounds with moving subjects can show subtle edge flicker. For most web playback this is invisible; for high-end VFX it may matter.
+- ❌ Live streams or real-time capture. The pipeline is batch-only.
+
+If your use case hits one of these, see the alternatives below.
+
+## Alternatives — when the built-in command isn't the right tool
+
+The CLI ships **one model on purpose** — the one that's MIT-licensed, runs everywhere, and produces production-quality output for person/portrait video. The list below leads with **free, open-source tools** that pair naturally with HyperFrames. Each entry calls out the actual catch — license, install effort, hardware needs — so you can pick the right one for your situation. Full benchmarks are in the [matting eval](https://www.heygenverse.com/a/0dd5a431-1832-4858-862d-de7fb7d02654).
+
+### Free, open-source CLIs and libraries
+
+These all run locally with no account, no upload, no watermark.
+
+| Tool | When to use it | Catch |
+|------|----------------|-------|
+| [`rembg`](https://github.com/danielgatis/rembg) (Python, MIT) | You need a different subject type — `isnet-general-use` for objects/animals/products, `birefnet-portrait` for a quality ceiling on hair, `silueta` for a tiny ~40 MB footprint. Same family as our default model, more variety. | Requires Python + `pip install rembg`. Some bundled models (`birefnet-*`) need ~4 GB RAM and are CPU-only |
+| [BiRefNet](https://github.com/ZhengPeng7/BiRefNet) (PyTorch, MIT) | Highest-fidelity portrait mattes available — visibly better hair edges than u²-net | Heavy (~4 GB inference RAM), slow on CPU, broken on Apple CoreML at the time of the eval |
+| [Robust Video Matting (RVM)](https://github.com/PeterL1n/RobustVideoMatting) (PyTorch, **GPL-3.0**) | The only widely-available model with **temporal consistency** built in — no edge flicker on moving subjects. Best choice when you're matting a long talking-head clip and frame-to-frame stability matters | GPL-3.0 license is incompatible with most commercial / proprietary codebases. Read your repo's license before using |
+| [Backgroundremover](https://github.com/nadermx/backgroundremover) (Python, MIT) | Simple `pip install` wrapper around u²-net; nice if you want a Python API instead of our Node CLI | Same model family as ours, no quality difference — pick whichever fits your stack |
+| [ComfyUI](https://github.com/comfyanonymous/ComfyUI) (open-source, GPL-3.0 core) | Custom workflows: chain a segmentation model + alpha refinement + temporal smoothing. The right tool for tricky cases (multiple subjects, hair against a similar background, sports footage) | Setup is involved (Python, models, node graph). Worth it for repeat specialty work |
+
+After running any of these externally, encode the output as a HyperFrames-compatible transparent WebM with:
+
+```bash Terminal
+ffmpeg -i frames-%04d.png -c:v libvpx-vp9 \
+ -pix_fmt yuva420p \
+ -metadata:s:v:0 alpha_mode=1 \
+ -auto-alt-ref 0 -b:v 0 -crf 30 \
+ transparent.webm
+```
+
+### Free desktop / GUI tools
+
+| Tool | When to use it | Catch |
+|------|----------------|-------|
+| [DaVinci Resolve — Magic Mask](https://www.blackmagicdesign.com/products/davinciresolve) | You're already editing in Resolve, want a brush-based UI with manual refinement, and need to round-trip the alpha into a larger edit | macOS / Windows / Linux desktop install. The free tier covers Magic Mask; paid Studio version unlocks higher resolutions on some features |
+| [Backgroundremover.app](https://backgroundremover.app) (web) | One-off image cutout, no signup, no watermark | Single images only, not video. Free tier is hosted but the underlying tool is the same `rembg` model family |
+| [PhotoRoom Background Remover](https://www.photoroom.com/tools/background-remover) (web) | Quick one-off image, polished UI, no signup | Single images only, e-commerce-tuned model |
+
+### Web SaaS tools (free tiers, with strings)
+
+| Tool | When to use it | Catch |
+|------|----------------|-------|
+| [unscreen.com](https://www.unscreen.com) | Quick one-off video, no install, drag-and-drop | **Free tier is watermarked and capped at short clips** (~10s preview). Paid removes both. Run by the team behind remove.bg |
+| [RunwayML — Green Screen](https://runwayml.com) | Polished UI with brush refinement and time-aware tracking; the closest a SaaS gets to professional roto | Free tier exists but is credit-limited; serious use is a subscription |
+| [Kapwing — Background Remover](https://www.kapwing.com/tools/remove-video-background) | Browser-based, integrates with their video editor | Free tier is watermarked; paid removes it |
+
+### How to choose
+
+- **Person / portrait video, web playback, MIT-clean** → use the built-in `hyperframes remove-background` (this is what it's tuned for).
+- **Non-human subject** (product, animal, object) → `rembg` with `isnet-general-use`.
+- **Maximum portrait quality, especially hair** → `BiRefNet` via Python.
+- **Long video where edge flicker would be visible**, GPL is OK → `RVM`.
+- **One-off marketing clip, no install** → DaVinci Resolve (free) for video, Backgroundremover.app for a still image.
+- **Specialty case the off-the-shelf models can't handle** → ComfyUI with a custom graph.
+
+## Troubleshooting
+
+### Model download fails or hangs
+
+The weights live on GitHub Releases (rembg's `v0.0.0` release, ~168 MB). If your network blocks GitHub or the download is interrupted:
+
+```bash Terminal
+# Manually download and drop into the cache
+mkdir -p ~/.cache/hyperframes/background-removal/models
+curl -L -o ~/.cache/hyperframes/background-removal/models/u2net_human_seg.onnx \
+ https://github.com/danielgatis/rembg/releases/download/v0.0.0/u2net_human_seg.onnx
+```
+
+Subsequent `remove-background` runs skip the download and use your local copy.
+
+### "ffmpeg and ffprobe are required"
+
+The pipeline shells out to ffmpeg for decode + encode. Install via `brew install ffmpeg` on macOS or `sudo apt install ffmpeg` on Debian/Ubuntu. Verify with `npx hyperframes doctor`.
+
+### The output WebM looks fully opaque in the browser
+
+Chrome only reads the alpha plane when the WebM is encoded as `yuva420p` with the `alpha_mode=1` metadata tag. The CLI sets both. If you re-encode the output yourself (e.g. with another ffmpeg invocation), preserve those flags:
+
+```bash Terminal
+ffmpeg -i in.webm -c:v libvpx-vp9 \
+ -pix_fmt yuva420p \
+ -metadata:s:v:0 alpha_mode=1 \
+ -auto-alt-ref 0 \
+ out.webm
+```
+
+To verify a WebM has alpha, extract the first frame and inspect:
+
+```bash Terminal
+ffmpeg -y -c:v libvpx-vp9 -i out.webm -frames:v 1 -pix_fmt rgba -update 1 frame0.png
+```
+
+The decoded `frame0.png` should be RGBA and have non-trivial alpha values.
+
+### CoreML is "available" but inference fails to start
+
+The pipeline auto-falls-back to CPU if CoreML fails to bind, with a warning. If you want to skip the CoreML attempt entirely, force CPU:
+
+```bash Terminal
+npx hyperframes remove-background subject.mp4 -o transparent.webm --device cpu
+```
+
+### The alpha mask has rough or jagged edges
+
+That usually means the source frame is high-contrast against a similar-toned background and the model's 320×320 inference resolution is showing through. Two paths forward:
+
+1. Re-frame or re-shoot to give the subject a more contrasting background.
+2. Try `birefnet-portrait` via `rembg` (see [Other open-source models](#other-open-source-models)) — it's higher quality at hair edges but slower and heavier.
+
+## Reference
+
+- CLI: [`hyperframes remove-background`](/packages/cli#remove-background)
+- Eval: [Matting eval — v7](https://www.heygenverse.com/a/0dd5a431-1832-4858-862d-de7fb7d02654)
+- Source model: [danielgatis/rembg](https://github.com/danielgatis/rembg)
+- ONNX runtime: [`onnxruntime-node`](https://www.npmjs.com/package/onnxruntime-node)
diff --git a/docs/guides/rendering.mdx b/docs/guides/rendering.mdx
index aba45fe16..8de19627c 100644
--- a/docs/guides/rendering.mdx
+++ b/docs/guides/rendering.mdx
@@ -65,7 +65,8 @@ Render your Hyperframes [compositions](/concepts/compositions) to MP4, MOV, or W
**Pros:**
- Fast startup, no container overhead
- - Uses your system GPU for hardware-accelerated encoding (with `--gpu`)
+ - Can use your system GPU for Chrome/WebGL capture by default
+ - Can use your system GPU for hardware-accelerated encoding (with `--gpu`)
- Best for iterative development
**Cons:**
@@ -90,7 +91,8 @@ Render your Hyperframes [compositions](/concepts/compositions) to MP4, MOV, or W
**Cons:**
- Slower startup due to container initialization
- - No GPU acceleration inside the container
+ - Browser capture stays on the deterministic software-GL path
+ - GPU encoding requires Docker host GPU passthrough and is not cross-platform on Docker Desktop
Docker mode uses `chrome-headless-shell` with [BeginFrame](/concepts/determinism#how-it-works) control for frame-perfect, deterministic capture.
@@ -121,8 +123,10 @@ Render your Hyperframes [compositions](/concepts/compositions) to MP4, MOV, or W
| `--video-bitrate` | e.g. `10M`, `5000k` | — | Target bitrate encoding. Cannot combine with `--crf` |
| `--workers` | 1-8 or `auto` | auto | Parallel render workers (see [Workers](#workers) below) |
| `--max-concurrent-renders` | 1-10 | 2 | Max simultaneous renders via the producer server (see [Concurrent Renders](#concurrent-renders) below) |
-| `--gpu` | — | off | GPU encoding (NVENC, VideoToolbox, VAAPI) |
-| `--hdr` | — | off | Detect HDR sources and output HDR10 (MP4 only). See [HDR Rendering](/guides/hdr) |
+| `--gpu` | — | off | GPU encoding (NVENC, VideoToolbox, VAAPI, QSV) |
+| `--browser-gpu` / `--no-browser-gpu` | — | on locally, off in Docker | Use or opt out of host GPU acceleration for local Chrome/WebGL capture |
+| `--hdr` | — | off | Force HDR output even if no HDR sources are detected (MP4 only). See [HDR Rendering](/guides/hdr) |
+| `--sdr` | — | off | Force SDR output even if HDR sources are detected |
| `--docker` | — | off | Use Docker for [deterministic rendering](/concepts/determinism) |
| `--quiet` | — | off | Suppress verbose output |
@@ -148,6 +152,26 @@ npx hyperframes render --video-bitrate 10M --output controlled.mp4
**Tip**: The default `standard` preset (CRF 18) is visually lossless at 1080p — most people cannot distinguish it from the source. Use `--quality draft` for faster iteration, or `--quality high` / `--crf 10` when file size is no concern.
+## GPU Acceleration
+
+Hyperframes has two separate GPU acceleration surfaces:
+
+- `--gpu` uses a hardware video encoder in FFmpeg when one is available. Supported backends include VideoToolbox on macOS, NVENC on NVIDIA systems, VAAPI on Linux, and Intel QSV on supported Windows/Linux hosts.
+- Browser GPU uses the host GPU for local Chrome/WebGL capture. It is enabled automatically for local renders and disabled in Docker. Use `--no-browser-gpu` to opt out.
+
+```bash Terminal
+# Add hardware FFmpeg encoding to the default local browser-GPU render
+npx hyperframes render --gpu --output encoded-fast.mp4
+
+# Opt out of hardware Chrome/WebGL capture
+npx hyperframes render --no-browser-gpu --output software-browser.mp4
+
+# Use browser GPU plus hardware FFmpeg encoding
+npx hyperframes render --gpu --output gpu.mp4
+```
+
+Browser GPU capture is local-mode only. It maps to platform-native Chrome GPU backends: Metal on macOS, D3D11 on Windows, and EGL on Linux. Use `--no-browser-gpu` or Docker mode when exact cross-machine reproducibility matters more than local render speed.
+
## Workers
Each render worker launches a **separate Chrome browser process** to capture frames in parallel. More workers can speed up rendering, but each one consumes ~256 MB of RAM and significant CPU.
diff --git a/docs/guides/troubleshooting.mdx b/docs/guides/troubleshooting.mdx
index cceb52c4c..aa9007bb7 100644
--- a/docs/guides/troubleshooting.mdx
+++ b/docs/guides/troubleshooting.mdx
@@ -128,9 +128,10 @@ If your issue is about a specific coding mistake (animations not working, video
1. Use `--quality draft` during development for faster encoding
2. Run `npx hyperframes benchmark` to find the optimal worker count for your system
- 3. Use `--gpu` for hardware-accelerated encoding (local mode only)
- 4. Reduce `--fps` to 24 if 30fps is not needed
- 5. Check that your composition does not have unnecessary elements or overly complex animations
+ 3. Local Chrome/WebGL GPU capture is enabled automatically; compare with `--no-browser-gpu` if troubleshooting
+ 4. Use `--gpu` for hardware-accelerated encoding (local mode only)
+ 5. Reduce `--fps` to 24 if 30fps is not needed
+ 6. Check that your composition does not have unnecessary elements or overly complex animations
See [Rendering: Options](/guides/rendering#options) for all available flags.
diff --git a/docs/guides/video-editor-cheatsheet.mdx b/docs/guides/video-editor-cheatsheet.mdx
new file mode 100644
index 000000000..f8a633d2e
--- /dev/null
+++ b/docs/guides/video-editor-cheatsheet.mdx
@@ -0,0 +1,267 @@
+---
+title: Video Editor Cheatsheet
+description: "Fast reference for video editors and creative people directing agents, cutting timing, tweaking layouts, previewing, and publishing HyperFrames projects."
+---
+
+Use this as a fast reference when you are directing agents, cutting timing, making visual layout tweaks, previewing, and sharing HyperFrames projects.
+
+## The Fast Loop
+
+```bash
+npx hyperframes init my-video --example blank
+cd my-video
+npx hyperframes preview
+```
+
+Keep the preview running while your agent edits `index.html` or files in `compositions/`. The Studio updates automatically, so you can direct the agent, scrub the result, make manual visual tweaks, then repeat.
+
+Most production work should feel like this:
+
+1. Ask the agent for the first cut, scene, caption pass, transition, or cleanup.
+2. Use the Studio preview and timeline to check timing.
+3. Use manual DOM editing for Figma-like layout tweaks: select elements, move them, and adjust visual properties directly.
+4. Ask the agent to clean up or generalize anything you changed manually.
+5. Lint, validate, render, and publish.
+
+Before showing or rendering a project:
+
+```bash
+npx hyperframes lint
+npx hyperframes validate
+npx hyperframes render --quality standard --output review.mp4
+```
+
+For fast iteration renders, use draft quality:
+
+```bash
+npx hyperframes render --quality draft --output draft.mp4
+```
+
+For final delivery:
+
+```bash
+npx hyperframes render --quality high --fps 30 --output final.mp4
+```
+
+## Terminal Shortcuts
+
+Move around projects quickly:
+
+```bash
+pwd # show current folder
+ls # list files
+cd my-video # enter a project folder
+cd .. # go up one folder
+cd - # jump back to the previous folder
+open . # open the current folder in Finder on macOS
+code . # open the current folder in VS Code, if installed
+```
+
+Common HyperFrames project folders:
+
+```bash
+cd assets # source videos, images, audio
+cd compositions # reusable scenes and overlays
+cd .. # back to the project root
+```
+
+Run HyperFrames commands from the project root, where `index.html` lives. If you are not sure where you are, run `pwd` then `ls`. If you see `index.html`, you are in the right place.
+
+## Preview Shortcuts
+
+Start the Studio:
+
+```bash
+npx hyperframes preview
+```
+
+Use a different port if `3002` is already busy:
+
+```bash
+npx hyperframes preview --port 4567
+```
+
+Inside the Studio:
+
+| Shortcut | Use |
+| --- | --- |
+| `Space` | Play or pause (focus on the page body) |
+| `Left Arrow` / `Right Arrow` | Nudge seek bar by 1 second (seek bar focused) |
+| `Shift+Left Arrow` / `Shift+Right Arrow` | Nudge seek bar by 5 seconds (seek bar focused) |
+| `Shift+T` | Show or hide the timeline editor |
+| `Cmd+1` / `Ctrl+1` | Switch to Compositions |
+| `Cmd+2` / `Ctrl+2` | Switch to Assets |
+| `Delete` / `Backspace` | Delete the selected timeline clip (when not typing in an editor) |
+| `Escape` | Leave a sub-composition or close editor dialogs |
+
+
+ Preview uses the same runtime as rendering, so the visual frame matches the output. If preview stutters on a heavy frame but the render is clean, that is expected — preview plays in real time, render captures one frame at a time.
+
+
+## Agent-Led Editing
+
+Ask the agent to verify visible changes in the browser. For a user-visible edit, a good handoff is:
+
+```
+Run the preview, check it with agent-browser, take a screenshot, and render a draft MP4 to take a look at the frames with ffmpeg.
+```
+
+## Manual DOM Editing
+
+In the Studio, you can edit the DOM visually for the final 10% of creative adjustment where dragging is faster than describing.
+
+Use manual DOM editing for:
+
+- moving titles, captions, product cards, logos, and overlays into position
+- adjusting size, spacing, opacity, color, and other visual properties
+- checking composition balance at an exact timestamp
+- making Figma-like placement tweaks
+
+Use agents for:
+
+- creating scenes from scratch
+- refactoring repeated visual patterns
+- wiring GSAP timelines
+- fixing broken timing, layout overflow, or render errors
+- turning a manual visual tweak into reusable, clean HTML/CSS
+
+After manual DOM edits, ask the agent to inspect the diff and keep the source clean:
+
+```
+I moved the hero title and resized the CTA manually in Studio. Inspect the changes, clean up the CSS if needed, then run lint and validate.
+```
+
+## CLI Commands Editors Use Most
+
+| Command | Use it for |
+| --- | --- |
+| `npx hyperframes init my-video` | Create a new project |
+| `npx hyperframes init my-video --example warm-grain` | Start from a visual template |
+| `npx hyperframes init my-video --video source.mp4` | Import video and generate captions from the source audio |
+| `npx hyperframes capture https://example.com` | Capture a website as source material for a video |
+| `npx hyperframes preview` | Open the live Studio preview |
+| `npx hyperframes lint` | Catch structural mistakes before preview or render |
+| `npx hyperframes validate` | Run the composition in headless Chrome to catch runtime errors |
+| `npx hyperframes inspect` | Find text overflow and layout problems across the timeline |
+| `npx hyperframes snapshot --at 1,3,5` | Save PNG checks at exact timestamps |
+| `npx hyperframes render --output final.mp4` | Render the video |
+| `npx hyperframes publish` | Upload the project and get a shareable HyperFrames URL |
+| `npx hyperframes doctor` | Check Node.js, FFmpeg, Chrome, Docker, and other dependencies |
+| `npx hyperframes docs` | Open local CLI docs |
+| `npx hyperframes upgrade` | Check for a newer CLI version |
+
+## Timing Cheatsheet
+
+Every visible timed layer should usually be a clip:
+
+```html
+
+ Opening title
+
+```
+
+Use these attributes like timeline controls:
+
+| Attribute | Video editor meaning |
+| --- | --- |
+| `data-start` | When the layer starts |
+| `data-duration` | How long the layer stays active |
+| `data-track-index` | Timeline track number |
+| `data-media-start` | Offset into a media file |
+| `data-volume` | Audio volume for an audio or video clip |
+| `data-composition-src` | Nested scene or reusable overlay |
+
+For GSAP animation, register one paused timeline per composition:
+
+```html
+
+```
+
+
+ If a video cuts off early, check that the GSAP timeline is at least as long as the intended edit. The final `tl.set({}, {}, 5)` pattern is the fix.
+
+
+## Render Presets
+
+| Goal | Command |
+| --- | --- |
+| Fast iteration | `npx hyperframes render --quality draft --output draft.mp4` |
+| Review link | `npx hyperframes render --quality standard --output review.mp4` |
+| Final export | `npx hyperframes render --quality high --fps 30 --output final.mp4` |
+| Transparent overlay | `npx hyperframes render --format webm --output overlay.webm` |
+| Deterministic output | `npx hyperframes render --docker --output final.mp4` |
+
+Use WebM for transparent overlays, captions, and lower thirds. Use `--docker` when you need pixel-consistent output across different machines.
+
+## Publish and Share
+
+Use `publish` when you want to share the editable project, not just the rendered MP4:
+
+```bash
+npx hyperframes publish
+```
+
+Publish zips the current project, uploads it, and prints a stable `hyperframes.dev` URL. The URL includes a claim token so the recipient can open it, claim the project, and continue editing in the web app.
+
+```bash
+npx hyperframes publish ./my-video # publish a specific folder
+npx hyperframes publish --yes # skip the confirmation prompt in scripts
+```
+
+Publish expects an `index.html` at the project root. It ignores `.git`, `node_modules`, `dist`, `.next`, and `coverage`.
+
+## What Agent Browser Is
+
+`agent-browser` is a browser automation tool for AI agents. It opens Chrome, navigates to your preview, clicks controls, reads page state, and captures screenshots. It is how an agent proves the video preview actually works instead of only saying the code looks right.
+
+Typical verification flow:
+
+```bash
+agent-browser open http://localhost:3002
+agent-browser snapshot -i
+agent-browser screenshot --screenshot-dir ./qa
+```
+
+Use it when you want the agent to open the HyperFrames Studio preview, play or scrub the video, click timeline controls, inspect visible UI text, capture screenshots for review, or record proof of a tested flow.
+
+For editor-facing changes, keep `npx hyperframes preview` running, then have the agent use `agent-browser` against the local preview URL.
+
+## Quick Fixes
+
+| Problem | Command or check |
+| --- | --- |
+| Preview will not start | `npx hyperframes doctor` |
+| Port already in use | `npx hyperframes preview --port 4567` |
+| Render fails | `npx hyperframes lint` then `npx hyperframes validate` |
+| Need exact frame checks | `npx hyperframes snapshot --at 1,2.5,5` |
+| Text overflows in the frame | `npx hyperframes inspect` |
+| Final render is too slow | Try `--quality draft`, reduce image sizes, or lower `--fps` |
+| Need to share editable project | `npx hyperframes publish` |
+
+
+
+ How to direct AI agents to build better videos
+
+
+ Timing, tracks, and GSAP timeline patterns
+
+
+ Pitfalls the linter can't catch
+
+
+ Full command reference
+
+
diff --git a/docs/packages/cli.mdx b/docs/packages/cli.mdx
index 779fce12a..f9ae12f72 100644
--- a/docs/packages/cli.mdx
+++ b/docs/packages/cli.mdx
@@ -148,6 +148,9 @@ This is suppressed in CI environments, non-TTY shells, and when `HYPERFRAMES_NO_
# Agent mode (default) — --example is required
npx hyperframes init my-video --example blank --video video.mp4
+ # Include Tailwind CSS browser-runtime support
+ npx hyperframes init my-video --example blank --tailwind
+
# Human mode — interactive prompts
npx hyperframes init --human-friendly
```
@@ -157,6 +160,7 @@ This is suppressed in CI environments, non-TTY shells, and when `HYPERFRAMES_NO_
| `--example, -e` | Example to scaffold (required in default mode, interactive in `--human-friendly`) |
| `--video, -V` | Path to a video file (MP4, WebM, MOV) |
| `--audio, -a` | Path to an audio file (MP3, WAV, M4A) |
+ | `--tailwind` | Add Tailwind CSS browser-runtime support to scaffolded HTML |
| `--skip-skills` | Skip AI coding skills installation |
| `--skip-transcribe` | Skip automatic whisper transcription |
| `--model` | Whisper model for transcription (e.g. `small.en`, `medium.en`, `large-v3`) |
@@ -173,6 +177,8 @@ This is suppressed in CI environments, non-TTY shells, and when `HYPERFRAMES_NO_
In default (agent) mode, `--example` is required — the CLI errors with a usage example if missing. In `--human-friendly` mode, you choose interactively. When `--video` or `--audio` is provided, the CLI automatically transcribes the audio with Whisper and patches captions into the composition (use `--skip-transcribe` to disable).
+ `--tailwind` injects the pinned Tailwind v4 browser runtime into scaffolded HTML and exposes a `window.__tailwindReady` promise that renders wait on before capturing frame 0. Use the `/tailwind` skill when editing these projects so agents follow v4 CSS-first patterns instead of v3 `tailwind.config.js` and `@tailwind` directive patterns. The browser runtime is still intended for scaffolded projects and quick iteration; for fully offline or locked-down production renders, compile Tailwind to CSS and include the stylesheet directly.
+
After scaffolding, the CLI installs AI coding skills for Claude Code, Gemini CLI, and Codex CLI (use `--skip-skills` to disable). See [`skills`](#skills) command.
See [Examples](/examples) for full details.
@@ -335,6 +341,59 @@ This is suppressed in CI environments, non-TTY shells, and when `HYPERFRAMES_NO_
Combine `tts` with `transcribe` to generate narration and word-level timestamps for captions in a single workflow: generate the audio with `tts`, then transcribe the output with `transcribe` to get word-level timing.
+
+ ### `remove-background`
+
+ Remove the background from a video or image using a local AI model. The output is transparent media you can drop into any composition's `` or `` element — no green screen required.
+
+ ```bash
+ # Default: VP9-with-alpha WebM (HTML5-native, ~1 MB / 4s @ 1080p)
+ npx hyperframes remove-background avatar.mp4 -o transparent.webm
+
+ # ProRes 4444 .mov for editing round-trip
+ npx hyperframes remove-background avatar.mp4 -o transparent.mov
+
+ # Single image → transparent PNG
+ npx hyperframes remove-background portrait.jpg -o cutout.png
+
+ # Layer separation: cutout AND inverse-alpha background plate in one pass
+ npx hyperframes remove-background avatar.mp4 \
+ -o subject.webm --background-output plate.webm
+
+ # Force CPU on a machine that has CoreML or CUDA
+ npx hyperframes remove-background avatar.mp4 -o transparent.webm --device cpu
+
+ # Inspect detected providers without rendering
+ npx hyperframes remove-background --info
+ ```
+
+ | Flag | Description |
+ |------|-------------|
+ | `--output, -o` | Output path. Format inferred from extension: `.webm` (default), `.mov`, `.png` |
+ | `--background-output, -b` | Optional second output: inverse-alpha background plate (subject region transparent, surroundings opaque). Same source RGB, complementary mask. Must be `.webm` or `.mov`. Hole-cut, not inpainted — composite something underneath to fill the hole. |
+ | `--device` | Execution provider: `auto` (default), `cpu`, `coreml`, `cuda` |
+ | `--quality` | WebM encoder preset: `fast` (crf 30, smallest), `balanced` (crf 18, default), `best` (crf 12, near-lossless). Higher quality keeps the cutout's RGB closer to the source mp4 — important when overlaying the cutout on its own source for text-behind-subject effects. Applies to both `--output` and `--background-output`. Ignored for `.mov` / `.png`. |
+ | `--info` | Print detected execution providers and exit (no render) |
+ | `--json` | Output result as JSON |
+
+ The model is `u2net_human_seg` (MIT, ~168 MB ONNX). Weights download to `~/.cache/hyperframes/background-removal/models/` on first run and are reused thereafter. Peak inference RAM is ~1.5 GB.
+
+ `--device auto` picks CoreML on Apple Silicon, CUDA when available, and CPU otherwise. The CLI bundles the CPU build of `onnxruntime-node`; for CUDA, set `HYPERFRAMES_CUDA=1` and provide a GPU-enabled `onnxruntime-node` build.
+
+ Output formats:
+
+ | Format | Use case | Size (4s @ 1080p) |
+ |--------|----------|-------------------|
+ | `.webm` (VP9 alpha) | Drop into `` for HTML5-native transparent playback | ~1 MB |
+ | `.mov` (ProRes 4444) | Editing round-trip in Premiere / Resolve / DaVinci | ~50 MB |
+ | `.png` | Single-image cutout | varies |
+
+
+ The `` element in Chrome only respects the alpha plane when the WebM is encoded as `yuva420p` with the `alpha_mode=1` metadata tag. The CLI sets both automatically — if you re-encode the output yourself, preserve those flags.
+
+
+ See the [Remove Background guide](/guides/remove-background) for the full workflow — using transparent videos in compositions, performance per platform, limitations of `u2net_human_seg`, and free alternative tools when this model isn't the right fit.
+
### `capture`
Capture a website — extract screenshots, design tokens, fonts, assets, and animations for video production:
@@ -530,6 +589,12 @@ This is suppressed in CI environments, non-TTY shells, and when `HYPERFRAMES_NO_
# With options
npx hyperframes render --output output.mp4 --fps 60 --quality high
+
+ # Opt out of local browser GPU capture
+ npx hyperframes render --no-browser-gpu --output cpu-browser.mp4
+
+ # Add hardware FFmpeg encoding
+ npx hyperframes render --gpu --output gpu.mp4
```
| Flag | Values | Default | Description |
@@ -540,14 +605,57 @@ This is suppressed in CI environments, non-TTY shells, and when `HYPERFRAMES_NO_
| `--quality` | draft, standard, high | standard | Encoding quality preset (drives CRF/bitrate) |
| `--crf` | 0-51 | — | Override encoder CRF (lower = higher quality). Mutually exclusive with `--video-bitrate` |
| `--video-bitrate` | e.g. `10M`, `5000k` | — | Target video bitrate. Mutually exclusive with `--crf` |
- | `--hdr` | — | off | Detect HDR sources and output HDR10 (H.265 10-bit, BT.2020 PQ/HLG). MP4 only. SDR-only compositions are unaffected. See [HDR Rendering](/guides/hdr) |
+ | `--hdr` | — | off | Force HDR output even if no HDR sources are detected. MP4 only. See [HDR Rendering](/guides/hdr) |
+ | `--sdr` | — | off | Force SDR output even if HDR sources are detected |
| `--workers` | 1-8 | 4 | Parallel render workers |
- | `--gpu` | — | off | GPU encoding (NVENC, VideoToolbox, VAAPI) |
+ | `--gpu` | — | off | GPU encoding (NVENC, VideoToolbox, VAAPI, QSV) |
+ | `--browser-gpu` / `--no-browser-gpu` | — | on locally, off in Docker | Use or opt out of host GPU acceleration for local Chrome/WebGL capture |
| `--docker` | — | off | Use Docker for [deterministic rendering](/concepts/determinism) |
| `--quiet` | — | off | Suppress verbose output |
+ | `--variables` | JSON object | — | Variable overrides merged over `data-composition-variables` defaults. Read via `window.__hyperframes.getVariables()` |
+ | `--variables-file` | path | — | Path to a JSON file with variable overrides (alternative to `--variables`) |
+ | `--strict-variables` | — | off | Fail render if any `--variables` key is undeclared or has a wrong type vs the composition's `data-composition-variables`. Without this flag, mismatches print as warnings and the render continues. |
CRF and target bitrate default to the `--quality` preset. Use `--crf` or `--video-bitrate` for fine-grained overrides; `RenderConfig.crf` and `RenderConfig.videoBitrate` accept the same overrides programmatically.
+ #### Parametrized renders
+
+ Render the same composition with different content by declaring variables on the composition root and overriding them at render time:
+
+ ```html index.html
+
+
+
+
+
+
+ ```
+
+ ```bash
+ # Render with declared defaults (preview also uses the defaults)
+ npx hyperframes render --output default.mp4
+
+ # Override at render time — missing keys fall through to declared defaults
+ npx hyperframes render --variables '{"title":"Q4 Report","theme":"dark"}' --output q4.mp4
+
+ # Pass values from a JSON file
+ npx hyperframes render --variables-file ./vars.json --output out.mp4
+ ```
+
+ `getVariables()` returns the merged result of declared defaults and any `--variables` overrides, so the same composition runs unchanged in dev preview and in production renders.
+
#### WebM with Transparency
Use `--format webm` to render compositions with a transparent background. This produces VP9 video with alpha channel in a WebM container — the standard format for overlayable video.
@@ -689,7 +797,7 @@ This is suppressed in CI environments, non-TTY shells, and when `HYPERFRAMES_NO_
### `skills`
- Install HyperFrames and GSAP skills for AI coding tools:
+ Install HyperFrames skills for AI coding tools, including first-party runtime adapter skills:
```bash
# Install to all default targets (Claude Code, Gemini CLI, Codex CLI)
@@ -708,7 +816,7 @@ This is suppressed in CI environments, non-TTY shells, and when `HYPERFRAMES_NO_
| `--codex` | Install to Codex CLI (`~/.codex/skills/`) |
| `--cursor` | Install to Cursor (`.cursor/skills/` in current project) |
- Skills are fetched from GitHub and include composition authoring, GSAP animation patterns, registry block/component wiring, and other domain-specific knowledge. The `init` command also offers to install skills automatically after scaffolding a project.
+ Skills are fetched from GitHub and include composition authoring, Tailwind v4 browser-runtime guidance, GSAP animation patterns, Anime.js, CSS animation, Lottie, Three.js, and WAAPI adapter patterns, registry block/component wiring, and other domain-specific knowledge. The `init` command also offers to install skills automatically after scaffolding a project.
#### Troubleshooting: `fatal: active post-checkout hook found during git clone`
diff --git a/docs/packages/core.mdx b/docs/packages/core.mdx
index b7dbd543a..56c32b4cd 100644
--- a/docs/packages/core.mdx
+++ b/docs/packages/core.mdx
@@ -130,6 +130,18 @@ const meta: CompositionMetadata = extractCompositionMetadata(htmlString);
// data-composition-variables='[{"id":"title","label":"Title","type":"string","default":"Hello"}]'
// >
+// Read resolved variables inside a composition (declared defaults +
+// CLI overrides + per-instance host data-variable-values):
+import { getVariables } from '@hyperframes/core';
+const { title } = getVariables<{ title: string }>();
+
+// Validate CLI / host overrides against the declared schema:
+import { validateVariables, formatVariableValidationIssue } from '@hyperframes/core';
+const issues = validateVariables({ title: 'Hello', count: 'three' }, meta.variables);
+for (const issue of issues) {
+ console.warn(formatVariableValidationIssue(issue));
+}
+
// Generate HTML from structured data
const html = generateHyperframesHtml(elements, {
animations,
diff --git a/docs/packages/engine.mdx b/docs/packages/engine.mdx
index 430abe0e1..9057d1591 100644
--- a/docs/packages/engine.mdx
+++ b/docs/packages/engine.mdx
@@ -175,7 +175,7 @@ await applyFaststart(inputPath, outputPath);
// Detect GPU encoding support
const gpu = await detectGpuEncoder();
-// gpu: "nvenc" | "videotoolbox" | "vaapi" | null
+// gpu: "nvenc" | "videotoolbox" | "vaapi" | "qsv" | null
```
#### WebM with VP9 Alpha
@@ -282,7 +282,7 @@ await server.close();
The engine exports two layers of HDR support: **color-space utilities** that classify sources and configure the FFmpeg encoder, and a **WebGPU readback runtime** for capturing CSS-animated DOM directly into HDR.
-For end-to-end HDR rendering (HDR video and image sources composited into an HDR10 MP4) use the [producer](/packages/producer) or the CLI's `--hdr` flag — see [HDR Rendering](/guides/hdr). The APIs below are for custom integrations.
+For end-to-end HDR rendering (HDR video and image sources composited into an HDR10 MP4) use the [producer](/packages/producer) or the CLI render pipeline with HDR auto-detect / `--hdr` / `--sdr` — see [HDR Rendering](/guides/hdr). The APIs below are for custom integrations.
### Color space utilities
@@ -344,7 +344,7 @@ const pqRgb = float16ToPqRgb(rgba16, width, height, bytesPerRow);
```
- This path requires **headed Chrome with `--enable-unsafe-webgpu`** — WebGPU is unavailable in `chrome-headless-shell`. It is *not* used by the default `--hdr` render pipeline (which extracts HDR pixels from sources via FFmpeg and composites in Node). Use it only for advanced custom pipelines that need CSS animations driving HDR pixel output.
+ This path requires **headed Chrome with `--enable-unsafe-webgpu`** — WebGPU is unavailable in `chrome-headless-shell`. It is *not* used by the default HDR-aware render pipeline (which extracts HDR pixels from sources via FFmpeg and composites in Node). Use it only for advanced custom pipelines that need CSS animations driving HDR pixel output.
## The `window.__hf` Protocol
diff --git a/docs/packages/producer.mdx b/docs/packages/producer.mdx
index 88ffb07ed..1886af13b 100644
--- a/docs/packages/producer.mdx
+++ b/docs/packages/producer.mdx
@@ -213,18 +213,31 @@ npx hyperframes render --docker --output output.mp4
The producer supports hardware-accelerated encoding for faster renders:
-| Platform | Encoder | Flag |
-|----------|---------|------|
+| Platform | Encoder | Selection |
+|----------|---------|-----------|
| NVIDIA | NVENC | Auto-detected |
| macOS | VideoToolbox | Auto-detected |
| Linux | VAAPI | Auto-detected |
+| Intel | QSV | Auto-detected |
-GPU encoding is automatically used when available. To check your system's capabilities:
+When GPU encoding is enabled, Hyperframes detects the available FFmpeg hardware encoder automatically. To check your system's capabilities:
```bash
npx hyperframes doctor
```
+The CLI enables local Chrome/WebGL GPU capture automatically and supports `--no-browser-gpu` as an opt-out. When using the producer API directly, pass an engine config override:
+
+```typescript
+import { resolveConfig } from '@hyperframes/producer';
+
+const job = createRenderJob({
+ fps: 30,
+ quality: 'standard',
+ producerConfig: resolveConfig({ browserGpuMode: 'hardware' }),
+});
+```
+
## Additional Exports
The producer also re-exports key engine functionality for convenience:
diff --git a/docs/public/catalog-index.json b/docs/public/catalog-index.json
index f2f7abd3f..2226c308f 100644
--- a/docs/public/catalog-index.json
+++ b/docs/public/catalog-index.json
@@ -12,6 +12,35 @@
"href": "/catalog/blocks/app-showcase",
"preview": "https://static.heygen.ai/hyperframes-oss/docs/images/catalog/blocks/app-showcase.png"
},
+ {
+ "name": "apple-money-count",
+ "type": "block",
+ "title": "Apple Money Count",
+ "description": "Apple-style finance counter that counts from $0 to $10,000, flashes green, and bursts money icons with sound.",
+ "tags": [
+ "showcase",
+ "finance",
+ "kinetic",
+ "youtube",
+ "sfx"
+ ],
+ "href": "/catalog/blocks/apple-money-count",
+ "preview": "https://static.heygen.ai/hyperframes-oss/docs/images/catalog/blocks/apple-money-count.png"
+ },
+ {
+ "name": "blue-sweater-intro-video",
+ "type": "block",
+ "title": "Blue Sweater Intro Video",
+ "description": "Warm AI creator intro sequence that resolves into an X follow card for @_blue_sweater_.",
+ "tags": [
+ "showcase",
+ "ai",
+ "creator",
+ "sfx"
+ ],
+ "href": "/catalog/blocks/blue-sweater-intro-video",
+ "preview": "https://static.heygen.ai/hyperframes-oss/docs/images/catalog/blocks/blue-sweater-intro-video.png"
+ },
{
"name": "chromatic-radial-split",
"type": "block",
@@ -201,6 +230,36 @@
"href": "/catalog/blocks/macos-notification",
"preview": "https://static.heygen.ai/hyperframes-oss/docs/images/catalog/blocks/macos-notification.png"
},
+ {
+ "name": "north-korea-locked-down",
+ "type": "block",
+ "title": "North Korea Locked Down",
+ "description": "Realistic map zoom into North Korea with a red scribble circle, locked-down pop-up label, and reddish editorial wash.",
+ "tags": [
+ "showcase",
+ "map",
+ "annotation",
+ "youtube",
+ "kinetic"
+ ],
+ "href": "/catalog/blocks/north-korea-locked-down",
+ "preview": "https://static.heygen.ai/hyperframes-oss/docs/images/catalog/blocks/north-korea-locked-down.png"
+ },
+ {
+ "name": "nyc-paris-flight",
+ "type": "block",
+ "title": "NYC Paris Flight",
+ "description": "Apple-style realistic map animation with a plane flying from New York to Paris, marker circle, landing pop, and sound effects.",
+ "tags": [
+ "showcase",
+ "travel",
+ "map",
+ "youtube",
+ "sfx"
+ ],
+ "href": "/catalog/blocks/nyc-paris-flight",
+ "preview": "https://static.heygen.ai/hyperframes-oss/docs/images/catalog/blocks/nyc-paris-flight.png"
+ },
{
"name": "reddit-post",
"type": "block",
@@ -483,6 +542,20 @@
"href": "/catalog/blocks/ui-3d-reveal",
"preview": "https://static.heygen.ai/hyperframes-oss/docs/images/catalog/blocks/ui-3d-reveal.png"
},
+ {
+ "name": "vpn-youtube-spot",
+ "type": "block",
+ "title": "VPN YouTube Spot",
+ "description": "Snappy Apple-style YouTube insert showing a phone finding and installing a friendly VPN app with sound effects.",
+ "tags": [
+ "app",
+ "showcase",
+ "youtube",
+ "sfx"
+ ],
+ "href": "/catalog/blocks/vpn-youtube-spot",
+ "preview": "https://static.heygen.ai/hyperframes-oss/docs/images/catalog/blocks/vpn-youtube-spot.png"
+ },
{
"name": "whip-pan",
"type": "block",
diff --git a/docs/quickstart.mdx b/docs/quickstart.mdx
index 498816d66..6add780c9 100644
--- a/docs/quickstart.mdx
+++ b/docs/quickstart.mdx
@@ -13,7 +13,7 @@ Install the HyperFrames skills, then describe the video you want:
npx skills add heygen-com/hyperframes
```
-This teaches your agent (Claude Code, Cursor, Gemini CLI, Codex) how to write correct compositions and GSAP animations. In Claude Code the skills register as slash commands — `/hyperframes` for composition authoring, `/hyperframes-cli` for CLI commands, and `/gsap` for animation help. Invoking the slash command loads the skill context explicitly, which produces correct output the first time.
+This teaches your agent (Claude Code, Cursor, Gemini CLI, Codex) how to write correct compositions, GSAP timelines, Tailwind v4 browser-runtime styles, and first-party adapter animations. In Claude Code the skills register as slash commands — `/hyperframes` for composition authoring, `/hyperframes-cli` for the dev-loop commands (init, lint, preview, render), `/hyperframes-media` for asset preprocessing (TTS, transcription, background removal), `/tailwind` for `init --tailwind` projects, `/gsap` for timeline animation help, and `/animejs`, `/css-animations`, `/lottie`, `/three`, or `/waapi` when a composition uses those runtimes. Invoking the slash command loads the skill context explicitly, which produces correct output the first time.
Claude Design uses a different entry path. Open [`docs/guides/claude-design-hyperframes.md`](https://github.com/heygen-com/hyperframes/blob/main/docs/guides/claude-design-hyperframes.md) on GitHub, click the download button (↓) to save it, then attach to your Claude Design chat. It produces a valid first draft you can refine in any AI coding agent. See the [Claude Design guide](/guides/claude-design).
@@ -47,7 +47,7 @@ Copy any of these into your agent to get started.
The agent handles scaffolding, animation, and rendering. See the [prompting guide](/guides/prompting) for more patterns.
- Skills encode HyperFrames-specific patterns — like required `class="clip"` on timed elements, GSAP timeline registration, and `data-*` attribute semantics — that are not in generic web docs. Using skills produces correct compositions from the start.
+ Skills encode HyperFrames-specific patterns — like required `class="clip"` on timed elements, GSAP timeline registration, adapter registries such as `window.__hfLottie`, and `data-*` attribute semantics — that are not in generic web docs. Using skills produces correct compositions from the start.
## Option 2: Start a project manually
@@ -237,6 +237,6 @@ The agent handles scaffolding, animation, and rendering. See the [prompting guid
Start from built-in examples like Warm Grain and Swiss Grid
- Explore render options: quality presets, Docker mode, and GPU encoding
+ Explore render options: quality presets, Docker mode, and GPU acceleration
diff --git a/docs/schema/registry-item.json b/docs/schema/registry-item.json
index ba72da93c..a2051a5bf 100644
--- a/docs/schema/registry-item.json
+++ b/docs/schema/registry-item.json
@@ -35,6 +35,14 @@
"type": "string",
"minLength": 1
},
+ "authorUrl": {
+ "type": "string",
+ "format": "uri"
+ },
+ "sourcePrompt": {
+ "type": "string",
+ "minLength": 1
+ },
"license": {
"type": "string",
"minLength": 1,
diff --git a/package.json b/package.json
index c3d5d5d82..5310bb8a2 100644
--- a/package.json
+++ b/package.json
@@ -11,7 +11,7 @@
"type": "module",
"scripts": {
"dev": "bun run studio",
- "build": "bun run --filter '!@hyperframes/cli' build && bun run --filter @hyperframes/cli build",
+ "build": "bun run --filter @hyperframes/core build && bun run --filter '@hyperframes/{core,engine,producer,player,studio,shader-transitions}' build && bun run --filter @hyperframes/cli build",
"build:producer": "bun run --filter @hyperframes/producer build",
"studio": "bun run --filter @hyperframes/studio dev",
"build:hyperframes-runtime": "bun run --filter @hyperframes/core build:hyperframes-runtime",
diff --git a/packages/cli/package.json b/packages/cli/package.json
index cd6c2e397..d7c49870c 100644
--- a/packages/cli/package.json
+++ b/packages/cli/package.json
@@ -1,6 +1,6 @@
{
"name": "@hyperframes/cli",
- "version": "0.4.27",
+ "version": "0.4.45",
"description": "HyperFrames CLI — create, preview, and render HTML video compositions",
"repository": {
"type": "git",
@@ -33,6 +33,7 @@
"giget": "^3.2.0",
"hono": "^4.0.0",
"mime-types": "^3.0.2",
+ "onnxruntime-node": "^1.20.0",
"open": "^10.0.0",
"postcss": "^8.5.8",
"prettier": "^3.8.1",
@@ -56,7 +57,7 @@
"vitest": "^3.2.4"
},
"optionalDependencies": {
- "@google/genai": "^1.50.0"
+ "@google/genai": "^1.50.1"
},
"engines": {
"node": ">=22"
diff --git a/packages/cli/scripts/build-copy.mjs b/packages/cli/scripts/build-copy.mjs
index 9132c7095..926493a9d 100644
--- a/packages/cli/scripts/build-copy.mjs
+++ b/packages/cli/scripts/build-copy.mjs
@@ -82,6 +82,11 @@ async function main() {
cpSync(layoutAuditScript, join(DIST, "commands", "layout-audit.browser.js"));
}
+ const contrastAuditScript = join(CLI_ROOT, "src", "commands", "contrast-audit.browser.js");
+ if (existsSync(contrastAuditScript)) {
+ cpSync(contrastAuditScript, join(DIST, "commands", "contrast-audit.browser.js"));
+ }
+
copyMdFiles(join(CLI_ROOT, "src", "docs"), join(DIST, "docs"));
console.log("[build-copy] done");
diff --git a/packages/cli/src/background-removal/inference.test.ts b/packages/cli/src/background-removal/inference.test.ts
new file mode 100644
index 000000000..719bcab4d
--- /dev/null
+++ b/packages/cli/src/background-removal/inference.test.ts
@@ -0,0 +1,127 @@
+import { describe, expect, it } from "vitest";
+import { MEAN, STD, applyMask } from "./inference.js";
+
+// Regression: the u2net_human_seg model was trained with ImageNet
+// normalization. Drifting away from these exact values changes the input
+// tensor at every pixel and shifts the predicted alpha mask noticeably
+// (Miguel reproduced 8,317 pixel changes with delta up to 78/255 when std
+// was set to (1, 1, 1)). Reference:
+// https://github.com/danielgatis/rembg/blob/main/rembg/sessions/u2net_human_seg.py#L33
+describe("background-removal/inference — rembg u2net_human_seg parity", () => {
+ it("MEAN matches U2netHumanSegSession reference", () => {
+ expect(MEAN).toEqual([0.485, 0.456, 0.406]);
+ });
+
+ it("STD matches U2netHumanSegSession reference (ImageNet, not the base u2net's (1,1,1))", () => {
+ expect(STD).toEqual([0.229, 0.224, 0.225]);
+ });
+});
+
+// These tests pin the contract that `--background-output` is built on:
+// fg.alpha + bg.alpha === 255 per pixel, and the RGB plane is byte-identical
+// between fg and bg. A future change to the postprocess loop (different mask
+// threshold, premultiplied alpha, gamma-corrected compositing) that breaks
+// either invariant should fail here loudly.
+describe("background-removal/inference — applyMask invariants", () => {
+ function makeRgb(pixels: number): Buffer {
+ // Deterministic but non-trivial RGB so byte equality is meaningful.
+ const buf = Buffer.allocUnsafe(pixels * 3);
+ for (let i = 0; i < pixels; i++) {
+ buf[i * 3] = (i * 7) & 0xff;
+ buf[i * 3 + 1] = (i * 13 + 31) & 0xff;
+ buf[i * 3 + 2] = (i * 19 + 61) & 0xff;
+ }
+ return buf;
+ }
+
+ function makeMask(pixels: number): Buffer {
+ // Hit the saturation endpoints (0, 255) and a few mid-tone values so the
+ // 255-m inversion is exercised across the full byte range.
+ const buf = Buffer.allocUnsafe(pixels);
+ for (let i = 0; i < pixels; i++) buf[i] = (i * 37) & 0xff;
+ return buf;
+ }
+
+ it("dual-output: fg.alpha + bg.alpha === 255 for every pixel", () => {
+ const pixels = 64;
+ const rgb = makeRgb(pixels);
+ const mask = makeMask(pixels);
+ const fg = Buffer.allocUnsafe(pixels * 4);
+ const bg = Buffer.allocUnsafe(pixels * 4);
+
+ const result = applyMask(rgb, mask, fg, bg, pixels);
+
+ expect(result.fg).toBe(fg);
+ expect(result.bg).toBe(bg);
+ for (let i = 0; i < pixels; i++) {
+ const sum = fg[i * 4 + 3]! + bg[i * 4 + 3]!;
+ expect(sum).toBe(255);
+ }
+ });
+
+ it("dual-output: RGB triples are byte-identical between fg and bg", () => {
+ const pixels = 64;
+ const rgb = makeRgb(pixels);
+ const mask = makeMask(pixels);
+ const fg = Buffer.allocUnsafe(pixels * 4);
+ const bg = Buffer.allocUnsafe(pixels * 4);
+
+ applyMask(rgb, mask, fg, bg, pixels);
+
+ for (let i = 0; i < pixels; i++) {
+ expect(fg[i * 4]).toBe(bg[i * 4]);
+ expect(fg[i * 4 + 1]).toBe(bg[i * 4 + 1]);
+ expect(fg[i * 4 + 2]).toBe(bg[i * 4 + 2]);
+ // And both match the source.
+ expect(fg[i * 4]).toBe(rgb[i * 3]);
+ expect(fg[i * 4 + 1]).toBe(rgb[i * 3 + 1]);
+ expect(fg[i * 4 + 2]).toBe(rgb[i * 3 + 2]);
+ }
+ });
+
+ it("dual-output: fg.alpha equals the input mask", () => {
+ const pixels = 32;
+ const rgb = makeRgb(pixels);
+ const mask = makeMask(pixels);
+ const fg = Buffer.allocUnsafe(pixels * 4);
+ const bg = Buffer.allocUnsafe(pixels * 4);
+
+ applyMask(rgb, mask, fg, bg, pixels);
+
+ for (let i = 0; i < pixels; i++) {
+ expect(fg[i * 4 + 3]).toBe(mask[i]);
+ }
+ });
+
+ it("single-output: bg=null returns bg=null and writes only fg", () => {
+ const pixels = 32;
+ const rgb = makeRgb(pixels);
+ const mask = makeMask(pixels);
+ const fg = Buffer.allocUnsafe(pixels * 4);
+
+ const result = applyMask(rgb, mask, fg, null, pixels);
+
+ expect(result.bg).toBeNull();
+ expect(result.fg).toBe(fg);
+ for (let i = 0; i < pixels; i++) {
+ expect(fg[i * 4]).toBe(rgb[i * 3]);
+ expect(fg[i * 4 + 3]).toBe(mask[i]);
+ }
+ });
+
+ it("saturates correctly at mask=0 and mask=255", () => {
+ // mask=0 → fg.alpha=0 (transparent subject), bg.alpha=255 (fully opaque plate)
+ // mask=255 → fg.alpha=255 (fully opaque subject), bg.alpha=0 (transparent plate)
+ const rgb = Buffer.from([10, 20, 30, 40, 50, 60]);
+ const mask = Buffer.from([0, 255]);
+ const fg = Buffer.allocUnsafe(8);
+ const bg = Buffer.allocUnsafe(8);
+
+ applyMask(rgb, mask, fg, bg, 2);
+
+ expect(fg[3]).toBe(0);
+ expect(bg[3]).toBe(255);
+ expect(fg[7]).toBe(255);
+ expect(bg[7]).toBe(0);
+ });
+});
diff --git a/packages/cli/src/background-removal/inference.ts b/packages/cli/src/background-removal/inference.ts
new file mode 100644
index 000000000..605257fc3
--- /dev/null
+++ b/packages/cli/src/background-removal/inference.ts
@@ -0,0 +1,249 @@
+/**
+ * u2net_human_seg inference: RGB frame → RGBA frame (alpha = human mask).
+ *
+ * Pre/postprocessing matches rembg's u2net session
+ * (https://github.com/danielgatis/rembg/blob/main/rembg/sessions/u2net.py)
+ * so output should be pixel-equivalent to `rembg new_session("u2net_human_seg")`.
+ */
+import type { InferenceSession, Tensor } from "onnxruntime-node";
+import type sharpType from "sharp";
+import { ensureModel, selectProviders, type Device, type ModelId } from "./manager.js";
+
+const INPUT_SIZE = 320;
+const INPUT_PLANE = INPUT_SIZE * INPUT_SIZE;
+
+// Must match rembg's U2netHumanSegSession.predict — ImageNet mean/std, NOT the
+// (1.0, 1.0, 1.0) std used by the general-purpose u2net session.
+// https://github.com/danielgatis/rembg/blob/main/rembg/sessions/u2net_human_seg.py#L33
+export const MEAN = [0.485, 0.456, 0.406] as const;
+export const STD = [0.229, 0.224, 0.225] as const;
+
+type Sharp = typeof sharpType;
+interface OrtModule {
+ InferenceSession: typeof InferenceSession;
+ Tensor: typeof Tensor;
+}
+
+export interface SessionResult {
+ /** Subject opaque, background fully transparent. */
+ fg: Buffer;
+ /** Inverse-alpha plate: same RGB, alpha is `255 − mask`. Null unless `withBackground` was true. */
+ bg: Buffer | null;
+}
+
+export interface Session {
+ /**
+ * Both `fg` and `bg` (when requested) are session-owned buffers reused on the
+ * next call — drain the encoder's stdin before invoking `process` again.
+ */
+ process(
+ rgb: Buffer,
+ width: number,
+ height: number,
+ withBackground?: boolean,
+ ): Promise;
+ provider: string;
+ close(): Promise;
+}
+
+export interface CreateSessionOptions {
+ model?: ModelId;
+ device?: Device;
+ onProgress?: (message: string) => void;
+}
+
+export async function createSession(options: CreateSessionOptions = {}): Promise {
+ const ort = (await import("onnxruntime-node")) as unknown as OrtModule;
+ const sharpMod = await import("sharp");
+ const sharp = sharpMod.default as Sharp;
+
+ const choice = selectProviders(options.device ?? "auto");
+ const path = await ensureModel(options.model, { onProgress: options.onProgress });
+
+ options.onProgress?.(`Loading model on ${choice.label}...`);
+
+ const tryCreate = (providers: string[]) =>
+ ort.InferenceSession.create(path, {
+ executionProviders: providers,
+ graphOptimizationLevel: "all",
+ });
+
+ let session: InferenceSession;
+ let providerUsed = choice.label;
+ try {
+ session = await tryCreate(choice.providers);
+ } catch (err) {
+ if (choice.providers[0] === "cpu") throw err;
+ options.onProgress?.(
+ `${choice.label} provider failed (${(err as Error).message}); falling back to CPU.`,
+ );
+ session = await tryCreate(["cpu"]);
+ providerUsed = "CPU";
+ }
+
+ const inputName = session.inputNames[0];
+ const outputName = session.outputNames[0];
+ if (!inputName || !outputName) {
+ throw new Error("ONNX session is missing input or output bindings");
+ }
+
+ // Reused across calls; sized lazily on first frame. Saves ~9 MB/frame at 1080p.
+ const inputData = new Float32Array(3 * INPUT_PLANE);
+ const maskBuf = Buffer.allocUnsafe(INPUT_PLANE);
+ let rgbaBuf: Buffer | null = null;
+ let rgbaBgBuf: Buffer | null = null;
+
+ return {
+ provider: providerUsed,
+ async process(rgb, width, height, withBackground = false) {
+ const tensor = await preprocess(sharp, ort, rgb, width, height, inputData);
+ const outputs = await session.run({ [inputName]: tensor });
+ const output = outputs[outputName];
+ if (!output) throw new Error(`Model did not return output '${outputName}'`);
+ const expectedBytes = width * height * 4;
+ if (!rgbaBuf || rgbaBuf.length !== expectedBytes) {
+ rgbaBuf = Buffer.allocUnsafe(expectedBytes);
+ }
+ if (withBackground) {
+ if (!rgbaBgBuf || rgbaBgBuf.length !== expectedBytes) {
+ rgbaBgBuf = Buffer.allocUnsafe(expectedBytes);
+ }
+ }
+ return await postprocess(
+ sharp,
+ output,
+ rgb,
+ width,
+ height,
+ maskBuf,
+ rgbaBuf,
+ withBackground ? rgbaBgBuf : null,
+ );
+ },
+ async close() {
+ await session.release();
+ },
+ };
+}
+
+async function preprocess(
+ sharp: Sharp,
+ ort: OrtModule,
+ rgb: Buffer,
+ width: number,
+ height: number,
+ inputData: Float32Array,
+): Promise {
+ const resized = await sharp(rgb, { raw: { width, height, channels: 3 } })
+ .resize(INPUT_SIZE, INPUT_SIZE, { kernel: "lanczos3", fit: "fill" })
+ .raw()
+ .toBuffer();
+
+ // rembg's normalize divides by `np.max(im_ary)` (NOT 255). Match exactly so
+ // we hit the same operating point as the model's training distribution.
+ let maxPixel = 0;
+ for (let i = 0; i < resized.length; i++) {
+ if (resized[i]! > maxPixel) maxPixel = resized[i]!;
+ }
+ if (maxPixel === 0) maxPixel = 1;
+
+ for (let y = 0; y < INPUT_SIZE; y++) {
+ for (let x = 0; x < INPUT_SIZE; x++) {
+ const src = (y * INPUT_SIZE + x) * 3;
+ const dst = y * INPUT_SIZE + x;
+ inputData[dst] = (resized[src]! / maxPixel - MEAN[0]) / STD[0];
+ inputData[INPUT_PLANE + dst] = (resized[src + 1]! / maxPixel - MEAN[1]) / STD[1];
+ inputData[2 * INPUT_PLANE + dst] = (resized[src + 2]! / maxPixel - MEAN[2]) / STD[2];
+ }
+ }
+
+ return new ort.Tensor("float32", inputData, [1, 3, INPUT_SIZE, INPUT_SIZE]);
+}
+
+async function postprocess(
+ sharp: Sharp,
+ output: Tensor,
+ rgb: Buffer,
+ width: number,
+ height: number,
+ maskBuf: Buffer,
+ rgbaBuf: Buffer,
+ rgbaBgBuf: Buffer | null,
+): Promise {
+ const raw = output.data as Float32Array;
+
+ let lo = Infinity;
+ let hi = -Infinity;
+ for (let i = 0; i < INPUT_PLANE; i++) {
+ const v = raw[i]!;
+ if (v < lo) lo = v;
+ if (v > hi) hi = v;
+ }
+ const range = hi - lo || 1;
+
+ for (let i = 0; i < INPUT_PLANE; i++) {
+ const norm = (raw[i]! - lo) / range;
+ maskBuf[i] = Math.max(0, Math.min(255, Math.round(norm * 255)));
+ }
+
+ // lanczos3 keeps soft edges; nearest leaves visible jaggies on hair.
+ // Sharp upcasts the single-channel raw input to a 3-channel buffer during
+ // resize, so the output is laid out as RGB-interleaved (R0,G0,B0,R1,G1,B1,...)
+ // even though all three channels carry the same grayscale value. Force the
+ // output back to single channel with toColourspace("b-w") so we can index
+ // it linearly as a mask.
+ const fullMask = await sharp(maskBuf, {
+ raw: { width: INPUT_SIZE, height: INPUT_SIZE, channels: 1 },
+ })
+ .resize(width, height, { kernel: "lanczos3", fit: "fill" })
+ .toColourspace("b-w")
+ .raw()
+ .toBuffer();
+
+ return applyMask(rgb, fullMask, rgbaBuf, rgbaBgBuf, width * height);
+}
+
+/**
+ * Composite the RGB source frame with the segmentation mask into one or two
+ * RGBA buffers. The contract this PR is built on:
+ * - `fg`'s alpha is the mask, `bg`'s alpha (when provided) is `255 − mask`,
+ * so `fg.alpha + bg.alpha === 255` for every pixel.
+ * - RGB triples are byte-identical between `fg` and `bg`.
+ * - When `bg` is null, only `fg` is touched.
+ *
+ * Exported for direct unit testing of the invariants above without spinning
+ * up an ONNX session.
+ */
+export function applyMask(
+ rgb: Buffer,
+ mask: Buffer,
+ fg: Buffer,
+ bg: Buffer | null,
+ pixels: number,
+): SessionResult {
+ if (bg) {
+ for (let i = 0; i < pixels; i++) {
+ const r = rgb[i * 3]!;
+ const g = rgb[i * 3 + 1]!;
+ const b = rgb[i * 3 + 2]!;
+ const m = mask[i]!;
+ const o = i * 4;
+ fg[o] = r;
+ fg[o + 1] = g;
+ fg[o + 2] = b;
+ fg[o + 3] = m;
+ bg[o] = r;
+ bg[o + 1] = g;
+ bg[o + 2] = b;
+ bg[o + 3] = 255 - m;
+ }
+ return { fg, bg };
+ }
+ for (let i = 0; i < pixels; i++) {
+ fg[i * 4] = rgb[i * 3]!;
+ fg[i * 4 + 1] = rgb[i * 3 + 1]!;
+ fg[i * 4 + 2] = rgb[i * 3 + 2]!;
+ fg[i * 4 + 3] = mask[i]!;
+ }
+ return { fg, bg: null };
+}
diff --git a/packages/cli/src/background-removal/manager.test.ts b/packages/cli/src/background-removal/manager.test.ts
new file mode 100644
index 000000000..1cc24bd07
--- /dev/null
+++ b/packages/cli/src/background-removal/manager.test.ts
@@ -0,0 +1,81 @@
+import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
+
+describe("background-removal/manager — selectProviders", () => {
+ beforeEach(() => {
+ vi.resetModules();
+ delete process.env["HYPERFRAMES_CUDA"];
+ });
+
+ afterEach(() => {
+ vi.restoreAllMocks();
+ });
+
+ it("returns CPU explicitly when --device cpu", async () => {
+ vi.doMock("node:os", () => ({
+ platform: () => "darwin",
+ arch: () => "arm64",
+ homedir: () => "/tmp",
+ }));
+ const { selectProviders } = await import("./manager.js");
+ const choice = selectProviders("cpu");
+ expect(choice.providers).toEqual(["cpu"]);
+ expect(choice.label).toBe("CPU");
+ });
+
+ it("auto picks CoreML on darwin-arm64", async () => {
+ vi.doMock("node:os", () => ({
+ platform: () => "darwin",
+ arch: () => "arm64",
+ homedir: () => "/tmp",
+ }));
+ const { selectProviders } = await import("./manager.js");
+ const choice = selectProviders("auto");
+ expect(choice.providers).toEqual(["coreml", "cpu"]);
+ expect(choice.label).toBe("CoreML");
+ });
+
+ it("auto falls back to CPU on linux without HYPERFRAMES_CUDA", async () => {
+ vi.doMock("node:os", () => ({
+ platform: () => "linux",
+ arch: () => "x64",
+ homedir: () => "/tmp",
+ }));
+ const { selectProviders } = await import("./manager.js");
+ const choice = selectProviders("auto");
+ expect(choice.providers).toEqual(["cpu"]);
+ expect(choice.label).toBe("CPU");
+ });
+
+ it("auto picks CUDA on linux when HYPERFRAMES_CUDA=1", async () => {
+ process.env["HYPERFRAMES_CUDA"] = "1";
+ vi.doMock("node:os", () => ({
+ platform: () => "linux",
+ arch: () => "x64",
+ homedir: () => "/tmp",
+ }));
+ const { selectProviders } = await import("./manager.js");
+ const choice = selectProviders("auto");
+ expect(choice.providers).toEqual(["cuda", "cpu"]);
+ expect(choice.label).toBe("CUDA");
+ });
+
+ it("--device coreml on linux throws", async () => {
+ vi.doMock("node:os", () => ({
+ platform: () => "linux",
+ arch: () => "x64",
+ homedir: () => "/tmp",
+ }));
+ const { selectProviders } = await import("./manager.js");
+ expect(() => selectProviders("coreml")).toThrow(/CoreML execution provider not available/);
+ });
+
+ it("--device cuda without env var throws", async () => {
+ vi.doMock("node:os", () => ({
+ platform: () => "linux",
+ arch: () => "x64",
+ homedir: () => "/tmp",
+ }));
+ const { selectProviders } = await import("./manager.js");
+ expect(() => selectProviders("cuda")).toThrow(/CUDA execution provider not available/);
+ });
+});
diff --git a/packages/cli/src/background-removal/manager.ts b/packages/cli/src/background-removal/manager.ts
new file mode 100644
index 000000000..d149c0e2e
--- /dev/null
+++ b/packages/cli/src/background-removal/manager.ts
@@ -0,0 +1,96 @@
+import { existsSync, mkdirSync } from "node:fs";
+import { homedir, platform, arch } from "node:os";
+import { join } from "node:path";
+import { downloadFile } from "../utils/download.js";
+
+export const MODELS_DIR = join(homedir(), ".cache", "hyperframes", "background-removal", "models");
+
+export const DEFAULT_MODEL = "u2net_human_seg" as const;
+export type ModelId = typeof DEFAULT_MODEL;
+
+const MODEL_URLS: Record = {
+ u2net_human_seg:
+ "https://github.com/danielgatis/rembg/releases/download/v0.0.0/u2net_human_seg.onnx",
+};
+
+export const MODEL_MEMORY_MB: Record = {
+ u2net_human_seg: 1500,
+};
+
+export const DEVICES = ["auto", "cpu", "coreml", "cuda"] as const;
+export type Device = (typeof DEVICES)[number];
+
+export function isDevice(value: unknown): value is Device {
+ return typeof value === "string" && (DEVICES as readonly string[]).includes(value);
+}
+
+export interface ProviderChoice {
+ providers: string[];
+ label: "CoreML" | "CUDA" | "CPU";
+}
+
+export function selectProviders(device: Device = "auto"): ProviderChoice {
+ if (device === "cpu") return { providers: ["cpu"], label: "CPU" };
+
+ const available = listAvailableProviders();
+ const hasCoreML = available.includes("coreml");
+ const hasCUDA = available.includes("cuda");
+
+ if (device === "coreml") {
+ if (!hasCoreML) {
+ throw new Error(
+ "CoreML execution provider not available. Install onnxruntime-node on Apple Silicon, or use --device cpu.",
+ );
+ }
+ return { providers: ["coreml", "cpu"], label: "CoreML" };
+ }
+ if (device === "cuda") {
+ if (!hasCUDA) {
+ throw new Error(
+ "CUDA execution provider not available. Use --device cpu or install an onnxruntime-node build with CUDA support.",
+ );
+ }
+ return { providers: ["cuda", "cpu"], label: "CUDA" };
+ }
+
+ if (hasCoreML && platform() === "darwin" && arch() === "arm64") {
+ return { providers: ["coreml", "cpu"], label: "CoreML" };
+ }
+ if (hasCUDA) return { providers: ["cuda", "cpu"], label: "CUDA" };
+ return { providers: ["cpu"], label: "CPU" };
+}
+
+let _cachedProviders: string[] | undefined;
+export function listAvailableProviders(): string[] {
+ if (_cachedProviders) return _cachedProviders;
+
+ // The npm onnxruntime-node ships with CPU on every platform and bundles the
+ // CoreML EP only on darwin-arm64. CUDA is opt-in via a separate gpu build —
+ // gate behind an env var so we don't try to bind to a missing EP.
+ const out: string[] = ["cpu"];
+ if (platform() === "darwin" && arch() === "arm64") out.push("coreml");
+ if (process.env["HYPERFRAMES_CUDA"] === "1") out.push("cuda");
+ _cachedProviders = out;
+ return out;
+}
+
+export function modelPath(model: ModelId = DEFAULT_MODEL): string {
+ return join(MODELS_DIR, `${model}.onnx`);
+}
+
+export async function ensureModel(
+ model: ModelId = DEFAULT_MODEL,
+ options?: { onProgress?: (message: string) => void },
+): Promise {
+ const dest = modelPath(model);
+ if (existsSync(dest)) return dest;
+
+ mkdirSync(MODELS_DIR, { recursive: true });
+ options?.onProgress?.(`Downloading ${model} weights (~168 MB)...`);
+ await downloadFile(MODEL_URLS[model], dest);
+
+ if (!existsSync(dest)) {
+ throw new Error(`Model download failed: ${model}`);
+ }
+ return dest;
+}
diff --git a/packages/cli/src/background-removal/pipeline.test.ts b/packages/cli/src/background-removal/pipeline.test.ts
new file mode 100644
index 000000000..5763efb06
--- /dev/null
+++ b/packages/cli/src/background-removal/pipeline.test.ts
@@ -0,0 +1,191 @@
+import { describe, expect, it } from "vitest";
+import { EventEmitter } from "node:events";
+import type { spawn } from "node:child_process";
+import {
+ inferOutputFormat,
+ inferInputKind,
+ buildEncoderArgs,
+ resolveRenderTargets,
+ waitForExit,
+} from "./pipeline.js";
+
+describe("background-removal/pipeline — inferOutputFormat", () => {
+ it("maps .webm → webm", () => {
+ expect(inferOutputFormat("/tmp/out.webm")).toBe("webm");
+ });
+ it("maps .mov → mov", () => {
+ expect(inferOutputFormat("/tmp/out.mov")).toBe("mov");
+ });
+ it("maps .png → png", () => {
+ expect(inferOutputFormat("/tmp/out.png")).toBe("png");
+ });
+ it("rejects unknown extensions", () => {
+ expect(() => inferOutputFormat("/tmp/out.mp4")).toThrow(/Unsupported output extension/);
+ });
+});
+
+describe("background-removal/pipeline — inferInputKind", () => {
+ it("recognizes mp4/mov/webm/mkv/avi as video", () => {
+ for (const ext of [".mp4", ".mov", ".webm", ".mkv", ".avi"]) {
+ expect(inferInputKind(`/tmp/clip${ext}`)).toBe("video");
+ }
+ });
+ it("recognizes jpg/png/webp as image", () => {
+ for (const ext of [".jpg", ".jpeg", ".png", ".webp"]) {
+ expect(inferInputKind(`/tmp/img${ext}`)).toBe("image");
+ }
+ });
+ it("rejects unknown extensions", () => {
+ expect(() => inferInputKind("/tmp/file.gif")).toThrow(/Unsupported input/);
+ });
+});
+
+describe("background-removal/pipeline — buildEncoderArgs", () => {
+ it("webm preset emits VP9 + alpha_mode metadata", () => {
+ const args = buildEncoderArgs("webm", 1920, 1080, 30, "/tmp/out.webm");
+ expect(args).toContain("libvpx-vp9");
+ expect(args).toContain("yuva420p");
+ // The alpha_mode metadata must be present; without it Chrome ignores the alpha plane.
+ const idx = args.indexOf("-metadata:s:v:0");
+ expect(idx).toBeGreaterThan(-1);
+ expect(args[idx + 1]).toBe("alpha_mode=1");
+ expect(args[args.length - 1]).toBe("/tmp/out.webm");
+ });
+
+ it("webm preset tags BT.709 colorspace + limited range", () => {
+ // Without these tags, ffmpeg's RGB→YUV conversion uses the BT.601 default,
+ // and Chrome's YUV→RGB pass on the resulting webm produces a different
+ // RGB triple than the source mp4 (visible color shift on overlay). Pin
+ // BT.709 limited-range so the cutout matches modern Rec.709 sources.
+ const args = buildEncoderArgs("webm", 1920, 1080, 30, "/tmp/out.webm");
+ const csIdx = args.indexOf("-colorspace");
+ expect(csIdx).toBeGreaterThan(-1);
+ expect(args[csIdx + 1]).toBe("bt709");
+ const rangeIdx = args.indexOf("-color_range");
+ expect(rangeIdx).toBeGreaterThan(-1);
+ expect(args[rangeIdx + 1]).toBe("tv");
+ });
+
+ it("webm quality presets map to crf 30/18/12", () => {
+ const fast = buildEncoderArgs("webm", 1920, 1080, 30, "/tmp/o.webm", "fast");
+ const balanced = buildEncoderArgs("webm", 1920, 1080, 30, "/tmp/o.webm", "balanced");
+ const best = buildEncoderArgs("webm", 1920, 1080, 30, "/tmp/o.webm", "best");
+ const crf = (args: string[]) => args[args.indexOf("-crf") + 1];
+ expect(crf(fast)).toBe("30");
+ expect(crf(balanced)).toBe("18");
+ expect(crf(best)).toBe("12");
+ });
+
+ it("webm default quality is balanced (crf 18)", () => {
+ const args = buildEncoderArgs("webm", 1920, 1080, 30, "/tmp/o.webm");
+ expect(args[args.indexOf("-crf") + 1]).toBe("18");
+ });
+
+ it("mov preset emits ProRes 4444 + yuva444p10le", () => {
+ const args = buildEncoderArgs("mov", 1920, 1080, 30, "/tmp/out.mov");
+ expect(args).toContain("prores_ks");
+ expect(args).toContain("4444");
+ expect(args).toContain("yuva444p10le");
+ });
+
+ it("png preset emits a single RGBA frame", () => {
+ const args = buildEncoderArgs("png", 1920, 1080, 30, "/tmp/out.png");
+ expect(args).toContain("-frames:v");
+ expect(args).toContain("rgba");
+ });
+
+ it("threads input dimensions and fps into raw video header", () => {
+ const args = buildEncoderArgs("webm", 640, 480, 24, "/tmp/o.webm");
+ const sIdx = args.indexOf("-s");
+ expect(args[sIdx + 1]).toBe("640x480");
+ const rIdx = args.indexOf("-r");
+ expect(args[rIdx + 1]).toBe("24");
+ });
+});
+
+describe("background-removal/pipeline — resolveRenderTargets", () => {
+ it("resolves a normal video → webm render", () => {
+ const t = resolveRenderTargets("/tmp/clip.mp4", "/tmp/cutout.webm");
+ expect(t.format).toBe("webm");
+ expect(t.inputKind).toBe("video");
+ expect(t.bgFormat).toBeUndefined();
+ });
+
+ it("resolves an image → png render", () => {
+ const t = resolveRenderTargets("/tmp/portrait.jpg", "/tmp/cutout.png");
+ expect(t.format).toBe("png");
+ expect(t.inputKind).toBe("image");
+ });
+
+ it("rejects image input with a video output extension", () => {
+ expect(() => resolveRenderTargets("/tmp/portrait.jpg", "/tmp/cutout.webm")).toThrow(
+ /Image input requires a \.png output/,
+ );
+ });
+
+ it("rejects video input with a .png output", () => {
+ expect(() => resolveRenderTargets("/tmp/clip.mp4", "/tmp/cutout.png")).toThrow(
+ /Video input requires a \.webm or \.mov output/,
+ );
+ });
+
+ it("threads background-output format through when valid", () => {
+ const t = resolveRenderTargets("/tmp/clip.mp4", "/tmp/fg.webm", "/tmp/bg.webm");
+ expect(t.bgFormat).toBe("webm");
+ const tMov = resolveRenderTargets("/tmp/clip.mp4", "/tmp/fg.webm", "/tmp/bg.mov");
+ expect(tMov.bgFormat).toBe("mov");
+ });
+
+ it("rejects --background-output for image inputs (no temporal pairing to do)", () => {
+ expect(() =>
+ resolveRenderTargets("/tmp/portrait.jpg", "/tmp/cutout.png", "/tmp/bg.png"),
+ ).toThrow(/--background-output is not supported for image inputs/);
+ });
+
+ it("rejects .png as the --background-output extension", () => {
+ // .png is only valid for single-image inputs, and image inputs themselves
+ // can't have a background-output anyway. So .png here is always a misuse.
+ expect(() => resolveRenderTargets("/tmp/clip.mp4", "/tmp/fg.webm", "/tmp/bg.png")).toThrow(
+ /--background-output must be \.webm or \.mov/,
+ );
+ });
+});
+
+// Regression: a previous version of waitForExit treated `code === null` as
+// success. Per Node's child_process docs, that's the signal-killed case —
+// reporting it as success means a SIGTERM/SIGKILL'd ffmpeg encoder produces
+// a "successful" render with a missing or truncated output file.
+describe("background-removal/pipeline — waitForExit signal handling", () => {
+ function fakeProc(): ReturnType {
+ return new EventEmitter() as unknown as ReturnType;
+ }
+
+ it("resolves on a clean exit (code=0, signal=null)", async () => {
+ const proc = fakeProc();
+ const promise = waitForExit(proc, "ffmpeg encoder", () => "");
+ proc.emit("exit", 0, null);
+ await expect(promise).resolves.toBeUndefined();
+ });
+
+ it("rejects when killed by signal (code=null, signal='SIGTERM')", async () => {
+ const proc = fakeProc();
+ const promise = waitForExit(proc, "ffmpeg encoder", () => "tail of stderr");
+ proc.emit("exit", null, "SIGTERM");
+ await expect(promise).rejects.toThrow(/killed by SIGTERM/);
+ await expect(promise).rejects.toThrow(/tail of stderr/);
+ });
+
+ it("rejects on non-zero exit code", async () => {
+ const proc = fakeProc();
+ const promise = waitForExit(proc, "ffmpeg encoder", () => "");
+ proc.emit("exit", 1, null);
+ await expect(promise).rejects.toThrow(/exited with code 1/);
+ });
+
+ it("rejects on SIGKILL", async () => {
+ const proc = fakeProc();
+ const promise = waitForExit(proc, "ffmpeg encoder", () => "");
+ proc.emit("exit", null, "SIGKILL");
+ await expect(promise).rejects.toThrow(/killed by SIGKILL/);
+ });
+});
diff --git a/packages/cli/src/background-removal/pipeline.ts b/packages/cli/src/background-removal/pipeline.ts
new file mode 100644
index 000000000..a0a0f3bfc
--- /dev/null
+++ b/packages/cli/src/background-removal/pipeline.ts
@@ -0,0 +1,470 @@
+/**
+ * Background-removal rendering pipeline.
+ *
+ * Decode source frames via ffmpeg → run inference per frame → encode the RGBA
+ * stream via a second ffmpeg process. Output formats:
+ * .webm → VP9 with alpha (HTML5-native, ~1 MB / 4s @ 1080p)
+ * .mov → ProRes 4444 with alpha (editing round-trip)
+ * .png → single RGBA still (only when input is also a single image)
+ *
+ * The encode flags for VP9-with-alpha mirror the `chunkEncoder.ts` pattern in
+ * @hyperframes/engine — `-pix_fmt yuva420p` plus the
+ * `-metadata:s:v:0 alpha_mode=1` tag are what make Chrome's `` element
+ * decode the alpha plane.
+ */
+import { spawn } from "node:child_process";
+import { extname } from "node:path";
+import { hasFFmpeg, hasFFprobe } from "../whisper/manager.js";
+import { createSession, type Session } from "./inference.js";
+import { type Device, type ModelId } from "./manager.js";
+
+export type OutputFormat = "webm" | "mov" | "png";
+
+export const QUALITY_CRF = {
+ fast: 30,
+ balanced: 18,
+ best: 12,
+} as const;
+
+export type Quality = keyof typeof QUALITY_CRF;
+
+export const QUALITIES = Object.keys(QUALITY_CRF) as readonly Quality[];
+
+export const DEFAULT_QUALITY: Quality = "balanced";
+
+export const isQuality = (v: unknown): v is Quality =>
+ typeof v === "string" && (QUALITIES as readonly string[]).includes(v);
+
+export interface RenderOptions {
+ inputPath: string;
+ outputPath: string;
+ /**
+ * Optional second output: an inverse-alpha background plate (same source
+ * RGB, transparent where the subject was). Only valid for video inputs and
+ * .webm/.mov outputs — not allowed alongside a .png output. The plate's
+ * format is inferred from this path independently of the foreground's.
+ *
+ * NOTE: this is a hole-cut plate, not an inpainted clean plate. Composite
+ * something opaque (graphics, blur, scene) under it to fill the hole.
+ */
+ backgroundOutputPath?: string;
+ device?: Device;
+ model?: ModelId;
+ /** Encoder CRF preset for `.webm`. See `QUALITY_CRF`. Ignored for `.mov`/`.png`. */
+ quality?: Quality;
+ onProgress?: (event: ProgressEvent) => void;
+}
+
+export type ProgressEvent =
+ | { kind: "info"; message: string }
+ | { kind: "metadata"; width: number; height: number; fps: number; frameCount: number }
+ | { kind: "frame"; index: number; total: number; avgMsPerFrame: number };
+
+export interface RenderResult {
+ outputPath: string;
+ /** Present only when `backgroundOutputPath` was set. */
+ backgroundOutputPath?: string;
+ framesProcessed: number;
+ durationSeconds: number;
+ avgMsPerFrame: number;
+ provider: string;
+ format: OutputFormat;
+}
+
+const VIDEO_EXTENSIONS = new Set([".mp4", ".mov", ".webm", ".mkv", ".avi"]);
+const IMAGE_EXTENSIONS = new Set([".jpg", ".jpeg", ".png", ".webp"]);
+
+interface MediaInfo {
+ width: number;
+ height: number;
+ fps: number;
+ frameCount: number;
+}
+
+export function inferOutputFormat(outputPath: string): OutputFormat {
+ const ext = extname(outputPath).toLowerCase();
+ if (ext === ".webm") return "webm";
+ if (ext === ".mov") return "mov";
+ if (ext === ".png") return "png";
+ throw new Error(
+ `Unsupported output extension: ${ext}. Use .webm (VP9 alpha), .mov (ProRes 4444), or .png.`,
+ );
+}
+
+export function inferInputKind(inputPath: string): "video" | "image" {
+ const ext = extname(inputPath).toLowerCase();
+ if (VIDEO_EXTENSIONS.has(ext)) return "video";
+ if (IMAGE_EXTENSIONS.has(ext)) return "image";
+ throw new Error(
+ `Unsupported input: ${ext}. Use a video (mp4/mov/webm/mkv/avi) or image (jpg/png/webp).`,
+ );
+}
+
+interface EngineMetadata {
+ width: number;
+ height: number;
+ fps: number;
+ durationSeconds: number;
+}
+
+async function probeMedia(inputPath: string): Promise {
+ const isImage = inferInputKind(inputPath) === "image";
+ const engine = (await import("@hyperframes/engine")) as {
+ extractMediaMetadata: (path: string) => Promise;
+ };
+ const meta = await engine.extractMediaMetadata(inputPath);
+
+ if (isImage) {
+ return { width: meta.width, height: meta.height, fps: 0, frameCount: 1 };
+ }
+
+ const fps = meta.fps || 30;
+ const frameCount = meta.durationSeconds ? Math.round(meta.durationSeconds * fps) : 0;
+ return { width: meta.width, height: meta.height, fps, frameCount };
+}
+
+export function buildEncoderArgs(
+ format: OutputFormat,
+ width: number,
+ height: number,
+ fps: number,
+ outputPath: string,
+ quality: Quality = DEFAULT_QUALITY,
+): string[] {
+ const base = [
+ "-y",
+ "-f",
+ "rawvideo",
+ "-pix_fmt",
+ "rgba",
+ "-s",
+ `${width}x${height}`,
+ "-r",
+ String(fps || 30),
+ "-i",
+ "-",
+ ];
+
+ if (format === "webm") {
+ return [
+ ...base,
+ "-c:v",
+ "libvpx-vp9",
+ "-b:v",
+ "0",
+ "-crf",
+ String(QUALITY_CRF[quality]),
+ "-deadline",
+ "good",
+ "-row-mt",
+ "1",
+ "-auto-alt-ref",
+ "0",
+ "-pix_fmt",
+ "yuva420p",
+ // Tag the output as BT.709 limited range so browsers use the same
+ // YUV→RGB matrix the source video was encoded with. Without these tags
+ // ffmpeg's default RGB→YUV conversion is BT.601, which causes a visible
+ // color shift (red/skin tones in particular) when the matted overlay is
+ // composited over the original mp4.
+ "-colorspace",
+ "bt709",
+ "-color_primaries",
+ "bt709",
+ "-color_trc",
+ "bt709",
+ "-color_range",
+ "tv",
+ "-metadata:s:v:0",
+ "alpha_mode=1",
+ "-an",
+ outputPath,
+ ];
+ }
+ if (format === "mov") {
+ return [
+ ...base,
+ "-c:v",
+ "prores_ks",
+ "-profile:v",
+ "4444",
+ "-vendor",
+ "apl0",
+ "-pix_fmt",
+ "yuva444p10le",
+ "-an",
+ outputPath,
+ ];
+ }
+ return [...base, "-frames:v", "1", "-pix_fmt", "rgba", "-update", "1", outputPath];
+}
+
+async function* readFrames(
+ stream: NodeJS.ReadableStream,
+ frameBytes: number,
+): AsyncGenerator {
+ let buffered: Buffer = Buffer.alloc(0);
+ for await (const chunk of stream) {
+ buffered =
+ buffered.length === 0 ? (chunk as Buffer) : Buffer.concat([buffered, chunk as Buffer]);
+ while (buffered.length >= frameBytes) {
+ // Copy because the next concat would clobber the underlying memory.
+ yield Buffer.from(buffered.subarray(0, frameBytes));
+ buffered = buffered.subarray(frameBytes);
+ }
+ }
+}
+
+export interface RenderTargets {
+ format: OutputFormat;
+ inputKind: "video" | "image";
+ bgFormat: OutputFormat | undefined;
+}
+
+/**
+ * Resolve and validate the input/output combination before any I/O. Pure;
+ * exported so unit tests can pin the error messages without spawning ffmpeg.
+ */
+export function resolveRenderTargets(
+ inputPath: string,
+ outputPath: string,
+ backgroundOutputPath?: string,
+): RenderTargets {
+ const format = inferOutputFormat(outputPath);
+ const inputKind = inferInputKind(inputPath);
+
+ if (inputKind === "image" && format !== "png") {
+ throw new Error(
+ `Image input requires a .png output (got ${extname(outputPath)}). Use a video input for .webm/.mov.`,
+ );
+ }
+ if (inputKind === "video" && format === "png") {
+ throw new Error(
+ `Video input requires a .webm or .mov output (got .png). Use an image input for .png.`,
+ );
+ }
+
+ let bgFormat: OutputFormat | undefined;
+ if (backgroundOutputPath) {
+ if (inputKind === "image") {
+ throw new Error(
+ "--background-output is not supported for image inputs. Use a video input (mp4/mov/webm) to produce both a cutout and a background plate.",
+ );
+ }
+ bgFormat = inferOutputFormat(backgroundOutputPath);
+ if (bgFormat === "png") {
+ throw new Error(
+ "--background-output must be .webm or .mov; .png is only valid for single-image inputs.",
+ );
+ }
+ }
+
+ return { format, inputKind, bgFormat };
+}
+
+export async function render(options: RenderOptions): Promise {
+ if (!hasFFmpeg() || !hasFFprobe()) {
+ throw new Error("ffmpeg and ffprobe are required. Install: brew install ffmpeg");
+ }
+
+ const { format, bgFormat } = resolveRenderTargets(
+ options.inputPath,
+ options.outputPath,
+ options.backgroundOutputPath,
+ );
+
+ const media = await probeMedia(options.inputPath);
+
+ options.onProgress?.({
+ kind: "metadata",
+ width: media.width,
+ height: media.height,
+ fps: media.fps,
+ frameCount: media.frameCount,
+ });
+
+ const session = await createSession({
+ model: options.model,
+ device: options.device,
+ onProgress: (msg) => options.onProgress?.({ kind: "info", message: msg }),
+ });
+
+ try {
+ const start = Date.now();
+ const framesProcessed = await runPipeline(options, session, media, format, bgFormat);
+ const durationSeconds = (Date.now() - start) / 1000;
+ const avgMsPerFrame = framesProcessed ? (durationSeconds * 1000) / framesProcessed : 0;
+
+ return {
+ outputPath: options.outputPath,
+ backgroundOutputPath: options.backgroundOutputPath,
+ framesProcessed,
+ durationSeconds,
+ avgMsPerFrame,
+ provider: session.provider,
+ format,
+ };
+ } finally {
+ await session.close();
+ }
+}
+
+const RECENT_WINDOW = 30;
+
+interface FfmpegProc {
+ proc: ReturnType;
+ exit: Promise;
+ /** Tail of stderr, captured for inclusion in error messages. */
+ getStderr: () => string;
+}
+
+type StdioFd = "ignore" | "pipe";
+type StdioTuple = [StdioFd, StdioFd, StdioFd];
+
+function spawnFfmpeg(args: string[], label: string, stdio: StdioTuple): FfmpegProc {
+ const proc = spawn("ffmpeg", args, { stdio });
+ let stderrBuf = "";
+ proc.stderr?.on("data", (d: Buffer) => {
+ stderrBuf += d.toString();
+ });
+ // If the encoder dies mid-render, the next .write() to its stdin emits an
+ // 'error' event on the writable. Without a listener, Node treats it as
+ // unhandled and crashes the CLI before waitForExit's reject path can
+ // surface the real cause (encoder stderr tail). Swallowing here is safe —
+ // the process exit is the source of truth.
+ proc.stdin?.on("error", () => {});
+ const exit = waitForExit(proc, label, () => stderrBuf);
+ return { proc, exit, getStderr: () => stderrBuf };
+}
+
+async function runPipeline(
+ options: RenderOptions,
+ session: Session,
+ media: MediaInfo,
+ format: OutputFormat,
+ bgFormat: OutputFormat | undefined,
+): Promise {
+ const { inputPath, outputPath, backgroundOutputPath } = options;
+ const { width, height, fps, frameCount } = media;
+ const frameBytes = width * height * 3;
+ const quality = options.quality ?? DEFAULT_QUALITY;
+
+ const decoder = spawnFfmpeg(
+ ["-loglevel", "error", "-i", inputPath, "-f", "rawvideo", "-pix_fmt", "rgb24", "-an", "-"],
+ "ffmpeg decoder",
+ ["ignore", "pipe", "pipe"],
+ );
+
+ const fg = spawnFfmpeg(
+ buildEncoderArgs(format, width, height, fps || 30, outputPath, quality),
+ "ffmpeg encoder",
+ ["pipe", "ignore", "pipe"],
+ );
+
+ const bg =
+ backgroundOutputPath && bgFormat
+ ? spawnFfmpeg(
+ buildEncoderArgs(bgFormat, width, height, fps || 30, backgroundOutputPath, quality),
+ "ffmpeg background encoder",
+ ["pipe", "ignore", "pipe"],
+ )
+ : null;
+
+ let processed = 0;
+ const total = frameCount;
+
+ const recentMs = new Array(RECENT_WINDOW).fill(0);
+ let recentSum = 0;
+ let recentSlot = 0;
+ let recentCount = 0;
+
+ try {
+ for await (const rgb of readFrames(decoder.proc.stdout!, frameBytes)) {
+ const t0 = Date.now();
+ const result = await session.process(rgb, width, height, bg !== null);
+ const elapsed = Date.now() - t0;
+
+ recentSum += elapsed - recentMs[recentSlot]!;
+ recentMs[recentSlot] = elapsed;
+ recentSlot = (recentSlot + 1) % RECENT_WINDOW;
+ if (recentCount < RECENT_WINDOW) recentCount++;
+
+ // Issue both writes before any await so a slow encoder doesn't block
+ // the other. Drain anything that returned false before the next
+ // session.process() — its output buffers are reused per frame.
+ //
+ // Subtlety: write() returning true means "highWaterMark not exceeded,"
+ // NOT "libuv has flushed the chunk." The buffer reference is held by
+ // libuv until the underlying syscall completes. Reusing the session's
+ // output buffer is safe because the next session.process() call takes
+ // ~10–50ms (ORT inference) — plenty of event-loop turns for libuv to
+ // drain. If that ever stops being true, we'd need to copy here.
+ const fgWroteFully = fg.proc.stdin!.write(result.fg);
+ const bgWroteFully = bg && result.bg ? bg.proc.stdin!.write(result.bg) : true;
+ if (!fgWroteFully || !bgWroteFully) {
+ const drains: Promise[] = [];
+ if (!fgWroteFully) {
+ drains.push(
+ new Promise((resolve) => fg.proc.stdin!.once("drain", () => resolve())),
+ );
+ }
+ if (!bgWroteFully && bg) {
+ drains.push(
+ new Promise((resolve) => bg.proc.stdin!.once("drain", () => resolve())),
+ );
+ }
+ await Promise.all(drains);
+ }
+
+ processed++;
+ options.onProgress?.({
+ kind: "frame",
+ index: processed,
+ total,
+ avgMsPerFrame: recentSum / recentCount,
+ });
+ }
+ } catch (err) {
+ decoder.proc.kill("SIGKILL");
+ fg.proc.kill("SIGKILL");
+ bg?.proc.kill("SIGKILL");
+ throw err;
+ }
+
+ fg.proc.stdin!.end();
+ bg?.proc.stdin!.end();
+ const exits: Promise[] = [decoder.exit, fg.exit];
+ if (bg) exits.push(bg.exit);
+ await Promise.all(exits);
+
+ if (processed === 0) {
+ throw new Error(
+ `No frames produced from ${inputPath}. Decoder stderr:\n${decoder.getStderr().slice(-400)}`,
+ );
+ }
+
+ return processed;
+}
+
+export function waitForExit(
+ proc: ReturnType,
+ label: string,
+ getStderr: () => string,
+): Promise {
+ return new Promise((resolve, reject) => {
+ proc.on("error", reject);
+ // Per Node docs the exit callback is (code, signal): on a normal exit
+ // `code` is the numeric exit status and `signal` is null; on a
+ // signal-killed exit `code` is null and `signal` is the signal name.
+ // Treating null-code as success would silently report SIGTERM/SIGKILL
+ // as a successful render.
+ proc.on("exit", (code, signal) => {
+ if (code === 0 && !signal) {
+ resolve();
+ return;
+ }
+ const cause = signal ? `killed by ${signal}` : `exited with code ${code}`;
+ reject(new Error(`${label} ${cause}: ${getStderr().slice(-400)}`));
+ });
+ });
+}
diff --git a/packages/cli/src/cli.ts b/packages/cli/src/cli.ts
index 64f6c6bfc..5bf2a2a4c 100644
--- a/packages/cli/src/cli.ts
+++ b/packages/cli/src/cli.ts
@@ -38,6 +38,7 @@ const subCommands = {
compositions: () => import("./commands/compositions.js").then((m) => m.default),
benchmark: () => import("./commands/benchmark.js").then((m) => m.default),
browser: () => import("./commands/browser.js").then((m) => m.default),
+ "remove-background": () => import("./commands/remove-background.js").then((m) => m.default),
transcribe: () => import("./commands/transcribe.js").then((m) => m.default),
tts: () => import("./commands/tts.js").then((m) => m.default),
"el-tts": () => import("./commands/el-tts.js").then((m) => m.default),
diff --git a/packages/cli/src/commands/add.test.ts b/packages/cli/src/commands/add.test.ts
index 87b82f525..6836b20d0 100644
--- a/packages/cli/src/commands/add.test.ts
+++ b/packages/cli/src/commands/add.test.ts
@@ -181,9 +181,9 @@ describe("runAdd (integration, mocked registry)", () => {
expect(result.type).toBe("hyperframes:block");
expect(result.written).toHaveLength(1);
expect(existsSync(join(dir, "compositions/my-block.html"))).toBe(true);
- expect(readFileSync(join(dir, "compositions/my-block.html"), "utf-8")).toContain(
- "my-block.html",
- );
+ const installed = readFileSync(join(dir, "compositions/my-block.html"), "utf-8");
+ expect(installed).toContain("");
+ expect(installed).toContain("my-block.html");
expect(result.snippet).toContain("compositions/my-block.html");
} finally {
rmSync(dir, { recursive: true, force: true });
diff --git a/packages/cli/src/commands/compositions.test.ts b/packages/cli/src/commands/compositions.test.ts
new file mode 100644
index 000000000..50c4456ff
--- /dev/null
+++ b/packages/cli/src/commands/compositions.test.ts
@@ -0,0 +1,49 @@
+import { describe, expect, it, beforeEach } from "vitest";
+import { ensureDOMParser } from "../utils/dom.js";
+import { parseSubComposition } from "./compositions.js";
+
+describe("parseSubComposition", () => {
+ beforeEach(() => {
+ ensureDOMParser();
+ });
+
+ it("reads template-wrapped sub-composition contents", () => {
+ const html = `
+
+