heygen-com · vanceingalls · Apr 21, 2026 · Apr 22, 2026
diff --git a/.github/workflows/player-perf.yml b/.github/workflows/player-perf.yml
@@ -42,6 +42,15 @@ jobs:
           - shard: load
             scenarios: load
             runs: "5"
+          - shard: fps
+            scenarios: fps
+            runs: "3"
+          - shard: scrub
+            scenarios: scrub
+            runs: "3"
+          - shard: drift
+            scenarios: drift
+            runs: "3"
     steps:
       - uses: actions/checkout@v4
 

diff --git a/packages/player/tests/perf/baseline.json b/packages/player/tests/perf/baseline.json
@@ -1,7 +1,7 @@
 {
   "compLoadColdP95Ms": 2000,
   "compLoadWarmP95Ms": 1000,
-  "fpsMin": 55,
+  "compositionTimeAdvancementRatioMin": 0.95,
   "scrubLatencyP95IsolatedMs": 80,
   "scrubLatencyP95InlineMs": 33,
   "driftMaxMs": 500,

diff --git a/packages/player/tests/perf/fixtures/10-video-grid/index.html b/packages/player/tests/perf/fixtures/10-video-grid/index.html
@@ -0,0 +1,126 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="utf-8" />
+    <title>perf fixture: 10-video-grid</title>
+    <style>
+      :root {
+        color-scheme: dark;
+      }
+      html,
+      body {
+        margin: 0;
+        padding: 0;
+        background: #050714;
+        color: #e6e6f0;
+        font-family:
+          system-ui,
+          -apple-system,
+          sans-serif;
+        overflow: hidden;
+      }
+      #root {
+        position: relative;
+        width: 1920px;
+        height: 1080px;
+        display: grid;
+        grid-template-columns: repeat(5, 1fr);
+        grid-template-rows: repeat(2, 1fr);
+        gap: 8px;
+        padding: 8px;
+        box-sizing: border-box;
+      }
+      .tile {
+        position: relative;
+        background: #111827;
+        border-radius: 12px;
+        overflow: hidden;
+        box-shadow: 0 0 0 1px rgba(255, 255, 255, 0.05);
+        will-change: transform;
+      }
+      .tile video {
+        position: absolute;
+        inset: 0;
+        width: 100%;
+        height: 100%;
+        object-fit: cover;
+      }
+      .tile .label {
+        position: absolute;
+        top: 8px;
+        left: 8px;
+        z-index: 2;
+        font:
+          600 14px/1 system-ui,
+          sans-serif;
+        color: #fff;
+        background: rgba(0, 0, 0, 0.6);
+        padding: 4px 8px;
+        border-radius: 6px;
+        pointer-events: none;
+      }
+    </style>
+    <script src="/vendor/gsap.min.js"></script>
+    <script data-hyperframes-runtime="1" src="/vendor/hyperframe.runtime.iife.js"></script>
+  </head>
+  <body>
+    <div
+      id="root"
+      data-composition-id="main"
+      data-width="1920"
+      data-height="1080"
+      data-duration="10"
+      data-fps="30"
+    ></div>
+    <script>
+      (function () {
+        var TILE_COUNT = 10;
+        var DURATION_SEC = 10;
+
+        var root = document.getElementById("root");
+        var tiles = [];
+        for (var i = 0; i < TILE_COUNT; i++) {
+          var tile = document.createElement("div");
+          tile.className = "tile";
+          tile.id = "tile-" + i;
+
+          var label = document.createElement("div");
+          label.className = "label";
+          label.textContent = "video " + (i + 1);
+          tile.appendChild(label);
+
+          var video = document.createElement("video");
+          video.id = "video-" + i;
+          video.setAttribute("data-start", "0");
+          video.setAttribute("data-duration", String(DURATION_SEC));
+          video.setAttribute("data-track-index", String(i));
+          video.setAttribute("src", "sample.mp4");
+          video.setAttribute("preload", "auto");
+          video.setAttribute("playsinline", "");
+          video.muted = true;
+          tile.appendChild(video);
+
+          root.appendChild(tile);
+          tiles.push(tile);
+        }
+
+        // Lightweight parent timeline so the player has a non-empty composition
+        // to drive. Each tile gets a subtle scale "breath" over the full
+        // duration — enough to keep GSAP scrubbing real properties without
+        // dominating the rAF budget that the video decoder needs.
+        var tl = gsap.timeline({ paused: true });
+        for (var j = 0; j < tiles.length; j++) {
+          tl.fromTo(
+            tiles[j],
+            { scale: 0.985 },
+            { scale: 1, duration: DURATION_SEC, ease: "sine.inOut" },
+            0,
+          );
+        }
+
+        window.__timelines = window.__timelines || {};
+        window.__timelines["main"] = tl;
+      })();
+    </script>
+  </body>
+</html>
diff --git a/packages/player/tests/perf/fixtures/10-video-grid/sample.mp4 b/packages/player/tests/perf/fixtures/10-video-grid/sample.mp4
diff --git a/packages/player/tests/perf/index.ts b/packages/player/tests/perf/index.ts
@@ -29,7 +29,10 @@ import { execFileSync } from "node:child_process";
 import { existsSync, mkdirSync, writeFileSync } from "node:fs";
 import { dirname, resolve } from "node:path";
 import { fileURLToPath } from "node:url";
+import { runFps } from "./scenarios/02-fps.ts";
 import { runLoad } from "./scenarios/03-load.ts";
+import { runScrub } from "./scenarios/04-scrub.ts";
+import { runDrift } from "./scenarios/05-drift.ts";
 import { reportAndGate, type GateMode, type GateResult, type Metric } from "./perf-gate.ts";
 import { launchBrowser } from "./runner.ts";
 import { startServer } from "./server.ts";
@@ -38,7 +41,42 @@ const HERE = dirname(fileURLToPath(import.meta.url));
 const RESULTS_DIR = resolve(HERE, "results");
 const RESULTS_FILE = resolve(RESULTS_DIR, "metrics.json");
 
-type ScenarioId = "load";
+type ScenarioId = "load" | "fps" | "scrub" | "drift";
+
+/**
+ * Per-scenario default `runs` value when the caller didn't pass `--runs`.
+ *
+ * Why `load` gets 5 runs and the others get 3:
+ *
+ *   - `load` reports a single p95 over `runs` measurements, so each `run` is
+ *     one sample. p95 over n=3 is mostly noise (the 95th percentile of three
+ *     numbers is just `max`), so we bump it to 5. We considered 10 — but cold
+ *     load is the slowest scenario in the shard (~2s × 5 runs × 2 fixtures =
+ *     ~20s with disk cache cleared), and going to 10 would push the load shard
+ *     past 30s of pure-measurement wall time per CI invocation.
+ *   - `fps` aggregates as `min(ratio)` over runs — 3 runs gives us a worst-
+ *     of-three signal, which is what we want for a floor metric. Adding more
+ *     runs would only make the ratio strictly smaller (more chances to catch
+ *     a stall) and shift the threshold toward false positives from runner
+ *     contention rather than real regressions.
+ *   - `scrub` and `drift` *pool* their per-run samples (10 seeks/run for
+ *     scrub, ~1500 RVFC frames/run for drift) and compute the percentile over
+ *     the pooled set. Their effective sample count for the percentile is
+ *     `runs × samples_per_run`, not `runs`, so 3 runs already gives 30+ scrub
+ *     samples and 4500+ drift samples per shard — well above the n≈30 rule of
+ *     thumb for a stable p95.
+ *
+ * TODO(player-perf): revisit `fps: 3` once we have ~2 weeks of CI baseline
+ * data — if `min(ratio)` shows >5% inter-run variance attributable to runner
+ * jitter (not real player regressions), bump to 5 and tighten the
+ * `compositionTimeAdvancementRatioMin` baseline accordingly.
+ */
+const DEFAULT_RUNS: Record<ScenarioId, number> = {
+  load: 5,
+  fps: 3,
+  scrub: 3,
+  drift: 3,
+};
 
 type ResultsFile = {
   schemaVersion: 1;
@@ -88,7 +126,7 @@ function parseArgs(argv: string[]): ParsedArgs {
     // `mode` is consumed (measure logs regressions but never fails; enforce
     // exits non-zero on regression).
     mode: (process.env.PLAYER_PERF_MODE as GateMode) === "enforce" ? "enforce" : "measure",
-    scenarios: ["load"],
+    scenarios: ["load", "fps", "scrub", "drift"],
     runs: null,
     fixture: null,
     headful: false,
@@ -150,7 +188,31 @@ async function main(): Promise<void> {
         const m = await runLoad({
           browser,
           origin: server.origin,
-          runs: args.runs ?? 5,
+          runs: args.runs ?? DEFAULT_RUNS.load,
+          fixture: args.fixture,
+        });
+        metrics.push(...m);
+      } else if (scenario === "fps") {
+        const m = await runFps({
+          browser,
+          origin: server.origin,
+          runs: args.runs ?? DEFAULT_RUNS.fps,
+          fixture: args.fixture,
+        });
+        metrics.push(...m);
+      } else if (scenario === "scrub") {
+        const m = await runScrub({
+          browser,
+          origin: server.origin,
+          runs: args.runs ?? DEFAULT_RUNS.scrub,
+          fixture: args.fixture,
+        });
+        metrics.push(...m);
+      } else if (scenario === "drift") {
+        const m = await runDrift({
+          browser,
+          origin: server.origin,
+          runs: args.runs ?? DEFAULT_RUNS.drift,
           fixture: args.fixture,
         });
         metrics.push(...m);

diff --git a/packages/player/tests/perf/perf-gate.ts b/packages/player/tests/perf/perf-gate.ts
@@ -31,7 +31,16 @@ export type Metric = {
 export type PerfBaseline = {
   compLoadColdP95Ms: number;
   compLoadWarmP95Ms: number;
-  fpsMin: number;
+  /**
+   * Floor on `(compositionTime advanced) / (wallClock elapsed)` over a sustained
+   * playback window — see packages/player/tests/perf/scenarios/02-fps.ts. A
+   * healthy player keeps up with its intended speed and reads ~1.0; values
+   * below 1.0 mean the composition clock fell behind real time, which is the
+   * actual user-visible jank we want to gate against. Refresh-rate independent
+   * by construction, so it does not saturate to display refresh on high-Hz
+   * runners the way the previous `fpsMin` did. Direction: higher-is-better.
+   */
+  compositionTimeAdvancementRatioMin: number;
   scrubLatencyP95IsolatedMs: number;
   scrubLatencyP95InlineMs: number;
   driftMaxMs: number;