Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions .github/workflows/player-perf.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,15 @@ jobs:
- shard: load
scenarios: load
runs: "5"
- shard: fps
scenarios: fps
runs: "3"
- shard: scrub
scenarios: scrub
runs: "3"
- shard: drift
scenarios: drift
runs: "3"
steps:
- uses: actions/checkout@v4

Expand Down
2 changes: 1 addition & 1 deletion packages/player/tests/perf/baseline.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"compLoadColdP95Ms": 2000,
"compLoadWarmP95Ms": 1000,
"fpsMin": 55,
"compositionTimeAdvancementRatioMin": 0.95,
"scrubLatencyP95IsolatedMs": 80,
"scrubLatencyP95InlineMs": 33,
"driftMaxMs": 500,
Expand Down
126 changes: 126 additions & 0 deletions packages/player/tests/perf/fixtures/10-video-grid/index.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8" />
<title>perf fixture: 10-video-grid</title>
<style>
:root {
color-scheme: dark;
}
html,
body {
margin: 0;
padding: 0;
background: #050714;
color: #e6e6f0;
font-family:
system-ui,
-apple-system,
sans-serif;
overflow: hidden;
}
#root {
position: relative;
width: 1920px;
height: 1080px;
display: grid;
grid-template-columns: repeat(5, 1fr);
grid-template-rows: repeat(2, 1fr);
gap: 8px;
padding: 8px;
box-sizing: border-box;
}
.tile {
position: relative;
background: #111827;
border-radius: 12px;
overflow: hidden;
box-shadow: 0 0 0 1px rgba(255, 255, 255, 0.05);
will-change: transform;
}
.tile video {
position: absolute;
inset: 0;
width: 100%;
height: 100%;
object-fit: cover;
}
.tile .label {
position: absolute;
top: 8px;
left: 8px;
z-index: 2;
font:
600 14px/1 system-ui,
sans-serif;
color: #fff;
background: rgba(0, 0, 0, 0.6);
padding: 4px 8px;
border-radius: 6px;
pointer-events: none;
}
</style>
<script src="/vendor/gsap.min.js"></script>
<script data-hyperframes-runtime="1" src="/vendor/hyperframe.runtime.iife.js"></script>
</head>
<body>
<div
id="root"
data-composition-id="main"
data-width="1920"
data-height="1080"
data-duration="10"
data-fps="30"
></div>
<script>
(function () {
var TILE_COUNT = 10;
var DURATION_SEC = 10;

var root = document.getElementById("root");
var tiles = [];
for (var i = 0; i < TILE_COUNT; i++) {
var tile = document.createElement("div");
tile.className = "tile";
tile.id = "tile-" + i;

var label = document.createElement("div");
label.className = "label";
label.textContent = "video " + (i + 1);
tile.appendChild(label);

var video = document.createElement("video");
video.id = "video-" + i;
video.setAttribute("data-start", "0");
video.setAttribute("data-duration", String(DURATION_SEC));
video.setAttribute("data-track-index", String(i));
video.setAttribute("src", "sample.mp4");
video.setAttribute("preload", "auto");
video.setAttribute("playsinline", "");
video.muted = true;
tile.appendChild(video);

root.appendChild(tile);
tiles.push(tile);
}

// Lightweight parent timeline so the player has a non-empty composition
// to drive. Each tile gets a subtle scale "breath" over the full
// duration — enough to keep GSAP scrubbing real properties without
// dominating the rAF budget that the video decoder needs.
var tl = gsap.timeline({ paused: true });
for (var j = 0; j < tiles.length; j++) {
tl.fromTo(
tiles[j],
{ scale: 0.985 },
{ scale: 1, duration: DURATION_SEC, ease: "sine.inOut" },
0,
);
}

window.__timelines = window.__timelines || {};
window.__timelines["main"] = tl;
})();
</script>
</body>
</html>
Binary file not shown.
68 changes: 65 additions & 3 deletions packages/player/tests/perf/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,10 @@ import { execFileSync } from "node:child_process";
import { existsSync, mkdirSync, writeFileSync } from "node:fs";
import { dirname, resolve } from "node:path";
import { fileURLToPath } from "node:url";
import { runFps } from "./scenarios/02-fps.ts";
import { runLoad } from "./scenarios/03-load.ts";
import { runScrub } from "./scenarios/04-scrub.ts";
import { runDrift } from "./scenarios/05-drift.ts";
import { reportAndGate, type GateMode, type GateResult, type Metric } from "./perf-gate.ts";
import { launchBrowser } from "./runner.ts";
import { startServer } from "./server.ts";
Expand All @@ -38,7 +41,42 @@ const HERE = dirname(fileURLToPath(import.meta.url));
const RESULTS_DIR = resolve(HERE, "results");
const RESULTS_FILE = resolve(RESULTS_DIR, "metrics.json");

type ScenarioId = "load";
type ScenarioId = "load" | "fps" | "scrub" | "drift";

/**
* Per-scenario default `runs` value when the caller didn't pass `--runs`.
*
* Why `load` gets 5 runs and the others get 3:
*
* - `load` reports a single p95 over `runs` measurements, so each `run` is
* one sample. p95 over n=3 is mostly noise (the 95th percentile of three
* numbers is just `max`), so we bump it to 5. We considered 10 — but cold
* load is the slowest scenario in the shard (~2s × 5 runs × 2 fixtures =
* ~20s with disk cache cleared), and going to 10 would push the load shard
* past 30s of pure-measurement wall time per CI invocation.
* - `fps` aggregates as `min(ratio)` over runs — 3 runs gives us a worst-
* of-three signal, which is what we want for a floor metric. Adding more
* runs would only make the ratio strictly smaller (more chances to catch
* a stall) and shift the threshold toward false positives from runner
* contention rather than real regressions.
* - `scrub` and `drift` *pool* their per-run samples (10 seeks/run for
* scrub, ~1500 RVFC frames/run for drift) and compute the percentile over
* the pooled set. Their effective sample count for the percentile is
* `runs × samples_per_run`, not `runs`, so 3 runs already gives 30+ scrub
* samples and 4500+ drift samples per shard — well above the n≈30 rule of
* thumb for a stable p95.
*
* TODO(player-perf): revisit `fps: 3` once we have ~2 weeks of CI baseline
* data — if `min(ratio)` shows >5% inter-run variance attributable to runner
* jitter (not real player regressions), bump to 5 and tighten the
* `compositionTimeAdvancementRatioMin` baseline accordingly.
*/
const DEFAULT_RUNS: Record<ScenarioId, number> = {
load: 5,
fps: 3,
scrub: 3,
drift: 3,
};

type ResultsFile = {
schemaVersion: 1;
Expand Down Expand Up @@ -88,7 +126,7 @@ function parseArgs(argv: string[]): ParsedArgs {
// `mode` is consumed (measure logs regressions but never fails; enforce
// exits non-zero on regression).
mode: (process.env.PLAYER_PERF_MODE as GateMode) === "enforce" ? "enforce" : "measure",
scenarios: ["load"],
scenarios: ["load", "fps", "scrub", "drift"],
runs: null,
fixture: null,
headful: false,
Expand Down Expand Up @@ -150,7 +188,31 @@ async function main(): Promise<void> {
const m = await runLoad({
browser,
origin: server.origin,
runs: args.runs ?? 5,
runs: args.runs ?? DEFAULT_RUNS.load,
fixture: args.fixture,
});
metrics.push(...m);
} else if (scenario === "fps") {
const m = await runFps({
browser,
origin: server.origin,
runs: args.runs ?? DEFAULT_RUNS.fps,
fixture: args.fixture,
});
metrics.push(...m);
} else if (scenario === "scrub") {
const m = await runScrub({
browser,
origin: server.origin,
runs: args.runs ?? DEFAULT_RUNS.scrub,
fixture: args.fixture,
});
metrics.push(...m);
} else if (scenario === "drift") {
const m = await runDrift({
browser,
origin: server.origin,
runs: args.runs ?? DEFAULT_RUNS.drift,
fixture: args.fixture,
});
metrics.push(...m);
Expand Down
11 changes: 10 additions & 1 deletion packages/player/tests/perf/perf-gate.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,16 @@ export type Metric = {
export type PerfBaseline = {
compLoadColdP95Ms: number;
compLoadWarmP95Ms: number;
fpsMin: number;
/**
* Floor on `(compositionTime advanced) / (wallClock elapsed)` over a sustained
* playback window — see packages/player/tests/perf/scenarios/02-fps.ts. A
* healthy player keeps up with its intended speed and reads ~1.0; values
* below 1.0 mean the composition clock fell behind real time, which is the
* actual user-visible jank we want to gate against. Refresh-rate independent
* by construction, so it does not saturate to display refresh on high-Hz
* runners the way the previous `fpsMin` did. Direction: higher-is-better.
*/
compositionTimeAdvancementRatioMin: number;
scrubLatencyP95IsolatedMs: number;
scrubLatencyP95InlineMs: number;
driftMaxMs: number;
Expand Down
Loading
Loading