diff --git a/apps/bench/src/bench-runtime.ts b/apps/bench/src/bench-runtime.ts index 0df77b9..20f75a0 100644 --- a/apps/bench/src/bench-runtime.ts +++ b/apps/bench/src/bench-runtime.ts @@ -416,6 +416,7 @@ export async function measureBenchInteractionRun( ); const startTimestamp = await waitForAnimationFrame(); + performance.mark("pretable.interaction.start"); trigger(); let domNodesPeak = root.querySelectorAll("*").length; @@ -466,6 +467,7 @@ export async function measureBenchInteractionRun( if (isFirstChangedFrame) { firstChangedAt = timestamp; + performance.mark("pretable.interaction.firstFrame"); } if (firstChangedAt === null) { @@ -522,6 +524,7 @@ export async function measureBenchInteractionRun( if (stableFrames >= Math.max(0, profile.maxSettleFrames - 1)) { settledAt = timestamp; + performance.mark("pretable.interaction.settled"); break; } } diff --git a/apps/bench/tests/bench.spec.ts b/apps/bench/tests/bench.spec.ts index 8e8f9a1..a7dc96e 100644 --- a/apps/bench/tests/bench.spec.ts +++ b/apps/bench/tests/bench.spec.ts @@ -62,6 +62,7 @@ test("writes benchmark artifacts for the selected Pretable run", async ({ "disabled-by-default-devtools.timeline.frame", "v8", "disabled-by-default-v8.cpu_profiler", + "blink.user_timing", ].join(","), options: "sampling-frequency=10000", }); diff --git a/docs/research/2026-05-16-bench-cdp-window-slicing-correction.md b/docs/research/2026-05-16-bench-cdp-window-slicing-correction.md new file mode 100644 index 0000000..2b0ac12 --- /dev/null +++ b/docs/research/2026-05-16-bench-cdp-window-slicing-correction.md @@ -0,0 +1,82 @@ +# Bench CDP window slicing — correction to PR #145 — 2026-05-16 + +## Summary + +PR #145's investigation analyzed the **full trace** (page-load to bench-result publish), which made `getEstimatedRowHeightSignature` look like the leading hotspot at ~1.6-2 ms self-time. With interaction-window slicing now in place, that signal **disappears** — F8's cost is during initial mount of 3000 rows, not the filter trigger. The actual interaction-window hotspot for filter-text/S2/hypothesis is **`matchesFilters`** at `packages/grid-core/src/derived-rows.ts:73` — 800 μs (15%) of the 6.97 ms trigger-to-first-frame window in this n=1 sample. Cause: the function lowercases the cell value on every filter pass across all 3000 source rows; nothing is cached between filter calls. + +This PR ships the slicing infrastructure (markers + analyzer option) but **no production fix**. The actual `matchesFilters` optimization is queued as the next follow-up. + +## What ships + +- `apps/bench/src/bench-runtime.ts` — three `performance.mark()` calls: `pretable.interaction.start`, `.firstFrame`, `.settled`. +- `apps/bench/tests/bench.spec.ts` — added `blink.user_timing` to the CDP `Tracing.start` categories so the marks land in the captured trace. +- `scripts/analyze-cdp.mjs` — new `--window=interaction|settle|full` flag. Default remains `full` for backwards compatibility. Window slicing filters ProfileChunk samples by reconstructed sample timestamps (chunk-start + cumulative deltas). + +## Trace findings (n=1, interaction window) + +``` +Window: interaction (6.97 ms) +Total sample time in window: 5.26 ms across 26 unique nodes + +Top 15 by SELF time (sourcemap-resolved): + 1127μs (21.4%) (program) native overhead + 802μs (15.3%) matchesFilters packages/core/dist/index.mjs:69 + 409μs ( 7.8%) (anon) packages/react/dist/index.mjs:29 + 275μs ( 5.2%) u8 packages/react/dist/index.mjs:17 + 272μs ( 5.2%) Hd react-dom-client.production.js:13346 + 163μs ( 3.1%) xo react-dom-client.production.js:4333 + 153μs ( 2.9%) ye apps/bench/src/bench-runtime.ts:376 + 150μs ( 2.9%) (anon) apps/bench/src/bench-app.tsx:489 + 144μs ( 2.7%) Ns react-dom-client.production.js:5437 + ... +``` + +**Notably absent:** `getEstimatedRowHeightSignature` (F8). It does not appear in the interaction-window top 26. PR #145's memo's leading hypothesis was wrong — confirmed by slicing. + +## Root-cause for filter-text + +`matchesFilters` (`packages/grid-core/src/derived-rows.ts:73`): + +```ts +function matchesFilters( + row: TRow, + resolvedFilters: ResolvedFilter[], +): boolean { + for (const { column, needle } of resolvedFilters) { + const haystack = String(readCellValue(row, column)).toLowerCase(); // ← runs per (row, filter call) + if (!haystack.includes(needle)) return false; + } + return true; +} +``` + +Called by `deriveVisibleRows` over all 3000 source rows on every filter change. Each call: read cell value, `String(...)`, `.toLowerCase()`, `.includes(needle)`. The lowercase conversion is the dominant per-row cost — `String.prototype.toLowerCase` allocates a new string, and for wrap columns with long cell text (S2's metadata/text columns) the allocation cost adds up across 3000 rows. + +## Proposed fix (queued for follow-up) + +**Option A — lowercase cache keyed on row + column.** Add a `Map>` (or `WeakMap`) of pre-lowercased cell values. Populated lazily, invalidated when row identity changes. Expected delta: ~600-800 μs reduction on filter-text — could move it from 16.79 ms p95 (PR #134's n=20) to ~16.0-16.2 ms — borderline under budget. + +**Option B — pre-build a lowercase index at filter-setup time.** On `replaceFilters`, walk all rows once and build column-indexed lowercase strings. Same expected delta as A but front-loaded into the filter trigger itself (still part of the 16 ms window, but pre-sort). + +**Option C — Aho-Corasick or similar string-search index.** Overkill for current sizes; revisit if filter-text complexity grows. + +Recommended: A. Lowest risk, no public-API impact, internal-only edit. + +## Sort + filter-metadata + +Both run through the same `deriveVisibleRows` path. Sort doesn't filter, so `matchesFilters` doesn't fire — but `sortRows` uses `Intl.Collator.compare` over many pairs, which has its own cost. A separate slice of a sort trace would confirm whether `Intl.Collator` is the dominant cost there. Out of scope for this PR. + +## Verdict + +**No production code change in this PR.** Ships the missing infrastructure (markers + window slicing) and corrects PR #145's misattribution. Quality wedge untouched. + +## Out-of-scope follow-ups + +- **`matchesFilters` lowercase cache** — the actual perf fix. Next PR. +- **Sort-script flame-graph slice** — confirm whether `Intl.Collator` is the sort-script hotspot. +- **filter-metadata flame-graph slice** — same code path as filter-text; should drop with A. +- **Matrix-runner CDP integration, Speedscope export** — same status as PR #143. + +## Lesson (saved to memory) + +CDP traces capture the entire page lifecycle. The bench's measured `interaction_latency_ms` only counts the trigger-to-first-frame window. Always slice to the interaction window before naming a hotspot — full-trace top-N can be dominated by mount-time work that doesn't count against the metric. diff --git a/scripts/analyze-cdp.mjs b/scripts/analyze-cdp.mjs index f5c9b89..d0e872c 100644 --- a/scripts/analyze-cdp.mjs +++ b/scripts/analyze-cdp.mjs @@ -4,25 +4,34 @@ // a .js.map is provided alongside the trace. // // Usage: -// node scripts/analyze-cdp.mjs [path/to/index-*.js.map] +// node scripts/analyze-cdp.mjs [path/to/index-*.js.map] [--window=] // -// Output: top-N (default 40) frames by self-time, with file:line attribution. +// --window options: +// full (default) Aggregate over the entire trace. +// interaction Slice to the trigger-to-first-frame window. Requires +// performance.mark("pretable.interaction.start" / ".firstFrame") +// events in the trace (the bench harness emits these +// automatically when PLAYWRIGHT_PERF_TRACE=1). +// settle Slice trigger-to-settled (first-frame plus follow-up frames). import { readFileSync } from "node:fs"; import { createRequire } from "node:module"; -const tracePath = process.argv[2]; -const mapPath = process.argv[3]; +const args = process.argv.slice(2); +const tracePath = args.find((a) => !a.startsWith("--") && a.endsWith(".json")); +const mapPath = args.find((a) => !a.startsWith("--") && a.endsWith(".map")); +const windowArg = args.find((a) => a.startsWith("--window=")); +const windowMode = windowArg ? windowArg.split("=")[1] : "full"; + if (!tracePath) { console.error( - "usage: node scripts/analyze-cdp.mjs [index.js.map]", + "usage: node scripts/analyze-cdp.mjs [index.js.map] [--window=full|interaction|settle]", ); process.exit(1); } const trace = JSON.parse(readFileSync(tracePath, "utf8")); -// Lazy-load source-map (optional dep — only needed when map path provided). let consumer = null; if (mapPath) { const require = createRequire(import.meta.url); @@ -31,6 +40,45 @@ if (mapPath) { consumer = await new SourceMapConsumer(rawMap); } +// Find performance.mark timestamps for window slicing. +const marks = new Map(); +for (const ev of trace.traceEvents) { + if (ev.cat !== "blink.user_timing") continue; + if (typeof ev.name !== "string") continue; + if (!ev.name.startsWith("pretable.")) continue; + if (typeof ev.ts !== "number") continue; + if (!marks.has(ev.name)) marks.set(ev.name, ev.ts); +} + +let windowStartTs = -Infinity; +let windowEndTs = Infinity; +let windowLabel = "full trace"; +if (windowMode === "interaction") { + const startTs = marks.get("pretable.interaction.start"); + const endTs = marks.get("pretable.interaction.firstFrame"); + if (startTs == null || endTs == null) { + console.error( + "[analyze-cdp] --window=interaction needs both pretable.interaction.start and .firstFrame marks; falling back to full", + ); + } else { + windowStartTs = startTs; + windowEndTs = endTs; + windowLabel = `interaction (${((endTs - startTs) / 1000).toFixed(2)} ms)`; + } +} else if (windowMode === "settle") { + const startTs = marks.get("pretable.interaction.start"); + const endTs = marks.get("pretable.interaction.settled"); + if (startTs == null || endTs == null) { + console.error( + "[analyze-cdp] --window=settle needs both pretable.interaction.start and .settled marks; falling back to full", + ); + } else { + windowStartTs = startTs; + windowEndTs = endTs; + windowLabel = `settle (${((endTs - startTs) / 1000).toFixed(2)} ms)`; + } +} + const nodes = new Map(); const selfDeltaUs = new Map(); let totalDelta = 0; @@ -43,7 +91,12 @@ for (const ev of trace.traceEvents) { for (const n of cpu.nodes ?? []) nodes.set(n.id, n); const samples = cpu.samples ?? []; const deltas = data.timeDeltas ?? []; + // ProfileChunk timestamps are walked sample-by-sample: sample i happens + // at chunkStart + sum(deltas[0..=i]). + let runningTs = ev.ts ?? 0; for (let i = 0; i < samples.length; i++) { + runningTs += deltas[i] ?? 0; + if (runningTs < windowStartTs || runningTs > windowEndTs) continue; const id = samples[i]; const d = Math.max(0, deltas[i] ?? 0); selfDeltaUs.set(id, (selfDeltaUs.get(id) ?? 0) + d); @@ -76,7 +129,7 @@ for (const [id, us] of selfDeltaUs.entries()) { entries.sort((a, b) => b.us - a.us); console.log( - `Total sample time: ${(totalDelta / 1000).toFixed(2)} ms across ${selfDeltaUs.size} unique nodes`, + `Window: ${windowLabel}\nTotal sample time in window: ${(totalDelta / 1000).toFixed(2)} ms across ${selfDeltaUs.size} unique nodes`, ); console.log( consumer @@ -84,7 +137,7 @@ console.log( : "\nTop 40 by SELF time (minified — pass a .js.map for attribution):", ); for (const e of entries.slice(0, 40)) { - const pct = ((e.us / totalDelta) * 100).toFixed(1); + const pct = totalDelta > 0 ? ((e.us / totalDelta) * 100).toFixed(1) : " -"; console.log(` ${e.us.toString().padStart(7)}μs (${pct}%) ${e.label}`); }