cacheplane · blove · May 18, 2026 · May 18, 2026
@@ -416,6 +416,7 @@ export async function measureBenchInteractionRun(
   );
   const startTimestamp = await waitForAnimationFrame();
 
+  performance.mark("pretable.interaction.start");
   trigger();
 
   let domNodesPeak = root.querySelectorAll("*").length;
@@ -466,6 +467,7 @@ export async function measureBenchInteractionRun(
 
     if (isFirstChangedFrame) {
       firstChangedAt = timestamp;
+      performance.mark("pretable.interaction.firstFrame");
     }
 
     if (firstChangedAt === null) {
@@ -522,6 +524,7 @@ export async function measureBenchInteractionRun(
 
     if (stableFrames >= Math.max(0, profile.maxSettleFrames - 1)) {
       settledAt = timestamp;
+      performance.mark("pretable.interaction.settled");
       break;
     }
   }

@@ -62,6 +62,7 @@ test("writes benchmark artifacts for the selected Pretable run", async ({
           "disabled-by-default-devtools.timeline.frame",
           "v8",
           "disabled-by-default-v8.cpu_profiler",
+          "blink.user_timing",
         ].join(","),
         options: "sampling-frequency=10000",
       });

@@ -0,0 +1,82 @@
+# Bench CDP window slicing — correction to PR #145 — 2026-05-16
+
+## Summary
+
+PR #145's investigation analyzed the **full trace** (page-load to bench-result publish), which made `getEstimatedRowHeightSignature` look like the leading hotspot at ~1.6-2 ms self-time. With interaction-window slicing now in place, that signal **disappears** — F8's cost is during initial mount of 3000 rows, not the filter trigger. The actual interaction-window hotspot for filter-text/S2/hypothesis is **`matchesFilters`** at `packages/grid-core/src/derived-rows.ts:73` — 800 μs (15%) of the 6.97 ms trigger-to-first-frame window in this n=1 sample. Cause: the function lowercases the cell value on every filter pass across all 3000 source rows; nothing is cached between filter calls.
+
+This PR ships the slicing infrastructure (markers + analyzer option) but **no production fix**. The actual `matchesFilters` optimization is queued as the next follow-up.
+
+## What ships
+
+- `apps/bench/src/bench-runtime.ts` — three `performance.mark()` calls: `pretable.interaction.start`, `.firstFrame`, `.settled`.
+- `apps/bench/tests/bench.spec.ts` — added `blink.user_timing` to the CDP `Tracing.start` categories so the marks land in the captured trace.
+- `scripts/analyze-cdp.mjs` — new `--window=interaction|settle|full` flag. Default remains `full` for backwards compatibility. Window slicing filters ProfileChunk samples by reconstructed sample timestamps (chunk-start + cumulative deltas).
+
+## Trace findings (n=1, interaction window)
+
+```
+Window: interaction (6.97 ms)
+Total sample time in window: 5.26 ms across 26 unique nodes
+
+Top 15 by SELF time (sourcemap-resolved):
+  1127μs  (21.4%)  (program)                                native overhead
+   802μs  (15.3%)  matchesFilters  packages/core/dist/index.mjs:69
+   409μs  ( 7.8%)  (anon) packages/react/dist/index.mjs:29
+   275μs  ( 5.2%)  u8 packages/react/dist/index.mjs:17
+   272μs  ( 5.2%)  Hd react-dom-client.production.js:13346
+   163μs  ( 3.1%)  xo react-dom-client.production.js:4333
+   153μs  ( 2.9%)  ye apps/bench/src/bench-runtime.ts:376
+   150μs  ( 2.9%)  (anon) apps/bench/src/bench-app.tsx:489
+   144μs  ( 2.7%)  Ns react-dom-client.production.js:5437
+   ...
+```
+
+**Notably absent:** `getEstimatedRowHeightSignature` (F8). It does not appear in the interaction-window top 26. PR #145's memo's leading hypothesis was wrong — confirmed by slicing.
+
+## Root-cause for filter-text
+
+`matchesFilters` (`packages/grid-core/src/derived-rows.ts:73`):
+
+```ts
+function matchesFilters<TRow extends PretableRow>(
+  row: TRow,
+  resolvedFilters: ResolvedFilter<TRow>[],
+): boolean {
+  for (const { column, needle } of resolvedFilters) {
+    const haystack = String(readCellValue(row, column)).toLowerCase(); // ← runs per (row, filter call)
+    if (!haystack.includes(needle)) return false;
+  }
+  return true;
+}
+```
+
+Called by `deriveVisibleRows` over all 3000 source rows on every filter change. Each call: read cell value, `String(...)`, `.toLowerCase()`, `.includes(needle)`. The lowercase conversion is the dominant per-row cost — `String.prototype.toLowerCase` allocates a new string, and for wrap columns with long cell text (S2's metadata/text columns) the allocation cost adds up across 3000 rows.
+
+## Proposed fix (queued for follow-up)
+
+**Option A — lowercase cache keyed on row + column.** Add a `Map<TRow, Map<columnId, string>>` (or `WeakMap<TRow, ...>`) of pre-lowercased cell values. Populated lazily, invalidated when row identity changes. Expected delta: ~600-800 μs reduction on filter-text — could move it from 16.79 ms p95 (PR #134's n=20) to ~16.0-16.2 ms — borderline under budget.
+
+**Option B — pre-build a lowercase index at filter-setup time.** On `replaceFilters`, walk all rows once and build column-indexed lowercase strings. Same expected delta as A but front-loaded into the filter trigger itself (still part of the 16 ms window, but pre-sort).
+
+**Option C — Aho-Corasick or similar string-search index.** Overkill for current sizes; revisit if filter-text complexity grows.
+
+Recommended: A. Lowest risk, no public-API impact, internal-only edit.
+
+## Sort + filter-metadata
+
+Both run through the same `deriveVisibleRows` path. Sort doesn't filter, so `matchesFilters` doesn't fire — but `sortRows` uses `Intl.Collator.compare` over many pairs, which has its own cost. A separate slice of a sort trace would confirm whether `Intl.Collator` is the dominant cost there. Out of scope for this PR.
+
+## Verdict
+
+**No production code change in this PR.** Ships the missing infrastructure (markers + window slicing) and corrects PR #145's misattribution. Quality wedge untouched.
+
+## Out-of-scope follow-ups
+
+- **`matchesFilters` lowercase cache** — the actual perf fix. Next PR.
+- **Sort-script flame-graph slice** — confirm whether `Intl.Collator` is the sort-script hotspot.
+- **filter-metadata flame-graph slice** — same code path as filter-text; should drop with A.
+- **Matrix-runner CDP integration, Speedscope export** — same status as PR #143.
+
+## Lesson (saved to memory)
+
+CDP traces capture the entire page lifecycle. The bench's measured `interaction_latency_ms` only counts the trigger-to-first-frame window. Always slice to the interaction window before naming a hotspot — full-trace top-N can be dominated by mount-time work that doesn't count against the metric.
@@ -4,25 +4,34 @@
 // a .js.map is provided alongside the trace.
 //
 // Usage:
-//   node scripts/analyze-cdp.mjs <trace.cdp.json> [path/to/index-*.js.map]
+//   node scripts/analyze-cdp.mjs <trace.cdp.json> [path/to/index-*.js.map] [--window=<name>]
 //
-// Output: top-N (default 40) frames by self-time, with file:line attribution.
+// --window options:
+//   full          (default) Aggregate over the entire trace.
+//   interaction   Slice to the trigger-to-first-frame window. Requires
+//                 performance.mark("pretable.interaction.start" / ".firstFrame")
+//                 events in the trace (the bench harness emits these
+//                 automatically when PLAYWRIGHT_PERF_TRACE=1).
+//   settle        Slice trigger-to-settled (first-frame plus follow-up frames).
 
 import { readFileSync } from "node:fs";
 import { createRequire } from "node:module";
 
-const tracePath = process.argv[2];
-const mapPath = process.argv[3];
+const args = process.argv.slice(2);
+const tracePath = args.find((a) => !a.startsWith("--") && a.endsWith(".json"));
+const mapPath = args.find((a) => !a.startsWith("--") && a.endsWith(".map"));
+const windowArg = args.find((a) => a.startsWith("--window="));
+const windowMode = windowArg ? windowArg.split("=")[1] : "full";
+
 if (!tracePath) {
   console.error(
-    "usage: node scripts/analyze-cdp.mjs <trace.cdp.json> [index.js.map]",
+    "usage: node scripts/analyze-cdp.mjs <trace.cdp.json> [index.js.map] [--window=full|interaction|settle]",
   );
   process.exit(1);
 }
 
 const trace = JSON.parse(readFileSync(tracePath, "utf8"));
 
-// Lazy-load source-map (optional dep — only needed when map path provided).
 let consumer = null;
 if (mapPath) {
   const require = createRequire(import.meta.url);
@@ -31,6 +40,45 @@ if (mapPath) {
   consumer = await new SourceMapConsumer(rawMap);
 }
 
+// Find performance.mark timestamps for window slicing.
+const marks = new Map();
+for (const ev of trace.traceEvents) {
+  if (ev.cat !== "blink.user_timing") continue;
+  if (typeof ev.name !== "string") continue;
+  if (!ev.name.startsWith("pretable.")) continue;
+  if (typeof ev.ts !== "number") continue;
+  if (!marks.has(ev.name)) marks.set(ev.name, ev.ts);
+}
+
+let windowStartTs = -Infinity;
+let windowEndTs = Infinity;
+let windowLabel = "full trace";
+if (windowMode === "interaction") {
+  const startTs = marks.get("pretable.interaction.start");
+  const endTs = marks.get("pretable.interaction.firstFrame");
+  if (startTs == null || endTs == null) {
+    console.error(
+      "[analyze-cdp] --window=interaction needs both pretable.interaction.start and .firstFrame marks; falling back to full",
+    );
+  } else {
+    windowStartTs = startTs;
+    windowEndTs = endTs;
+    windowLabel = `interaction (${((endTs - startTs) / 1000).toFixed(2)} ms)`;
+  }
+} else if (windowMode === "settle") {
+  const startTs = marks.get("pretable.interaction.start");
+  const endTs = marks.get("pretable.interaction.settled");
+  if (startTs == null || endTs == null) {
+    console.error(
+      "[analyze-cdp] --window=settle needs both pretable.interaction.start and .settled marks; falling back to full",
+    );
+  } else {
+    windowStartTs = startTs;
+    windowEndTs = endTs;
+    windowLabel = `settle (${((endTs - startTs) / 1000).toFixed(2)} ms)`;
+  }
+}
+
 const nodes = new Map();
 const selfDeltaUs = new Map();
 let totalDelta = 0;
@@ -43,7 +91,12 @@ for (const ev of trace.traceEvents) {
   for (const n of cpu.nodes ?? []) nodes.set(n.id, n);
   const samples = cpu.samples ?? [];
   const deltas = data.timeDeltas ?? [];
+  // ProfileChunk timestamps are walked sample-by-sample: sample i happens
+  // at chunkStart + sum(deltas[0..=i]).
+  let runningTs = ev.ts ?? 0;
   for (let i = 0; i < samples.length; i++) {
+    runningTs += deltas[i] ?? 0;
+    if (runningTs < windowStartTs || runningTs > windowEndTs) continue;
     const id = samples[i];
     const d = Math.max(0, deltas[i] ?? 0);
     selfDeltaUs.set(id, (selfDeltaUs.get(id) ?? 0) + d);
@@ -76,15 +129,15 @@ for (const [id, us] of selfDeltaUs.entries()) {
 entries.sort((a, b) => b.us - a.us);
 
 console.log(
-  `Total sample time: ${(totalDelta / 1000).toFixed(2)} ms across ${selfDeltaUs.size} unique nodes`,
+  `Window: ${windowLabel}\nTotal sample time in window: ${(totalDelta / 1000).toFixed(2)} ms across ${selfDeltaUs.size} unique nodes`,
 );
 console.log(
   consumer
     ? "\nTop 40 by SELF time (sourcemap-resolved):"
     : "\nTop 40 by SELF time (minified — pass a .js.map for attribution):",
 );
 for (const e of entries.slice(0, 40)) {
-  const pct = ((e.us / totalDelta) * 100).toFixed(1);
+  const pct = totalDelta > 0 ? ((e.us / totalDelta) * 100).toFixed(1) : "  -";
   console.log(`  ${e.us.toString().padStart(7)}μs  (${pct}%)  ${e.label}`);
 }