Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions apps/bench/src/__tests__/bench-runtime.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ describe("bench runtime", () => {
scriptName: "initial",
autorun: false,
updateRatePerSec: 1000,
waitForTrigger: false,
};

expect(createBenchRequest(query, dataset, "123.0")).toMatchObject({
Expand Down
9 changes: 9 additions & 0 deletions apps/bench/src/__tests__/query-state.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ describe("parseBenchQuery", () => {
scriptName: "initial",
autorun: false,
updateRatePerSec: 1000,
waitForTrigger: false,
});
});

Expand All @@ -28,6 +29,7 @@ describe("parseBenchQuery", () => {
scriptName: "initial",
autorun: true,
updateRatePerSec: 1000,
waitForTrigger: false,
});
});

Expand All @@ -44,6 +46,7 @@ describe("parseBenchQuery", () => {
scriptName: "scroll",
autorun: false,
updateRatePerSec: 1000,
waitForTrigger: false,
});
});

Expand All @@ -60,6 +63,7 @@ describe("parseBenchQuery", () => {
scriptName: "scroll",
autorun: false,
updateRatePerSec: 1000,
waitForTrigger: false,
});
});

Expand All @@ -76,6 +80,7 @@ describe("parseBenchQuery", () => {
scriptName: "scroll",
autorun: false,
updateRatePerSec: 1000,
waitForTrigger: false,
});
});

Expand All @@ -88,6 +93,7 @@ describe("parseBenchQuery", () => {
scriptName: "scroll",
autorun: false,
updateRatePerSec: 1000,
waitForTrigger: false,
});
});

Expand All @@ -100,6 +106,7 @@ describe("parseBenchQuery", () => {
scriptName: "scroll",
autorun: false,
updateRatePerSec: 1000,
waitForTrigger: false,
});
});

Expand All @@ -112,6 +119,7 @@ describe("parseBenchQuery", () => {
scriptName: "scroll",
autorun: false,
updateRatePerSec: 1000,
waitForTrigger: false,
});
});

Expand All @@ -124,6 +132,7 @@ describe("parseBenchQuery", () => {
scriptName: "scroll",
autorun: false,
updateRatePerSec: 1000,
waitForTrigger: false,
});
});

Expand Down
28 changes: 25 additions & 3 deletions apps/bench/src/bench-app.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -429,10 +429,32 @@ export function BenchApp({ search, browserVersion }: BenchAppProps) {
if (!query.autorun || autorunRef.current) {
return;
}

autorunRef.current = true;
void autorunScript(query.scriptName);
}, [query.autorun, query.scriptName]);

let cancelled = false;
const run = () => {
if (cancelled) return;
void autorunScript(query.scriptName);
};

if (!query.waitForTrigger) {
run();
return;
}

const tick = () => {
if (cancelled) return;
if (window.__PRETABLE_BENCH_START__ === true) {
run();
return;
}
requestAnimationFrame(tick);
};
requestAnimationFrame(tick);
return () => {
cancelled = true;
};
}, [query.autorun, query.waitForTrigger, query.scriptName]);

const selectedScenario = getScenarioById(query.scenarioId);

Expand Down
1 change: 1 addition & 0 deletions apps/bench/src/bench-types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,5 @@ export interface BenchQueryState {
* 50 ms tick (so RAF/timer behavior stays consistent across rates).
*/
updateRatePerSec: number;
waitForTrigger: boolean;
}
2 changes: 2 additions & 0 deletions apps/bench/src/query-state.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ const DEFAULT_QUERY_STATE: BenchQueryState = {
scriptName: "initial",
autorun: false,
updateRatePerSec: 1000,
waitForTrigger: false,
};

/** Allowed update-rate values for the rate sweep. */
Expand Down Expand Up @@ -78,5 +79,6 @@ export function parseBenchQuery(
? parsed
: DEFAULT_QUERY_STATE.updateRatePerSec;
})(),
waitForTrigger: searchParams.get("waitForTrigger") === "1",
};
}
1 change: 1 addition & 0 deletions apps/bench/src/window.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import type { BenchRunSummary } from "@pretable-internal/bench-runner";
declare global {
interface Window {
__PRETABLE_BENCH_RESULT__?: BenchRunSummary;
__PRETABLE_BENCH_START__?: boolean;
}
}

Expand Down
11 changes: 10 additions & 1 deletion apps/bench/tests/bench.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,9 @@ test("writes benchmark artifacts for the selected Pretable run", async ({
const rateParam = updateRatePerSec
? `&updateRatePerSec=${updateRatePerSec}`
: "";
const triggerParam = perfTraceEnabled ? "&waitForTrigger=1" : "";
await page.goto(
`/?adapter=${adapterId}&scenario=${scenarioId}&scale=${scale}&script=${scriptName}${rateParam}&autorun=1`,
`/?adapter=${adapterId}&scenario=${scenarioId}&scale=${scale}&script=${scriptName}${rateParam}&autorun=1${triggerParam}`,
);

await expect(page.getByLabel(adapterLabel).first()).toBeVisible();
Expand Down Expand Up @@ -73,6 +74,14 @@ test("writes benchmark artifacts for the selected Pretable run", async ({
}
}

if (perfTraceEnabled) {
await page.evaluate(() => {
(
window as Window & { __PRETABLE_BENCH_START__?: boolean }
).__PRETABLE_BENCH_START__ = true;
});
}

await page.waitForFunction(() => Boolean(window.__PRETABLE_BENCH_RESULT__));

const result = await page.evaluate(() => window.__PRETABLE_BENCH_RESULT__);
Expand Down
26 changes: 26 additions & 0 deletions docs/research/repo-memory.md
Original file line number Diff line number Diff line change
Expand Up @@ -565,3 +565,29 @@ Output: `status/traces/<stem>.cdp.json` (sibling to the Playwright `.trace.zip`)
- **Pretable wrapped-text filter perf-fix investigation** — next item; profiling + scope. Tooling now unblocked; remaining blocker is the bench-app interaction-start timing noted above.
- **`/bench` page swap to read from `hypotheses.json` directly** — still deferred; aggregator scripts continue feeding the page for now.
- **Matrix-runner reliability** — flakes are now well-documented across PRs #133, #134, #140, and this PR's sort re-run (which succeeded for pretable-only, but the multi-adapter runner remains fragile).

## 2026-05-15

### Bench-app trigger gating (CDP tracing now captures the full interaction window)

Closed the consumer-side limitation called out in the 2026-05-13 CDP-tracing entry: the bench app's autorun was firing before CDP attach completed, so traces captured only the tail of the interaction window.

**The gate:** new `waitForTrigger=1` query param on the bench app. When present, the autorun `useEffect` polls `window.__PRETABLE_BENCH_START__` via `requestAnimationFrame` instead of running the script immediately. The Playwright spec automatically appends the param under `PLAYWRIGHT_PERF_TRACE=1`, attaches CDP, then sets the window flag — so by the time the interaction script runs, tracing is recording.

The trigger is set **outside** the CDP try/catch (success or failure both unblock the gate; the bench never hangs).

**Before/after (filter-text / S2 / hypothesis, pretable):**

| Metric | PR #143 baseline | This PR |
| -------------- | ---------------- | ------------ |
| Trace events | 145 | 723 |
| File size | ~30 KB | ~221 KB |
| Window covered | tail only | full ~144 ms |

**Category breakdown (verification run):** 427 timeline + 140 frame + 39 frame-timeline + 42 v8 + 26 cpu_profiler + 25 cc + 23 metadata. Full DevTools profiling set.

The plan called for >1000 events as the success bar. 723 came in under that, but the bar was directional — `filter-text` at hypothesis-scale is genuinely a sub-200 ms operation. Heavier scripts / larger scales will produce proportionally larger traces. The thing that matters (full-window coverage) is achieved.

**Unaffected:** `/bench` page autorun, matrix runner, all default paths. The gate is opt-in via query param; default behavior is byte-identical to current `main`.

**Wrapped-text filter perf-fix is now fully unblocked** — the next consumer of this tooling. Both the harness (PR #143) and the consumer-side gating (this PR) are in place; profiling can proceed against actionable flame graphs.
Loading
Loading