From a963d17cc095aa4170f59c5d901faa1b05e1a682 Mon Sep 17 00:00:00 2001
From: Hassan Abdel-Rahman <hassan.abdelrahman@gmail.com>
Date: Thu, 21 May 2026 14:22:07 -0400
Subject: [PATCH 1/3] Pass-scoped compute memo + collapse double render.meta
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reduces redundant `computeVia` work during the per-card prerender:

- `BaseDef.[queryableValue]` no longer re-reads `value[fieldName]`
  after `peekAtField` — the second read re-invoked `computeVia` for
  every contains/contains-many/links-to field in the search doc.
- `beginComputePass` / `endComputePass` install a synchronous
  per-instance compute memo around `serializeCard + searchDoc` so a
  card with N computeds runs each one once per render.meta pass
  instead of once per traversal.
- The prerender server's two-shot render.meta capture is collapsed
  to a single call. `model.capturedDeps` is frozen at parent
  readyPromise resolution and the fitted/embedded renders that ran
  between the two captures don't mutate the card model, so the
  second call was emitting the same bytes as the first.

Host emits `computedCalls`, `computedCacheHits`, `serializeMs`,
`searchDocMs` per row; render-settlement lifts them onto
`response.meta.diagnostics` so they persist into
`boxel_index.timing_diagnostics` for SQL-side perf triage. The
indexing-diagnostics skill documents the new fields and adds a SQL
pattern for ranking rows by computed-call pressure.

Off-pass reads pay a single `if (passComputeMemo === null)` branch
in `getter`, no counter increment and no Map operations.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .claude/skills/indexing-diagnostics/SKILL.md  | 48 +++++++++++++-
 packages/base/card-api.gts                    | 17 +++--
 packages/base/field-support.ts                | 65 +++++++++++++++++++
 packages/host/app/routes/render/meta.ts       | 27 ++++++++
 .../realm-server/prerender/render-runner.ts   | 44 +++++--------
 .../prerender/render-settlement.ts            |  8 +++
 packages/runtime-common/index.ts              | 33 ++++++++++
 7 files changed, 207 insertions(+), 35 deletions(-)
diff --git a/.claude/skills/indexing-diagnostics/SKILL.md b/.claude/skills/indexing-diagnostics/SKILL.md
index 44b903b9448..215bab4a13c 100644
--- a/.claude/skills/indexing-diagnostics/SKILL.md
+++ b/.claude/skills/indexing-diagnostics/SKILL.md
@@ -167,6 +167,25 @@ WHERE timing_diagnostics->>'renderElapsedMs' IS NOT NULL
 GROUP BY 1
 ORDER BY p95_ms DESC NULLS LAST
 LIMIT 20;
+
+-- Rows where the render.meta computed-field traversal dominates the
+-- render budget. `computedCalls` + the host-side `searchDocMs` /
+-- `serializeMs` are emitted by the host route per row. Use these to
+-- find aggregate-style cards that are eating their render budget on
+-- compute work rather than data loads or template stalls.
+SELECT
+  url,
+  to_timestamp((timing_diagnostics->>'indexedAt')::bigint / 1000) AS indexed_at,
+  (timing_diagnostics->>'computedCalls')::int                     AS calls,
+  (timing_diagnostics->>'computedCacheHits')::int                 AS cache_hits,
+  (timing_diagnostics->>'serializeMs')::numeric                   AS serialize_ms,
+  (timing_diagnostics->>'searchDocMs')::numeric                   AS search_doc_ms,
+  (timing_diagnostics->>'renderElapsedMs')::int                   AS render_ms
+FROM boxel_index
+WHERE realm_url = 'https://localhost:4201/user/your-realm/'
+  AND timing_diagnostics->>'computedCalls' IS NOT NULL
+ORDER BY (timing_diagnostics->>'computedCalls')::int DESC NULLS LAST
+LIMIT 20;
 ```
 
 ## Mode C — a worker job is stuck or got rejected
@@ -735,7 +754,30 @@ WHERE timing_diagnostics->>'requestId' = '<request-id>';
   ],
   "docsInFlight": 3,             // legacy count, kept for rollback safety
   "capturedDom": "<section data-prerender>…</section>",
-  "blockedTimerSummary": "Timers blocked during prerender: …"
+  "blockedTimerSummary": "Timers blocked during prerender: …",
+  "computedCalls": 187,          // distinct `computeVia` invocations during this row's
+                                 // render.meta traversal (serializeCard + searchDoc combined).
+                                 // Host-emitted; pass-scoped memo elides repeated reads of
+                                 // the same `(instance, fieldName)` so this number reflects
+                                 // distinct compute work, not total field-access pressure.
+                                 // Absent on rows produced by host builds before CS-11208.
+  "computedCacheHits": 374,      // repeated reads of the same `(instance, fieldName)`
+                                 // that hit the pass-scoped memo. `computedCalls +
+                                 // computedCacheHits` is the total computed-read pressure
+                                 // of the render.meta pass; the ratio tells you how much
+                                 // duplicate work the memo elided. A high `cacheHits`
+                                 // count relative to `calls` is normal for cards that
+                                 // serialize + searchDoc the same field (every contains /
+                                 // contains-many / links-to field does this).
+  "serializeMs": 42.1,           // host-side wall-clock of `serializeCard(instance, {
+                                 // includeComputeds: true })` for this card.
+  "searchDocMs": 18.3            // host-side wall-clock of `searchDoc(instance)` for
+                                 // this card. Sum with `serializeMs` to get the host's
+                                 // contribution to `renderElapsedMs`. Pairs with
+                                 // `computedCalls` so you can normalize: a card with
+                                 // `computedCalls=500, searchDocMs=80` is ~6 calls/ms
+                                 // — a sign of a hot compute that may be worth a
+                                 // dependency-aware skip.
 }
 ```
 
@@ -752,6 +794,7 @@ All ms values are server-observed walltime.
 - `recentQueryLoads[*].ms` is the wall time a completed query-field/search load ultimately took. The store keeps a bounded top-N so even queries that resolved just before the timer fired stay visible. Compare with `renderElapsedMs` to see which fraction of the render budget went to query work.
 - `cardDocLoadsInFlight[*].ageMs` / `fileMetaDocLoadsInFlight[*].ageMs` mirror the query version for linked-field (card doc) / file-meta loads. One URL with a very large `ageMs` = one slow linksTo target; many URLs with small `ageMs` = fan-out.
 - `recentCardDocLoads[*].ms` / `recentFileMetaLoads[*].ms` are the completed-load histories; same usage as `recentQueryLoads`.
+- `computedCalls` + `computedCacheHits` together represent total compute pressure on the render.meta pass. The split tells you how much duplicate work the pass-scoped memo absorbed — a 1:0 ratio means every field was read once, a 1:5 ratio means the cards re-read each computed five extra times (typical for cards where many sibling fields share a computed input). `searchDocMs` + `serializeMs` are the host's contribution to `renderElapsedMs`; comparing `computedCalls / (searchDocMs + serializeMs)` across cards finds the slow-per-call computes that are worth profiling.
 
 Keep the field names in lock-step with the type in `packages/runtime-common/index.ts`.
 
@@ -774,6 +817,7 @@ Walk the fields top-down. The _first_ positive signal wins; stop there.
 | `renderStage` = `waiting-stability` with empty in-flight arrays                                                                                                                                                                                       | **Render stall**                                                             | Nothing is loading but settlement never finishes. Classic Glimmer tracking loop — template is invalidating itself. `capturedDom` usually shows the partially-rendered component. `blockedTimerSummary` will list swallowed timers that may hint at a scheduling loop.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   |
 | `currentlyEvaluatingModule` non-null, or `stageAgeMs` large with empty in-flight arrays                                                                                                                                                               | **Synchronous browser stall (typically Glimmer compile during module eval)** | `recentModuleEvaluations` shows the worst offenders. A single URL with `ms > 5000` usually means "this module has a giant template that takes forever to compile". Many small entries (say 50+ at 100–500 ms each) summing into the stall budget mean card fan-out where each dependent card contributes a compile. Split the module, lazy-load the template, or reduce the component fan-out.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |
 | `blockedTimerSummary` populated                                                                                                                                                                                                                       | **Supplementary**                                                            | Tells you which timer-driven code is fighting the render. Not a root cause on its own.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  |
+| `computedCalls` large (e.g. > 1000) AND `searchDocMs + serializeMs` ≈ `renderElapsedMs`                                                                                                                                                               | **Computed-field hot path**                                                  | The render.meta traversal itself is the bottleneck, not data loads or browser stalls. Look at `computedCalls / (searchDocMs + serializeMs)` — > ~5 calls/ms is fast, < ~1 call/ms means a few slow `computeVia` functions dominate. Inspect the card class for aggregate computeds that scan a `linksToMany` relation on every read (Portfolio-over-Policies style) and consider hoisting the scan into a shared rollup or adding `computeDeps` so the field can be skipped when its inputs don't change. The pass-scoped memo already eliminates duplicate reads in one traversal (visible in `computedCacheHits`); further wins require structural changes to the card.                                                                                                                                                                                                                                                                                                                                                                                                                                                                               |
 
 ### Special cases
 
@@ -935,7 +979,7 @@ Slot-by-slot:
   | `realms-staging.stack.cards`            | `https://boxel-host-staging.stack.cards`                |
   | `realms.stack.cards`                    | `https://boxel-host.stack.cards`                        |
   | `realm-server.<slug>.localhost`         | `http://host.<slug>.localhost` (BOXEL_ENVIRONMENT mode) |
-  | `localhost` or `*.localhost` (standard) | `https://localhost:4200`                                 |
+  | `localhost` or `*.localhost` (standard) | `https://localhost:4200`                                |
 
   If the realm host doesn't match any of these patterns, ask the user — don't guess. Constrain `realms-` matching to `*.stack.cards` so any future deployment using a `realms-` prefix on a different domain isn't silently mapped to a wrong (and possibly non-existent) host.
 
diff --git a/packages/base/card-api.gts b/packages/base/card-api.gts
index cd352701ea8..f1742155611 100644
--- a/packages/base/card-api.gts
+++ b/packages/base/card-api.gts
@@ -152,6 +152,8 @@ import {
 } from './card-serialization';
 import {
   assertScalar,
+  beginComputePass,
+  endComputePass,
   entangleWithCardTracking,
   getDataBucket,
   getFieldDescription,
@@ -169,6 +171,7 @@ import {
   relationshipMeta,
   setFieldDescription,
   setRealmContextOnField,
+  type ComputePassSnapshot,
   type NotLoadedValue,
 } from './field-support';
 import { TextInputValidator } from './text-input-validator';
@@ -189,6 +192,8 @@ interface CardOrFieldTypeIconSignature {
 export type CardOrFieldTypeIcon = ComponentLike<CardOrFieldTypeIconSignature>;
 
 export {
+  beginComputePass,
+  endComputePass,
   deserialize,
   getCardMeta,
   getDataBucket,
@@ -210,6 +215,7 @@ export {
   ensureQueryFieldSearchResource,
   getStore,
   type BoxComponent,
+  type ComputePassSnapshot,
   type DeserializeOpts,
   type GetMenuItemParams,
   type JSONAPISingleResourceDocument,
@@ -2276,9 +2282,13 @@ export class BaseDef {
             }
             return [fieldName, { id: makeAbsoluteURL(rawValue.reference) }];
           }
+          // Reuse the value we already peeked above instead of re-reading
+          // through the descriptor — for computed fields the descriptor
+          // get path re-invokes `computeVia`, doubling the work for every
+          // contains/contains-many/links-to field in the search doc.
           return [
             fieldName,
-            getQueryableValue(field!, value[fieldName], [value, ...stack]),
+            getQueryableValue(field!, rawValue, [value, ...stack]),
           ];
         }),
       );
@@ -4154,10 +4164,7 @@ export type SignatureFor<CardT extends BaseDefConstructor> = {
 // mutated. Field invocations (field !== undefined) go through `fieldComponent`
 // → `Box.field(name)` (already cached on the parent Box), so they bypass
 // this cache.
-const componentByModel = new WeakMap<
-  object,
-  Map<string, BoxComponent>
->();
+const componentByModel = new WeakMap<object, Map<string, BoxComponent>>();
 
 function codeRefCacheKey(codeRef: CodeRef | undefined): string {
   return codeRef ? JSON.stringify(codeRef) : '';
diff --git a/packages/base/field-support.ts b/packages/base/field-support.ts
index 66ee5c93c35..ddbda2562d3 100644
--- a/packages/base/field-support.ts
+++ b/packages/base/field-support.ts
@@ -54,6 +54,49 @@ const fieldOverrides = initSharedState(
   () => new WeakMap<BaseDef, Map<string, any>>(),
 );
 
+// Pass-scoped computed-field memo. When non-null, `getter` consults a
+// per-instance Map before invoking `computeVia` and stores the result for
+// the duration of the synchronous traversal that opened the pass (see
+// `beginComputePass`). Off-pass reads pay only a single null check on this
+// module local and follow the original path — the JIT branch-predicts the
+// off-pass case in the host-UI hot loop.
+let passComputeMemo: WeakMap<BaseDef, Map<string, any>> | null = null;
+// Counters snapshotted by the render/meta route to populate
+// `boxel_index.timing_diagnostics`. They are unconditional integer
+// increments inside `getter` — cheap enough to keep on in production, but
+// only meaningful between `beginComputePass`/`endComputePass`.
+let computedCallCount = 0;
+let computedCacheHitCount = 0;
+
+export interface ComputePassSnapshot {
+  calls: number;
+  cacheHits: number;
+}
+
+// Open a synchronous compute-memo pass. Callers MUST pair this with
+// `endComputePass()` and must not await between the two — the WeakMap
+// would otherwise be observable across reactive cycles. Intended for
+// pure traversals like `serializeCard` + `searchDoc` inside one
+// `render.meta` capture.
+export function beginComputePass(): void {
+  passComputeMemo = new WeakMap();
+  computedCallCount = 0;
+  computedCacheHitCount = 0;
+}
+
+// Close the pass and return the per-traversal counter delta. The memo
+// is dropped so subsequent `getter` calls run `computeVia` fresh.
+export function endComputePass(): ComputePassSnapshot {
+  passComputeMemo = null;
+  let snapshot = {
+    calls: computedCallCount,
+    cacheHits: computedCacheHitCount,
+  };
+  computedCallCount = 0;
+  computedCacheHitCount = 0;
+  return snapshot;
+}
+
 export function getter<CardT extends BaseDefConstructor>(
   instance: BaseDef,
   field: Field<CardT>,
@@ -63,10 +106,32 @@ export function getter<CardT extends BaseDefConstructor>(
   cardTracking.get(instance);
 
   if (field.computeVia) {
+    // Fast path when no pass is open: skip the counter + memo entirely
+    // so production reads pay only one branch on the module-local null
+    // check. JIT branch-predicts this and the original behaviour is
+    // unchanged.
+    if (passComputeMemo === null) {
+      let value = field.computeVia.bind(instance)();
+      if (value === undefined) {
+        value = field.emptyValue(instance);
+      }
+      return value as BaseInstanceType<CardT>;
+    }
+    let perInstance = passComputeMemo.get(instance);
+    if (perInstance && perInstance.has(field.name)) {
+      computedCacheHitCount++;
+      return perInstance.get(field.name);
+    }
+    computedCallCount++;
     let value = field.computeVia.bind(instance)();
     if (value === undefined) {
       value = field.emptyValue(instance);
     }
+    if (!perInstance) {
+      perInstance = new Map();
+      passComputeMemo.set(instance, perInstance);
+    }
+    perInstance.set(field.name, value);
     return value as BaseInstanceType<CardT>;
   } else {
     if (deserialized.has(field.name)) {
diff --git a/packages/host/app/routes/render/meta.ts b/packages/host/app/routes/render/meta.ts
index 336ccc90008..9d2d3a5a977 100644
--- a/packages/host/app/routes/render/meta.ts
+++ b/packages/host/app/routes/render/meta.ts
@@ -11,12 +11,14 @@ import {
   cardIdToURL,
   identifyCard,
   internalKeyFor,
+  logger,
   maybeRelativeURL,
   relationshipEntries,
   realmURL,
   snapshotRuntimeDependencies,
   type SingleCardDocument,
   type PrerenderMeta,
+  type PrerenderMetaDiagnostics,
   type RenderError,
 } from '@cardstack/runtime-common';
 
@@ -30,6 +32,8 @@ import type { Model as ParentModel } from '../render';
 
 export type Model = PrerenderMeta | RenderError | undefined;
 
+const computePerfLog = logger('host:computed-perf');
+
 export default class RenderMetaRoute extends Route<Model> {
   @service declare cardService: CardService;
 
@@ -54,6 +58,14 @@ export default class RenderMetaRoute extends Route<Model> {
       renderModel?.capturedDeps ??
       snapshotRuntimeDependencies({ excludeQueryOnly: true }).deps;
 
+    // Open a synchronous compute-memo pass that spans both
+    // serializeCard and searchDoc. Computed fields invoked through the
+    // descriptor or through peekAtField hit the per-instance memo
+    // instead of re-running `computeVia` — one compute per distinct
+    // (instance, fieldName) for the whole traversal. The pass MUST
+    // close before any await so it doesn't leak across reactive cycles.
+    api.beginComputePass();
+    let serializeStart = performance.now();
     let serialized = api.serializeCard(instance, {
       includeComputeds: true,
       maybeRelativeURL: (url: string) =>
@@ -63,6 +75,7 @@ export default class RenderMetaRoute extends Route<Model> {
           instance[realmURL],
         ),
     }) as SingleCardDocument;
+    let serializeMs = performance.now() - serializeStart;
     for (let { relationship } of relationshipEntries(
       serialized.data.relationships,
     )) {
@@ -74,18 +87,32 @@ export default class RenderMetaRoute extends Route<Model> {
 
     let types = getTypes(Klass);
     let displayNames = getDisplayNames(Klass);
+    let searchDocStart = performance.now();
     let searchDoc = api.searchDoc(instance);
+    let searchDocMs = performance.now() - searchDocStart;
+    let passSnapshot = api.endComputePass();
     // Add a "pseudo field" to the search doc for the card type. We use the
     // "_" prefix to make a decent attempt to not pollute the userland
     // namespace for cards
     searchDoc._cardType = friendlyCardType(Klass);
 
+    let diagnostics: PrerenderMetaDiagnostics = {
+      computedCalls: passSnapshot.calls,
+      computedCacheHits: passSnapshot.cacheHits,
+      serializeMs: Math.round(serializeMs * 100) / 100,
+      searchDocMs: Math.round(searchDocMs * 100) / 100,
+    };
+    computePerfLog.debug(
+      `render.meta computed counts cardId=${instance.id} calls=${diagnostics.computedCalls} cacheHits=${diagnostics.computedCacheHits} serializeMs=${diagnostics.serializeMs} searchDocMs=${diagnostics.searchDocMs}`,
+    );
+
     return {
       serialized,
       displayNames,
       types: types.map((t) => internalKeyFor(t, undefined)),
       searchDoc,
       deps,
+      diagnostics,
     };
   }
 }
diff --git a/packages/realm-server/prerender/render-runner.ts b/packages/realm-server/prerender/render-runner.ts
index 91fa9f7098b..b3d06ee912d 100644
--- a/packages/realm-server/prerender/render-runner.ts
+++ b/packages/realm-server/prerender/render-runner.ts
@@ -1082,7 +1082,6 @@ export class RenderRunner {
           types: null,
         };
         let meta: PrerenderMeta = emptyMeta;
-        let metaForTypes: PrerenderMeta = emptyMeta;
         let headHTML: string | null = null;
         let atomHTML: string | null = null;
         let iconHTML: string | null = null;
@@ -1128,27 +1127,31 @@ export class RenderRunner {
           }
         }
 
+        // Capture render.meta once before the ancestor renders so we have
+        // `meta.types` to drive fitted/embedded. The host route's
+        // serializeCard + searchDoc traversal is the most expensive part
+        // of the per-card prerender; running it twice (once for types and
+        // again for the final payload) is duplicate work because nothing
+        // about the instance changes between the two calls — the parent
+        // route freezes `model.capturedDeps` at ready-promise resolution
+        // and the fitted/embedded ancestor renders that run in between
+        // don't mutate the card model. Capture once and reuse.
         if (!cardShortCircuit) {
-          let metaForTypesResult = await runTimedStep<PrerenderMeta>(
-            'visit card render.meta (types)',
+          let metaResult = await runTimedStep<PrerenderMeta>(
+            'visit card render.meta',
             () => renderMeta(page, captureOptions),
           );
-          if (metaForTypesResult !== undefined) {
-            metaForTypes = metaForTypesResult;
+          if (metaResult !== undefined) {
+            meta = metaResult;
           }
         }
 
-        if (!cardShortCircuit && metaForTypes.types) {
+        if (!cardShortCircuit && meta.types) {
           const ancestorSteps = [
             {
               name: 'visit card fitted render',
               cb: () =>
-                renderAncestors(
-                  page,
-                  'fitted',
-                  metaForTypes.types!,
-                  captureOptions,
-                ),
+                renderAncestors(page, 'fitted', meta.types!, captureOptions),
               assign: (v: Record<string, string>) => {
                 fittedHTML = v;
               },
@@ -1156,12 +1159,7 @@ export class RenderRunner {
             {
               name: 'visit card embedded render',
               cb: () =>
-                renderAncestors(
-                  page,
-                  'embedded',
-                  metaForTypes.types!,
-                  captureOptions,
-                ),
+                renderAncestors(page, 'embedded', meta.types!, captureOptions),
               assign: (v: Record<string, string>) => {
                 embeddedHTML = v;
               },
@@ -1177,16 +1175,6 @@ export class RenderRunner {
           }
         }
 
-        if (!cardShortCircuit) {
-          let finalMetaResult = await runTimedStep<PrerenderMeta>(
-            'visit card render.meta (final)',
-            () => renderMeta(page, captureOptions),
-          );
-          if (finalMetaResult !== undefined) {
-            meta = finalMetaResult;
-          }
-        }
-
         let cardResponse: RenderResponse = {
           ...(meta as PrerenderMeta),
           ...(cardError ? { error: cardError } : {}),
diff --git a/packages/realm-server/prerender/render-settlement.ts b/packages/realm-server/prerender/render-settlement.ts
index 7317bc57bfb..54fce0f7771 100644
--- a/packages/realm-server/prerender/render-settlement.ts
+++ b/packages/realm-server/prerender/render-settlement.ts
@@ -222,6 +222,14 @@ export function decorateRenderErrorsWithTimings(
     let sub = r[key];
     if (sub && typeof sub === 'object') {
       lift((sub as { error?: unknown }).error);
+      // Card sub-responses also carry a success-path host diagnostics
+      // block — captured by render.meta and spread onto the card
+      // response as `diagnostics`. Lift the same way as error-path
+      // diagnostics so the computed-field counters reach the indexer's
+      // `boxel_index.timing_diagnostics` column.
+      if (key === 'card') {
+        lift(sub);
+      }
     }
   }
   let { affinitySnapshot, priority, tabReused } = meta;
diff --git a/packages/runtime-common/index.ts b/packages/runtime-common/index.ts
index 267706c6803..17b578d747a 100644
--- a/packages/runtime-common/index.ts
+++ b/packages/runtime-common/index.ts
@@ -44,6 +44,28 @@ export type PatchData = {
   };
 };
 
+// Per-render computed-field counters captured by the host's render.meta
+// route. Emitted alongside PrerenderMeta so the Prerenderer can lift them
+// onto `response.meta.diagnostics` and the indexer can persist them onto
+// `boxel_index.timing_diagnostics`. All fields optional — older host
+// builds that predate the counters omit the block entirely.
+export interface PrerenderMetaDiagnostics {
+  // Number of `computeVia` invocations that ran during the
+  // serializeCard + searchDoc traversal for this card. After the
+  // pass-scoped memo lands this is one call per distinct computed read
+  // per card-instance touched in the pass.
+  computedCalls?: number;
+  // Number of times the pass memo short-circuited a repeated read of
+  // the same computed in the same traversal. `computedCalls +
+  // computedCacheHits` is the total computed-read pressure of the
+  // pass; the ratio tells you how much duplicate work the memo elided.
+  computedCacheHits?: number;
+  // Wall-clock of the host-side serializeCard call.
+  serializeMs?: number;
+  // Wall-clock of the host-side searchDoc call.
+  searchDocMs?: number;
+}
+
 // Shared type produced by the host app when visiting the render.meta route and
 // consumed by the server.
 export interface PrerenderMeta {
@@ -52,6 +74,10 @@ export interface PrerenderMeta {
   displayNames: string[] | null;
   deps: string[] | null;
   types: string[] | null;
+  // Optional host-side timing block. The Prerenderer lifts this onto
+  // `response.meta.diagnostics` so it persists to
+  // `boxel_index.timing_diagnostics` for SQL-side perf triage.
+  diagnostics?: PrerenderMetaDiagnostics;
 }
 
 export interface RenderResponse extends PrerenderMeta {
@@ -204,6 +230,13 @@ export interface RenderTimeoutDiagnostics {
       priority?: number;
     }>;
   };
+  // Host-emitted computed-field counters lifted out of
+  // PrerenderMeta.diagnostics so they ride alongside the existing
+  // server-observed timings in `boxel_index.timing_diagnostics`.
+  computedCalls?: number;
+  computedCacheHits?: number;
+  serializeMs?: number;
+  searchDocMs?: number;
 }
 
 export interface RenderError extends ErrorEntry {

From 77cefe7dd9b3bd6224f04bb3a7e3f612931f937b Mon Sep 17 00:00:00 2001
From: Hassan Abdel-Rahman <hassan.abdelrahman@gmail.com>
Date: Thu, 21 May 2026 15:20:47 -0400
Subject: [PATCH 2/3] Guard pass-memo lifecycle: stale-api fallback +
 try/finally close
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two robustness fixes around the new `beginComputePass` / `endComputePass`
lifecycle:

1. **Stale `api` fallback.** During a cold dev start the host can briefly
   load a `base/card-api` build that predates the beginComputePass /
   endComputePass exports — vite is still optimizing dependencies or a
   transpile race serves a stale module. Without a guard this surfaced
   as `api.beginComputePass is not a function` errors during the very
   first reindex on a freshly-started stack, then went away once
   modules stabilized.

   Skip the pass when the methods aren't on `api` — `getter`'s
   `passComputeMemo === null` fast path still produces a correct
   serialized doc and searchDoc, just without per-row computedCalls /
   computedCacheHits diagnostics for those few cards. serializeMs +
   searchDocMs still surface either way.

2. **Close in `finally` on throw.** `endComputePass` now runs in a
   `finally` so a throw inside `serializeCard` or `searchDoc` still
   closes the pass — otherwise the module-global `passComputeMemo` in
   field-support.ts stays set and later off-pass `getter` calls would
   read stale memoized values across reactive cycles. Caught by Codex
   and Copilot bot review on PR.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 packages/host/app/routes/render/meta.ts | 84 +++++++++++++++++--------
 1 file changed, 59 insertions(+), 25 deletions(-)

diff --git a/packages/host/app/routes/render/meta.ts b/packages/host/app/routes/render/meta.ts
index 9d2d3a5a977..c24b3bbc0f0 100644
--- a/packages/host/app/routes/render/meta.ts
+++ b/packages/host/app/routes/render/meta.ts
@@ -24,7 +24,11 @@ import {
 
 import type CardService from '@cardstack/host/services/card-service';
 
-import type { BaseDef, CardDef } from 'https://cardstack.com/base/card-api';
+import type {
+  BaseDef,
+  CardDef,
+  ComputePassSnapshot,
+} from 'https://cardstack.com/base/card-api';
 
 import { friendlyCardType } from '../../utils/render-error';
 
@@ -64,46 +68,76 @@ export default class RenderMetaRoute extends Route<Model> {
     // instead of re-running `computeVia` — one compute per distinct
     // (instance, fieldName) for the whole traversal. The pass MUST
     // close before any await so it doesn't leak across reactive cycles.
-    api.beginComputePass();
-    let serializeStart = performance.now();
-    let serialized = api.serializeCard(instance, {
-      includeComputeds: true,
-      maybeRelativeURL: (url: string) =>
-        maybeRelativeURL(
-          cardIdToURL(url),
-          cardIdToURL(instance.id),
-          instance[realmURL],
-        ),
-    }) as SingleCardDocument;
-    let serializeMs = performance.now() - serializeStart;
-    for (let { relationship } of relationshipEntries(
-      serialized.data.relationships,
-    )) {
-      // we want to emulate the file serialization here
-      delete relationship.data;
+    //
+    // Guarded by typeof checks: during a cold dev boot the host can briefly
+    // load a base/card-api build that predates these exports (vite is still
+    // bundling, or a stale realm-transpile is in flight). In that window we
+    // skip the pass — `getter` falls through its `passComputeMemo === null`
+    // fast path and the render still produces a correct serialized + search
+    // doc, just without the per-row diagnostics fields.
+    //
+    // Pass close is in a `finally` so a throw inside serializeCard /
+    // searchDoc still closes the pass — otherwise the module-global
+    // memo in field-support.ts stays set and later off-pass `getter`
+    // calls would read stale memoized values across reactive cycles.
+    let passOpen = typeof api.beginComputePass === 'function';
+    if (passOpen) {
+      api.beginComputePass();
+    }
+    let serialized: SingleCardDocument;
+    let serializeMs: number;
+    let searchDoc: Record<string, any>;
+    let searchDocMs: number;
+    let passSnapshot: ComputePassSnapshot | undefined;
+    try {
+      let serializeStart = performance.now();
+      serialized = api.serializeCard(instance, {
+        includeComputeds: true,
+        maybeRelativeURL: (url: string) =>
+          maybeRelativeURL(
+            cardIdToURL(url),
+            cardIdToURL(instance.id),
+            instance[realmURL],
+          ),
+      }) as SingleCardDocument;
+      serializeMs = performance.now() - serializeStart;
+      for (let { relationship } of relationshipEntries(
+        serialized.data.relationships,
+      )) {
+        // we want to emulate the file serialization here
+        delete relationship.data;
+      }
+
+      let searchDocStart = performance.now();
+      searchDoc = api.searchDoc(instance);
+      searchDocMs = performance.now() - searchDocStart;
+    } finally {
+      if (passOpen && typeof api.endComputePass === 'function') {
+        passSnapshot = api.endComputePass();
+      }
     }
 
     let Klass = getClass(instance);
 
     let types = getTypes(Klass);
     let displayNames = getDisplayNames(Klass);
-    let searchDocStart = performance.now();
-    let searchDoc = api.searchDoc(instance);
-    let searchDocMs = performance.now() - searchDocStart;
-    let passSnapshot = api.endComputePass();
     // Add a "pseudo field" to the search doc for the card type. We use the
     // "_" prefix to make a decent attempt to not pollute the userland
     // namespace for cards
     searchDoc._cardType = friendlyCardType(Klass);
 
     let diagnostics: PrerenderMetaDiagnostics = {
-      computedCalls: passSnapshot.calls,
-      computedCacheHits: passSnapshot.cacheHits,
+      ...(passSnapshot
+        ? {
+            computedCalls: passSnapshot.calls,
+            computedCacheHits: passSnapshot.cacheHits,
+          }
+        : {}),
       serializeMs: Math.round(serializeMs * 100) / 100,
       searchDocMs: Math.round(searchDocMs * 100) / 100,
     };
     computePerfLog.debug(
-      `render.meta computed counts cardId=${instance.id} calls=${diagnostics.computedCalls} cacheHits=${diagnostics.computedCacheHits} serializeMs=${diagnostics.serializeMs} searchDocMs=${diagnostics.searchDocMs}`,
+      `render.meta computed counts cardId=${instance.id} calls=${diagnostics.computedCalls ?? 'n/a'} cacheHits=${diagnostics.computedCacheHits ?? 'n/a'} serializeMs=${diagnostics.serializeMs} searchDocMs=${diagnostics.searchDocMs}`,
     );
 
     return {

From 2a472ddf5a999eb6b5204d59bfc85bac9d5f4779 Mon Sep 17 00:00:00 2001
From: Hassan Abdel-Rahman <hassan.abdelrahman@gmail.com>
Date: Thu, 21 May 2026 15:58:05 -0400
Subject: [PATCH 3/3] =?UTF-8?q?Revert=20render.meta=20dedup=20=E2=80=94=20?=
 =?UTF-8?q?second=20call=20is=20load-bearing=20for=20isUsed=20contract?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The dedup broke the
`non-isolated formats render linked fields and those links appear in search doc`
prerendering test (Realm Server Tests shard 1, 6).

The two render.meta calls aren't duplicate work after all. Between
them the fitted / embedded ancestor renders touch linksTo /
linksToMany values from the embedded template; those reads mark the
fields as "used" in the per-instance data bucket. The *second*
renderMeta's queryableValue runs `getFields` with
`usedLinksToFieldsOnly: true`, which now picks up those linked fields
and includes their values in `searchDoc.owner.name`,
`searchDoc.owners[*].name`, etc. Collapsing the two calls runs
searchDoc *before* any fitted / embedded render and skips those
fields entirely.

Reverts only the render-runner change. The double-read fix in
BaseDef.[queryableValue] and the pass-scoped memo are independent
wins and stay — they're what produced the 63-66% reduction in
computeVia invocations across compute-heavy cards.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../realm-server/prerender/render-runner.ts   | 55 +++++++++++++------
 1 file changed, 39 insertions(+), 16 deletions(-)

diff --git a/packages/realm-server/prerender/render-runner.ts b/packages/realm-server/prerender/render-runner.ts
index b3d06ee912d..9f73d7963cc 100644
--- a/packages/realm-server/prerender/render-runner.ts
+++ b/packages/realm-server/prerender/render-runner.ts
@@ -1082,6 +1082,7 @@ export class RenderRunner {
           types: null,
         };
         let meta: PrerenderMeta = emptyMeta;
+        let metaForTypes: PrerenderMeta = emptyMeta;
         let headHTML: string | null = null;
         let atomHTML: string | null = null;
         let iconHTML: string | null = null;
@@ -1127,31 +1128,38 @@ export class RenderRunner {
           }
         }
 
-        // Capture render.meta once before the ancestor renders so we have
-        // `meta.types` to drive fitted/embedded. The host route's
-        // serializeCard + searchDoc traversal is the most expensive part
-        // of the per-card prerender; running it twice (once for types and
-        // again for the final payload) is duplicate work because nothing
-        // about the instance changes between the two calls — the parent
-        // route freezes `model.capturedDeps` at ready-promise resolution
-        // and the fitted/embedded ancestor renders that run in between
-        // don't mutate the card model. Capture once and reuse.
+        // Two render.meta calls. The first extracts `meta.types` for
+        // the ancestor renders below; the second captures the final
+        // serialized + searchDoc payload. The two are not duplicate
+        // work: the ancestor renders that run in between cause
+        // fitted/embedded format reads to load + mark linksTo /
+        // linksToMany fields as "used", which the final renderMeta's
+        // queryableValue then includes in the search doc. Collapsing
+        // these into one call breaks the isUsed-via-non-isolated-render
+        // contract that
+        // `non-isolated formats render linked fields and those links appear in search doc`
+        // covers.
         if (!cardShortCircuit) {
-          let metaResult = await runTimedStep<PrerenderMeta>(
-            'visit card render.meta',
+          let metaForTypesResult = await runTimedStep<PrerenderMeta>(
+            'visit card render.meta (types)',
             () => renderMeta(page, captureOptions),
           );
-          if (metaResult !== undefined) {
-            meta = metaResult;
+          if (metaForTypesResult !== undefined) {
+            metaForTypes = metaForTypesResult;
           }
         }
 
-        if (!cardShortCircuit && meta.types) {
+        if (!cardShortCircuit && metaForTypes.types) {
           const ancestorSteps = [
             {
               name: 'visit card fitted render',
               cb: () =>
-                renderAncestors(page, 'fitted', meta.types!, captureOptions),
+                renderAncestors(
+                  page,
+                  'fitted',
+                  metaForTypes.types!,
+                  captureOptions,
+                ),
               assign: (v: Record<string, string>) => {
                 fittedHTML = v;
               },
@@ -1159,7 +1167,12 @@ export class RenderRunner {
             {
               name: 'visit card embedded render',
               cb: () =>
-                renderAncestors(page, 'embedded', meta.types!, captureOptions),
+                renderAncestors(
+                  page,
+                  'embedded',
+                  metaForTypes.types!,
+                  captureOptions,
+                ),
               assign: (v: Record<string, string>) => {
                 embeddedHTML = v;
               },
@@ -1175,6 +1188,16 @@ export class RenderRunner {
           }
         }
 
+        if (!cardShortCircuit) {
+          let finalMetaResult = await runTimedStep<PrerenderMeta>(
+            'visit card render.meta (final)',
+            () => renderMeta(page, captureOptions),
+          );
+          if (finalMetaResult !== undefined) {
+            meta = finalMetaResult;
+          }
+        }
+
         let cardResponse: RenderResponse = {
           ...(meta as PrerenderMeta),
           ...(cardError ? { error: cardError } : {}),