From bf49bee57940ea917418fde08da85e2316428ce0 Mon Sep 17 00:00:00 2001 From: Hassan Abdel-Rahman Date: Tue, 12 May 2026 04:53:15 -0400 Subject: [PATCH 1/3] Fix flaky instantiate-validation spec: wait for Spec to be searchable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Seed helpers in instantiate-test-fixtures.ts used `client.waitForFile`, which polls GET on the source file only. Realm-side source POST indexing is async, so the file becomes readable before it shows up in `_federated-search` results. `InstantiateValidationStep` polls `discoverRealmSpecs` for at most 30s for the Spec to appear in search; under CI load this sometimes times out, and the step falls into the "modules exist but no Spec cards" branch (`passed: false`, no `details`), which then trips `expect(details).toBeTruthy()` in the e2e spec at line 132. Confirmed by an identical failure on a clean main commit (CI run 25694263747 against 03f6ae7c), so this is a pre-existing flake, not a regression from the prerender-search-cacheonly PR (#4777) where it also surfaced. Fix: - New `awaitSpecSearchable` helper polls `client.search({type: spec})` for up to 60s after writing the Spec source, so downstream discovery sees it without depending on its own 30s budget. - `seedValidCardWithSpec` and `seedTagsCardWithBrokenExampleAndSpec` call the gate immediately after the Spec write. The tags fixture writes the bad example *after* the gate — the bad example deliberately fails indexing (`containsMany` got a string), and a still-queued indexer was what historically kept the Spec out of search results past the 30s budget. Diagnostic logging (orthogonal to the fix, helps if the flake recurs): - The "modules exist but no Spec cards" branch in `instantiate-step.ts` now logs the realm URL, file count, and filtered list of spec-like filenames at `warn`. Previously this case fired a generic info log that didn't reveal whether the search index was empty because the Spec wasn't written or because indexing hadn't caught up. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../src/validators/instantiate-step.ts | 27 ++++++-- .../helpers/instantiate-test-fixtures.ts | 65 ++++++++++++++++++- 2 files changed, 86 insertions(+), 6 deletions(-) diff --git a/packages/software-factory/src/validators/instantiate-step.ts b/packages/software-factory/src/validators/instantiate-step.ts index 3c9819ae2b8..219feba0181 100644 --- a/packages/software-factory/src/validators/instantiate-step.ts +++ b/packages/software-factory/src/validators/instantiate-step.ts @@ -150,13 +150,18 @@ export class InstantiateValidationStep implements ValidationStepRunner { // Check if there's anything to validate before creating artifacts if (specInfos.length === 0) { let hasModules = false; + let filenames: string[] = []; try { let filesResult = await this.fetchFilenamesFn(targetRealm); - hasModules = (filesResult.filenames ?? []).some( + filenames = filesResult.filenames ?? []; + hasModules = filenames.some( (f) => f.endsWith('.gts') && !f.endsWith('.test.gts'), ); - } catch { + } catch (err) { // If we can't check filenames, treat as nothing to validate + log.warn( + `Failed to list realm files while diagnosing empty spec search: ${err instanceof Error ? err.message : String(err)}`, + ); } if (!hasModules) { @@ -165,8 +170,22 @@ export class InstantiateValidationStep implements ValidationStepRunner { return { step: 'instantiate', passed: true, files: [], errors: [] }; } - // Modules exist but no specs — fail with actionable message - log.info('Card modules exist but no Spec cards found — failing'); + // Modules exist but no specs — likely either a real "no Catalog Spec" + // configuration miss OR an indexer/search-readiness lag where the + // Spec source file is on disk but `_federated-search` hasn't picked + // it up yet. Dump the filename list (filtered to spec-like paths) + // and the count of total files so future flakes can be triaged + // against an actual log line instead of an assertion that swallows + // the context. + let specLikeFilenames = filenames.filter( + (f) => + f.endsWith('.json') && + (f.startsWith('Spec/') || f.includes('-spec.json')), + ); + log.warn( + `Card modules exist but no Spec cards found in search — failing. ` + + `realm=${targetRealm} totalFiles=${filenames.length} specLikeFiles=${JSON.stringify(specLikeFilenames)}`, + ); return { step: 'instantiate', passed: false, diff --git a/packages/software-factory/tests/helpers/instantiate-test-fixtures.ts b/packages/software-factory/tests/helpers/instantiate-test-fixtures.ts index 78229bcb67a..0a692ac6a42 100644 --- a/packages/software-factory/tests/helpers/instantiate-test-fixtures.ts +++ b/packages/software-factory/tests/helpers/instantiate-test-fixtures.ts @@ -6,8 +6,11 @@ * the two surfaces are exercised against identical inputs. */ import type { BoxelCLIClient } from '@cardstack/boxel-cli/api'; +import { specRef } from '@cardstack/runtime-common/constants'; import { expect } from '@playwright/test'; +import { retryWithPoll } from '../../src/retry-with-poll'; + // --------------------------------------------------------------------------- // Card modules // --------------------------------------------------------------------------- @@ -151,8 +154,10 @@ export function tagsCardSpecJson(): string { // --------------------------------------------------------------------------- /** - * Write a file + await the realm's index to pick it up. Returns once the - * file is visible to subsequent searches. + * Write a file + await the realm to ack it back via GET. The file is + * readable here, but realm-side search-index ingestion happens out-of-band + * — `awaitSpecSearchable` covers that separately for the Spec card the + * downstream `InstantiateValidationStep` queries for. */ async function writeAndAwaitIndex( client: BoxelCLIClient, @@ -171,6 +176,55 @@ async function writeAndAwaitIndex( expect(indexed, `waiting for ${path} to be indexed timed out`).toBe(true); } +/** + * Poll the realm's federated search until the just-seeded Spec card surfaces + * as a Spec-type result. `waitForFile` only guarantees the source file is + * GET-able; the realm runs source POST indexing asynchronously, so there is + * a window where the file is readable but `client.search({type: spec})` + * still returns an empty list. Without this gate the downstream + * `InstantiateValidationStep`'s 30s discovery poll has been observed + * timing out in CI under load (the test falls into the "modules exist but + * no Spec cards" branch, where `result.details` is undefined). The + * 60s budget here gives the realm headroom even when its indexer is + * queued behind from-scratch index work from test setup. + */ +async function awaitSpecSearchable( + client: BoxelCLIClient, + realmUrl: string, + specPath: string, +): Promise { + let expectedSuffix = specPath.replace(/\.json$/, ''); + let lastResult: Awaited> | undefined; + let result = await retryWithPoll( + async () => { + lastResult = await client.search(realmUrl, { + filter: { type: specRef }, + }); + return lastResult; + }, + (r) => { + if (!r.ok) return false; + let found = (r.data ?? []).some((card) => { + let id = (card as { id?: unknown }).id; + return typeof id === 'string' && id.endsWith(expectedSuffix); + }); + return !found; + }, + { totalWaitMs: 60_000, pollMs: 250 }, + ); + expect( + result.ok, + `search for Spec at ${specPath} failed: ${result.error}`, + ).toBe(true); + let cardIds = (result.data ?? []).map( + (c) => (c as { id?: unknown }).id ?? '(no id)', + ); + expect( + cardIds.some((id) => typeof id === 'string' && id.endsWith(expectedSuffix)), + `Spec ${specPath} did not show up in search within 60s; got: ${JSON.stringify(cardIds)}`, + ).toBe(true); +} + /** * Seed `instantiate-test-card.gts` + one linked example + a Spec that * instantiates cleanly. @@ -197,6 +251,7 @@ export async function seedValidCardWithSpec( 'Spec/valid-card-spec.json', validCardSpecJson(), ); + await awaitSpecSearchable(client, realmUrl, 'Spec/valid-card-spec.json'); } /** @@ -222,6 +277,12 @@ export async function seedTagsCardWithBrokenExampleAndSpec( 'Spec/tags-card-spec.json', tagsCardSpecJson(), ); + // Gate on the Spec actually appearing in search results before we drop + // the broken example. The broken example deliberately fails indexing + // (containsMany received a string), and a still-queued realm indexer + // can keep the Spec out of search results past `discoverRealmSpecs`'s + // own 30s poll budget — that is the historical flake. + await awaitSpecSearchable(client, realmUrl, 'Spec/tags-card-spec.json'); await writeAndAwaitIndex( client, realmUrl, From d775ce17c35522a77cbad2d9ab98290b0d5ca27b Mon Sep 17 00:00:00 2001 From: Hassan Abdel-Rahman Date: Tue, 12 May 2026 05:14:33 -0400 Subject: [PATCH 2/3] Make awaitSpecSearchable a soft diagnostic gate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Original v1 of this PR treated `awaitSpecSearchable` as a hard assertion — fail the test if the seeded Spec didn't surface in `_federated-search` within the polling window. CI run 25724049661 revealed that doesn't actually fix anything: the realm sometimes never indexes the tags-card Spec for this fixture (likely an indexer race against the broken example's error_doc state), so the hard gate just relocates the same failure earlier with less context. Switch to a soft gate: - Bump the wait to 90s (gives the realm comfortable headroom) - On success, return normally — most CI runs hit this path quickly - On timeout, emit one warn-line capturing the realm's actual state (current search hits, spec source file readability, realm file listing filtered to spec-like paths), then return without asserting - The downstream `InstantiateValidationStep.discoverRealmSpecs` poll will still see the same empty result and the existing e2e assertion (`expect(details).toBeTruthy()`) will still fail — but now with two diagnostic logs explaining exactly what the realm saw at the time Also reverts the speculative example-before-Spec reorder. Both orderings reproduced the flake locally and in CI, so the original order stands and the gate's diagnostic dump documents what was observed. Addresses Copilot review feedback: - Removes the dead `lastResult` closure variable in `awaitSpecSearchable`. With `noUnusedLocals` the read-via-return kept it compiling, but the variable served no purpose. - `instantiate-step.ts` now reads `filesResult.error` instead of relying on `try/catch` alone. `client.listFiles` reports failures via a returned `error` field, not throwing — so a soft error would have silently fallen through to the bootstrap branch and skipped the diagnostic warn-log. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../src/validators/instantiate-step.ts | 24 +++- .../helpers/instantiate-test-fixtures.ts | 105 ++++++++++++------ 2 files changed, 90 insertions(+), 39 deletions(-) diff --git a/packages/software-factory/src/validators/instantiate-step.ts b/packages/software-factory/src/validators/instantiate-step.ts index 219feba0181..919a341b742 100644 --- a/packages/software-factory/src/validators/instantiate-step.ts +++ b/packages/software-factory/src/validators/instantiate-step.ts @@ -151,16 +151,28 @@ export class InstantiateValidationStep implements ValidationStepRunner { if (specInfos.length === 0) { let hasModules = false; let filenames: string[] = []; + let listError: string | undefined; try { let filesResult = await this.fetchFilenamesFn(targetRealm); - filenames = filesResult.filenames ?? []; - hasModules = filenames.some( - (f) => f.endsWith('.gts') && !f.endsWith('.test.gts'), - ); + // `fetchFilenamesFn` (defaults to `client.listFiles`) reports + // failures via a returned `error` field, not by throwing. Treat + // either path as "we don't actually know what's in the realm" and + // fall back to the no-modules branch so we don't fail the step + // with a misleading "modules exist but no specs" message. + if (filesResult.error) { + listError = filesResult.error; + } else { + filenames = filesResult.filenames ?? []; + hasModules = filenames.some( + (f) => f.endsWith('.gts') && !f.endsWith('.test.gts'), + ); + } } catch (err) { - // If we can't check filenames, treat as nothing to validate + listError = err instanceof Error ? err.message : String(err); + } + if (listError) { log.warn( - `Failed to list realm files while diagnosing empty spec search: ${err instanceof Error ? err.message : String(err)}`, + `Failed to list realm files while diagnosing empty spec search: ${listError}`, ); } diff --git a/packages/software-factory/tests/helpers/instantiate-test-fixtures.ts b/packages/software-factory/tests/helpers/instantiate-test-fixtures.ts index 0a692ac6a42..caeba38bd42 100644 --- a/packages/software-factory/tests/helpers/instantiate-test-fixtures.ts +++ b/packages/software-factory/tests/helpers/instantiate-test-fixtures.ts @@ -177,16 +177,28 @@ async function writeAndAwaitIndex( } /** - * Poll the realm's federated search until the just-seeded Spec card surfaces - * as a Spec-type result. `waitForFile` only guarantees the source file is - * GET-able; the realm runs source POST indexing asynchronously, so there is - * a window where the file is readable but `client.search({type: spec})` - * still returns an empty list. Without this gate the downstream - * `InstantiateValidationStep`'s 30s discovery poll has been observed - * timing out in CI under load (the test falls into the "modules exist but - * no Spec cards" branch, where `result.details` is undefined). The - * 60s budget here gives the realm headroom even when its indexer is - * queued behind from-scratch index work from test setup. + * Diagnostic gate: poll federated search until the just-seeded Spec card + * surfaces as a Spec-type result, with a generous budget. `waitForFile` + * only guarantees the file is GET-able; realm-side source POST indexing + * is async, so there's a window where the file is readable but + * `client.search({type: spec})` still returns an empty list. The + * downstream `InstantiateValidationStep`'s 30s discovery poll has been + * observed timing out in CI under load (the test falls into the + * "modules exist but no Spec cards" branch, where `result.details` is + * undefined, and the e2e assertion that triggered this skill's + * investigation trips on it). + * + * Important: this gate is SOFT. If polling times out, we log a + * diagnostic dump (current search hits, realm file listing, file + * readability checks) and return — the test then proceeds to its real + * assertions. The reason: CI evidence shows the realm sometimes never + * surfaces the Spec for this fixture's containsMany card no matter how + * long we wait (likely an indexer bug interacting with the broken + * example's error_doc state). A hard gate just shifts the failure + * upstream without adding information. The log it emits on the way out + * is the real value here — pairs with the warn-log in + * `InstantiateValidationStep` when its discovery poll also comes back + * empty. */ async function awaitSpecSearchable( client: BoxelCLIClient, @@ -194,14 +206,10 @@ async function awaitSpecSearchable( specPath: string, ): Promise { let expectedSuffix = specPath.replace(/\.json$/, ''); - let lastResult: Awaited> | undefined; + let totalWaitMs = 90_000; + let startedAt = Date.now(); let result = await retryWithPoll( - async () => { - lastResult = await client.search(realmUrl, { - filter: { type: specRef }, - }); - return lastResult; - }, + () => client.search(realmUrl, { filter: { type: specRef } }), (r) => { if (!r.ok) return false; let found = (r.data ?? []).some((card) => { @@ -210,19 +218,52 @@ async function awaitSpecSearchable( }); return !found; }, - { totalWaitMs: 60_000, pollMs: 250 }, + { totalWaitMs, pollMs: 250 }, ); - expect( - result.ok, - `search for Spec at ${specPath} failed: ${result.error}`, - ).toBe(true); + let elapsedMs = Date.now() - startedAt; + + if (!result.ok) { + console.warn( + `[awaitSpecSearchable] search for ${specPath} returned not-ok after ${elapsedMs}ms: ${result.error ?? '(no error message)'}`, + ); + return; + } let cardIds = (result.data ?? []).map( (c) => (c as { id?: unknown }).id ?? '(no id)', ); - expect( - cardIds.some((id) => typeof id === 'string' && id.endsWith(expectedSuffix)), - `Spec ${specPath} did not show up in search within 60s; got: ${JSON.stringify(cardIds)}`, - ).toBe(true); + let found = cardIds.some( + (id) => typeof id === 'string' && id.endsWith(expectedSuffix), + ); + if (found) { + return; + } + + // Soft-fail diagnostic dump. The test will likely fail downstream + // when InstantiateValidationStep can't find the Spec either — at + // which point this log shows the realm's actual state at the time + // we gave up waiting. + let readSpecFile = await client.read(realmUrl, specPath).catch((err) => ({ + ok: false, + error: err instanceof Error ? err.message : String(err), + })); + let listing = await client.listFiles(realmUrl).catch((err) => ({ + filenames: [] as string[], + error: err instanceof Error ? err.message : String(err), + })); + let specLikeFilenames = (listing.filenames ?? []).filter( + (f) => + f.endsWith('.json') && (f.startsWith('Spec/') || f.includes('-spec')), + ); + console.warn( + `[awaitSpecSearchable] Spec ${specPath} did not surface in search within ${elapsedMs}ms. ` + + `realm=${realmUrl} searchHits=${JSON.stringify(cardIds)} ` + + `specSourceFileReadable=${(readSpecFile as { ok?: boolean }).ok ?? false} ` + + `totalFiles=${(listing.filenames ?? []).length} ` + + `specLikeFilenames=${JSON.stringify(specLikeFilenames)}` + + ((listing as { error?: string }).error + ? ` listFilesError=${(listing as { error?: string }).error}` + : ''), + ); } /** @@ -269,19 +310,17 @@ export async function seedTagsCardWithBrokenExampleAndSpec( 'tags-card.gts', TAGS_CARD_MODULE_GTS, ); - // Write the Spec BEFORE the bad example so it's indexed before the - // example potentially stalls the indexer. + // Write the Spec BEFORE the bad example. Reordering was attempted to + // give the indexer a stable Spec target; locally and in CI both + // orderings produced the same flake (Spec never surfaces in search + // within the budget), so the historic order stands and the + // diagnostic gate below documents what we saw when it didn't. await writeAndAwaitIndex( client, realmUrl, 'Spec/tags-card-spec.json', tagsCardSpecJson(), ); - // Gate on the Spec actually appearing in search results before we drop - // the broken example. The broken example deliberately fails indexing - // (containsMany received a string), and a still-queued realm indexer - // can keep the Spec out of search results past `discoverRealmSpecs`'s - // own 30s poll budget — that is the historical flake. await awaitSpecSearchable(client, realmUrl, 'Spec/tags-card-spec.json'); await writeAndAwaitIndex( client, From 646dbdc8a2844f9e991232311aae9252efcf6b19 Mon Sep 17 00:00:00 2001 From: Hassan Abdel-Rahman Date: Tue, 12 May 2026 08:27:34 -0400 Subject: [PATCH 3/3] boxel-cli: regenerate plugin/skills/realm-sync synopsis MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The CI Lint job's `pnpm build:plugin` synopsis-staleness check has been failing repo-wide on every PR (4780, 4781, 4782) because the `boxel realm watch` command's surface changed (from positional ` ` to a subcommands-style entry point) but the generated synopsis at `plugin/skills/realm-sync/SKILL.md` wasn't regenerated. Run `pnpm build:plugin` and commit the result so the Lint check goes green. Not strictly related to the instantiate-validation flake this PR targets, but the Lint job is gating the merge button — unblocking it here so the flake fix can land. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../boxel-cli/plugin/skills/realm-sync/SKILL.md | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/packages/boxel-cli/plugin/skills/realm-sync/SKILL.md b/packages/boxel-cli/plugin/skills/realm-sync/SKILL.md index fc28c2eef7b..9b75bef04a8 100644 --- a/packages/boxel-cli/plugin/skills/realm-sync/SKILL.md +++ b/packages/boxel-cli/plugin/skills/realm-sync/SKILL.md @@ -66,20 +66,9 @@ Bidirectional sync between a local directory and a Boxel realm - `--dry-run` — Preview without making changes - `--realm-secret-seed` — Administrative auth: prompt for a realm secret seed and mint a JWT locally instead of using a Matrix profile (env: BOXEL_REALM_SECRET_SEED) -### `boxel realm watch ` +### `boxel realm watch` -Watch a Boxel realm for server-side changes and pull them into a local directory - -**Arguments:** - -- `` — The URL of the realm to watch (e.g., https://app.boxel.ai/demo/) -- `` — The local directory to write changes into - -**Options:** - -- `-i, --interval ` — Polling interval in seconds -- `-d, --debounce ` — Seconds to wait after a burst of changes before applying them -- `--realm-secret-seed` — Administrative auth: prompt for a realm secret seed and mint a JWT locally instead of using a Matrix profile (env: BOXEL_REALM_SECRET_SEED) +Watch a Boxel realm; subcommands manage watch processes ### `boxel realm push `