From ae8be3e366c42000bfb8b1a6e58a353faf28a001 Mon Sep 17 00:00:00 2001 From: bcode Date: Sun, 17 May 2026 10:47:42 -0700 Subject: [PATCH] diag: stderr trace through plugin shutdown + forceFlush path rc2 still shows orphan turn parents in cloud V4 traces. The bcode log file has no shutdown-related entries because none of the relevant code paths log on success. Add stderr writes so v4-worker's bcode-output-.log artifact captures: - whether the shutdown branch in src/index.ts's top-level finally is reached at all - pluginShutdownHooks.size at invocation time (was rc2's fix actually populating the Set?) - whether bcode-laminar.shutdown was invoked + how many turn spans it ended - forceFlush start, completion, rejection, or timeout stderr is captured by the v4-worker harness; we'll diff against the current 'log just stops mid-bus-event' baseline to localize which step is failing. Once V4 verification is settled and the actual fix is identified, this stderr diagnostic block can be removed. TEMP - to be removed after verification. --- packages/bcode-laminar/src/plugin.ts | 17 +++++++++++++++-- packages/opencode/src/index.ts | 25 +++++++++++++++++++++++-- 2 files changed, 38 insertions(+), 4 deletions(-) diff --git a/packages/bcode-laminar/src/plugin.ts b/packages/bcode-laminar/src/plugin.ts index 20a750145..1c5d2b75e 100644 --- a/packages/bcode-laminar/src/plugin.ts +++ b/packages/bcode-laminar/src/plugin.ts @@ -82,10 +82,23 @@ export const LaminarPlugin: Plugin = ({ client }) => { // this hook from its top-level finally before forceFlush, so span.end() // here gets its export drained by the host's existing forceFlush race. shutdown: () => { - for (const [sessionId, span] of Object.entries(sessionCurrentTurnSpan)) { - span.end() + // Use console.error not client.app.log — the SDK server may already be + // torn down by shutdown time, and the host log is async via HTTP which + // doesn't honor the sync shutdown contract. v4-worker captures stderr + // into bcode-output-.log so this lands in the cloud artifact. + const sessionIds = Object.keys(sessionCurrentTurnSpan) + console.error(`[bcode-laminar] shutdown invoked: ending ${sessionIds.length} turn span(s)`) + for (const sessionId of sessionIds) { + const span = sessionCurrentTurnSpan[sessionId] + if (!span) continue + try { + span.end() + } catch (err) { + console.error(`[bcode-laminar] span.end() threw for session ${sessionId}: ${(err as Error).message}`) + } delete sessionCurrentTurnSpan[sessionId] } + console.error(`[bcode-laminar] shutdown complete`) }, event: async ({ event }) => { switch (event.type) { diff --git a/packages/opencode/src/index.ts b/packages/opencode/src/index.ts index c487778ef..b16e14635 100644 --- a/packages/opencode/src/index.ts +++ b/packages/opencode/src/index.ts @@ -259,15 +259,24 @@ try { // before forceFlush + process.exit() below. try { const { pluginShutdownHooks } = await import("./plugin") + // Diagnostic via stderr: v4-worker captures into bcode-output-.log + // and we need to know whether this branch is even reached + how many hooks + // ran. Remove once V4 telemetry verification is settled. + process.stderr.write(`[bcode] shutdown: invoking ${pluginShutdownHooks.size} plugin shutdown hook(s)\n`) + let invoked = 0 for (const hook of pluginShutdownHooks) { try { hook() + invoked++ } catch (err) { Log.Default.error("plugin shutdown hook failed", { error: err }) + process.stderr.write(`[bcode] shutdown: hook threw: ${(err as Error).message}\n`) } } + process.stderr.write(`[bcode] shutdown: invoked ${invoked}/${pluginShutdownHooks.size} hook(s) successfully\n`) } catch (err) { Log.Default.error("plugin shutdown import failed", { error: err }) + process.stderr.write(`[bcode] shutdown: import failed: ${(err as Error).message}\n`) } // Drain any registered OTel span processors (e.g. bcode-laminar) before // exiting so the just-ended turn spans actually hit the wire. Bounded with @@ -275,10 +284,22 @@ try { // OTel-based plugin, not laminar-specific. const provider = trace.getTracerProvider() as { forceFlush?: () => Promise } if (provider.forceFlush) { + process.stderr.write(`[bcode] shutdown: forceFlush starting\n`) + const start = Date.now() await Promise.race([ - provider.forceFlush().catch(() => {}), - new Promise((resolve) => setTimeout(resolve, 3000)), + provider.forceFlush().catch((err: Error) => { + process.stderr.write(`[bcode] shutdown: forceFlush rejected: ${err.message}\n`) + }), + new Promise((resolve) => + setTimeout(() => { + process.stderr.write(`[bcode] shutdown: forceFlush timed out after 3000ms\n`) + resolve() + }, 3000), + ), ]) + process.stderr.write(`[bcode] shutdown: forceFlush done in ${Date.now() - start}ms\n`) + } else { + process.stderr.write(`[bcode] shutdown: no forceFlush on global provider\n`) } // Some subprocesses don't react properly to SIGTERM and similar signals. // Most notably, some docker-container-based MCP servers don't handle such signals unless