From 3d7a148af1c796a120f977481484a64527d7ae4e Mon Sep 17 00:00:00 2001 From: Jonathan Haas Date: Fri, 15 May 2026 22:48:42 -0700 Subject: [PATCH 1/2] fix: omit non-terminal runtime promotion writes --- src/server/agent-runtime-ledger.ts | 76 +++++++-- test/cli/run-command.test.ts | 247 +++++++++++++++++++++++++++++ 2 files changed, 313 insertions(+), 10 deletions(-) diff --git a/src/server/agent-runtime-ledger.ts b/src/server/agent-runtime-ledger.ts index 4412d8568..a2b3c49c8 100644 --- a/src/server/agent-runtime-ledger.ts +++ b/src/server/agent-runtime-ledger.ts @@ -283,7 +283,7 @@ function workItemKindForEntry(kind: AgentRuntimeLedgerEntryKind): string { function waitTypeForEntry( kind: AgentRuntimeLedgerEntryKind, ): string | undefined { - if (kind === "wait" || kind === "governance") { + if (kind === "wait") { return "AGENT_RUN_WAIT_TYPE_APPROVAL"; } return undefined; @@ -342,27 +342,79 @@ function replaySummary( }; } +const PASSIVE_TERMINAL_ENTRY_KINDS = new Set([ + "run", + "wait", + "governance", +]); + +function isTerminalState(state: AgentRuntimeLedgerState): boolean { + return ( + state === "succeeded" || + state === "skipped" || + state === "failed" || + state === "cancelled" + ); +} + +function isTerminalCandidate(entry: AgentRuntimeLedgerEntry): boolean { + return ( + isTerminalState(entry.state) && !PASSIVE_TERMINAL_ENTRY_KINDS.has(entry.kind) + ); +} + +function terminalEntry( + entries: AgentRuntimeLedgerEntry[], +): AgentRuntimeLedgerEntry | undefined { + const last = entries.at(-1); + if (!last || !isTerminalState(last.state)) return undefined; + if (isTerminalCandidate(last)) return last; + for (let index = entries.length - 1; index >= 0; index -= 1) { + const entry = entries[index]; + if (entry && isTerminalCandidate(entry)) return entry; + } + return undefined; +} + function terminalOperation( runId: string, entries: AgentRuntimeLedgerEntry[], -): AgentRuntimePromotionOperation { - const last = entries.at(-1); - const succeeded = last?.state === "succeeded" || last?.state === "skipped"; +): AgentRuntimePromotionOperation | undefined { + const terminal = terminalEntry(entries); + if (!terminal) return undefined; + const succeeded = + terminal.state === "succeeded" || terminal.state === "skipped"; return { operation: succeeded ? "complete_run" : "fail_run", id: `promote:${runId}:terminal`, payload: { state: succeeded ? "succeeded" : "failed", - timestamp: last?.timestamp ?? new Date(0).toISOString(), + timestamp: terminal.timestamp, ...(succeeded ? {} : { - reason: `Final ledger entry ended in ${last?.state ?? "unknown"} state.`, + reason: `Terminal ledger entry ended in ${terminal.state} state.`, }), }, }; } +function terminalOperationWarning( + entries: AgentRuntimeLedgerEntry[], +): string | undefined { + const last = entries.at(-1); + if (!last) { + return "Terminal operation omitted because no ledger entries were available."; + } + if (terminalEntry(entries)) { + return undefined; + } + if (isTerminalState(last.state)) { + return "Terminal operation omitted because no substantive terminal ledger entry was available."; + } + return `Terminal operation omitted because final ledger entry ended in ${last.state} state.`; +} + function buildPromotionPlan( runId: string, sessionId: string, @@ -425,7 +477,13 @@ function buildPromotionPlan( } } - operations.push(terminalOperation(runId, entries)); + const terminal = terminalOperation(runId, entries); + if (terminal) operations.push(terminal); + const terminalWarning = terminalOperationWarning(entries); + const warnings = [ + "Promotion plan is dry-run only; no Platform AgentRuntime writes were performed.", + ...(terminalWarning ? [terminalWarning] : []), + ]; return { schemaVersion: AGENT_RUNTIME_PROMOTION_PLAN_SCHEMA, @@ -433,9 +491,7 @@ function buildPromotionPlan( sessionId, idempotencyKey, operations, - warnings: [ - "Promotion plan is dry-run only; no Platform AgentRuntime writes were performed.", - ], + warnings, }; } diff --git a/test/cli/run-command.test.ts b/test/cli/run-command.test.ts index 472b17805..730584cd9 100644 --- a/test/cli/run-command.test.ts +++ b/test/cli/run-command.test.ts @@ -4,6 +4,7 @@ import { join } from "node:path"; import { afterEach, describe, expect, it, vi } from "vitest"; import { parseArgs } from "../../src/cli/args.js"; import { handleRunCommand, testing } from "../../src/cli/commands/run.js"; +import type { AgentTrajectoryEvent } from "../../src/server/agent-trajectory.js"; import { buildAgentRuntimeLedgerReport } from "../../src/server/agent-runtime-ledger.js"; import { SessionManager } from "../../src/session/manager.js"; @@ -212,6 +213,44 @@ describe("run command", () => { return { sessionDir, sessionId }; } + function buildLedgerForEvents( + sessionId: string, + events: AgentTrajectoryEvent[], + ) { + return buildAgentRuntimeLedgerReport({ + session: { id: sessionId }, + timeline: { + source: "local", + generatedAt: "2026-05-09T10:00:03.000Z", + items: [], + }, + trajectory: { + schemaVersion: "evalops.maestro.agent-trajectory.v1", + run: { + id: sessionId, + sessionId, + source: "local", + generatedAt: "2026-05-09T10:00:03.000Z", + platformBacked: false, + }, + counts: { + events: events.length, + evidenceAnchors: 0, + byKind: {}, + byPhase: {}, + byStatus: {}, + }, + events, + }, + replay: { + schemaVersion: "evalops.maestro.agent-trajectory-replay.v1", + trajectorySchemaVersion: "evalops.maestro.agent-trajectory.v1", + counts: { events: events.length, deltas: 0, errors: 0, warnings: 0 }, + deltas: [], + }, + }); + } + function makeLegacySessionDir(): { sessionDir: string; sessionId: string } { const sessionDir = mkdtempSync(join(tmpdir(), "maestro-run-legacy-")); tempDirs.push(sessionDir); @@ -700,4 +739,212 @@ describe("run command", () => { payload: { state: "succeeded" }, }); }); + + it("omits dry-run terminal promotion operations for non-terminal final state", () => { + const ledger = buildAgentRuntimeLedgerReport({ + session: { id: "session-running" }, + timeline: { + source: "local", + generatedAt: "2026-05-09T10:00:02.000Z", + items: [], + }, + trajectory: { + schemaVersion: "evalops.maestro.agent-trajectory.v1", + run: { + id: "session-running", + sessionId: "session-running", + source: "local", + generatedAt: "2026-05-09T10:00:02.000Z", + platformBacked: false, + }, + counts: { + events: 1, + evidenceAnchors: 0, + byKind: {}, + byPhase: {}, + byStatus: {}, + }, + events: [ + { + id: "event-running-tool", + sequence: 1, + timestamp: "2026-05-09T10:00:01.000Z", + kind: "tool", + phase: "act", + actor: "tool", + type: "tool.requested", + status: "running", + visibility: "user", + source: "local", + title: "Tool running", + evidence: [], + }, + ], + }, + replay: { + schemaVersion: "evalops.maestro.agent-trajectory-replay.v1", + trajectorySchemaVersion: "evalops.maestro.agent-trajectory.v1", + counts: { events: 1, deltas: 0, errors: 0, warnings: 0 }, + deltas: [], + }, + }); + + expect( + ledger.promotion.operations.some( + (operation) => + operation.operation === "complete_run" || + operation.operation === "fail_run", + ), + ).toBe(false); + expect(ledger.promotion.warnings).toContain( + "Terminal operation omitted because final ledger entry ended in running state.", + ); + }); + + it("omits terminal promotion operations for non-terminal final states", () => { + for (const scenario of [ + { + status: "pending", + ledgerState: "waiting", + kind: "wait", + phase: "wait", + actor: "platform", + type: "wait.pending", + }, + { + status: "denied", + ledgerState: "blocked", + kind: "governance", + phase: "govern", + actor: "system", + type: "policy.decision", + }, + ] as const) { + const ledger = buildLedgerForEvents(`session-${scenario.ledgerState}`, [ + { + id: `event-${scenario.ledgerState}`, + sequence: 1, + timestamp: "2026-05-09T10:00:01.000Z", + kind: scenario.kind, + phase: scenario.phase, + actor: scenario.actor, + type: scenario.type, + status: scenario.status, + visibility: "user", + source: "local", + title: `Final ${scenario.ledgerState} event`, + evidence: [], + }, + ]); + + expect( + ledger.promotion.operations.some( + (operation) => + operation.operation === "complete_run" || + operation.operation === "fail_run", + ), + ).toBe(false); + expect(ledger.promotion.warnings).toContain( + `Terminal operation omitted because final ledger entry ended in ${scenario.ledgerState} state.`, + ); + } + }); + + it("keeps passive info entries succeeded without treating them as terminal", () => { + const ledger = buildLedgerForEvents("session-info-only", [ + { + id: "event-session-started", + sequence: 1, + timestamp: "2026-05-09T10:00:01.000Z", + kind: "session", + phase: "setup", + actor: "system", + type: "session.started", + status: "info", + visibility: "user", + source: "local", + title: "Session started", + evidence: [], + }, + ]); + + expect(ledger.entries[0]?.state).toBe("succeeded"); + expect( + ledger.promotion.operations.some( + (operation) => + operation.operation === "complete_run" || + operation.operation === "fail_run", + ), + ).toBe(false); + expect(ledger.promotion.warnings).toContain( + "Terminal operation omitted because no substantive terminal ledger entry was available.", + ); + }); + + it("does not project governance decisions as active waits", () => { + const ledger = buildLedgerForEvents("session-governance", [ + { + id: "event-policy", + sequence: 1, + timestamp: "2026-05-09T10:00:01.000Z", + kind: "governance", + phase: "govern", + actor: "system", + type: "policy.decision", + status: "info", + visibility: "user", + source: "local", + title: "Policy decision recorded", + evidence: [], + }, + { + id: "event-final-message", + sequence: 2, + timestamp: "2026-05-09T10:00:02.000Z", + kind: "message", + phase: "think", + actor: "assistant", + type: "message.assistant", + status: "completed", + visibility: "user", + source: "local", + title: "Assistant response", + evidence: [], + }, + ]); + + expect(ledger.entries[0]?.platformShape.waitType).toBeUndefined(); + expect( + ledger.promotion.operations.some( + (operation) => + operation.operation === "wait_run" && + operation.ledgerEntryId === "ledger:event-policy", + ), + ).toBe(false); + }); + + it("keeps informational final events eligible for completion promotion", () => { + const ledger = buildLedgerForEvents("session-info", [ + { + id: "event-info", + sequence: 1, + timestamp: "2026-05-09T10:00:01.000Z", + kind: "context", + phase: "setup", + actor: "system", + type: "model.info", + status: "info", + visibility: "user", + source: "local", + title: "Model metadata recorded", + evidence: [], + }, + ]); + + expect(ledger.entries[0]?.state).toBe("succeeded"); + expect(ledger.promotion.operations.at(-1)).toMatchObject({ + operation: "complete_run", + payload: { state: "succeeded" }, + }); + }); }); From 913d84c2e91bbe30fa5a2b323662783c5ff19be3 Mon Sep 17 00:00:00 2001 From: Jonathan Haas Date: Fri, 15 May 2026 22:56:22 -0700 Subject: [PATCH 2/2] fix: map blocked runtime steps to failed --- src/server/agent-runtime-ledger.ts | 18 +++++++++++++--- test/cli/run-command.test.ts | 33 +++++++++++++++++++++++++++++- 2 files changed, 47 insertions(+), 4 deletions(-) diff --git a/src/server/agent-runtime-ledger.ts b/src/server/agent-runtime-ledger.ts index a2b3c49c8..0293c6291 100644 --- a/src/server/agent-runtime-ledger.ts +++ b/src/server/agent-runtime-ledger.ts @@ -42,6 +42,11 @@ export type AgentRuntimeLedgerState = | "cancelled" | "skipped"; +type AgentRuntimePromotionStepState = Exclude< + AgentRuntimeLedgerState, + "blocked" +>; + export interface AgentRuntimeLedgerEntry { id: string; sequence: number; @@ -131,7 +136,7 @@ export type AgentRuntimePromotionOperation = payload: { stepId: string; kind: string; - state: AgentRuntimeLedgerState; + state: AgentRuntimePromotionStepState; title: string; timestamp: string; toolName?: string; @@ -359,10 +364,17 @@ function isTerminalState(state: AgentRuntimeLedgerState): boolean { function isTerminalCandidate(entry: AgentRuntimeLedgerEntry): boolean { return ( - isTerminalState(entry.state) && !PASSIVE_TERMINAL_ENTRY_KINDS.has(entry.kind) + isTerminalState(entry.state) && + !PASSIVE_TERMINAL_ENTRY_KINDS.has(entry.kind) ); } +function runStepStateForEntry( + entry: AgentRuntimeLedgerEntry, +): AgentRuntimePromotionStepState { + return entry.state === "blocked" ? "failed" : entry.state; +} + function terminalEntry( entries: AgentRuntimeLedgerEntry[], ): AgentRuntimeLedgerEntry | undefined { @@ -444,7 +456,7 @@ function buildPromotionPlan( payload: { stepId: entry.id, kind: entry.platformShape.stepKind, - state: entry.state, + state: runStepStateForEntry(entry), title: entry.title, timestamp: entry.timestamp, ...(entry.toolName ? { toolName: entry.toolName } : {}), diff --git a/test/cli/run-command.test.ts b/test/cli/run-command.test.ts index 730584cd9..38256afea 100644 --- a/test/cli/run-command.test.ts +++ b/test/cli/run-command.test.ts @@ -4,8 +4,8 @@ import { join } from "node:path"; import { afterEach, describe, expect, it, vi } from "vitest"; import { parseArgs } from "../../src/cli/args.js"; import { handleRunCommand, testing } from "../../src/cli/commands/run.js"; -import type { AgentTrajectoryEvent } from "../../src/server/agent-trajectory.js"; import { buildAgentRuntimeLedgerReport } from "../../src/server/agent-runtime-ledger.js"; +import type { AgentTrajectoryEvent } from "../../src/server/agent-trajectory.js"; import { SessionManager } from "../../src/session/manager.js"; describe("run command", () => { @@ -923,6 +923,37 @@ describe("run command", () => { ).toBe(false); }); + it("maps blocked ledger entries to valid Platform run-step states", () => { + const ledger = buildLedgerForEvents("session-denied", [ + { + id: "event-policy-denied", + sequence: 1, + timestamp: "2026-05-09T10:00:01.000Z", + kind: "governance", + phase: "govern", + actor: "system", + type: "policy.decision", + status: "denied", + visibility: "user", + source: "local", + title: "Policy denied", + evidence: [], + }, + ]); + + expect(ledger.entries[0]?.state).toBe("blocked"); + expect( + ledger.promotion.operations.find( + (operation) => + operation.operation === "record_run_step" && + operation.ledgerEntryId === "ledger:event-policy-denied", + ), + ).toMatchObject({ + operation: "record_run_step", + payload: { state: "failed" }, + }); + }); + it("keeps informational final events eligible for completion promotion", () => { const ledger = buildLedgerForEvents("session-info", [ {