feat: add local AgentRuntime ledger projection#419
Conversation
PR SummaryMedium Risk Overview
Introduces Reviewed by Cursor Bugbot for commit aecb161. Bugbot is set up for automated code reviews on this repo. Configure here. |
348186a to
5b8ccbe
Compare
There was a problem hiding this comment.
💡 Codex Review
Here are some automated review suggestions for this pull request.
Reviewed commit: 348186ae7f
ℹ️ About Codex in GitHub
Codex has been enabled to automatically review pull requests in this repo. Reviews are triggered when you
- Open a pull request for review
- Mark a draft as ready
- Comment "@codex review".
If Codex has suggestions, it will comment; otherwise it will react with 👍.
When you sign up for Codex through ChatGPT, Codex can also answer questions or update the PR, like "@codex address that feedback".
There was a problem hiding this comment.
Cursor Bugbot has reviewed your changes and found 3 potential issues.
Bugbot Autofix prepared fixes for all 3 issues found in the latest run.
- ✅ Resolved by another fix:
stateForStatusmisses "running" status, maps to "pending"stateForStatusnow explicitly mapsrunningtorunning, so in-flight tool requests no longer fall back topending.
- ✅ Resolved by another fix:
tool.failedevents misclassified astool_callnottool_resultkindForEventnow classifiestool.failedalongsidetool.completedastool_result, correcting failed tool ledger entries.
- ✅ Resolved by another fix: Fallback arg recovery missing new run subcommands
- The
runfallback recovery now recognizesledger,replay, andpromotein the same path that already handledinspect.
- The
Preview (2ca0ce72d6)
diff --git a/src/server/agent-runtime-ledger.ts b/src/server/agent-runtime-ledger.ts
--- a/src/server/agent-runtime-ledger.ts
+++ b/src/server/agent-runtime-ledger.ts
@@ -283,7 +283,7 @@
function waitTypeForEntry(
kind: AgentRuntimeLedgerEntryKind,
): string | undefined {
- if (kind === "wait" || kind === "governance") {
+ if (kind === "wait") {
return "AGENT_RUN_WAIT_TYPE_APPROVAL";
}
return undefined;
@@ -345,24 +345,45 @@
function terminalOperation(
runId: string,
entries: AgentRuntimeLedgerEntry[],
-): AgentRuntimePromotionOperation {
+): AgentRuntimePromotionOperation | undefined {
const last = entries.at(-1);
- const succeeded = last?.state === "succeeded" || last?.state === "skipped";
+ if (!last) return undefined;
+ const succeeded = last.state === "succeeded" || last.state === "skipped";
+ const failed = last.state === "failed" || last.state === "cancelled";
+ if (!succeeded && !failed) return undefined;
return {
operation: succeeded ? "complete_run" : "fail_run",
id: `promote:${runId}:terminal`,
payload: {
state: succeeded ? "succeeded" : "failed",
- timestamp: last?.timestamp ?? new Date(0).toISOString(),
+ timestamp: last.timestamp,
...(succeeded
? {}
: {
- reason: `Final ledger entry ended in ${last?.state ?? "unknown"} state.`,
+ reason: `Final ledger entry ended in ${last.state} state.`,
}),
},
};
}
+function terminalOperationWarning(
+ entries: AgentRuntimeLedgerEntry[],
+): string | undefined {
+ const last = entries.at(-1);
+ if (!last) {
+ return "Terminal operation omitted because no ledger entries were available.";
+ }
+ if (
+ last.state === "succeeded" ||
+ last.state === "skipped" ||
+ last.state === "failed" ||
+ last.state === "cancelled"
+ ) {
+ return undefined;
+ }
+ return `Terminal operation omitted because final ledger entry ended in ${last.state} state.`;
+}
+
function buildPromotionPlan(
runId: string,
sessionId: string,
@@ -425,7 +446,13 @@
}
}
- operations.push(terminalOperation(runId, entries));
+ const terminal = terminalOperation(runId, entries);
+ if (terminal) operations.push(terminal);
+ const terminalWarning = terminalOperationWarning(entries);
+ const warnings = [
+ "Promotion plan is dry-run only; no Platform AgentRuntime writes were performed.",
+ ...(terminalWarning ? [terminalWarning] : []),
+ ];
return {
schemaVersion: AGENT_RUNTIME_PROMOTION_PLAN_SCHEMA,
@@ -433,9 +460,7 @@
sessionId,
idempotencyKey,
operations,
- warnings: [
- "Promotion plan is dry-run only; no Platform AgentRuntime writes were performed.",
- ],
+ warnings,
};
}
diff --git a/test/cli/run-command.test.ts b/test/cli/run-command.test.ts
--- a/test/cli/run-command.test.ts
+++ b/test/cli/run-command.test.ts
@@ -4,6 +4,7 @@
import { afterEach, describe, expect, it, vi } from "vitest";
import { parseArgs } from "../../src/cli/args.js";
import { handleRunCommand, testing } from "../../src/cli/commands/run.js";
+import type { AgentTrajectoryEvent } from "../../src/server/agent-trajectory.js";
import { buildAgentRuntimeLedgerReport } from "../../src/server/agent-runtime-ledger.js";
import { SessionManager } from "../../src/session/manager.js";
@@ -212,6 +213,44 @@
return { sessionDir, sessionId };
}
+ function buildLedgerForEvents(
+ sessionId: string,
+ events: AgentTrajectoryEvent[],
+ ) {
+ return buildAgentRuntimeLedgerReport({
+ session: { id: sessionId },
+ timeline: {
+ source: "local",
+ generatedAt: "2026-05-09T10:00:03.000Z",
+ items: [],
+ },
+ trajectory: {
+ schemaVersion: "evalops.maestro.agent-trajectory.v1",
+ run: {
+ id: sessionId,
+ sessionId,
+ source: "local",
+ generatedAt: "2026-05-09T10:00:03.000Z",
+ platformBacked: false,
+ },
+ counts: {
+ events: events.length,
+ evidenceAnchors: 0,
+ byKind: {},
+ byPhase: {},
+ byStatus: {},
+ },
+ events,
+ },
+ replay: {
+ schemaVersion: "evalops.maestro.agent-trajectory-replay.v1",
+ trajectorySchemaVersion: "evalops.maestro.agent-trajectory.v1",
+ counts: { events: events.length, deltas: 0, errors: 0, warnings: 0 },
+ deltas: [],
+ },
+ });
+ }
+
function makeLegacySessionDir(): { sessionDir: string; sessionId: string } {
const sessionDir = mkdtempSync(join(tmpdir(), "maestro-run-legacy-"));
tempDirs.push(sessionDir);
@@ -700,4 +739,156 @@
payload: { state: "succeeded" },
});
});
+
+ it("omits dry-run terminal promotion operations for non-terminal final state", () => {
+ const ledger = buildAgentRuntimeLedgerReport({
+ session: { id: "session-running" },
+ timeline: {
+ source: "local",
+ generatedAt: "2026-05-09T10:00:02.000Z",
+ items: [],
+ },
+ trajectory: {
+ schemaVersion: "evalops.maestro.agent-trajectory.v1",
+ run: {
+ id: "session-running",
+ sessionId: "session-running",
+ source: "local",
+ generatedAt: "2026-05-09T10:00:02.000Z",
+ platformBacked: false,
+ },
+ counts: {
+ events: 1,
+ evidenceAnchors: 0,
+ byKind: {},
+ byPhase: {},
+ byStatus: {},
+ },
+ events: [
+ {
+ id: "event-running-tool",
+ sequence: 1,
+ timestamp: "2026-05-09T10:00:01.000Z",
+ kind: "tool",
+ phase: "act",
+ actor: "tool",
+ type: "tool.requested",
+ status: "running",
+ visibility: "user",
+ source: "local",
+ title: "Tool running",
+ evidence: [],
+ },
+ ],
+ },
+ replay: {
+ schemaVersion: "evalops.maestro.agent-trajectory-replay.v1",
+ trajectorySchemaVersion: "evalops.maestro.agent-trajectory.v1",
+ counts: { events: 1, deltas: 0, errors: 0, warnings: 0 },
+ deltas: [],
+ },
+ });
+
+ expect(
+ ledger.promotion.operations.some(
+ (operation) =>
+ operation.operation === "complete_run" ||
+ operation.operation === "fail_run",
+ ),
+ ).toBe(false);
+ expect(ledger.promotion.warnings).toContain(
+ "Terminal operation omitted because final ledger entry ended in running state.",
+ );
+ });
+
+ it("omits terminal promotion operations for waiting and blocked final states", () => {
+ for (const scenario of [
+ {
+ status: "pending",
+ ledgerState: "waiting",
+ kind: "wait",
+ phase: "wait",
+ actor: "platform",
+ type: "wait.pending",
+ },
+ {
+ status: "denied",
+ ledgerState: "blocked",
+ kind: "governance",
+ phase: "govern",
+ actor: "system",
+ type: "policy.decision",
+ },
+ ] as const) {
+ const ledger = buildLedgerForEvents(`session-${scenario.ledgerState}`, [
+ {
+ id: `event-${scenario.ledgerState}`,
+ sequence: 1,
+ timestamp: "2026-05-09T10:00:01.000Z",
+ kind: scenario.kind,
+ phase: scenario.phase,
+ actor: scenario.actor,
+ type: scenario.type,
+ status: scenario.status,
+ visibility: "user",
+ source: "local",
+ title: `Final ${scenario.ledgerState} event`,
+ evidence: [],
+ },
+ ]);
+
+ expect(
+ ledger.promotion.operations.some(
+ (operation) =>
+ operation.operation === "complete_run" ||
+ operation.operation === "fail_run",
+ ),
+ ).toBe(false);
+ expect(ledger.promotion.warnings).toContain(
+ `Terminal operation omitted because final ledger entry ended in ${scenario.ledgerState} state.`,
+ );
+ }
+ });
+
+ it("does not project governance decisions as active waits", () => {
+ const ledger = buildLedgerForEvents("session-governance", [
+ {
+ id: "event-policy",
+ sequence: 1,
+ timestamp: "2026-05-09T10:00:01.000Z",
+ kind: "governance",
+ phase: "govern",
+ actor: "system",
+ type: "policy.decision",
+ status: "info",
+ visibility: "user",
+ source: "local",
+ title: "Policy decision recorded",
+ evidence: [],
+ },
+ {
+ id: "event-final-message",
+ sequence: 2,
+ timestamp: "2026-05-09T10:00:02.000Z",
+ kind: "message",
+ phase: "think",
+ actor: "assistant",
+ type: "message.assistant",
+ status: "completed",
+ visibility: "user",
+ source: "local",
+ title: "Assistant response",
+ evidence: [],
+ },
+ ]);
+
+ expect(ledger.entries[0]?.platformShape.waitType).toBeUndefined();
+ expect(
+ ledger.promotion.operations.some(
+ (operation) =>
+ operation.operation === "wait_run" &&
+ operation.ledgerEntryId === "ledger:event-policy",
+ ),
+ ).toBe(false);
+ });
});You can send follow-ups to the cloud agent here.
Reviewed by Cursor Bugbot for commit 5b8ccbe. Configure here.
5b8ccbe to
c8da29c
Compare
There was a problem hiding this comment.
💡 Codex Review
Here are some automated review suggestions for this pull request.
Reviewed commit: 578e5ee013
ℹ️ About Codex in GitHub
Codex has been enabled to automatically review pull requests in this repo. Reviews are triggered when you
- Open a pull request for review
- Mark a draft as ready
- Comment "@codex review".
If Codex has suggestions, it will comment; otherwise it will react with 👍.
When you sign up for Codex through ChatGPT, Codex can also answer questions or update the PR, like "@codex address that feedback".
There was a problem hiding this comment.
💡 Codex Review
Here are some automated review suggestions for this pull request.
Reviewed commit: aecb161501
ℹ️ About Codex in GitHub
Codex has been enabled to automatically review pull requests in this repo. Reviews are triggered when you
- Open a pull request for review
- Mark a draft as ready
- Comment "@codex review".
If Codex has suggestions, it will comment; otherwise it will react with 👍.
When you sign up for Codex through ChatGPT, Codex can also answer questions or update the PR, like "@codex address that feedback".
| if (kind === "wait" || kind === "governance") { | ||
| return "AGENT_RUN_WAIT_TYPE_APPROVAL"; |
There was a problem hiding this comment.
Restrict wait_run generation to unresolved wait events
waitTypeForEntry marks every governance ledger entry as an approval wait, so buildPromotionPlan emits a wait_run operation even for resolved policy.decision outcomes (for example denied/info). That produces a dry-run promotion plan that records phantom waits after a decision has already been made, which can mislead downstream consumers that interpret wait_run as an active blocking state.
Useful? React with 👍 / 👎.

Summary
maestro run ledger,maestro run replay, andmaestro run promoteto emit the ledger, replay summary, and dry-run Platform promotion plan.maestro run inspect --jsonand document the inspect/replay/promote contract.Internal Source PR
Verification
node ./scripts/run-vitest.js --run test/cli/run-command.test.ts test/server/agent-trajectory-replay.test.ts test/server/agent-trajectory-validation.test.tsbunx tsc -p tsconfig.build.json --noEmit