diff --git a/.changeset/patch-agent-log-token-delta.md b/.changeset/patch-agent-log-token-delta.md new file mode 100644 index 00000000000..70d76600a5d --- /dev/null +++ b/.changeset/patch-agent-log-token-delta.md @@ -0,0 +1,8 @@ +--- +"gh-aw": patch +--- + +Improved agent log rendering by displaying the effective-token delta (ΔET) for each MCP tool call. The delta is computed by correlating tool call timestamps from `rpc-messages.jsonl` with LLM API call data in `token-usage.jsonl`, showing how much each tool call result contributed to the growing context window. + +- Go CLI: adds a "Tool Call Timeline (Effective Token Δ)" table to the MCP tool usage section when delta data is available +- GitHub Actions step summary: adds a ΔET column to the REQUEST table in the MCP Gateway Activity section diff --git a/actions/setup/js/parse_mcp_gateway_log.cjs b/actions/setup/js/parse_mcp_gateway_log.cjs index 3d475cd2b9e..454738a495e 100644 --- a/actions/setup/js/parse_mcp_gateway_log.cjs +++ b/actions/setup/js/parse_mcp_gateway_log.cjs @@ -572,15 +572,87 @@ function buildRpcSummaryRow(cells) { return `| ${cells.map(cell => escapeMarkdownTableCell(cell)).join(" | ")} |`; } +/** + * Computes effective-token deltas for each REQUEST entry relative to the surrounding + * LLM API calls recorded in token-usage.jsonl. + * + * For each request at timestamp T, the algorithm finds: + * - prev: the last token-usage entry with timestamp < T + * - next: the first token-usage entry with timestamp > T + * and returns delta = effectiveTokens(next) − effectiveTokens(prev). + * + * @param {string} tokenUsageContent - Raw content of token-usage.jsonl + * @param {Array} requests - REQUEST entries from rpc-messages.jsonl + * @returns {Map} Map from request index to ΔET (omits entries with delta ≤ 0) + */ +function computeToolCallTokenDeltas(tokenUsageContent, requests) { + const deltas = new Map(); + if (!tokenUsageContent || !requests || requests.length === 0) return deltas; + + // Parse and sort token-usage entries by timestamp + /** @type {Array<{ts: number, et: number}>} */ + const etEntries = []; + for (const line of tokenUsageContent.split("\n")) { + const trimmed = line.trim(); + if (!trimmed) continue; + try { + const entry = JSON.parse(trimmed); + if (!entry || typeof entry !== "object" || !entry.timestamp) continue; + const ts = new Date(entry.timestamp).getTime(); + if (isNaN(ts)) continue; + const et = computeEffectiveTokens( + entry.model || "", + entry.input_tokens || 0, + entry.output_tokens || 0, + entry.cache_read_tokens || 0, + entry.cache_write_tokens || 0 + ); + etEntries.push({ ts, et }); + } catch { + // skip malformed lines + } + } + + etEntries.sort((a, b) => a.ts - b.ts); + if (etEntries.length < 2) return deltas; + + for (let i = 0; i < requests.length; i++) { + const req = requests[i]; + if (!req.timestamp) continue; + const callTs = new Date(req.timestamp).getTime(); + if (isNaN(callTs)) continue; + + let prevIdx = -1; + let nextIdx = -1; + for (let j = 0; j < etEntries.length; j++) { + if (etEntries[j].ts < callTs) { + prevIdx = j; // keep updating to get the last one before callTs + } else if (etEntries[j].ts > callTs && nextIdx === -1) { + nextIdx = j; // first entry after callTs + } + } + + if (prevIdx === -1 || nextIdx === -1) continue; + + const delta = etEntries[nextIdx].et - etEntries[prevIdx].et; + if (delta > 0) { + deltas.set(i, delta); + } + } + + return deltas; +} + /** * Generates a markdown step summary for rpc-messages.jsonl entries (mcpg v0.2.0+ format). * Shows a table of REQUEST entries (tool calls), a count of RESPONSE entries, any other * message types, and the DIFC_FILTERED section if there are blocked events. * @param {{requests: Array, responses: Array, other: Array}} entries * @param {Array} difcFilteredEvents - DIFC_FILTERED events parsed separately + * @param {Map} [tokenDeltas] - Optional map of request index → ΔET * @returns {string} Markdown summary, or empty string if nothing to show */ -function generateRpcMessagesSummary(entries, difcFilteredEvents) { +function generateRpcMessagesSummary(entries, difcFilteredEvents, tokenDeltas) { const { requests, responses, other } = entries; const blockedCount = difcFilteredEvents ? difcFilteredEvents.length : 0; const totalMessages = requests.length + responses.length + other.length + blockedCount; @@ -622,16 +694,29 @@ function generateRpcMessagesSummary(entries, difcFilteredEvents) { callLines.push(""); if (requests.length > 0) { + const hasDeltas = tokenDeltas && tokenDeltas.size > 0; callLines.push("#### REQUEST"); callLines.push(""); - callLines.push("| Time | Server | Tool / Method |"); - callLines.push("|------|--------|---------------|"); + if (hasDeltas) { + callLines.push("| Time | Server | Tool / Method | ΔET |"); + callLines.push("|------|--------|---------------|----:|"); + } else { + callLines.push("| Time | Server | Tool / Method |"); + callLines.push("|------|--------|---------------|"); + } - for (const req of requests) { + for (let i = 0; i < requests.length; i++) { + const req = requests[i]; const time = formatRpcMessageTime(req.timestamp); const server = escapeMarkdownTableCell(req.server_id || "-"); const label = formatRpcInlineCodeLabel(getRpcRequestLabel(req)); - callLines.push(`| ${time} | ${server} | ${label} |`); + if (hasDeltas) { + const delta = tokenDeltas.get(i); + const deltaCell = delta ? `+${delta.toLocaleString()}` : "-"; + callLines.push(`| ${time} | ${server} | ${label} | ${escapeMarkdownTableCell(deltaCell)} |`); + } else { + callLines.push(`| ${time} | ${server} | ${label} |`); + } } callLines.push(""); @@ -766,7 +851,20 @@ async function main() { core.info(`rpc-messages.jsonl: ${rpcEntries.requests.length} request(s), ${rpcEntries.responses.length} response(s), ${rpcEntries.other.length} other, ${difcFilteredEvents.length} DIFC_FILTERED`); if (totalMessages > 0 || difcFilteredEvents.length > 0) { - const rpcSummary = generateRpcMessagesSummary(rpcEntries, difcFilteredEvents); + // Compute effective-token deltas by correlating request timestamps with token-usage.jsonl + let tokenDeltas = new Map(); + if (fs.existsSync(TOKEN_USAGE_PATH) && rpcEntries.requests.length > 0) { + try { + const tokenUsageContent = fs.readFileSync(TOKEN_USAGE_PATH, "utf8"); + tokenDeltas = computeToolCallTokenDeltas(tokenUsageContent, rpcEntries.requests); + if (tokenDeltas.size > 0) { + core.info(`Computed effective-token deltas for ${tokenDeltas.size} of ${rpcEntries.requests.length} request(s)`); + } + } catch { + // Non-fatal: delta column is omitted when token-usage.jsonl is unreadable + } + } + const rpcSummary = generateRpcMessagesSummary(rpcEntries, difcFilteredEvents, tokenDeltas); if (rpcSummary.length > 0) { core.summary.addRaw(rpcSummary); } @@ -949,6 +1047,7 @@ if (typeof module !== "undefined" && module.exports) { parseRpcMessagesJsonl, getRpcRequestLabel, generateRpcMessagesSummary, + computeToolCallTokenDeltas, printAllGatewayFiles, parseTokenUsageJsonl, generateTokenUsageSummary, diff --git a/actions/setup/js/parse_mcp_gateway_log.test.cjs b/actions/setup/js/parse_mcp_gateway_log.test.cjs index 51fabd3559b..0980bb6320c 100644 --- a/actions/setup/js/parse_mcp_gateway_log.test.cjs +++ b/actions/setup/js/parse_mcp_gateway_log.test.cjs @@ -12,6 +12,7 @@ const { parseRpcMessagesJsonl, getRpcRequestLabel, generateRpcMessagesSummary, + computeToolCallTokenDeltas, printAllGatewayFiles, parseTokenUsageJsonl, generateTokenUsageSummary, @@ -1477,4 +1478,108 @@ not-json expect(hasError).toBe(false); }); }); + + describe("computeToolCallTokenDeltas", () => { + // Helper to build a token-usage JSONL line + function tuLine(ts, inputTokens, outputTokens) { + return JSON.stringify({ timestamp: ts, model: "unknown", provider: "test", input_tokens: inputTokens, output_tokens: outputTokens, cache_read_tokens: 0, cache_write_tokens: 0 }); + } + // Helper to build a REQUEST entry + function req(ts, toolName) { + return { timestamp: ts, server_id: "srv", type: "REQUEST", payload: { method: "tools/call", params: { name: toolName } } }; + } + + test("computes delta for a tool call bracketed by two API calls", () => { + // ET(t0) = 1*1000 + 4*50 = 1200; ET(t1) = 1*1500 + 4*80 = 1820; delta = 620 + const tokenContent = [ + tuLine("2026-05-19T21:10:00.000Z", 1000, 50), + tuLine("2026-05-19T21:10:10.000Z", 1500, 80), + ].join("\n"); + const requests = [req("2026-05-19T21:10:05.000Z", "my-tool")]; + const deltas = computeToolCallTokenDeltas(tokenContent, requests); + expect(deltas.get(0)).toBe(620); + }); + + test("returns empty map when tool call has no preceding API call", () => { + const tokenContent = tuLine("2026-05-19T21:10:10.000Z", 1000, 50); + const requests = [req("2026-05-19T21:10:05.000Z", "my-tool")]; + const deltas = computeToolCallTokenDeltas(tokenContent, requests); + expect(deltas.size).toBe(0); + }); + + test("returns empty map when tool call has no following API call", () => { + const tokenContent = tuLine("2026-05-19T21:10:00.000Z", 1000, 50); + const requests = [req("2026-05-19T21:10:05.000Z", "my-tool")]; + const deltas = computeToolCallTokenDeltas(tokenContent, requests); + expect(deltas.size).toBe(0); + }); + + test("returns empty map for empty inputs", () => { + expect(computeToolCallTokenDeltas("", []).size).toBe(0); + expect(computeToolCallTokenDeltas("", null).size).toBe(0); + }); + + test("computes correct deltas for multiple sequential tool calls", () => { + // ET[0] = 1200, ET[1] = 1820, ET[2] = 2400 + const tokenContent = [ + tuLine("2026-05-19T21:10:00.000Z", 1000, 50), + tuLine("2026-05-19T21:10:10.000Z", 1500, 80), + tuLine("2026-05-19T21:10:20.000Z", 2000, 100), + ].join("\n"); + const requests = [ + req("2026-05-19T21:10:05.000Z", "tool-a"), + req("2026-05-19T21:10:15.000Z", "tool-b"), + ]; + const deltas = computeToolCallTokenDeltas(tokenContent, requests); + expect(deltas.get(0)).toBe(620); // 1820 - 1200 + expect(deltas.get(1)).toBe(580); // 2400 - 1820 + }); + }); + + describe("generateRpcMessagesSummary with token deltas", () => { + test("shows ΔET column when deltas are provided", () => { + const entries = { + requests: [ + { timestamp: "2026-05-19T21:10:05.123Z", server_id: "srv", type: "REQUEST", payload: { method: "tools/call", params: { name: "my-tool" } } }, + ], + responses: [], + other: [], + }; + const deltas = new Map([[0, 620]]); + const result = generateRpcMessagesSummary(entries, [], deltas); + expect(result).toContain("ΔET"); + expect(result).toContain("+620"); + expect(result).toContain("my-tool"); + }); + + test("omits ΔET column when no deltas are provided", () => { + const entries = { + requests: [ + { timestamp: "2026-05-19T21:10:05.123Z", server_id: "srv", type: "REQUEST", payload: { method: "tools/call", params: { name: "my-tool" } } }, + ], + responses: [], + other: [], + }; + const result = generateRpcMessagesSummary(entries, []); + expect(result).not.toContain("ΔET"); + expect(result).toContain("my-tool"); + }); + + test("shows dash for requests without a delta", () => { + const entries = { + requests: [ + { timestamp: "2026-05-19T21:10:05.123Z", server_id: "srv", type: "REQUEST", payload: { method: "tools/call", params: { name: "tool-a" } } }, + { timestamp: "2026-05-19T21:10:15.123Z", server_id: "srv", type: "REQUEST", payload: { method: "tools/call", params: { name: "tool-b" } } }, + ], + responses: [], + other: [], + }; + const deltas = new Map([[0, 500]]); // only tool-a has a delta + const result = generateRpcMessagesSummary(entries, [], deltas); + expect(result).toContain("ΔET"); + expect(result).toContain("+500"); + expect(result).toContain("tool-a"); + expect(result).toContain("tool-b"); + }); + }); }); diff --git a/docs/adr/33629-effective-token-delta-per-mcp-tool-call.md b/docs/adr/33629-effective-token-delta-per-mcp-tool-call.md new file mode 100644 index 00000000000..4542d500fae --- /dev/null +++ b/docs/adr/33629-effective-token-delta-per-mcp-tool-call.md @@ -0,0 +1,86 @@ +# ADR-33629: Effective-Token Delta per MCP Tool Call via Timestamp Correlation + +**Date**: 2026-05-20 +**Status**: Draft +**Deciders**: pelikhan, Copilot + +--- + +## Part 1 — Narrative (Human-Friendly) + +### Context + +Each MCP tool call result is appended to the LLM context window on the next turn, which directly increases per-turn token cost. The `gh aw audit` Go CLI and the GitHub Actions step-summary (`parse_mcp_gateway_log.cjs`) both surface MCP tool call timelines, but neither rendering exposes the cost of each tool call's payload in the next API call. The gateway log (`rpc-messages.jsonl`) records tool call timestamps, and `token-usage.jsonl` records every LLM API call with input/output/cache token counts and model identifiers from which an effective-token weighting is already computed elsewhere in the codebase. The two files are written independently and have no shared correlation key. + +### Decision + +We will compute an effective-token delta (`ΔET`) for each MCP tool call by **timestamp-bracketing** against the existing `token-usage.jsonl` stream. For each tool call at timestamp `T`, we locate `prev` (the last token-usage entry with `ts < T`) and `next` (the first token-usage entry with `ts > T`), then assign `ΔET = effectiveTokens(next) − effectiveTokens(prev)`. The delta is rendered as a new `ΔET` column in the JS step-summary REQUEST table and as a separate "Tool Call Timeline (Effective Token Δ)" table in the Go CLI MCP tool usage report. When `token-usage.jsonl` is missing, unreadable, or insufficient (fewer than two entries), the delta is silently omitted — the feature is strictly additive and non-fatal. + +### Alternatives Considered + +#### Alternative 1: Measure tool call result payload size directly (bytes or local tokenization) + +We could instead size the raw tool call result payload from `rpc-messages.jsonl` and convert that to tokens via a local tokenizer. This was rejected because (a) it ignores model-specific effective-token weightings (cache reads, cache writes, output tokens) already centralized in `computeEffectiveTokens` / `computeModelEffectiveTokensWithWeights`, (b) it cannot account for cache eviction, system-prompt resends, or context pruning between turns, and (c) it would require introducing a tokenizer dependency to JS and Go for results the LLM provider's own token accounting already attributes precisely. + +#### Alternative 2: Add a correlation ID linking each MCP request to the LLM API call that consumed it + +The MCP gateway and the LLM client could be modified to share a correlation ID (e.g., a turn ID), and `token-usage.jsonl` could record which tool call results it consumed. This would yield exact attribution rather than timestamp-based brackets. It was rejected for this PR because it requires coordinated changes across the gateway, the LLM client, and every engine adapter; timestamp correlation is sufficient for the common case (sequential calls) and uses only existing log data. A correlation-ID approach remains a future option if timestamp bracketing proves inaccurate in practice. + +#### Alternative 3: Compute the delta inside the audit pipeline only and skip the JS step summary + +We could limit the new feature to the Go CLI audit/logs path. This was rejected because the GitHub Actions step summary is the most visible artifact during a workflow run and is where users first notice expensive tool calls — adding the column there has the highest impact-per-byte. Duplicating the correlation algorithm in Go and JS is intentional: each environment has its own log-reading pipeline, and the algorithm is small and well-tested in both. + +### Consequences + +#### Positive +- Users see, per tool call, how much effective-token cost the tool's result added to the next API call. +- Implementation reuses existing effective-token weight computation (`computeEffectiveTokens` in JS, `computeModelEffectiveTokensWithWeights` in Go), so caching, model multipliers, and output weighting are honored consistently. +- Feature is fully additive and degrades gracefully: when `token-usage.jsonl` is absent or has fewer than two entries, no `ΔET` column is rendered and no error is reported. +- Unit tests cover the bracketing algorithm in both Go (`TestCorrelateToolCallsWithTokenDelta`) and JS (`computeToolCallTokenDeltas`), including the no-prev, no-next, multi-call, and empty-input cases. + +#### Negative +- Timestamp-bracketing is approximate: when multiple tool calls execute between two consecutive API calls, the entire ET delta is attributed to none of them (since both share the same `prev` and `next`) — only tool calls bracketed by distinct API call pairs receive a delta. +- The algorithm is duplicated across two languages (Go and JS); fixes to the bracketing logic must be made in two places. +- The delta conflates tool-result cost with any other context growth between the two API calls (e.g., system reminders, growing message history) — it is an upper bound on the tool's contribution, not an exact measurement. +- A clock-skew or out-of-order log scenario (timestamps in `rpc-messages.jsonl` not aligned with `token-usage.jsonl`) silently produces wrong attribution. + +#### Neutral +- A new `EffectiveTokenDelta int` field is added to the `MCPToolCall` Go struct with `omitempty` JSON serialization, so JSON consumers see no breaking change. +- The JS REQUEST table grows a fourth column (`ΔET`) only when at least one delta is computed; the existing three-column form is preserved when no deltas exist. +- Two new exported symbols are added to the JS module (`computeToolCallTokenDeltas`) and the Go `pkg/cli` package (`readTokenUsageEntries`, `correlateToolCallsWithTokenDelta`). +- Deltas with value `≤ 0` are not rendered, so a tool call followed by a smaller API call (e.g., due to cache eviction recovery) is silently shown as a dash, not a negative value. + +--- + +## Part 2 — Normative Specification (RFC 2119) + +> The key words **MUST**, **MUST NOT**, **REQUIRED**, **SHALL**, **SHALL NOT**, **SHOULD**, **SHOULD NOT**, **RECOMMENDED**, **MAY**, and **OPTIONAL** in this section are to be interpreted as described in [RFC 2119](https://www.rfc-editor.org/rfc/rfc2119). + +### Delta Computation + +1. Implementations **MUST** compute each MCP tool call's `ΔET` as `effectiveTokens(next) − effectiveTokens(prev)`, where `prev` is the last `token-usage.jsonl` entry with timestamp strictly less than the tool call timestamp and `next` is the first entry with timestamp strictly greater than the tool call timestamp. +2. Implementations **MUST** use the project's existing effective-token weighting routines (`computeEffectiveTokens` in JS, `computeModelEffectiveTokensWithWeights` in Go) to compute `effectiveTokens(entry)`; they **MUST NOT** introduce a divergent token weighting for this feature. +3. Implementations **MUST NOT** assign a `ΔET` value to a tool call that lacks either a `prev` or a `next` token-usage entry. +4. Implementations **MUST NOT** render a `ΔET` value that is less than or equal to zero; such values **MUST** be treated as "no delta available" for that tool call. +5. Implementations **MUST** be silent and non-fatal when `token-usage.jsonl` is missing, unreadable, malformed, or contains fewer than two parseable entries — they **MUST** continue rendering the tool call timeline without a delta column. + +### Rendering + +1. Go CLI implementations **MUST** render a separate "Tool Call Timeline (Effective Token Δ)" table containing only tool calls whose `EffectiveTokenDelta` is greater than zero. +2. JS step-summary implementations **MUST** render a `ΔET` column in the REQUEST table only when at least one tool call has a positive delta; otherwise the existing three-column REQUEST table layout **MUST** be preserved. +3. Tool calls in the JS REQUEST table that do not have a positive delta **MUST** display `-` in the `ΔET` column when the column is present. +4. Delta values **SHOULD** be formatted with a leading `+` sign and thousands separators (e.g., `+1,234`) in user-facing output. + +### Data Model + +1. The Go `MCPToolCall` struct **MUST** include an `EffectiveTokenDelta int` field with the JSON tag `effective_token_delta,omitempty`. +2. The Go correlation routine `correlateToolCallsWithTokenDelta` **MUST** return a new slice (not mutate the input) and **MUST** be invoked for both code paths in `extractMCPToolUsageData` (the `rpc-messages.jsonl` path and the `gateway.jsonl` path). +3. The JS correlation routine `computeToolCallTokenDeltas` **MUST** return a `Map` keyed by request index, and **MUST** omit entries whose computed delta is less than or equal to zero. + +### Conformance + +An implementation is considered conformant with this ADR if it satisfies all **MUST** and **MUST NOT** requirements above. Failure to meet any **MUST** or **MUST NOT** requirement constitutes non-conformance. + +--- + +*This is a DRAFT ADR generated by the [Design Decision Gate](https://github.com/github/gh-aw/actions/runs/26193570188) workflow. The PR author must review, complete, and finalize this document before the PR can merge.* diff --git a/pkg/cli/audit_report.go b/pkg/cli/audit_report.go index 145aa1d7c8d..11fe1125f70 100644 --- a/pkg/cli/audit_report.go +++ b/pkg/cli/audit_report.go @@ -181,15 +181,16 @@ type MCPToolSummary struct { // MCPToolCall represents a single MCP tool call with full details type MCPToolCall struct { - Timestamp string `json:"timestamp"` - ServerName string `json:"server_name"` - ToolName string `json:"tool_name"` - Method string `json:"method,omitempty"` - InputSize int `json:"input_size"` - OutputSize int `json:"output_size"` - Duration string `json:"duration,omitempty"` - Status string `json:"status"` - Error string `json:"error,omitempty"` + Timestamp string `json:"timestamp"` + ServerName string `json:"server_name"` + ToolName string `json:"tool_name"` + Method string `json:"method,omitempty"` + InputSize int `json:"input_size"` + OutputSize int `json:"output_size"` + Duration string `json:"duration,omitempty"` + Status string `json:"status"` + Error string `json:"error,omitempty"` + EffectiveTokenDelta int `json:"effective_token_delta,omitempty"` // Change in effective tokens caused by this tool call result } // MCPServerStats contains server-level statistics diff --git a/pkg/cli/audit_report_render_tools.go b/pkg/cli/audit_report_render_tools.go index 8ea851bdfcd..62b7d450540 100644 --- a/pkg/cli/audit_report_render_tools.go +++ b/pkg/cli/audit_report_render_tools.go @@ -121,6 +121,38 @@ func renderMCPToolUsageTable(mcpData *MCPToolUsageData) { if mcpData.GuardPolicySummary != nil && mcpData.GuardPolicySummary.TotalBlocked > 0 { renderGuardPolicySummary(mcpData.GuardPolicySummary) } + + // Render tool call timeline with effective-token deltas when available + var callsWithDelta []MCPToolCall + for _, tc := range mcpData.ToolCalls { + if tc.EffectiveTokenDelta > 0 { + callsWithDelta = append(callsWithDelta, tc) + } + } + if len(callsWithDelta) > 0 { + fmt.Fprintln(os.Stderr) + fmt.Fprintln(os.Stderr, " Tool Call Timeline (Effective Token Δ):") + fmt.Fprintln(os.Stderr) + + timelineConfig := console.TableConfig{ + Headers: []string{"Time", "Server", "Tool", "ΔET"}, + Rows: make([][]string, 0, len(callsWithDelta)), + } + for _, tc := range callsWithDelta { + ts := tc.Timestamp + if len(ts) > 19 { + ts = ts[:19] + "Z" + } + row := []string{ + ts, + stringutil.Truncate(tc.ServerName, 20), + stringutil.Truncate(tc.ToolName, 35), + "+" + console.FormatNumber(tc.EffectiveTokenDelta), + } + timelineConfig.Rows = append(timelineConfig.Rows, row) + } + fmt.Fprint(os.Stderr, console.RenderTable(timelineConfig)) + } } // renderMCPServerHealth renders MCP server health summary diff --git a/pkg/cli/gateway_logs_mcp.go b/pkg/cli/gateway_logs_mcp.go index 60b98629c57..9568c222051 100644 --- a/pkg/cli/gateway_logs_mcp.go +++ b/pkg/cli/gateway_logs_mcp.go @@ -78,6 +78,9 @@ func extractMCPToolUsageData(logDir string, verbose bool) (*MCPToolUsageData, er if err != nil { return nil, fmt.Errorf("failed to read rpc-messages.jsonl: %w", err) } + // Correlate tool calls with effective-token deltas from token-usage.jsonl + tokenUsageFile := findTokenUsageFile(logDir) + toolCalls = correlateToolCallsWithTokenDelta(toolCalls, tokenUsageFile) mcpData.ToolCalls = toolCalls gatewayLogsLog.Printf("Loaded %d tool calls from rpc-messages.jsonl", len(toolCalls)) } else { @@ -85,6 +88,9 @@ func extractMCPToolUsageData(logDir string, verbose bool) (*MCPToolUsageData, er if err := extractToolCallsFromGatewayLog(gatewayLogPath, mcpData); err != nil { return nil, err } + // Correlate tool calls with effective-token deltas from token-usage.jsonl + tokenUsageFile := findTokenUsageFile(logDir) + mcpData.ToolCalls = correlateToolCallsWithTokenDelta(mcpData.ToolCalls, tokenUsageFile) gatewayLogsLog.Printf("Loaded %d tool calls from gateway.jsonl", len(mcpData.ToolCalls)) } diff --git a/pkg/cli/token_usage.go b/pkg/cli/token_usage.go index 5721644592f..7d71ac8c1aa 100644 --- a/pkg/cli/token_usage.go +++ b/pkg/cli/token_usage.go @@ -427,6 +427,147 @@ func extractCustomTokenWeightsFromDir(runDir string) *types.TokenWeights { return awInfo.TokenWeights } +// readTokenUsageEntries parses a token-usage.jsonl file and returns the raw +// ordered list of entries, sorted by timestamp where available. +func readTokenUsageEntries(filePath string) ([]TokenUsageEntry, error) { + file, err := os.Open(filePath) + if err != nil { + return nil, fmt.Errorf("failed to open token usage file: %w", err) + } + defer file.Close() + + scanner := bufio.NewScanner(file) + scanner.Buffer(make([]byte, 0, 64*1024), 1024*1024) + + type orderedEntry struct { + entry TokenUsageEntry + timestamp time.Time + hasTimestamp bool + order int + } + + var ordered []orderedEntry + lineNum := 0 + for scanner.Scan() { + lineNum++ + line := strings.TrimSpace(scanner.Text()) + if line == "" { + continue + } + var entry TokenUsageEntry + if err := json.Unmarshal([]byte(line), &entry); err != nil { + tokenUsageLog.Printf("Skipping invalid JSON at line %d: %v", lineNum, err) + continue + } + ts, hasTs := parseTokenUsageTimestamp(entry.Timestamp) + ordered = append(ordered, orderedEntry{entry: entry, timestamp: ts, hasTimestamp: hasTs, order: lineNum}) + } + if err := scanner.Err(); err != nil { + return nil, fmt.Errorf("error reading token usage file: %w", err) + } + + sort.SliceStable(ordered, func(i, j int) bool { + l, r := ordered[i], ordered[j] + if l.hasTimestamp && r.hasTimestamp { + return l.timestamp.Before(r.timestamp) + } + if l.hasTimestamp != r.hasTimestamp { + return l.hasTimestamp + } + return l.order < r.order + }) + + entries := make([]TokenUsageEntry, len(ordered)) + for i, o := range ordered { + entries[i] = o.entry + } + return entries, nil +} + +// correlateToolCallsWithTokenDelta correlates each tool call with the effective-token +// delta introduced by its result being appended to the LLM context. +// +// For each tool call at timestamp T, the algorithm finds: +// - prev: the last token-usage entry whose timestamp is before T (the API call +// that produced the tool call decision) +// - next: the first token-usage entry whose timestamp is after T (the API call +// that consumed the tool call result) +// +// delta = effectiveTokens(next) − effectiveTokens(prev). +// +// Tool calls that cannot be bracketed by a prev/next pair receive delta = 0. +// The function is a no-op when tokenUsageFile is empty or unreadable. +func correlateToolCallsWithTokenDelta(toolCalls []MCPToolCall, tokenUsageFile string) []MCPToolCall { + if len(toolCalls) == 0 || tokenUsageFile == "" { + return toolCalls + } + + entries, err := readTokenUsageEntries(tokenUsageFile) + if err != nil { + tokenUsageLog.Printf("correlateToolCallsWithTokenDelta: failed to read %s: %v", tokenUsageFile, err) + return toolCalls + } + if len(entries) < 2 { + return toolCalls + } + + // Resolve weights once for all entries + multipliers, classWeights := resolveEffectiveWeights(nil) + + // Pre-compute effective tokens for each entry + type entryWithET struct { + ts time.Time + et int + } + etEntries := make([]entryWithET, 0, len(entries)) + for _, e := range entries { + ts, ok := parseTokenUsageTimestamp(e.Timestamp) + if !ok { + continue + } + et := computeModelEffectiveTokensWithWeights( + e.Model, e.InputTokens, e.OutputTokens, e.CacheReadTokens, e.CacheWriteTokens, + multipliers, classWeights, + ) + etEntries = append(etEntries, entryWithET{ts: ts, et: et}) + } + if len(etEntries) < 2 { + return toolCalls + } + + updated := make([]MCPToolCall, len(toolCalls)) + copy(updated, toolCalls) + + for i, tc := range updated { + callTS, ok := parseTokenUsageTimestamp(tc.Timestamp) + if !ok { + continue + } + + // Find prev (last entry with ts < callTS) and next (first entry with ts > callTS) + prevIdx := -1 + nextIdx := -1 + for j, e := range etEntries { + if e.ts.Before(callTS) { + prevIdx = j // keep updating to get the last one before callTS + } else if e.ts.After(callTS) && nextIdx == -1 { + nextIdx = j // first entry after callTS + } + } + + if prevIdx == -1 || nextIdx == -1 { + continue + } + + delta := etEntries[nextIdx].et - etEntries[prevIdx].et + if delta > 0 { + updated[i].EffectiveTokenDelta = delta + } + } + + return updated +} + // TotalTokens returns the sum of all token types func (s *TokenUsageSummary) TotalTokens() int { return s.TotalInputTokens + s.TotalOutputTokens + s.TotalCacheReadTokens + s.TotalCacheWriteTokens diff --git a/pkg/cli/token_usage_test.go b/pkg/cli/token_usage_test.go index ec8339bb648..5bb8c2e947d 100644 --- a/pkg/cli/token_usage_test.go +++ b/pkg/cli/token_usage_test.go @@ -324,6 +324,98 @@ func TestAnalyzeTokenUsage(t *testing.T) { }) } +func TestCorrelateToolCallsWithTokenDelta(t *testing.T) { + t.Run("assigns delta to tool calls bracketed by API calls", func(t *testing.T) { + tmpDir := testutil.TempDir(t, "token-delta") + filePath := filepath.Join(tmpDir, "token-usage.jsonl") + // Two API calls; tool call happens between them. + // ET for first entry (model "unknown", default weights, m=1): + // 1.0*1000 + 4.0*50 = 1200 + // ET for second entry: + // 1.0*1500 + 4.0*80 = 1820 + // Expected delta = 1820 - 1200 = 620 + content := `{"timestamp":"2026-05-19T21:10:00.000Z","model":"unknown","provider":"test","input_tokens":1000,"output_tokens":50,"cache_read_tokens":0,"cache_write_tokens":0} +{"timestamp":"2026-05-19T21:10:10.000Z","model":"unknown","provider":"test","input_tokens":1500,"output_tokens":80,"cache_read_tokens":0,"cache_write_tokens":0}` + require.NoError(t, os.WriteFile(filePath, []byte(content+"\n"), 0o644)) + + toolCalls := []MCPToolCall{ + { + Timestamp: "2026-05-19T21:10:05.000Z", + ServerName: "test-server", + ToolName: "test-tool", + }, + } + result := correlateToolCallsWithTokenDelta(toolCalls, filePath) + require.Len(t, result, 1) + assert.Equal(t, 620, result[0].EffectiveTokenDelta, "expected delta = ET(next) - ET(prev)") + }) + + t.Run("leaves delta zero when tool call has no preceding API call", func(t *testing.T) { + tmpDir := testutil.TempDir(t, "token-delta-no-prev") + filePath := filepath.Join(tmpDir, "token-usage.jsonl") + content := `{"timestamp":"2026-05-19T21:10:10.000Z","model":"unknown","provider":"test","input_tokens":1000,"output_tokens":50,"cache_read_tokens":0,"cache_write_tokens":0}` + require.NoError(t, os.WriteFile(filePath, []byte(content+"\n"), 0o644)) + + toolCalls := []MCPToolCall{ + { + Timestamp: "2026-05-19T21:10:05.000Z", // before the only API call + ServerName: "test-server", + ToolName: "test-tool", + }, + } + result := correlateToolCallsWithTokenDelta(toolCalls, filePath) + require.Len(t, result, 1) + assert.Equal(t, 0, result[0].EffectiveTokenDelta, "no delta when no preceding API call") + }) + + t.Run("leaves delta zero when tool call has no following API call", func(t *testing.T) { + tmpDir := testutil.TempDir(t, "token-delta-no-next") + filePath := filepath.Join(tmpDir, "token-usage.jsonl") + content := `{"timestamp":"2026-05-19T21:10:00.000Z","model":"unknown","provider":"test","input_tokens":1000,"output_tokens":50,"cache_read_tokens":0,"cache_write_tokens":0}` + require.NoError(t, os.WriteFile(filePath, []byte(content+"\n"), 0o644)) + + toolCalls := []MCPToolCall{ + { + Timestamp: "2026-05-19T21:10:05.000Z", // after the only API call + ServerName: "test-server", + ToolName: "test-tool", + }, + } + result := correlateToolCallsWithTokenDelta(toolCalls, filePath) + require.Len(t, result, 1) + assert.Equal(t, 0, result[0].EffectiveTokenDelta, "no delta when no following API call") + }) + + t.Run("handles empty token usage file path", func(t *testing.T) { + toolCalls := []MCPToolCall{{Timestamp: "2026-05-19T21:10:05.000Z", ToolName: "t"}} + result := correlateToolCallsWithTokenDelta(toolCalls, "") + require.Len(t, result, 1) + assert.Equal(t, 0, result[0].EffectiveTokenDelta, "no delta with empty file path") + }) + + t.Run("assigns correct deltas to multiple sequential tool calls", func(t *testing.T) { + tmpDir := testutil.TempDir(t, "token-delta-multi") + filePath := filepath.Join(tmpDir, "token-usage.jsonl") + // Three API calls, two tool calls between consecutive pairs. + content := `{"timestamp":"2026-05-19T21:10:00.000Z","model":"unknown","provider":"test","input_tokens":1000,"output_tokens":50,"cache_read_tokens":0,"cache_write_tokens":0} +{"timestamp":"2026-05-19T21:10:10.000Z","model":"unknown","provider":"test","input_tokens":1500,"output_tokens":80,"cache_read_tokens":0,"cache_write_tokens":0} +{"timestamp":"2026-05-19T21:10:20.000Z","model":"unknown","provider":"test","input_tokens":2000,"output_tokens":100,"cache_read_tokens":0,"cache_write_tokens":0}` + require.NoError(t, os.WriteFile(filePath, []byte(content+"\n"), 0o644)) + // ET[0] = 1000 + 4*50 = 1200 + // ET[1] = 1500 + 4*80 = 1820 → delta1 = 620 + // ET[2] = 2000 + 4*100 = 2400 → delta2 = 580 + + toolCalls := []MCPToolCall{ + {Timestamp: "2026-05-19T21:10:05.000Z", ServerName: "s", ToolName: "tool-a"}, + {Timestamp: "2026-05-19T21:10:15.000Z", ServerName: "s", ToolName: "tool-b"}, + } + result := correlateToolCallsWithTokenDelta(toolCalls, filePath) + require.Len(t, result, 2) + assert.Equal(t, 620, result[0].EffectiveTokenDelta, "delta for tool-a") + assert.Equal(t, 580, result[1].EffectiveTokenDelta, "delta for tool-b") + }) +} + func TestCacheEfficiency(t *testing.T) { t.Run("remains zero to avoid transforming raw token counts", func(t *testing.T) { tmpDir := testutil.TempDir(t, "cache-eff")