Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 57 additions & 3 deletions actions/setup/js/send_otlp_span.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -1522,17 +1522,67 @@ function getErrorMessage(errorEntry) {
* @property {number | undefined} estimatedCostUsd
* @property {string | undefined} stopReason
* @property {string | undefined} resolvedModel
* @property {{input_tokens?: number, output_tokens?: number, cache_read_tokens?: number, cache_write_tokens?: number} | undefined} tokenUsage
* @property {number} warningCount
*/

/**
* Read turns, estimated cost, and warning volume from agent-stdio.log.
* Normalize token usage counters from an engine result event usage block.
*
* @param {unknown} rawUsage
* @returns {{input_tokens?: number, output_tokens?: number, cache_read_tokens?: number, cache_write_tokens?: number} | undefined}
*/
function normalizeRuntimeTokenUsage(rawUsage) {
if (!rawUsage || typeof rawUsage !== "object" || Array.isArray(rawUsage)) {
return undefined;
}

/** @type {{input_tokens?: number, output_tokens?: number, cache_read_tokens?: number, cache_write_tokens?: number, cache_read_input_tokens?: number, cache_creation_input_tokens?: number}} */
const usage = rawUsage;
/** @type {{input_tokens?: number, output_tokens?: number, cache_read_tokens?: number, cache_write_tokens?: number}} */
const normalized = {};
if (typeof usage.input_tokens === "number" && Number.isFinite(usage.input_tokens) && usage.input_tokens >= 0) {
normalized.input_tokens = usage.input_tokens;
}
if (typeof usage.output_tokens === "number" && Number.isFinite(usage.output_tokens) && usage.output_tokens >= 0) {
normalized.output_tokens = usage.output_tokens;
}

const cacheReadTokens =
typeof usage.cache_read_tokens === "number" && Number.isFinite(usage.cache_read_tokens) && usage.cache_read_tokens >= 0
? usage.cache_read_tokens
: typeof usage.cache_read_input_tokens === "number" &&
Number.isFinite(usage.cache_read_input_tokens) &&
usage.cache_read_input_tokens >= 0
? usage.cache_read_input_tokens
: undefined;
if (typeof cacheReadTokens === "number") {
normalized.cache_read_tokens = cacheReadTokens;
}

const cacheWriteTokens =
typeof usage.cache_write_tokens === "number" && Number.isFinite(usage.cache_write_tokens) && usage.cache_write_tokens >= 0
? usage.cache_write_tokens
: typeof usage.cache_creation_input_tokens === "number" &&
Number.isFinite(usage.cache_creation_input_tokens) &&
usage.cache_creation_input_tokens >= 0
? usage.cache_creation_input_tokens
: undefined;
if (typeof cacheWriteTokens === "number") {
normalized.cache_write_tokens = cacheWriteTokens;
}

return Object.keys(normalized).length > 0 ? normalized : undefined;
}

/**
* Read turns, estimated cost, token usage, and warning volume from agent-stdio.log.
*
* @returns {AgentRuntimeMetrics}
*/
function readAgentRuntimeMetrics() {
/** @type {AgentRuntimeMetrics} */
const metrics = { turns: undefined, estimatedCostUsd: undefined, stopReason: undefined, resolvedModel: undefined, warningCount: 0 };
const metrics = { turns: undefined, estimatedCostUsd: undefined, stopReason: undefined, resolvedModel: undefined, tokenUsage: undefined, warningCount: 0 };

try {
const content = fs.readFileSync(AGENT_STDIO_LOG_PATH, "utf8");
Expand Down Expand Up @@ -1565,6 +1615,10 @@ function readAgentRuntimeMetrics() {
if (typeof parsed.stop_reason === "string" && parsed.stop_reason) {
metrics.stopReason = parsed.stop_reason;
}
const tokenUsage = normalizeRuntimeTokenUsage(parsed.usage);
if (tokenUsage) {
metrics.tokenUsage = tokenUsage;
}
};

for (const rawLine of lines) {
Expand Down Expand Up @@ -2016,7 +2070,7 @@ async function sendJobConclusionSpan(spanName, options = {}) {
// to avoid double-counting in backends that sum gen_ai.usage.* across all spans.
// When no agent span is emitted the attributes fall through to the conclusion span
// so a single query is still sufficient for observability.
const agentUsage = readJSONIfExists("/tmp/gh-aw/agent_usage.json") || {};
const agentUsage = readJSONIfExists("/tmp/gh-aw/agent_usage.json") || runtimeMetrics.tokenUsage || {};
const usageAttrs = [];
if (typeof agentUsage.input_tokens === "number" && agentUsage.input_tokens > 0) {
usageAttrs.push(buildAttr("gen_ai.usage.input_tokens", agentUsage.input_tokens));
Expand Down
63 changes: 63 additions & 0 deletions actions/setup/js/send_otlp_span.test.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -5145,6 +5145,69 @@ describe("sendJobConclusionSpan", () => {
expect(attrs["gen_ai.usage.total_tokens"]).toBe(3150);
});

it("falls back to agent-stdio.log usage when agent_usage.json is absent", async () => {
const mockFetch = vi.fn().mockResolvedValue({ ok: true, status: 200, statusText: "OK" });
vi.stubGlobal("fetch", mockFetch);

process.env.INPUT_JOB_NAME = "agent";
process.env.GH_AW_AGENT_CONCLUSION = "failure";
statSpy.mockImplementation(() => {
throw Object.assign(new Error("ENOENT"), { code: "ENOENT" });
});
process.env.GH_AW_OTLP_ENDPOINTS = JSON.stringify([{ url: "https://traces.example.com" }]);

readFileSpy.mockImplementation(filePath => {
if (filePath === "/tmp/gh-aw/agent-stdio.log") {
return '{"type":"result","num_turns":3,"usage":{"input_tokens":48200,"output_tokens":1350,"cache_read_input_tokens":41000,"cache_creation_input_tokens":3100}}\n';
}
throw Object.assign(new Error("ENOENT"), { code: "ENOENT" });
});

await sendJobConclusionSpan("gh-aw.agent.conclusion");

expect(mockFetch.mock.calls.length).toBe(1);
const body = JSON.parse(mockFetch.mock.calls[0][1].body);
const attrs = Object.fromEntries(body.resourceSpans[0].scopeSpans[0].spans[0].attributes.map(a => [a.key, a.value.intValue ?? a.value.stringValue]));
expect(attrs["gen_ai.usage.input_tokens"]).toBe(48200);
expect(attrs["gen_ai.usage.output_tokens"]).toBe(1350);
expect(attrs["gen_ai.usage.cache_read.input_tokens"]).toBe(41000);
expect(attrs["gen_ai.usage.cache_creation.input_tokens"]).toBe(3100);
expect(attrs["gen_ai.usage.total_tokens"]).toBe(48200 + 1350);
});

it("prefers agent_usage.json over agent-stdio.log usage when both are present", async () => {
const mockFetch = vi.fn().mockResolvedValue({ ok: true, status: 200, statusText: "OK" });
vi.stubGlobal("fetch", mockFetch);

process.env.INPUT_JOB_NAME = "agent";
process.env.GH_AW_AGENT_CONCLUSION = "failure";
statSpy.mockImplementation(() => {
throw Object.assign(new Error("ENOENT"), { code: "ENOENT" });
});
process.env.GH_AW_OTLP_ENDPOINTS = JSON.stringify([{ url: "https://traces.example.com" }]);

readFileSpy.mockImplementation(filePath => {
if (filePath === "/tmp/gh-aw/agent_usage.json") {
return JSON.stringify({ input_tokens: 3000, output_tokens: 150, cache_read_tokens: 200, cache_write_tokens: 75 });
}
if (filePath === "/tmp/gh-aw/agent-stdio.log") {
return '{"type":"result","usage":{"input_tokens":9999,"output_tokens":888,"cache_read_input_tokens":777,"cache_creation_input_tokens":66}}\n';
}
throw Object.assign(new Error("ENOENT"), { code: "ENOENT" });
});

await sendJobConclusionSpan("gh-aw.agent.conclusion");

expect(mockFetch.mock.calls.length).toBe(1);
const body = JSON.parse(mockFetch.mock.calls[0][1].body);
const attrs = Object.fromEntries(body.resourceSpans[0].scopeSpans[0].spans[0].attributes.map(a => [a.key, a.value.intValue ?? a.value.stringValue]));
expect(attrs["gen_ai.usage.input_tokens"]).toBe(3000);
expect(attrs["gen_ai.usage.output_tokens"]).toBe(150);
expect(attrs["gen_ai.usage.cache_read.input_tokens"]).toBe(200);
expect(attrs["gen_ai.usage.cache_creation.input_tokens"]).toBe(75);
expect(attrs["gen_ai.usage.total_tokens"]).toBe(3150);
});

it("omits gen_ai tokens from downstream job even when all agent files are readable from artifact", async () => {
const mockFetch = vi.fn().mockResolvedValue({ ok: true, status: 200, statusText: "OK" });
vi.stubGlobal("fetch", mockFetch);
Expand Down