-
Notifications
You must be signed in to change notification settings - Fork 391
fix(otlp): always emit gen_ai.response.finish_reasons; use GITHUB_SHA as service.version fallback #33528
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
fix(otlp): always emit gen_ai.response.finish_reasons; use GITHUB_SHA as service.version fallback #33528
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1173,7 +1173,7 @@ async function sendJobSetupSpan(options = {}) { | |
| startMs, | ||
| endMs, | ||
| serviceName, | ||
| scopeVersion: process.env.GH_AW_INFO_VERSION || process.env.GH_AW_INFO_CLI_VERSION || "unknown", | ||
| scopeVersion: process.env.GH_AW_INFO_VERSION || process.env.GH_AW_INFO_CLI_VERSION || process.env.GITHUB_SHA || "unknown", | ||
| attributes, | ||
| resourceAttributes, | ||
| }); | ||
|
|
@@ -1627,7 +1627,7 @@ async function sendJobConclusionSpan(spanName, options = {}) { | |
| const effectiveTokens = rawET ? parseInt(rawET, 10) : NaN; | ||
|
|
||
| const serviceName = process.env.OTEL_SERVICE_NAME || "gh-aw"; | ||
| const version = awInfo.agent_version || awInfo.version || process.env.GH_AW_INFO_VERSION || awInfo.cli_version || process.env.GH_AW_INFO_CLI_VERSION || "unknown"; | ||
| const version = awInfo.agent_version || awInfo.version || process.env.GH_AW_INFO_VERSION || awInfo.cli_version || process.env.GH_AW_INFO_CLI_VERSION || process.env.GITHUB_SHA || "unknown"; | ||
|
|
||
| // Prefer GITHUB_AW_OTEL_TRACE_ID (written to GITHUB_ENV by this job's setup step) so | ||
| // all spans in the same job share one trace. Fall back to aw_context.otel_trace_id | ||
|
|
@@ -1791,11 +1791,12 @@ async function sendJobConclusionSpan(spanName, options = {}) { | |
| if (workflowName) attributes.push(buildAttr("gen_ai.workflow.name", workflowName)); | ||
| if (runtimeMetrics.resolvedModel) attributes.push(buildAttr("gen_ai.response.model", runtimeMetrics.resolvedModel)); | ||
| // Use the engine-reported stop_reason when available; fall back to "timeout" when | ||
| // the agent was killed before it could write a result entry to agent-stdio.log. | ||
| const effectiveStopReason = runtimeMetrics.stopReason || (isAgentTimedOut ? "timeout" : undefined); | ||
| if (effectiveStopReason) { | ||
| attributes.push(buildArrayAttr("gen_ai.response.finish_reasons", [effectiveStopReason])); | ||
| } | ||
| // the agent was killed before it could write a result entry to agent-stdio.log; | ||
| // use "unknown" as a sentinel for engines (e.g. copilot, codex) that do not emit | ||
| // a result entry at all, so that gen_ai.response.finish_reasons is always present | ||
| // and length-truncation is always queryable in Sentry/dashboards. | ||
| const effectiveStopReason = runtimeMetrics.stopReason || (isAgentTimedOut ? "timeout" : "unknown"); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [/diagnose] The fix correctly addresses the root cause by making
Consider: Would Alternative: Document in a comment that |
||
| attributes.push(buildArrayAttr("gen_ai.response.finish_reasons", [effectiveStopReason])); | ||
| } | ||
|
|
||
| if (agentConclusion) { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1822,6 +1822,22 @@ describe("sendJobSetupSpan", () => { | |
| expect(body.resourceSpans[0].scopeSpans[0].scope.version).toBe("v2.5.0"); | ||
| }); | ||
|
|
||
| it("falls back to GITHUB_SHA for service.version when no gh-aw version is available", async () => { | ||
| const mockFetch = vi.fn().mockResolvedValue({ ok: true, status: 200, statusText: "OK" }); | ||
| vi.stubGlobal("fetch", mockFetch); | ||
|
|
||
| process.env.GH_AW_OTLP_ENDPOINTS = JSON.stringify([{ url: "https://traces.example.com" }]); | ||
| // No GH_AW_INFO_VERSION or GH_AW_INFO_CLI_VERSION — only SHA available | ||
| process.env.GITHUB_SHA = "aabbccdd1122334455667788aabbccdd11223344"; | ||
|
|
||
| await sendJobSetupSpan(); | ||
|
|
||
| const body = JSON.parse(mockFetch.mock.calls[0][1].body); | ||
| const resourceAttrs = body.resourceSpans[0].resource.attributes; | ||
| expect(resourceAttrs).toContainEqual({ key: "service.version", value: { stringValue: "aabbccdd1122334455667788aabbccdd11223344" } }); | ||
| expect(body.resourceSpans[0].scopeSpans[0].scope.version).toBe("aabbccdd1122334455667788aabbccdd11223344"); | ||
| }); | ||
|
|
||
| it("includes gh-aw.awf.version and gh-aw.awmg.version resource attributes from aw_info.json", async () => { | ||
| const mockFetch = vi.fn().mockResolvedValue({ ok: true, status: 200, statusText: "OK" }); | ||
| vi.stubGlobal("fetch", mockFetch); | ||
|
|
@@ -3040,7 +3056,7 @@ describe("sendJobConclusionSpan", () => { | |
| expect(conclusionAttrs["gen_ai.response.model"]).toBe("claude-sonnet-4-20250514"); | ||
| }); | ||
|
|
||
| it("omits gen_ai.response.finish_reasons from the agent span when stop_reason is absent in agent-stdio.log", async () => { | ||
| it("emits gen_ai.response.finish_reasons=[unknown] on the agent span when stop_reason is absent from agent-stdio.log", async () => { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [/tdd] Consider adding a test that explicitly validates the documented priority order for Example: it("applies finish_reasons priority: stopReason → timeout → unknown", async () => {
const mockFetch = vi.fn().mockResolvedValue({ ok: true, status: 200, statusText: "OK" });
vi.stubGlobal("fetch", mockFetch);
process.env.GH_AW_OTLP_ENDPOINTS = JSON.stringify([{ url: "(traces.example.com/redacted)" }]);
// Case 1: stop_reason present (highest priority)
const stdio1 = JSON.stringify({ type: "result", result: { stop_reason: "end_turn" } }) + "\n";
fs.writeFileSync("/tmp/gh-aw/test/agent-stdio.log", stdio1, "utf-8");
await sendJobConclusionSpan("gh-aw.job.conclusion");
let finishAttr = JSON.parse(mockFetch.mock.calls[0][1].body)
.resourceSpans[0].scopeSpans[0].spans[0].attributes
.find(a => a.key === "gen_ai.response.finish_reasons");
expect(finishAttr.value.arrayValue.values).toEqual([{ stringValue: "end_turn" }]);
// Case 2: timeout (no stop_reason, agent timed out)
fs.writeFileSync("/tmp/gh-aw/test/agent-stdio.log", "", "utf-8");
process.env.GITHUB_AW_AGENT_TIMEOUT_TRIGGERED = "true";
await sendJobConclusionSpan("gh-aw.job.conclusion");
finishAttr = JSON.parse(mockFetch.mock.calls[1][1].body)
.resourceSpans[0].scopeSpans[0].spans[0].attributes
.find(a => a.key === "gen_ai.response.finish_reasons");
expect(finishAttr.value.arrayValue.values).toEqual([{ stringValue: "timeout" }]);
// Case 3: unknown (no stop_reason, not timed out)
delete process.env.GITHUB_AW_AGENT_TIMEOUT_TRIGGERED;
await sendJobConclusionSpan("gh-aw.job.conclusion");
finishAttr = JSON.parse(mockFetch.mock.calls[2][1].body)
.resourceSpans[0].scopeSpans[0].spans[0].attributes
.find(a => a.key === "gen_ai.response.finish_reasons");
expect(finishAttr.value.arrayValue.values).toEqual([{ stringValue: "unknown" }]);
});This would serve as executable documentation of the fallback chain and catch regressions if the priority order ever changes unintentionally. |
||
| const mockFetch = vi.fn().mockResolvedValue({ ok: true, status: 200, statusText: "OK" }); | ||
| vi.stubGlobal("fetch", mockFetch); | ||
|
|
||
|
|
@@ -3065,13 +3081,15 @@ describe("sendJobConclusionSpan", () => { | |
| const agentBody = JSON.parse(mockFetch.mock.calls[0][1].body); | ||
| const agentSpan = agentBody.resourceSpans[0].scopeSpans[0].spans[0]; | ||
| expect(agentSpan.name).toBe("gh-aw.agent.agent"); | ||
| const keys = agentSpan.attributes.map(a => a.key); | ||
| expect(keys).not.toContain("gen_ai.response.finish_reasons"); | ||
| const agentFinishAttr = agentSpan.attributes.find(a => a.key === "gen_ai.response.finish_reasons"); | ||
| expect(agentFinishAttr).toBeDefined(); | ||
| expect(agentFinishAttr.value.arrayValue.values).toEqual([{ stringValue: "unknown" }]); | ||
|
|
||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [/tdd] The test verifies that The PR description mentions this affected "5/7 of the failed runs" — the test should validate both the agent span and the conclusion span independently to ensure they both emit the attribute. Currently, lines 3084-3087 test the conclusion span, but consider adding a comment clarifying which span is under test. Suggestion: Add an explicit assertion comment: // Agent span should always emit finish_reasons, even when stop_reason is absent
const agentFinishAttr = agentSpan.attributes.find(a => a.key === "gen_ai.response.finish_reasons");
// Conclusion span should mirror the same behavior
const conclusionFinishAttr = conclusionSpan.attributes.find(a => a.key === "gen_ai.response.finish_reasons"); |
||
| const conclusionBody = JSON.parse(mockFetch.mock.calls[1][1].body); | ||
| const conclusionSpan = conclusionBody.resourceSpans[0].scopeSpans[0].spans[0]; | ||
| const conclusionKeys = conclusionSpan.attributes.map(a => a.key); | ||
| expect(conclusionKeys).not.toContain("gen_ai.response.finish_reasons"); | ||
| const conclusionFinishAttr = conclusionSpan.attributes.find(a => a.key === "gen_ai.response.finish_reasons"); | ||
| expect(conclusionFinishAttr).toBeDefined(); | ||
| expect(conclusionFinishAttr.value.arrayValue.values).toEqual([{ stringValue: "unknown" }]); | ||
| }); | ||
|
|
||
| it("includes gen_ai.response.finish_reasons with max_tokens on the agent span when truncated", async () => { | ||
|
|
@@ -3934,6 +3952,27 @@ describe("sendJobConclusionSpan", () => { | |
| expect(resourceAttrs).toContainEqual({ key: "service.version", value: { stringValue: "v3.0.0" } }); | ||
| }); | ||
|
|
||
| it("falls back to GITHUB_SHA for service.version on conclusion span when no gh-aw version is available", async () => { | ||
| const mockFetch = vi.fn().mockResolvedValue({ ok: true, status: 200, statusText: "OK" }); | ||
| vi.stubGlobal("fetch", mockFetch); | ||
|
|
||
| process.env.GH_AW_OTLP_ENDPOINTS = JSON.stringify([{ url: "https://traces.example.com" }]); | ||
| // No GH_AW_INFO_VERSION, GH_AW_INFO_CLI_VERSION, or aw_info.json version fields | ||
| process.env.GITHUB_SHA = "deadbeef1234567890abcdef1234567890abcdef"; | ||
|
|
||
| const readFileSpy = vi.spyOn(fs, "readFileSync").mockImplementation(() => { | ||
| throw Object.assign(new Error("ENOENT"), { code: "ENOENT" }); | ||
| }); | ||
|
|
||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [/tdd] This test validates the GITHUB_SHA fallback by mocking Edge case to consider: What happens when Suggestion: Add a test case: it("falls back to 'unknown' for service.version when GITHUB_SHA is also unavailable", async () => {
const mockFetch = vi.fn().mockResolvedValue({ ok: true, status: 200, statusText: "OK" });
vi.stubGlobal("fetch", mockFetch);
process.env.GH_AW_OTLP_ENDPOINTS = JSON.stringify([{ url: "(traces.example.com/redacted)" }]);
delete process.env.GITHUB_SHA;
delete process.env.GH_AW_INFO_VERSION;
delete process.env.GH_AW_INFO_CLI_VERSION;
const readFileSpy = vi.spyOn(fs, "readFileSync").mockImplementation(() => {
throw Object.assign(new Error("ENOENT"), { code: "ENOENT" });
});
await sendJobConclusionSpan("gh-aw.job.conclusion");
readFileSpy.mockRestore();
const body = JSON.parse(mockFetch.mock.calls[0][1].body);
const resourceAttrs = body.resourceSpans[0].resource.attributes;
expect(resourceAttrs).toContainEqual({ key: "service.version", value: { stringValue: "unknown" } });
});This ensures the fallback chain is fully tested. |
||
| await sendJobConclusionSpan("gh-aw.job.conclusion"); | ||
| readFileSpy.mockRestore(); | ||
|
|
||
| const body = JSON.parse(mockFetch.mock.calls[0][1].body); | ||
| const resourceAttrs = body.resourceSpans[0].resource.attributes; | ||
| expect(resourceAttrs).toContainEqual({ key: "service.version", value: { stringValue: "deadbeef1234567890abcdef1234567890abcdef" } }); | ||
| expect(body.resourceSpans[0].scopeSpans[0].scope.version).toBe("deadbeef1234567890abcdef1234567890abcdef"); | ||
| }); | ||
|
|
||
| describe("agent_output.json error enrichment", () => { | ||
| let readFileSpy; | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[/diagnose] The GITHUB_SHA fallback correctly provides a commit anchor for Sentry. One observability enhancement to consider:
Current behavior:
GITHUB_SHAis a 40-character full commit hash. Sentry release identifiers are typically shorter for readability in dashboards.Suggestion: Consider truncating to the first 7-8 characters when using GITHUB_SHA as a fallback:
This would make Sentry releases more readable while still providing a unique commit anchor. However, full SHAs are also valid — document the decision either way.