diff --git a/packages/junior/src/chat/pi/traced-stream.ts b/packages/junior/src/chat/pi/traced-stream.ts index e6ee7328..49295b3c 100644 --- a/packages/junior/src/chat/pi/traced-stream.ts +++ b/packages/junior/src/chat/pi/traced-stream.ts @@ -13,6 +13,75 @@ import { serializeGenAiAttribute, } from "@/chat/logging"; import { GEN_AI_PROVIDER_NAME } from "@/chat/pi/client"; +import { TURN_CONTEXT_TAG } from "@/chat/turn-context-tag"; + +const TURN_CONTEXT_OPEN = `<${TURN_CONTEXT_TAG}>`; +const CURRENT_INSTRUCTION_RE = + /]*>\n?([\s\S]*?)\n?<\/current-instruction>/; +const WRAPPER_BLOCKS_RE = + /<(?:thread-background|session-context|turn-context)>[\s\S]*?<\/(?:thread-background|session-context|turn-context)>\n*/g; + +/** Extract the user's actual instruction from a `buildUserTurnText` output. */ +function extractUserInstruction(text: string): string { + const match = CURRENT_INSTRUCTION_RE.exec(text); + if (match) { + return match[1].trim(); + } + return text.replace(WRAPPER_BLOCKS_RE, "").trim(); +} + +/** + * Clean user messages for the observable `gen_ai.input.messages` attribute: + * 1. Drop `` content parts entirely (volatile runtime metadata). + * 2. Unwrap `` / strip `` etc. from text + * so only the actual user instruction remains. + */ +function cleanUserMessagesForObservability(messages: unknown[]): unknown[] { + return messages.map((msg) => { + const record = msg as Record | null; + if (!record || record.role !== "user") { + return msg; + } + + const content = record.content; + if (!Array.isArray(content)) { + return msg; + } + + let changed = false; + const cleaned = content + .filter((part) => { + if ( + part && + typeof part === "object" && + (part as { type?: unknown }).type === "text" + ) { + const text = (part as { text?: unknown }).text; + if (typeof text === "string" && text.startsWith(TURN_CONTEXT_OPEN)) { + changed = true; + return false; + } + } + return true; + }) + .map((part) => { + if ( + part && + typeof part === "object" && + (part as { type?: unknown }).type === "text" + ) { + const text = (part as { text?: unknown }).text; + if (typeof text === "string" && CURRENT_INSTRUCTION_RE.test(text)) { + changed = true; + return { ...part, text: extractUserInstruction(text) }; + } + } + return part; + }); + + return changed ? { ...record, content: cleaned } : msg; + }); +} // Compose only the OTel GenAI attributes that are knowable at span start // (request-shape + system instructions). End-of-call attributes such as @@ -27,7 +96,10 @@ function buildChatStartAttributes( "gen_ai.request.model": model.id, }; - const inputMessages = serializeGenAiAttribute(context.messages); + const observableMessages = cleanUserMessagesForObservability( + context.messages, + ); + const inputMessages = serializeGenAiAttribute(observableMessages); if (inputMessages) { attributes["gen_ai.input.messages"] = inputMessages; } diff --git a/packages/junior/tests/unit/chat/pi/traced-stream.test.ts b/packages/junior/tests/unit/chat/pi/traced-stream.test.ts index c4636d5f..863743a1 100644 --- a/packages/junior/tests/unit/chat/pi/traced-stream.test.ts +++ b/packages/junior/tests/unit/chat/pi/traced-stream.test.ts @@ -116,6 +116,62 @@ describe("createTracedStreamFn", () => { expect(opts.attributes["gen_ai.request.model"]).toBe("openai/gpt-5.4"); }); + it("strips runtime context and XML wrappers from observable user messages", async () => { + const { createTracedStreamFn } = await import("@/chat/pi/traced-stream"); + const stream = createAssistantMessageEventStream(); + const base = vi.fn(() => stream); + + const userText = [ + "", + "", + ' prior question', + ' prior answer', + "", + "", + "", + "", + "- gen_ai.conversation.id: conv123", + "", + "", + '', + "what is sentry?", + "", + ].join("\n"); + + const traced = createTracedStreamFn(base as unknown as StreamFn); + await traced( + fakeModel("openai/gpt-5.4"), + { + systemPrompt: "you are junior", + messages: [ + { + role: "user", + content: [ + { + type: "text", + text: "\nskills and config...\n", + }, + { type: "text", text: userText }, + ], + timestamp: 0, + }, + ], + }, + undefined, + ); + + const opts = startInactiveSpan.mock.calls[0]?.[0] as unknown as { + attributes: Record; + }; + const inputMessages = opts.attributes["gen_ai.input.messages"] as string; + expect(inputMessages).toContain("what is sentry?"); + expect(inputMessages).not.toContain("runtime-turn-context"); + expect(inputMessages).not.toContain("thread-background"); + expect(inputMessages).not.toContain("session-context"); + expect(inputMessages).not.toContain("current-instruction"); + expect(inputMessages).not.toContain("prior answer"); + }); + it("sets output.messages, usage tokens, finish_reasons, response.model after stream completion", async () => { const { createTracedStreamFn } = await import("@/chat/pi/traced-stream"); const stream = createAssistantMessageEventStream();