Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 73 additions & 1 deletion packages/junior/src/chat/pi/traced-stream.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,75 @@ import {
serializeGenAiAttribute,
} from "@/chat/logging";
import { GEN_AI_PROVIDER_NAME } from "@/chat/pi/client";
import { TURN_CONTEXT_TAG } from "@/chat/turn-context-tag";

const TURN_CONTEXT_OPEN = `<${TURN_CONTEXT_TAG}>`;
const CURRENT_INSTRUCTION_RE =
/<current-instruction[^>]*>\n?([\s\S]*?)\n?<\/current-instruction>/;
const WRAPPER_BLOCKS_RE =
/<(?:thread-background|session-context|turn-context)>[\s\S]*?<\/(?:thread-background|session-context|turn-context)>\n*/g;

/** Extract the user's actual instruction from a `buildUserTurnText` output. */
function extractUserInstruction(text: string): string {
const match = CURRENT_INSTRUCTION_RE.exec(text);
if (match) {
return match[1].trim();
}
return text.replace(WRAPPER_BLOCKS_RE, "").trim();
}

/**
* Clean user messages for the observable `gen_ai.input.messages` attribute:
* 1. Drop `<runtime-turn-context>` content parts entirely (volatile runtime metadata).
* 2. Unwrap `<current-instruction>` / strip `<thread-background>` etc. from text
* so only the actual user instruction remains.
*/
function cleanUserMessagesForObservability(messages: unknown[]): unknown[] {
return messages.map((msg) => {
const record = msg as Record<string, unknown> | null;
if (!record || record.role !== "user") {
return msg;
}

const content = record.content;
if (!Array.isArray(content)) {
return msg;
}

let changed = false;
const cleaned = content
.filter((part) => {
if (
part &&
typeof part === "object" &&
(part as { type?: unknown }).type === "text"
) {
const text = (part as { text?: unknown }).text;
if (typeof text === "string" && text.startsWith(TURN_CONTEXT_OPEN)) {
changed = true;
return false;
}
}
return true;
})
.map((part) => {
if (
part &&
typeof part === "object" &&
(part as { type?: unknown }).type === "text"
) {
const text = (part as { text?: unknown }).text;
if (typeof text === "string" && CURRENT_INSTRUCTION_RE.test(text)) {
changed = true;
return { ...part, text: extractUserInstruction(text) };
}
}
return part;
});

return changed ? { ...record, content: cleaned } : msg;
});
}

// Compose only the OTel GenAI attributes that are knowable at span start
// (request-shape + system instructions). End-of-call attributes such as
Expand All @@ -27,7 +96,10 @@ function buildChatStartAttributes(
"gen_ai.request.model": model.id,
};

const inputMessages = serializeGenAiAttribute(context.messages);
const observableMessages = cleanUserMessagesForObservability(
context.messages,
);
const inputMessages = serializeGenAiAttribute(observableMessages);
if (inputMessages) {
attributes["gen_ai.input.messages"] = inputMessages;
}
Expand Down
56 changes: 56 additions & 0 deletions packages/junior/tests/unit/chat/pi/traced-stream.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,62 @@ describe("createTracedStreamFn", () => {
expect(opts.attributes["gen_ai.request.model"]).toBe("openai/gpt-5.4");
});

it("strips runtime context and XML wrappers from observable user messages", async () => {
const { createTracedStreamFn } = await import("@/chat/pi/traced-stream");
const stream = createAssistantMessageEventStream();
const base = vi.fn(() => stream);

const userText = [
"<thread-background>",
"<thread-transcript>",
' <message index="1" role="user">prior question</message>',
' <message index="2" role="assistant">prior answer</message>',
"</thread-transcript>",
"</thread-background>",
"",
"<session-context>",
"- gen_ai.conversation.id: conv123",
"</session-context>",
"",
'<current-instruction priority="highest">',
"what is sentry?",
"</current-instruction>",
].join("\n");

const traced = createTracedStreamFn(base as unknown as StreamFn);
await traced(
fakeModel("openai/gpt-5.4"),
{
systemPrompt: "you are junior",
messages: [
{
role: "user",
content: [
{
type: "text",
text: "<runtime-turn-context>\nskills and config...\n</runtime-turn-context>",
},
{ type: "text", text: userText },
],
timestamp: 0,
},
],
},
undefined,
);

const opts = startInactiveSpan.mock.calls[0]?.[0] as unknown as {
attributes: Record<string, unknown>;
};
const inputMessages = opts.attributes["gen_ai.input.messages"] as string;
expect(inputMessages).toContain("what is sentry?");
expect(inputMessages).not.toContain("runtime-turn-context");
expect(inputMessages).not.toContain("thread-background");
expect(inputMessages).not.toContain("session-context");
expect(inputMessages).not.toContain("current-instruction");
expect(inputMessages).not.toContain("prior answer");
});

it("sets output.messages, usage tokens, finish_reasons, response.model after stream completion", async () => {
const { createTracedStreamFn } = await import("@/chat/pi/traced-stream");
const stream = createAssistantMessageEventStream();
Expand Down
Loading