From bfbecd702431d46124aab94a82a051a60fafbcfb Mon Sep 17 00:00:00 2001 From: "sentry-junior[bot]" <264270552+sentry-junior[bot]@users.noreply.github.com> Date: Sun, 31 May 2026 18:52:36 +0000 Subject: [PATCH] fix(dashboard): treat transcript text output as markdown or json only Prose sections in parseMarkdownBlocks were using the broad detectLanguage heuristic, which could return xml/html/typescript/shellscript. When classified as xml or html, the prose was rendered through StructuredMarkup (the collapsible XML tree) instead of HighlightedCode, breaking syntax highlighting for normal LLM chat output. Changes: - Add detectOutputLanguage: returns only 'json' (valid JSON/JSONL) or 'markdown' for LLM text output; no XML/TS/shell heuristics - Update parseMarkdownBlocks to use detectOutputLanguage for all prose sections, keeping broad detectLanguage only for the raw debug view - Add fenced: boolean to CodeBlock so canRenderStructuredMarkup can gate structured XML/HTML rendering on explicit fence provenance - canRenderStructuredMarkup now accepts CodeBlock and requires fenced===true; auto-detected prose is never eligible regardless of language - ThinkingPartView switches to detectOutputLanguage (LLM reasoning output) - raw message view keeps broad detectLanguage (debug/developer view) - 13 new format tests covering all prose language scenarios and the structured markup eligibility invariant Fixes: XML markers in raw LLM output no longer trigger the collapsible structured markup renderer. Co-authored-by: David Cramer --- packages/junior-dashboard/src/client/code.tsx | 2 +- .../src/client/components/TranscriptText.tsx | 2 +- .../src/client/components/TranscriptTurn.tsx | 3 +- .../junior-dashboard/src/client/format.ts | 44 ++++++++-- packages/junior-dashboard/src/client/types.ts | 2 +- .../junior-dashboard/tests/format.test.ts | 83 +++++++++++++++++++ 6 files changed, 123 insertions(+), 13 deletions(-) diff --git a/packages/junior-dashboard/src/client/code.tsx b/packages/junior-dashboard/src/client/code.tsx index 2707d4d31..95fa491d5 100644 --- a/packages/junior-dashboard/src/client/code.tsx +++ b/packages/junior-dashboard/src/client/code.tsx @@ -7,7 +7,7 @@ import type { CodeBlock, MarkupNode } from "./types"; /** Count rendered children so transcripts can decide which markup node expands. */ export function countStructuredBlockChildren(block: CodeBlock): number { - if (!canRenderStructuredMarkup(block.language)) return 1; + if (!canRenderStructuredMarkup(block)) return 1; const rootCount = parseMarkupNodes(block.code, block.language).length; return rootCount > 0 ? rootCount : 1; } diff --git a/packages/junior-dashboard/src/client/components/TranscriptText.tsx b/packages/junior-dashboard/src/client/components/TranscriptText.tsx index 698ffe023..41e8abb42 100644 --- a/packages/junior-dashboard/src/client/components/TranscriptText.tsx +++ b/packages/junior-dashboard/src/client/components/TranscriptText.tsx @@ -21,7 +21,7 @@ export function TranscriptText(props: { const childCount = countStructuredBlockChildren(block); seenChildren += childCount; - if (!canRenderStructuredMarkup(block.language)) { + if (!canRenderStructuredMarkup(block)) { return ( diff --git a/packages/junior-dashboard/src/client/format.ts b/packages/junior-dashboard/src/client/format.ts index 59cde56a4..6c086bef9 100644 --- a/packages/junior-dashboard/src/client/format.ts +++ b/packages/junior-dashboard/src/client/format.ts @@ -511,9 +511,34 @@ function formatCodeBlock(code: string, language: BundledLanguage): string { return language === "json" ? (prettyJsonData(code) ?? code) : code; } -/** Decide whether a fenced block can use the interactive markup renderer. */ -export function canRenderStructuredMarkup(language: BundledLanguage): boolean { - return language === "xml" || language === "html"; +/** + * Detect the language for LLM text output prose: json if the text is valid + * JSON or JSONL, markdown otherwise. Never auto-detects XML, HTML, TypeScript, + * or shell — those heuristics are unreliable for rendered assistant output. + */ +export function detectOutputLanguage(text: string): BundledLanguage { + const trimmed = text.trim(); + if (!trimmed) return "markdown"; + try { + JSON.parse(trimmed); + return "json"; + } catch { + // continue + } + if (prettyJsonl(trimmed)) return "json"; + return "markdown"; +} + +/** + * Decide whether a fenced block can use the interactive markup renderer. + * Structured XML/HTML rendering is only enabled for explicitly-fenced blocks; + * auto-detected prose is never eligible regardless of inferred language. + */ +export function canRenderStructuredMarkup(block: CodeBlock): boolean { + return ( + block.fenced === true && + (block.language === "xml" || block.language === "html") + ); } /** Parse markdown into renderable code blocks while preserving plain text blocks. */ @@ -525,24 +550,25 @@ export function parseMarkdownBlocks(text: string): CodeBlock[] { while ((match = fence.exec(text))) { const prose = text.slice(cursor, match.index).trim(); if (prose) { - const language = detectLanguage(prose); - blocks.push({ code: formatCodeBlock(prose, language), language }); + const language = detectOutputLanguage(prose); + blocks.push({ code: formatCodeBlock(prose, language), fenced: false, language }); } const language = normalizeLanguage(match[1]); blocks.push({ code: formatCodeBlock(match[2] ?? "", language), + fenced: true, language, }); cursor = match.index + match[0].length; } const rest = text.slice(cursor).trim(); if (rest) { - const language = detectLanguage(rest); - blocks.push({ code: formatCodeBlock(rest, language), language }); + const language = detectOutputLanguage(rest); + blocks.push({ code: formatCodeBlock(rest, language), fenced: false, language }); } if (blocks.length > 0) return blocks; - const language = detectLanguage(text); - return [{ code: formatCodeBlock(text, language), language }]; + const language = detectOutputLanguage(text); + return [{ code: formatCodeBlock(text, language), fenced: false, language }]; } /** Parse XML/HTML-ish fragments for the collapsible transcript renderer. */ diff --git a/packages/junior-dashboard/src/client/types.ts b/packages/junior-dashboard/src/client/types.ts index a15a3225d..56c9ede4b 100644 --- a/packages/junior-dashboard/src/client/types.ts +++ b/packages/junior-dashboard/src/client/types.ts @@ -141,7 +141,7 @@ export type SessionFilter = "active" | "recent" | "hung" | "failed" | "all"; export type VisualStatus = "active" | "failed" | "hung" | "idle"; -export type CodeBlock = { code: string; language: BundledLanguage }; +export type CodeBlock = { code: string; fenced?: boolean; language: BundledLanguage }; export type MarkupNode = | { diff --git a/packages/junior-dashboard/tests/format.test.ts b/packages/junior-dashboard/tests/format.test.ts index d1172bf20..bd4351481 100644 --- a/packages/junior-dashboard/tests/format.test.ts +++ b/packages/junior-dashboard/tests/format.test.ts @@ -1,9 +1,11 @@ import { describe, expect, it } from "vitest"; import { + canRenderStructuredMarkup, formatDurationTotal, formatTokenTotal, formatUsageTotal, + parseMarkdownBlocks, turnMessageCount, } from "../src/client/format"; import type { ConversationTurn } from "../src/client/types"; @@ -71,3 +73,84 @@ describe("dashboard token formatting", () => { expect(turnMessageCount(turn)).toBe(2); }); }); + +describe("parseMarkdownBlocks output language detection", () => { + it("treats XML-looking prose as markdown, never auto-detects XML", () => { + const [block] = parseMarkdownBlocks("bar"); + expect(block?.language).toBe("markdown"); + expect(block?.fenced).toBe(false); + }); + + it("treats HTML-looking prose as markdown", () => { + const [block] = parseMarkdownBlocks("
Hello
"); + expect(block?.language).toBe("markdown"); + }); + + it("treats TypeScript-looking prose as markdown", () => { + const [block] = parseMarkdownBlocks("const value = 1;"); + expect(block?.language).toBe("markdown"); + }); + + it("treats shell-looking prose as markdown", () => { + const [block] = parseMarkdownBlocks("npm install"); + expect(block?.language).toBe("markdown"); + }); + + it("detects valid JSON prose as json and pretty-prints it", () => { + const [block] = parseMarkdownBlocks('{"a":1}'); + expect(block?.language).toBe("json"); + expect(block?.code).toBe('{\n "a": 1\n}'); + expect(block?.fenced).toBe(false); + }); + + it("marks prose blocks as not fenced", () => { + const blocks = parseMarkdownBlocks("some prose text"); + expect(blocks[0]?.fenced).toBe(false); + }); + + it("marks explicit fenced blocks as fenced", () => { + const blocks = parseMarkdownBlocks("before\n```xml\n\n```\nafter"); + expect(blocks[1]?.language).toBe("xml"); + expect(blocks[1]?.fenced).toBe(true); + }); + + it("keeps prose blocks as markdown when fenced XML is present", () => { + const blocks = parseMarkdownBlocks("before\n```xml\n\n```\nafter"); + expect(blocks[0]?.language).toBe("markdown"); + expect(blocks[0]?.fenced).toBe(false); + expect(blocks[2]?.language).toBe("markdown"); + expect(blocks[2]?.fenced).toBe(false); + }); +}); + +describe("canRenderStructuredMarkup", () => { + it("returns false for auto-detected prose (fenced: false)", () => { + expect( + canRenderStructuredMarkup({ code: "", language: "xml", fenced: false }), + ).toBe(false); + }); + + it("returns true for explicitly-fenced xml", () => { + expect( + canRenderStructuredMarkup({ code: "", language: "xml", fenced: true }), + ).toBe(true); + }); + + it("returns true for explicitly-fenced html", () => { + expect( + canRenderStructuredMarkup({ code: "
", language: "html", fenced: true }), + ).toBe(true); + }); + + it("returns false for fenced non-xml/html blocks", () => { + expect( + canRenderStructuredMarkup({ code: "const x = 1", language: "typescript", fenced: true }), + ).toBe(false); + }); + + it("returns false when fenced is undefined", () => { + expect( + canRenderStructuredMarkup({ code: "", language: "xml" }), + ).toBe(false); + }); +});