From bfbecd702431d46124aab94a82a051a60fafbcfb Mon Sep 17 00:00:00 2001
From: "sentry-junior[bot]"
 <264270552+sentry-junior[bot]@users.noreply.github.com>
Date: Sun, 31 May 2026 18:52:36 +0000
Subject: [PATCH] fix(dashboard): treat transcript text output as markdown or
 json only

Prose sections in parseMarkdownBlocks were using the broad detectLanguage
heuristic, which could return xml/html/typescript/shellscript. When
classified as xml or html, the prose was rendered through StructuredMarkup
(the collapsible XML tree) instead of HighlightedCode, breaking syntax
highlighting for normal LLM chat output.

Changes:
- Add detectOutputLanguage: returns only 'json' (valid JSON/JSONL) or
  'markdown' for LLM text output; no XML/TS/shell heuristics
- Update parseMarkdownBlocks to use detectOutputLanguage for all prose
  sections, keeping broad detectLanguage only for the raw debug view
- Add fenced: boolean to CodeBlock so canRenderStructuredMarkup can gate
  structured XML/HTML rendering on explicit fence provenance
- canRenderStructuredMarkup now accepts CodeBlock and requires fenced===true;
  auto-detected prose is never eligible regardless of language
- ThinkingPartView switches to detectOutputLanguage (LLM reasoning output)
- raw message view keeps broad detectLanguage (debug/developer view)
- 13 new format tests covering all prose language scenarios and the
  structured markup eligibility invariant

Fixes: XML markers in raw LLM output no longer trigger the collapsible
structured markup renderer.

Co-authored-by: David Cramer <noreply>
---
 packages/junior-dashboard/src/client/code.tsx |  2 +-
 .../src/client/components/TranscriptText.tsx  |  2 +-
 .../src/client/components/TranscriptTurn.tsx  |  3 +-
 .../junior-dashboard/src/client/format.ts     | 44 ++++++++--
 packages/junior-dashboard/src/client/types.ts |  2 +-
 .../junior-dashboard/tests/format.test.ts     | 83 +++++++++++++++++++
 6 files changed, 123 insertions(+), 13 deletions(-)
diff --git a/packages/junior-dashboard/src/client/code.tsx b/packages/junior-dashboard/src/client/code.tsx
index 2707d4d31..95fa491d5 100644
--- a/packages/junior-dashboard/src/client/code.tsx
+++ b/packages/junior-dashboard/src/client/code.tsx
@@ -7,7 +7,7 @@ import type { CodeBlock, MarkupNode } from "./types";
 
 /** Count rendered children so transcripts can decide which markup node expands. */
 export function countStructuredBlockChildren(block: CodeBlock): number {
-  if (!canRenderStructuredMarkup(block.language)) return 1;
+  if (!canRenderStructuredMarkup(block)) return 1;
   const rootCount = parseMarkupNodes(block.code, block.language).length;
   return rootCount > 0 ? rootCount : 1;
 }
diff --git a/packages/junior-dashboard/src/client/components/TranscriptText.tsx b/packages/junior-dashboard/src/client/components/TranscriptText.tsx
index 698ffe023..41e8abb42 100644
--- a/packages/junior-dashboard/src/client/components/TranscriptText.tsx
+++ b/packages/junior-dashboard/src/client/components/TranscriptText.tsx
@@ -21,7 +21,7 @@ export function TranscriptText(props: {
         const childCount = countStructuredBlockChildren(block);
         seenChildren += childCount;
 
-        if (!canRenderStructuredMarkup(block.language)) {
+        if (!canRenderStructuredMarkup(block)) {
           return (
             <HighlightedCode
               code={block.code}
diff --git a/packages/junior-dashboard/src/client/components/TranscriptTurn.tsx b/packages/junior-dashboard/src/client/components/TranscriptTurn.tsx
index aaaf79525..20b228153 100644
--- a/packages/junior-dashboard/src/client/components/TranscriptTurn.tsx
+++ b/packages/junior-dashboard/src/client/components/TranscriptTurn.tsx
@@ -3,6 +3,7 @@ import { useState, type ClipboardEventHandler, type ReactNode } from "react";
 import { HighlightedCode } from "../code";
 import {
   detectLanguage,
+  detectOutputLanguage,
   formatBytes,
   formatMessageOffset,
   formatMessageTimestamp,
@@ -494,7 +495,7 @@ function ThinkingPartView(props: { value: unknown }) {
       <div className="border-t border-[#beaaff]/15 px-3 py-3">
         <HighlightedCode
           code={rendered || "{}"}
-          language={detectLanguage(rendered)}
+          language={detectOutputLanguage(rendered)}
         />
       </div>
     </details>
diff --git a/packages/junior-dashboard/src/client/format.ts b/packages/junior-dashboard/src/client/format.ts
index 59cde56a4..6c086bef9 100644
--- a/packages/junior-dashboard/src/client/format.ts
+++ b/packages/junior-dashboard/src/client/format.ts
@@ -511,9 +511,34 @@ function formatCodeBlock(code: string, language: BundledLanguage): string {
   return language === "json" ? (prettyJsonData(code) ?? code) : code;
 }
 
-/** Decide whether a fenced block can use the interactive markup renderer. */
-export function canRenderStructuredMarkup(language: BundledLanguage): boolean {
-  return language === "xml" || language === "html";
+/**
+ * Detect the language for LLM text output prose: json if the text is valid
+ * JSON or JSONL, markdown otherwise. Never auto-detects XML, HTML, TypeScript,
+ * or shell — those heuristics are unreliable for rendered assistant output.
+ */
+export function detectOutputLanguage(text: string): BundledLanguage {
+  const trimmed = text.trim();
+  if (!trimmed) return "markdown";
+  try {
+    JSON.parse(trimmed);
+    return "json";
+  } catch {
+    // continue
+  }
+  if (prettyJsonl(trimmed)) return "json";
+  return "markdown";
+}
+
+/**
+ * Decide whether a fenced block can use the interactive markup renderer.
+ * Structured XML/HTML rendering is only enabled for explicitly-fenced blocks;
+ * auto-detected prose is never eligible regardless of inferred language.
+ */
+export function canRenderStructuredMarkup(block: CodeBlock): boolean {
+  return (
+    block.fenced === true &&
+    (block.language === "xml" || block.language === "html")
+  );
 }
 
 /** Parse markdown into renderable code blocks while preserving plain text blocks. */
@@ -525,24 +550,25 @@ export function parseMarkdownBlocks(text: string): CodeBlock[] {
   while ((match = fence.exec(text))) {
     const prose = text.slice(cursor, match.index).trim();
     if (prose) {
-      const language = detectLanguage(prose);
-      blocks.push({ code: formatCodeBlock(prose, language), language });
+      const language = detectOutputLanguage(prose);
+      blocks.push({ code: formatCodeBlock(prose, language), fenced: false, language });
     }
     const language = normalizeLanguage(match[1]);
     blocks.push({
       code: formatCodeBlock(match[2] ?? "", language),
+      fenced: true,
       language,
     });
     cursor = match.index + match[0].length;
   }
   const rest = text.slice(cursor).trim();
   if (rest) {
-    const language = detectLanguage(rest);
-    blocks.push({ code: formatCodeBlock(rest, language), language });
+    const language = detectOutputLanguage(rest);
+    blocks.push({ code: formatCodeBlock(rest, language), fenced: false, language });
   }
   if (blocks.length > 0) return blocks;
-  const language = detectLanguage(text);
-  return [{ code: formatCodeBlock(text, language), language }];
+  const language = detectOutputLanguage(text);
+  return [{ code: formatCodeBlock(text, language), fenced: false, language }];
 }
 
 /** Parse XML/HTML-ish fragments for the collapsible transcript renderer. */
diff --git a/packages/junior-dashboard/src/client/types.ts b/packages/junior-dashboard/src/client/types.ts
index a15a3225d..56c9ede4b 100644
--- a/packages/junior-dashboard/src/client/types.ts
+++ b/packages/junior-dashboard/src/client/types.ts
@@ -141,7 +141,7 @@ export type SessionFilter = "active" | "recent" | "hung" | "failed" | "all";
 
 export type VisualStatus = "active" | "failed" | "hung" | "idle";
 
-export type CodeBlock = { code: string; language: BundledLanguage };
+export type CodeBlock = { code: string; fenced?: boolean; language: BundledLanguage };
 
 export type MarkupNode =
   | {
diff --git a/packages/junior-dashboard/tests/format.test.ts b/packages/junior-dashboard/tests/format.test.ts
index d1172bf20..bd4351481 100644
--- a/packages/junior-dashboard/tests/format.test.ts
+++ b/packages/junior-dashboard/tests/format.test.ts
@@ -1,9 +1,11 @@
 import { describe, expect, it } from "vitest";
 
 import {
+  canRenderStructuredMarkup,
   formatDurationTotal,
   formatTokenTotal,
   formatUsageTotal,
+  parseMarkdownBlocks,
   turnMessageCount,
 } from "../src/client/format";
 import type { ConversationTurn } from "../src/client/types";
@@ -71,3 +73,84 @@ describe("dashboard token formatting", () => {
     expect(turnMessageCount(turn)).toBe(2);
   });
 });
+
+describe("parseMarkdownBlocks output language detection", () => {
+  it("treats XML-looking prose as markdown, never auto-detects XML", () => {
+    const [block] = parseMarkdownBlocks("<foo>bar</foo>");
+    expect(block?.language).toBe("markdown");
+    expect(block?.fenced).toBe(false);
+  });
+
+  it("treats HTML-looking prose as markdown", () => {
+    const [block] = parseMarkdownBlocks("<div>Hello</div>");
+    expect(block?.language).toBe("markdown");
+  });
+
+  it("treats TypeScript-looking prose as markdown", () => {
+    const [block] = parseMarkdownBlocks("const value = 1;");
+    expect(block?.language).toBe("markdown");
+  });
+
+  it("treats shell-looking prose as markdown", () => {
+    const [block] = parseMarkdownBlocks("npm install");
+    expect(block?.language).toBe("markdown");
+  });
+
+  it("detects valid JSON prose as json and pretty-prints it", () => {
+    const [block] = parseMarkdownBlocks('{"a":1}');
+    expect(block?.language).toBe("json");
+    expect(block?.code).toBe('{\n  "a": 1\n}');
+    expect(block?.fenced).toBe(false);
+  });
+
+  it("marks prose blocks as not fenced", () => {
+    const blocks = parseMarkdownBlocks("some prose text");
+    expect(blocks[0]?.fenced).toBe(false);
+  });
+
+  it("marks explicit fenced blocks as fenced", () => {
+    const blocks = parseMarkdownBlocks("before\n```xml\n<foo/>\n```\nafter");
+    expect(blocks[1]?.language).toBe("xml");
+    expect(blocks[1]?.fenced).toBe(true);
+  });
+
+  it("keeps prose blocks as markdown when fenced XML is present", () => {
+    const blocks = parseMarkdownBlocks("before\n```xml\n<foo/>\n```\nafter");
+    expect(blocks[0]?.language).toBe("markdown");
+    expect(blocks[0]?.fenced).toBe(false);
+    expect(blocks[2]?.language).toBe("markdown");
+    expect(blocks[2]?.fenced).toBe(false);
+  });
+});
+
+describe("canRenderStructuredMarkup", () => {
+  it("returns false for auto-detected prose (fenced: false)", () => {
+    expect(
+      canRenderStructuredMarkup({ code: "<foo/>", language: "xml", fenced: false }),
+    ).toBe(false);
+  });
+
+  it("returns true for explicitly-fenced xml", () => {
+    expect(
+      canRenderStructuredMarkup({ code: "<foo/>", language: "xml", fenced: true }),
+    ).toBe(true);
+  });
+
+  it("returns true for explicitly-fenced html", () => {
+    expect(
+      canRenderStructuredMarkup({ code: "<div/>", language: "html", fenced: true }),
+    ).toBe(true);
+  });
+
+  it("returns false for fenced non-xml/html blocks", () => {
+    expect(
+      canRenderStructuredMarkup({ code: "const x = 1", language: "typescript", fenced: true }),
+    ).toBe(false);
+  });
+
+  it("returns false when fenced is undefined", () => {
+    expect(
+      canRenderStructuredMarkup({ code: "<foo/>", language: "xml" }),
+    ).toBe(false);
+  });
+});