diff --git a/chat-ui/src/components/assistant-message.tsx b/chat-ui/src/components/assistant-message.tsx
index fc4df8b2..0b5be3bb 100644
--- a/chat-ui/src/components/assistant-message.tsx
+++ b/chat-ui/src/components/assistant-message.tsx
@@ -1,4 +1,5 @@
import type { ChatMessage, ThinkingBlockState, ToolCallState } from "@/lib/chat-types";
+import { getAssistantTextBlocks } from "@/lib/chat-message-content";
import { Markdown } from "./markdown";
import { ThinkingBlock } from "./thinking-block";
import { ToolCallCard } from "./tool-call-card";
@@ -12,8 +13,8 @@ export function AssistantMessage({
toolCalls: ToolCallState[];
thinkingBlocks: ThinkingBlockState[];
}) {
- const textContent =
- message.content.find((b) => b.type === "text")?.text ?? "";
+ const textBlocks = getAssistantTextBlocks(message);
+ const hasText = textBlocks.length > 0;
const isStreaming = message.status === "streaming";
@@ -28,9 +29,11 @@ export function AssistantMessage({
))}
- {textContent && }
+ {textBlocks.map((textContent, index) => (
+
+ ))}
- {isStreaming && !textContent && toolCalls.length === 0 && (
+ {isStreaming && !hasText && toolCalls.length === 0 && (
diff --git a/chat-ui/src/components/chat-input.tsx b/chat-ui/src/components/chat-input.tsx
index cdb778c8..261941de 100644
--- a/chat-ui/src/components/chat-input.tsx
+++ b/chat-ui/src/components/chat-input.tsx
@@ -15,7 +15,7 @@ export function ChatInput({
onRemoveFile,
initialText,
}: {
- onSend: (text: string) => void;
+ onSend: (text: string) => boolean | void | Promise
;
onStop: () => void;
isStreaming: boolean;
disabled?: boolean;
@@ -25,6 +25,7 @@ export function ChatInput({
initialText?: string;
}) {
const [text, setText] = useState(initialText ?? "");
+ const [isSubmitting, setIsSubmitting] = useState(false);
const textareaRef = useRef(null);
const composingRef = useRef(false);
const fileInputRef = useRef(null);
@@ -46,15 +47,21 @@ export function ChatInput({
}
}, [initialText]);
- const handleSend = useCallback(() => {
+ const handleSend = useCallback(async () => {
const trimmed = text.trim();
- if (!trimmed || isStreaming) return;
- onSend(trimmed);
- setText("");
- if (textareaRef.current) {
- textareaRef.current.style.height = "auto";
+ if (!trimmed || isStreaming || disabled || isSubmitting) return;
+ setIsSubmitting(true);
+ try {
+ const sent = await onSend(trimmed);
+ if (sent === false) return;
+ setText("");
+ if (textareaRef.current) {
+ textareaRef.current.style.height = "auto";
+ }
+ } finally {
+ setIsSubmitting(false);
}
- }, [text, isStreaming, onSend]);
+ }, [text, isStreaming, disabled, isSubmitting, onSend]);
const handleKeyDown = useCallback(
(e: KeyboardEvent) => {
@@ -120,7 +127,7 @@ export function ChatInput({
}}
placeholder="Send a message..."
rows={1}
- disabled={disabled}
+ disabled={disabled || isSubmitting}
enterKeyHint="send"
className="max-h-[200px] min-h-[36px] flex-1 resize-none bg-transparent px-2 py-1.5 text-sm text-foreground placeholder:text-muted-foreground focus:outline-none"
aria-label="Message input"
@@ -141,7 +148,7 @@ export function ChatInput({
variant="ghost"
size="icon"
onClick={handleSend}
- disabled={!text.trim() || disabled}
+ disabled={!text.trim() || disabled || isSubmitting}
className="h-8 w-8 shrink-0 rounded-lg bg-primary text-primary-content hover:bg-primary/90 disabled:opacity-50"
aria-label="Send message"
>
diff --git a/chat-ui/src/components/tool-call-card.tsx b/chat-ui/src/components/tool-call-card.tsx
index 6d381762..e65f8322 100644
--- a/chat-ui/src/components/tool-call-card.tsx
+++ b/chat-ui/src/components/tool-call-card.tsx
@@ -161,11 +161,11 @@ export function ToolCallCard({ tool }: { tool: ToolCallState }) {
const inputDetails = toolInputDetails(tool);
const output = tool.output ? redactSensitiveText(truncate(tool.output, TOOL_OUTPUT_DISPLAY_LIMIT)) : "";
- const autoExpand = tool.state === "running" || tool.state === "result" || tool.state === "error" || tool.state === "blocked";
+ const autoExpand = tool.state === "error" || tool.state === "blocked";
const [isOpen, setIsOpen] = useState(autoExpand);
useEffect(() => {
- if (tool.state === "running" || tool.state === "result" || tool.state === "error" || tool.state === "blocked") {
+ if (tool.state === "error" || tool.state === "blocked") {
setIsOpen(true);
}
}, [tool.state]);
diff --git a/chat-ui/src/components/user-message.tsx b/chat-ui/src/components/user-message.tsx
index c04f3a8d..6ed80aa8 100644
--- a/chat-ui/src/components/user-message.tsx
+++ b/chat-ui/src/components/user-message.tsx
@@ -1,14 +1,67 @@
import type { ChatMessage } from "@/lib/chat-types";
+import { File, FileText } from "lucide-react";
export function UserMessage({ message }: { message: ChatMessage }) {
const text =
message.content.find((b) => b.type === "text")?.text ?? "";
+ const attachments = message.attachments ?? [];
return (
-
{text}
+ {attachments.length > 0 && (
+
+ )}
+ {text &&
{text}
}
);
}
+
+function AttachmentIcon({
+ mimeType,
+ previewUrl,
+ filename,
+}: {
+ mimeType: string;
+ previewUrl: string;
+ filename: string;
+}) {
+ if (mimeType.startsWith("image/")) {
+ return (
+
+
+
+ );
+ }
+ if (mimeType === "application/pdf") {
+ return ;
+ }
+ if (mimeType.startsWith("text/") || mimeType === "application/json") {
+ return ;
+ }
+ return ;
+}
+
+function formatBytes(size: number): string {
+ if (size < 1024) return `${size} B`;
+ if (size < 1024 * 1024) return `${Math.round(size / 1024)} KB`;
+ return `${(size / (1024 * 1024)).toFixed(1)} MB`;
+}
diff --git a/chat-ui/src/hooks/use-attachments.ts b/chat-ui/src/hooks/use-attachments.ts
index 05e36b8a..5bdd5fe8 100644
--- a/chat-ui/src/hooks/use-attachments.ts
+++ b/chat-ui/src/hooks/use-attachments.ts
@@ -23,18 +23,29 @@ export type PendingAttachment = {
export type AttachmentResult = {
id: string;
+ client_id?: string;
filename: string;
mime_type: string;
size: number;
preview_url: string;
};
+export type UploadFilesResult = {
+ intendedCount: number;
+ acceptedIds: string[];
+ failedCount: number;
+};
+
+export function shouldBlockSendAfterUpload(result: UploadFilesResult): boolean {
+ return result.intendedCount > 0 && result.failedCount > 0;
+}
+
export function useAttachments(): {
files: PendingAttachment[];
addFiles: (newFiles: File[]) => void;
removeFile: (id: string) => void;
clearFiles: () => void;
- uploadFiles: (sessionId: string) => Promise;
+ uploadFiles: (sessionId: string) => Promise;
hasFiles: boolean;
isUploading: boolean;
} {
@@ -103,15 +114,31 @@ export function useAttachments(): {
}, []);
const uploadFiles = useCallback(
- async (sessionId: string): Promise => {
- const pending = filesRef.current.filter((f) => f.status === "pending");
- if (pending.length === 0) return [];
+ async (sessionId: string): Promise => {
+ const current = filesRef.current;
+ const intendedCount = current.length;
+ const alreadyAcceptedIds = current
+ .map((file) => (file.status === "done" ? file.serverId : undefined))
+ .filter((id): id is string => typeof id === "string" && id.length > 0);
+ const pending = current.filter((f) => f.serverId === undefined && (f.status === "pending" || f.status === "error"));
+ if (intendedCount === 0) {
+ return { intendedCount: 0, acceptedIds: [], failedCount: 0 };
+ }
+ if (pending.length === 0) {
+ return {
+ intendedCount,
+ acceptedIds: alreadyAcceptedIds,
+ failedCount: intendedCount - alreadyAcceptedIds.length,
+ };
+ }
- setFiles((prev) => prev.map((f) => (f.status === "pending" ? { ...f, status: "uploading" as const } : f)));
+ const pendingIds = new Set(pending.map((file) => file.id));
+ setFiles((prev) => prev.map((f) => (pendingIds.has(f.id) ? { ...f, status: "uploading" as const } : f)));
const formData = new FormData();
for (const p of pending) {
formData.append("file", p.file);
+ formData.append("client_id", p.id);
}
try {
@@ -133,12 +160,12 @@ export function useAttachments(): {
prev.map((f) => (f.status === "uploading" ? { ...f, status: "error" as const } : f)),
);
toast.error(errorMsg);
- return [];
+ return { intendedCount, acceptedIds: alreadyAcceptedIds, failedCount: pending.length };
}
const body = (await res.json()) as {
attachments?: AttachmentResult[];
- rejected?: Array<{ filename: string; reason: string; message: string }>;
+ rejected?: Array<{ client_id?: string; filename: string; reason: string; message: string }>;
};
if (body.rejected) {
@@ -147,19 +174,40 @@ export function useAttachments(): {
}
}
- const serverIds = (body.attachments ?? []).map((a) => a.id);
+ const accepted = body.attachments ?? [];
+ const acceptedByClientId = new Map(
+ accepted
+ .filter((attachment) => typeof attachment.client_id === "string")
+ .map((attachment) => [attachment.client_id as string, attachment.id] as const),
+ );
+ const rejectedIds = new Set(
+ (body.rejected ?? [])
+ .map((rejection) => rejection.client_id)
+ .filter((id): id is string => typeof id === "string" && id.length > 0),
+ );
setFiles((prev) =>
- prev.map((f) => (f.status === "uploading" ? { ...f, status: "done" as const } : f)),
+ prev.map((f) => {
+ if (!pendingIds.has(f.id)) return f;
+ const serverId = acceptedByClientId.get(f.id);
+ if (serverId) {
+ return { ...f, status: "done" as const, serverId };
+ }
+ return { ...f, status: "error" as const };
+ }),
);
- return serverIds;
+ const acceptedIds = [...alreadyAcceptedIds, ...accepted.map((attachment) => attachment.id)];
+ const failedCount = pending.filter(
+ (file) => !acceptedByClientId.has(file.id) || rejectedIds.has(file.id),
+ ).length;
+ return { intendedCount, acceptedIds, failedCount };
} catch {
setFiles((prev) =>
prev.map((f) => (f.status === "uploading" ? { ...f, status: "error" as const } : f)),
);
toast.error("Upload failed. Please try again.");
- return [];
+ return { intendedCount, acceptedIds: alreadyAcceptedIds, failedCount: pending.length };
}
},
[],
diff --git a/chat-ui/src/hooks/use-chat.ts b/chat-ui/src/hooks/use-chat.ts
index 5b4ceee5..54d143f0 100644
--- a/chat-ui/src/hooks/use-chat.ts
+++ b/chat-ui/src/hooks/use-chat.ts
@@ -1,4 +1,5 @@
import { runTimelineSummaryToView } from "@/lib/chat-activity";
+import { parseMessageContentJson } from "@/lib/chat-message-content";
import { type ChatStore, beginRunActivity, createChatStore, dispatchFrame } from "@/lib/chat-store";
import type {
ChatMessage,
@@ -265,28 +266,13 @@ function buildTimelineViewMap(detail: SessionDetail): Map = [];
- try {
- const parsed = JSON.parse(row.content_json);
- if (typeof parsed === "string") {
- contentBlocks = [{ type: "text", text: parsed }];
- } else if (Array.isArray(parsed)) {
- contentBlocks = parsed;
- } else {
- contentBlocks = [parsed];
- }
- } catch {
- contentBlocks = [{ type: "text", text: row.content_json }];
- }
+ const parsed = parseMessageContentJson(row.content_json, row.role);
return {
id: row.id,
role: row.role as "user" | "assistant",
- content: contentBlocks,
+ content: parsed.contentBlocks,
+ attachments: parsed.attachments,
createdAt: row.created_at,
status: row.status as "committed" | "streaming" | "error",
stopReason: row.stop_reason,
diff --git a/chat-ui/src/lib/__tests__/chat-store.test.ts b/chat-ui/src/lib/__tests__/chat-store.test.ts
index fc8a6caf..8949e3f4 100644
--- a/chat-ui/src/lib/__tests__/chat-store.test.ts
+++ b/chat-ui/src/lib/__tests__/chat-store.test.ts
@@ -1,5 +1,7 @@
import { describe, expect, it } from "vitest";
+import { shouldBlockSendAfterUpload } from "../../hooks/use-attachments";
import { ACTIVE_RUN_MESSAGE_ID } from "../chat-activity";
+import { getAssistantTextBlocks } from "../chat-message-content";
import { beginRunActivity, createChatStore, dispatchFrame } from "../chat-store";
function send(store: ReturnType, event: string, data: Record): void {
@@ -127,6 +129,134 @@ describe("chat-store reducer: text block lifecycle", () => {
expect(store.getState().thinkingBlocks.get("tk_0_0")?.isStreaming).toBe(false);
});
+
+ it("preserves multiple assistant text blocks for rendering", () => {
+ const store = createChatStore();
+ send(store, "message.assistant_start", { message_id: "a1" });
+ send(store, "message.text_start", {
+ message_id: "a1",
+ text_block_id: "tb_0_0",
+ index: 0,
+ });
+ send(store, "message.text_delta", {
+ text_block_id: "tb_0_0",
+ delta: "Before the tool.",
+ });
+ send(store, "message.text_end", { text_block_id: "tb_0_0" });
+ send(store, "message.text_start", {
+ message_id: "a1",
+ text_block_id: "tb_0_2",
+ index: 2,
+ });
+ send(store, "message.text_delta", {
+ text_block_id: "tb_0_2",
+ delta: "After the tool.",
+ });
+ send(store, "message.text_end", { text_block_id: "tb_0_2" });
+
+ const assistant = store.getState().messages[0];
+ expect(assistant?.content.filter((block) => block.type === "text")).toHaveLength(2);
+ if (assistant) {
+ expect(getAssistantTextBlocks(assistant)).toEqual(["Before the tool.", "After the tool."]);
+ }
+ });
+
+ it("session.error marks a previously ended assistant row as error", () => {
+ const store = createChatStore();
+ send(store, "message.assistant_start", { message_id: "a1" });
+ send(store, "message.text_start", {
+ message_id: "a1",
+ text_block_id: "tb_0_0",
+ index: 0,
+ });
+ send(store, "message.text_delta", {
+ text_block_id: "tb_0_0",
+ delta: "Partial answer",
+ });
+ send(store, "message.assistant_end", {
+ message_id: "a1",
+ interrupted: false,
+ });
+ expect(store.getState().messages[0]?.status).toBe("committed");
+
+ send(store, "session.error", {
+ session_id: "s1",
+ message_id: "a1",
+ subtype: "error_during_execution",
+ recoverable: false,
+ errors: ["Provider failed"],
+ cost_usd: 0,
+ duration_ms: 10,
+ });
+
+ expect(store.getState().messages[0]?.status).toBe("error");
+ });
+});
+
+describe("chat-store reducer: user attachments", () => {
+ it("preserves attachment metadata from user.message frames", () => {
+ const store = createChatStore();
+ send(store, "user.message", {
+ message_id: "u1",
+ text: "Review this.",
+ attachments: [
+ {
+ id: "att-1",
+ filename: "brief.pdf",
+ mime_type: "application/pdf",
+ size_bytes: 1234,
+ preview_url: "/chat/attachments/att-1/preview",
+ },
+ ],
+ sent_at: "2026-04-30T00:00:00.000Z",
+ source_tab_id: "tab-1",
+ });
+
+ const user = store.getState().messages[0];
+ expect(user?.attachments).toEqual([
+ {
+ id: "att-1",
+ filename: "brief.pdf",
+ mimeType: "application/pdf",
+ sizeBytes: 1234,
+ previewUrl: "/chat/attachments/att-1/preview",
+ },
+ ]);
+ });
+
+ it("replayed user.message attachments are idempotent", () => {
+ const store = createChatStore();
+ const frame = {
+ message_id: "u1",
+ text: "Review this.",
+ attachments: [
+ {
+ id: "att-1",
+ filename: "brief.pdf",
+ mime_type: "application/pdf",
+ size_bytes: 1234,
+ preview_url: "/chat/attachments/att-1/preview",
+ },
+ ],
+ sent_at: "2026-04-30T00:00:00.000Z",
+ source_tab_id: "tab-1",
+ };
+ replay(store, "user.message", frame);
+ replay(store, "user.message", frame);
+
+ const user = store.getState().messages[0];
+ expect(user?.attachments).toHaveLength(1);
+ expect(user?.attachments?.[0]?.id).toBe("att-1");
+ });
+});
+
+describe("attachment upload send gate", () => {
+ it("distinguishes no files from failed intended uploads", () => {
+ expect(shouldBlockSendAfterUpload({ intendedCount: 0, acceptedIds: [], failedCount: 0 })).toBe(false);
+ expect(shouldBlockSendAfterUpload({ intendedCount: 1, acceptedIds: [], failedCount: 1 })).toBe(true);
+ expect(shouldBlockSendAfterUpload({ intendedCount: 2, acceptedIds: ["att-1"], failedCount: 1 })).toBe(true);
+ expect(shouldBlockSendAfterUpload({ intendedCount: 1, acceptedIds: ["att-1"], failedCount: 0 })).toBe(false);
+ });
});
describe("chat-store reducer: run activity", () => {
diff --git a/chat-ui/src/lib/chat-message-content.ts b/chat-ui/src/lib/chat-message-content.ts
new file mode 100644
index 00000000..be520ab2
--- /dev/null
+++ b/chat-ui/src/lib/chat-message-content.ts
@@ -0,0 +1,75 @@
+import type { ChatAttachmentView, ChatMessage, ContentBlock } from "./chat-types";
+
+export type ParsedMessageContent = {
+ contentBlocks: ContentBlock[];
+ attachments?: ChatAttachmentView[];
+};
+
+export function parseMessageContentJson(contentJson: string, role: string): ParsedMessageContent {
+ try {
+ const parsed = JSON.parse(contentJson);
+ if (typeof parsed === "string") {
+ return { contentBlocks: [{ type: "text", text: parsed }] };
+ }
+ if (Array.isArray(parsed)) {
+ return normalizeDurableContent(parsed, role);
+ }
+ return normalizeDurableContent([parsed], role);
+ } catch {
+ return { contentBlocks: [{ type: "text", text: contentJson }] };
+ }
+}
+
+export function getAssistantTextBlocks(message: Pick): string[] {
+ return message.content
+ .filter((block) => block.type === "text" && typeof block.text === "string" && block.text.length > 0)
+ .map((block) => block.text as string);
+}
+
+function normalizeDurableContent(blocks: unknown[], role: string): ParsedMessageContent {
+ if (role !== "user") {
+ return { contentBlocks: blocks.filter(isRecord).map(recordToContentBlock) };
+ }
+ const contentBlocks: ContentBlock[] = [];
+ const attachments: ChatAttachmentView[] = [];
+ for (const block of blocks) {
+ if (!isRecord(block)) continue;
+ if (block.type === "attachment") {
+ const attachment = normalizeDurableAttachment(block);
+ if (attachment) attachments.push(attachment);
+ continue;
+ }
+ if (block.type === "text") {
+ contentBlocks.push({ type: "text", text: typeof block.text === "string" ? block.text : "" });
+ }
+ }
+ return { contentBlocks, attachments: attachments.length > 0 ? attachments : undefined };
+}
+
+function recordToContentBlock(block: Record): ContentBlock {
+ return { ...block, type: typeof block.type === "string" ? block.type : "text" };
+}
+
+function normalizeDurableAttachment(block: Record): ChatAttachmentView | null {
+ if (typeof block.id !== "string" || block.id.length === 0) return null;
+ const filename = typeof block.filename === "string" && block.filename.length > 0 ? block.filename : "file";
+ const mimeType =
+ typeof block.mime_type === "string"
+ ? block.mime_type
+ : typeof block.mimeType === "string"
+ ? block.mimeType
+ : "application/octet-stream";
+ const sizeValue = block.size_bytes ?? block.sizeBytes;
+ const previewValue = block.preview_url ?? block.previewUrl;
+ return {
+ id: block.id,
+ filename,
+ mimeType,
+ sizeBytes: typeof sizeValue === "number" ? sizeValue : null,
+ previewUrl: typeof previewValue === "string" ? previewValue : `/chat/attachments/${block.id}/preview`,
+ };
+}
+
+function isRecord(value: unknown): value is Record {
+ return typeof value === "object" && value !== null && !Array.isArray(value);
+}
diff --git a/chat-ui/src/lib/chat-store.ts b/chat-ui/src/lib/chat-store.ts
index e378a8bd..840a2392 100644
--- a/chat-ui/src/lib/chat-store.ts
+++ b/chat-ui/src/lib/chat-store.ts
@@ -11,7 +11,7 @@ import {
dispatchToolRunning,
dispatchToolStart,
} from "./chat-dispatch-tools";
-import type { ChatMessage, ChatState } from "./chat-types";
+import type { ChatAttachmentView, ChatMessage, ChatState } from "./chat-types";
type Listener = () => void;
@@ -81,16 +81,55 @@ function upsertMessage(messages: ChatMessage[], message: ChatMessage): ChatMessa
const next = [...messages];
const existing = next[existingIndex];
if (!existing) return messages;
+ const nextAttachments = mergeAttachments(existing.attachments, message.attachments);
next[existingIndex] = {
...existing,
...message,
content: existing.content.length > 0 ? existing.content : message.content,
+ attachments: nextAttachments,
status: existing.status === "committed" ? existing.status : message.status,
runTimeline: existing.runTimeline ?? message.runTimeline,
};
return next;
}
+function mergeAttachments(
+ existing: ChatAttachmentView[] | undefined,
+ incoming: ChatAttachmentView[] | undefined,
+): ChatAttachmentView[] | undefined {
+ if (!existing || existing.length === 0) return incoming;
+ if (!incoming || incoming.length === 0) return existing;
+ const seen = new Set(existing.map((attachment) => attachment.id));
+ return [...existing, ...incoming.filter((attachment) => !seen.has(attachment.id))];
+}
+
+function normalizeAttachment(value: unknown): ChatAttachmentView | null {
+ if (typeof value !== "object" || value === null || Array.isArray(value)) return null;
+ const record = value as Record;
+ if (typeof record.id !== "string" || record.id.length === 0) return null;
+ const filename = typeof record.filename === "string" && record.filename.length > 0 ? record.filename : "file";
+ const mimeType =
+ typeof record.mime_type === "string"
+ ? record.mime_type
+ : typeof record.mimeType === "string"
+ ? record.mimeType
+ : "application/octet-stream";
+ const sizeValue = record.size_bytes ?? record.sizeBytes;
+ const previewValue = record.preview_url ?? record.previewUrl;
+ return {
+ id: record.id,
+ filename,
+ mimeType,
+ sizeBytes: typeof sizeValue === "number" ? sizeValue : null,
+ previewUrl: typeof previewValue === "string" ? previewValue : `/chat/attachments/${record.id}/preview`,
+ };
+}
+
+function normalizeAttachments(value: unknown): ChatAttachmentView[] {
+ if (!Array.isArray(value)) return [];
+ return value.map(normalizeAttachment).filter((attachment): attachment is ChatAttachmentView => attachment !== null);
+}
+
function updateTextBlockInContent(s: ChatState, blockId: string, updater: (text: string) => string): ChatState {
const block = s.textBlocks.get(blockId);
if (!block) return s;
@@ -136,6 +175,7 @@ export function dispatchFrame(
id: data.message_id as string,
role: "user" as const,
content: [{ type: "text", text: data.text as string }],
+ attachments: normalizeAttachments(data.attachments),
createdAt: data.sent_at as string,
status: "committed" as const,
}),
@@ -323,9 +363,12 @@ export function dispatchFrame(
.findIndex((message) => message.role === "assistant" && message.status === "streaming");
const normalizedIndex = messageId !== null ? index : index >= 0 ? msgs.length - 1 - index : -1;
const target = normalizedIndex >= 0 ? msgs[normalizedIndex] : undefined;
- if (target && target.role === "assistant" && target.status === "streaming") {
- const newStatus = event === "session.error" ? "error" : "committed";
- msgs[normalizedIndex] = { ...target, status: newStatus };
+ if (target && target.role === "assistant") {
+ if (event === "session.error") {
+ msgs[normalizedIndex] = { ...target, status: "error" };
+ } else if (target.status === "streaming") {
+ msgs[normalizedIndex] = { ...target, status: "committed" };
+ }
}
return {
...s,
diff --git a/chat-ui/src/lib/chat-types.ts b/chat-ui/src/lib/chat-types.ts
index dd3e0bf5..6e9b7488 100644
--- a/chat-ui/src/lib/chat-types.ts
+++ b/chat-ui/src/lib/chat-types.ts
@@ -18,10 +18,19 @@ export type ContentBlock = {
[key: string]: unknown;
};
+export type ChatAttachmentView = {
+ id: string;
+ filename: string;
+ mimeType: string;
+ sizeBytes: number | null;
+ previewUrl: string;
+};
+
export type ChatMessage = {
id: string;
role: "user" | "assistant";
content: ContentBlock[];
+ attachments?: ChatAttachmentView[];
createdAt: string;
status: "committed" | "streaming" | "error";
stopReason?: string | null;
diff --git a/chat-ui/src/lib/client.ts b/chat-ui/src/lib/client.ts
index 8cb4a868..fa019026 100644
--- a/chat-ui/src/lib/client.ts
+++ b/chat-ui/src/lib/client.ts
@@ -129,16 +129,25 @@ export function abortSession(id: string): Promise {
}).then(() => undefined);
}
-export function sendMessage(sessionId: string, text: string, tabId: string): ReadableStream {
+export function sendMessage(
+ sessionId: string,
+ text: string,
+ tabId: string,
+ attachmentIds?: string[],
+): ReadableStream {
const controller = new AbortController();
const stream = new ReadableStream({
async start(streamController) {
try {
+ const body: Record = { session_id: sessionId, text, tab_id: tabId };
+ if (attachmentIds && attachmentIds.length > 0) {
+ body.attachment_ids = attachmentIds;
+ }
const res = await fetch("/chat/stream", {
method: "POST",
credentials: "include",
headers: { "Content-Type": "application/json" },
- body: JSON.stringify({ session_id: sessionId, text, tab_id: tabId }),
+ body: JSON.stringify(body),
signal: controller.signal,
});
if (!res.ok || !res.body) {
diff --git a/chat-ui/src/routes/session-route.tsx b/chat-ui/src/routes/session-route.tsx
index 6a7d9642..348594b6 100644
--- a/chat-ui/src/routes/session-route.tsx
+++ b/chat-ui/src/routes/session-route.tsx
@@ -3,13 +3,14 @@ import { DropOverlay } from "@/components/drop-overlay";
import { IosInstallBanner } from "@/components/ios-install-banner";
import { MessageList } from "@/components/message-list";
import { NotificationBanner } from "@/components/notification-banner";
-import { useAttachments } from "@/hooks/use-attachments";
+import { shouldBlockSendAfterUpload, useAttachments } from "@/hooks/use-attachments";
import { useChat } from "@/hooks/use-chat";
import { useDragDrop } from "@/hooks/use-drag-drop";
import { useFocusHeartbeat } from "@/hooks/use-focus-heartbeat";
import { usePaste } from "@/hooks/use-paste";
import { useCallback, useEffect, useRef, useState } from "react";
import { useLocation, useParams } from "react-router-dom";
+import { toast } from "sonner";
export function SessionRoute() {
const { sessionId } = useParams<{ sessionId: string }>();
@@ -56,13 +57,22 @@ export function SessionRoute() {
}, [sessionId, location.state, location.pathname, sendMessage]);
const handleSend = useCallback(
- async (text: string) => {
- if (!sessionId) return;
- const attachmentIds = await uploadFiles(sessionId);
- if (attachmentIds.length > 0) clearFiles();
- sendMessage(text, attachmentIds.length > 0 ? attachmentIds : undefined);
+ async (text: string): Promise => {
+ if (!sessionId) return false;
+ const uploadResult = await uploadFiles(sessionId);
+ if (shouldBlockSendAfterUpload(uploadResult)) {
+ const message =
+ uploadResult.acceptedIds.length === 0
+ ? "No files uploaded. Your message was not sent."
+ : "Some files did not upload. Your message was not sent.";
+ toast.error(message);
+ return false;
+ }
+ if (uploadResult.acceptedIds.length > 0) clearFiles();
+ sendMessage(text, uploadResult.acceptedIds.length > 0 ? uploadResult.acceptedIds : undefined);
sentCountRef.current++;
setHasSentMessage(true);
+ return true;
},
[sessionId, uploadFiles, clearFiles, sendMessage],
);
diff --git a/prompts/phase-10h-chat-integrity-builder.md b/prompts/phase-10h-chat-integrity-builder.md
new file mode 100644
index 00000000..2fa46354
--- /dev/null
+++ b/prompts/phase-10h-chat-integrity-builder.md
@@ -0,0 +1,117 @@
+ultrathink. ultrathink. ultrathink.
+
+You are a principal engineer, principal architect, and principal product manager at Anthropic, all three at once. You are implementing the Phase 10H Phantom chat transcript-integrity slice in `/Users/truffle/work/phantom-murph-hardening`.
+
+No context anxiety. No token limits. No time pressure. No cost anxiety. No "v2" thinking. There is no v2. Build it right. Take as long as you need.
+
+## Mission
+
+Make the Phantom chat transcript trustworthy enough for production Murph use before we add more ambitious polish. The UI can be beautiful only if the underlying conversation record is honest, durable, and replayable.
+
+This slice focuses on correctness, not broad redesign:
+
+1. Runtime or SDK result errors must not become empty successful assistant rows.
+2. User attachment metadata must persist without storing raw base64 payloads in chat message content.
+3. Sent user-message attachments must appear live and after reload.
+4. Upload failures must not silently send a prompt without the intended files.
+5. Assistant messages with multiple text blocks must render all text.
+6. Chat input must not double-send while upload is pending.
+
+## Required Reading
+
+Read these in order:
+
+1. `/Users/truffle/.claude/AGENTS.md`
+2. `/Users/truffle/.claude/CLAUDE.md`
+3. `/Users/truffle/work/phantom-murph-hardening/CLAUDE.md`
+4. `/Users/truffle/work/murph/QUALITY-BAR.md`
+5. `/Users/truffle/work/phantom-murph-hardening/research/chat-experience/phase-10h-product-direction.md`
+6. `/Users/truffle/work/phantom-murph-hardening/research/chat-experience/phase-10h-phantom-chat-review.md`
+7. `/Users/truffle/work/phantom-murph-hardening/research/chat-experience/phase-10h-pi-thinking-research.md`
+8. `/Users/truffle/work/phantom-murph-hardening/research/chat-experience/phase-10h-provider-thinking-research.md`
+
+Then inspect the current source directly. Agent summaries are not evidence.
+
+## Owned Areas
+
+You may edit these areas as needed:
+
+- `/Users/truffle/work/phantom-murph-hardening/src/chat/`
+- `/Users/truffle/work/phantom-murph-hardening/chat-ui/src/`
+- Focused tests under those same areas.
+
+Keep changes tightly scoped. Do not redesign the full chat UI in this slice.
+
+## Product Bar
+
+The transcript is a trust surface. When the agent runs a long task, creates files, uses tools, or attaches user-provided context, the UI must preserve what happened without pretending failed work succeeded.
+
+Good behavior:
+
+- The live view and reload view agree.
+- Attachments show as first-class user message context.
+- Invalid or failed attachments are visible to the user and block sending until fixed.
+- Assistant text is not silently dropped because it arrived as multiple blocks.
+- Terminal SDK errors produce visible error state, not blank success.
+
+Bad behavior:
+
+- Showing an assistant row as successful when the durable transcript will later remove it.
+- Persisting base64 image or file payloads in the chat transcript.
+- Sending the user's prompt without files after upload failures.
+- Flattening tool or content data with brittle string parsing.
+- Adding unrelated UI polish before fixing transcript correctness.
+
+## Implementation Notes
+
+Prefer structured helpers over ad hoc parsing. If the durable message content already contains structured JSON blocks, parse them centrally and render from that shape.
+
+Keep SDK runtime message content separate from display-safe transcript content. The model may need attachment payloads during the run; the durable chat transcript should keep metadata and previews, not raw base64.
+
+Make upload behavior explicit. If a user selected three files and one failed, the send path should know that and avoid silently continuing with two files.
+
+Terminal result errors need both backend and frontend protection. If a provider produces an error after assistant start, the backend should avoid synthetic success completion frames and the frontend should let `session.error` override any matching assistant row.
+
+## Acceptance Criteria
+
+1. Result errors after assistant start do not emit a normal assistant end frame.
+2. `session.error` can mark an existing assistant row as error even if a prior event ended it.
+3. User attachments are committed to message ownership and replay as attachment chips after reload.
+4. Durable content JSON does not contain raw base64 data URLs from attachment payloads.
+5. Invalid, wrong-session, or already-sent attachment IDs are rejected with a 400 response.
+6. Upload failures block send and preserve the composer content.
+7. Multiple assistant text blocks render in order.
+8. TypeScript remains strict: no explicit `any`, no `@ts-ignore`, no hidden type escapes.
+9. Existing chat streaming, replay, and resume behavior still passes tests.
+
+## Required Verification
+
+Run focused tests first, then enough local gates to establish confidence:
+
+```sh
+bun test src/chat/__tests__/writer.test.ts
+bun test src/chat/__tests__/http.test.ts src/chat/__tests__/message-builder.test.ts src/chat/__tests__/upload.test.ts
+bun test src/chat/__tests__/sdk-to-wire.test.ts
+cd chat-ui && bun test src/lib/__tests__/chat-store.test.ts
+bun run lint
+bun run typecheck
+cd chat-ui && bun run typecheck
+cd chat-ui && bun run build
+git diff --check
+```
+
+If a command cannot run, document why and leave the work in a clean resume state.
+
+## Handoff Contract
+
+When done, report:
+
+1. Files changed.
+2. Behavior shipped.
+3. Tests run and exact pass/fail status.
+4. Any residual risks.
+5. Whether the diff is ready for an independent review agent.
+
+Do not commit. Do not push. The orchestrator will verify by reading files and running checks.
+
+ultrathink. The floor is correctness. Polish can only sit on top of a transcript users can trust.
diff --git a/prompts/phase-10h-chat-integrity-review.md b/prompts/phase-10h-chat-integrity-review.md
new file mode 100644
index 00000000..341fd0bd
--- /dev/null
+++ b/prompts/phase-10h-chat-integrity-review.md
@@ -0,0 +1,57 @@
+ultrathink. ultrathink. ultrathink.
+
+You are a principal engineer, principal architect, and principal product manager at Anthropic, all three at once. You are reviewing the Phase 10H Phantom chat transcript-integrity slice in `/Users/truffle/work/phantom-murph-hardening`.
+
+No context anxiety. No token limits. No time pressure. No cost anxiety. No "v2" thinking. There is no v2. Build it right. Take as long as you need.
+
+## Mission
+
+Review the current uncommitted diff. Do not edit application code. Write your review report to:
+
+`/Users/truffle/work/phantom-murph-hardening/research/chat-experience/phase-10h-chat-integrity-review.md`
+
+Focus on bugs, regressions, missing tests, and product-trust issues.
+
+## Required Reading
+
+Read:
+
+1. `/Users/truffle/.claude/AGENTS.md`
+2. `/Users/truffle/.claude/CLAUDE.md`
+3. `/Users/truffle/work/phantom-murph-hardening/CLAUDE.md`
+4. `/Users/truffle/work/phantom-murph-hardening/research/chat-experience/phase-10h-product-direction.md`
+5. `/Users/truffle/work/phantom-murph-hardening/research/chat-experience/phase-10h-phantom-chat-review.md`
+6. `/Users/truffle/work/phantom-murph-hardening/research/chat-experience/phase-10h-pi-thinking-research.md`
+7. `/Users/truffle/work/phantom-murph-hardening/research/chat-experience/phase-10h-provider-thinking-research.md`
+8. `/Users/truffle/work/phantom-murph-hardening/prompts/phase-10h-chat-integrity-builder.md`
+
+Then inspect the actual diff with `git diff`, not summaries.
+
+## Review Scope
+
+Pay special attention to:
+
+- Runtime or SDK result errors must not become empty successful assistant rows.
+- User attachment metadata must be persisted without base64 payloads.
+- Sent user-message attachments must appear live and after reload.
+- Upload failures must not silently send prompts without the intended files.
+- Assistant messages with multiple text blocks must render all text.
+- Chat input should not double-send while upload is pending.
+- Attachment preview URLs must stay authenticated and safe.
+- Existing chat streaming, replay, and resume semantics must not regress.
+- No explicit `any`, `@ts-ignore`, hidden type escapes, or broad unrelated refactors.
+- No generated build artifact should be included unless there is a clear reason.
+
+## Output Format
+
+Findings first, ordered by severity. Use P0/P1/P2/P3. Include exact file paths and line references. If there are no P0/P1/P2 findings, say that explicitly.
+
+Then include:
+
+1. Tests you ran or inspected.
+2. Residual risks.
+3. Whether this diff is safe to proceed to live browser verification.
+
+Do not make code changes. Write only the report file.
+
+ultrathink. Treat the transcript as a trust surface. The review should be skeptical, specific, and grounded in actual files.
diff --git a/prompts/phase-10h-phantom-chat-review.md b/prompts/phase-10h-phantom-chat-review.md
new file mode 100644
index 00000000..24201a42
--- /dev/null
+++ b/prompts/phase-10h-phantom-chat-review.md
@@ -0,0 +1,59 @@
+ultrathink. ultrathink. ultrathink.
+
+You are a principal engineer, principal architect, and principal product
+manager at Anthropic, all three at once. You are reviewing Phantom's current
+chat UI and chat event pipeline for best-in-class agent experience.
+
+Mission:
+
+Audit Phantom's current chat experience after PR 109. Identify the highest
+leverage fixes for durable run timeline, richer progress, thinking display,
+tool-card behavior, loading/idle states, visual polish, and post-compaction
+continuation. This is a review/research pass, not a builder pass.
+
+Required reading, in order:
+
+1. `/Users/truffle/.claude/AGENTS.md`
+2. `/Users/truffle/work/murph/QUALITY-BAR.md`
+3. `/Users/truffle/work/murph/PROGRESS.md`
+4. `/Users/truffle/work/phantom-murph-hardening/research/chat-experience/phase-10a-synthesis.md`
+5. `/Users/truffle/work/phantom-murph-hardening/research/chat-experience/phase-10d-chat-ui-polish-research.md`
+6. Current Phantom files under:
+ - `src/chat/`
+ - `src/agent/`
+ - `chat-ui/src/components/`
+ - `chat-ui/src/lib/`
+ - `chat-ui/src/hooks/`
+ - `chat-ui/src/routes/`
+
+You are not alone in the codebase. Do not overwrite or revert other work.
+
+Deliverable:
+
+Write `/Users/truffle/work/phantom-murph-hardening/research/chat-experience/phase-10h-phantom-chat-review.md`.
+
+The report must include:
+
+1. Findings first, ordered by severity and user impact. Include file and line
+ references for every finding.
+2. A concrete "next builder slice" that can be implemented and verified in one
+ PR without ballooning scope.
+3. A second slice backlog for larger work that should not block the next PR.
+4. UI polish recommendations grounded in the existing design system and the
+ screenshots Cheema shared: tool cards collapsed by default, useful expanded
+ content, run activity while tools execute, top/border/input polish, no dead
+ feeling during long work.
+5. Testing plan: focused unit tests, chat-ui tests if present, production
+ build, and live browser verification flows against Murph.
+
+Non-goals:
+
+- Do not edit application code.
+- Do not commit, push, or open a PR.
+- Do not make generic design advice. Tie every recommendation to a real
+ Phantom file or observed behavior.
+
+Self-review:
+
+Before finishing, verify each finding by reading the actual source. Do not rely
+on prior summaries.
diff --git a/prompts/phase-10h-pi-thinking-research.md b/prompts/phase-10h-pi-thinking-research.md
new file mode 100644
index 00000000..95da7013
--- /dev/null
+++ b/prompts/phase-10h-pi-thinking-research.md
@@ -0,0 +1,68 @@
+ultrathink. ultrathink. ultrathink.
+
+You are a principal engineer, principal architect, and principal product
+manager at Anthropic, all three at once. You are researching how Phantom should
+render agent thinking, tool progress, and long-running activity now that
+Phantom runs on Murph, which runs on pi-mono.
+
+Mission:
+
+Find what Pi and Pi-adjacent code already provide for thinking display,
+progress display, activity rows, run timelines, and CLI or web rendering. Do
+not let Phantom reinvent primitives Pi already gives us. If Pi does not provide
+the UI primitive, identify the cleanest Phantom-owned layer.
+
+Required reading, in order:
+
+1. `/Users/truffle/.claude/AGENTS.md`
+2. `/Users/truffle/work/murph/QUALITY-BAR.md`
+3. `/Users/truffle/work/murph/PROGRESS.md`
+4. `/Users/truffle/work/phantom-murph-hardening/research/chat-experience/phase-10g-pi-continuity.md`
+5. Local pi-mono sources under `/Users/truffle/work/pi-mono`, especially:
+ - `packages/agent/src/`
+ - `packages/ai/src/types.ts`
+ - `packages/ai/src/providers/`
+ - `packages/web-ui/README.md`
+ - `packages/web-ui/src/`
+ - `packages/mom/src/`
+
+External source rules:
+
+- Use primary sources only. GitHub repositories, official docs, package
+ READMEs, and source code are acceptable.
+- If you need to clone another Pi/Pi Code related repository from pi.dev or
+ GitHub, clone it under `/Users/truffle/work/research-clones/`.
+- Do not use SEO blogs, scraped docs, or tutorials as evidence.
+- Do not copy implementation from any repo into Phantom. This is research and
+ architecture guidance only.
+
+Deliverable:
+
+Write `/Users/truffle/work/phantom-murph-hardening/research/chat-experience/phase-10h-pi-thinking-research.md`.
+
+The report must include:
+
+1. Source inventory: exact repos/files read and why they matter.
+2. How Pi represents thinking, streaming text, tool calls, tool progress,
+ usage, and completion.
+3. Whether Pi has a CLI or web rendering pattern we should reuse directly,
+ adapt conceptually, or ignore.
+4. What Phantom should own versus what Murph/Pi should own.
+5. Specific recommendations for Phantom's chat UI, ordered by impact.
+6. Risks and anti-patterns, especially places where a custom UI could break
+ provider protocol or leak unsafe content.
+7. Concrete acceptance criteria for the next builder slice.
+
+Non-goals:
+
+- Do not edit Phantom application code.
+- Do not change Murph code.
+- Do not commit, push, or open a PR.
+- Do not create a broad open-source polish plan. Stay focused on chat
+ experience and thinking/progress rendering.
+
+Self-review:
+
+Before finishing, re-read your report and verify every factual claim has a
+local file path or primary-source URL. If a claim is an inference, label it as
+an inference.
diff --git a/prompts/phase-10h-provider-thinking-research.md b/prompts/phase-10h-provider-thinking-research.md
new file mode 100644
index 00000000..788fc66b
--- /dev/null
+++ b/prompts/phase-10h-provider-thinking-research.md
@@ -0,0 +1,72 @@
+ultrathink. ultrathink. ultrathink.
+
+You are a principal engineer, principal architect, and principal product
+manager at Anthropic, all three at once. You are researching provider thinking
+and reasoning event support across Murph, Pi, Phantom, OpenAI, Anthropic, and
+ZAI.
+
+Mission:
+
+Determine what Phantom can truthfully show as "thinking" or "reasoning" today
+without lying to the user or exposing unsafe private chain-of-thought. We need
+best-in-class progress, but it must respect provider semantics.
+
+Required reading, in order:
+
+1. `/Users/truffle/.claude/AGENTS.md`
+2. `/Users/truffle/work/murph/QUALITY-BAR.md`
+3. `/Users/truffle/work/murph/PROGRESS.md`
+4. Murph source:
+ - `/Users/truffle/work/murph/packages/core/src/events/`
+ - `/Users/truffle/work/murph/packages/core/src/substrate/`
+ - `/Users/truffle/work/murph/packages/core/src/providers/`
+ - `/Users/truffle/work/murph/packages/anthropic-sdk-shim/src/`
+5. Phantom source:
+ - `src/chat/sdk-to-wire.ts`
+ - `src/chat/sdk-to-wire-handlers.ts`
+ - `src/chat/types.ts`
+ - `src/chat/run-timeline.ts`
+ - `chat-ui/src/components/thinking-block.tsx`
+ - `chat-ui/src/components/message.tsx`
+ - `chat-ui/src/lib/chat-types.ts`
+6. Pi source:
+ - `/Users/truffle/work/pi-mono/packages/ai/src/types.ts`
+ - `/Users/truffle/work/pi-mono/packages/ai/src/providers/openai-responses.ts`
+ - `/Users/truffle/work/pi-mono/packages/ai/src/providers/anthropic.ts`
+ - `/Users/truffle/work/pi-mono/packages/ai/src/providers/openai-completions.ts`
+ - `/Users/truffle/work/pi-mono/packages/ai/src/providers/transform-messages.ts`
+
+External docs:
+
+- Use official provider docs only if needed. Prefer OpenAI and Anthropic
+ official docs, ZAI official docs or source-backed docs.
+- Do not use SEO sites or tutorials.
+
+Deliverable:
+
+Write `/Users/truffle/work/phantom-murph-hardening/research/chat-experience/phase-10h-provider-thinking-research.md`.
+
+The report must include:
+
+1. Event taxonomy: what events we currently have for thinking, redacted
+ thinking, progress summaries, token usage, tool calls, and rate limits.
+2. Provider capability matrix for OpenAI, Anthropic, and ZAI as routed through
+ Pi/Murph.
+3. What can be shown verbatim, what should be summarized, and what must be
+ hidden or labeled as private/redacted.
+4. Whether Phantom can show thinking tokens/counts today, and if so where the
+ data comes from. If not, name the missing Murph/Pi event or field.
+5. Recommended UI model for "thinking" that is honest across providers.
+6. Tests needed before shipping thinking/progress UI changes.
+
+Non-goals:
+
+- Do not edit code.
+- Do not print or inspect API key values.
+- Do not run costly live model loops unless absolutely needed. This is a source
+ and event-contract research pass.
+
+Self-review:
+
+Before finishing, check that each recommendation maps to an existing event or a
+clearly named event-contract gap.
diff --git a/research/chat-experience/phase-10g-pi-continuity.md b/research/chat-experience/phase-10g-pi-continuity.md
new file mode 100644
index 00000000..75be1083
--- /dev/null
+++ b/research/chat-experience/phase-10g-pi-continuity.md
@@ -0,0 +1,75 @@
+# Phase 10G Pi Continuity Context
+
+Date: 2026-04-30
+
+## Problem
+
+A live Phantom-on-Murph browser session showed that Murph compaction can preserve
+protocol validity and still lose host-level app facts that the user expects the
+agent to remember, such as the exact page URL produced by `phantom_create_page`.
+The symptom was not specific to page URLs. It was a continuity issue after a
+long, tool-heavy run.
+
+## Pi Grounding
+
+Pi already provides the primitive we need:
+
+- `transformContext` runs at the AgentMessage level before `convertToLlm`.
+- Pi custom messages require the app to also provide a `convertToLlm`
+ implementation. Murph's default Pi converter intentionally passes only
+ `user`, `assistant`, and `toolResult` messages.
+- Phantom should therefore inject host facts through `transformContext` as a
+ normal user-context message, not as a custom role that the default converter
+ would filter out.
+
+Murph already exposes this primitive through `MurphOptions.transformContext`,
+passes it through query normalization, and forwards it into the Pi harness.
+
+## Decision
+
+Do not build a parallel Phantom continuity runtime. Phantom should derive compact
+host facts from its existing durable stream log and pass them to Murph through
+`transformContext` as a Pi-compatible user-context message. Murph remains
+responsible for raw transcript compaction, replay, tool-call protocol validity,
+provider transport, and retry behavior.
+
+## Current Implementation
+
+- `src/chat/continuity-context.ts` scans the tail of `chat_stream_events`.
+- It extracts user-visible page artifacts from `phantom_create_page` and
+ `phantom_preview_page`.
+- It intentionally excludes `phantom_generate_login` authentication links from
+ page artifacts.
+- It includes recent `session.compact_boundary` checkpoints.
+- `src/agent/murph-context.ts` wraps that context in
+ `` and inserts it as a Pi-compatible user-context
+ message before the latest user message when possible.
+- The chat query path uses this transform only on `agent_runtime: murph`.
+ Anthropic fallback can still receive the same context through the system
+ prompt append path.
+- Tool call cards now default collapsed, with errors and blocked calls still
+ opening automatically.
+
+## Verification
+
+- Focused Phantom tests pass:
+ `bun test src/agent/__tests__/murph-context.test.ts src/chat/__tests__/continuity-context.test.ts src/chat/__tests__/writer.test.ts src/agent/__tests__/agent-sdk-boundary-callers.test.ts src/agent/__tests__/prompt-assembler.test.ts`
+- Full Phantom tests pass: `bun test`.
+- Phantom typecheck passes: `bun run typecheck`.
+- Phantom lint passes: `bun run lint`.
+- Chat UI typecheck and production build pass.
+- Murph shim test and typecheck pass for `Options.transformContext`.
+
+## Live Verification
+
+Phantom was run locally on top of the locally rebuilt Murph shim with the OpenAI
+provider and `gpt-5.5`.
+
+Verified:
+
+- A chat request created and previewed `/ui/continuity-smoke-final.html`.
+- The served page returned HTTP 200 and contained the expected smoke text.
+- A follow-up asking for the exact created page URL returned the page URL, not
+ a login link.
+- Completed tool cards rendered collapsed by default. An errored tool card still
+ opened automatically.
diff --git a/research/chat-experience/phase-10h-chat-integrity-review.md b/research/chat-experience/phase-10h-chat-integrity-review.md
new file mode 100644
index 00000000..32e3886c
--- /dev/null
+++ b/research/chat-experience/phase-10h-chat-integrity-review.md
@@ -0,0 +1,50 @@
+# Phase 10H Chat Integrity Review
+
+## Findings
+
+### P1: Live result errors can still leave a committed assistant row
+
+`/Users/truffle/work/phantom-murph-hardening/src/chat/sdk-to-wire.ts:214` emits `message.assistant_end` for any SDK `result` when an assistant has started, before the subtype is checked at `/Users/truffle/work/phantom-murph-hardening/src/chat/sdk-to-wire.ts:228`. The live reducer then treats that `message.assistant_end` as success by changing the streaming assistant row to `committed` at `/Users/truffle/work/phantom-murph-hardening/chat-ui/src/lib/chat-store.ts:332`. When the following `session.error` arrives, `/Users/truffle/work/phantom-murph-hardening/chat-ui/src/lib/chat-store.ts:351` only changes the row to `error` if it is still `streaming`, so the already committed row stays visually successful.
+
+Impact: an SDK run that emits `assistant_start` or partial assistant text and then ends with a non-success `result` can still show a successful assistant row in the live transcript. The backend skip at `/Users/truffle/work/phantom-murph-hardening/src/chat/writer.ts:148` prevents durable assistant commit, so reload later removes that row. That creates a live versus reload trust mismatch and still violates the requirement that runtime or SDK result errors must not become empty successful assistant rows.
+
+The current coverage misses this shape. `/Users/truffle/work/phantom-murph-hardening/src/chat/__tests__/writer.test.ts:231` covers a non-success result with no prior assistant event, and `/Users/truffle/work/phantom-murph-hardening/src/chat/__tests__/sdk-to-wire.test.ts:402` only asserts that a result error creates `session.error`.
+
+Fix direction: for non-success SDK results, either do not emit a normal `message.assistant_end` before `session.error`, or make `session.error` override the assistant row to `error` even after `assistant_end` marked it committed. Add reducer and writer coverage for `assistant_start -> result error` and `assistant_start -> text -> result error`.
+
+No P0 or P2 findings found.
+
+## Tests Ran Or Inspected
+
+- `bun test src/chat/__tests__/writer.test.ts src/chat/__tests__/message-builder.test.ts src/chat/__tests__/http.test.ts`, 40 pass.
+- `bun test src/chat/__tests__/sdk-to-wire.test.ts src/chat/__tests__/run-timeline.test.ts src/chat/__tests__/http-resume.test.ts`, 72 pass.
+- `cd chat-ui && bun test src/lib/__tests__/chat-store.test.ts`, 26 pass.
+- `bun run typecheck`, pass.
+- `cd chat-ui && bun run typecheck`, pass.
+- `bun run lint`, pass for backend `src/`.
+- `cd chat-ui && bun run build`, pass. Vite reported unresolved `/chat/fonts/...` runtime font references and the existing chunk-size warning.
+- `git diff --check`, pass.
+- `cd chat-ui && bun run lint` was attempted, but `chat-ui/package.json` has no `lint` script.
+
+## Residual Risks
+
+- The required reading file `/Users/truffle/work/phantom-murph-hardening/prompts/phase-10h-chat-integrity-builder.md` is not present on disk. I searched the prompts and chat-experience research directories and continued from the actual diff plus the available Phase 10H materials.
+- I did not perform live browser verification.
+- The main remaining untested path is the live reducer sequence where an assistant row starts before a terminal SDK result error.
+
+## Live Browser Verification
+
+Not safe to proceed as the candidate diff yet. Fix the P1 live transcript error-row issue first, then run live browser verification against the repaired slice.
+
+## Re-Review
+
+P1 resolved.
+
+Evidence: `/Users/truffle/work/phantom-murph-hardening/src/chat/sdk-to-wire.ts:214` now gates the synthetic `message.assistant_end` on `subtype === "success"`, so a non-success SDK `result` after assistant start emits `session.error` without first producing a normal assistant completion frame. `/Users/truffle/work/phantom-murph-hardening/chat-ui/src/lib/chat-store.ts:366` now lets `session.error` mark the matching assistant row `error` regardless of whether that row was already `streaming` or `committed`.
+
+Targeted coverage is present. `/Users/truffle/work/phantom-murph-hardening/src/chat/__tests__/sdk-to-wire.test.ts:418` asserts that a result error after assistant start does not emit `message.assistant_end`. `/Users/truffle/work/phantom-murph-hardening/chat-ui/src/lib/__tests__/chat-store.test.ts:164` asserts that `session.error` changes a previously ended committed assistant row to `error`.
+
+Re-review tests:
+
+- `bun test src/chat/__tests__/sdk-to-wire.test.ts`, 58 pass.
+- `cd chat-ui && bun test src/lib/__tests__/chat-store.test.ts`, 27 pass.
diff --git a/research/chat-experience/phase-10h-live-verification.md b/research/chat-experience/phase-10h-live-verification.md
new file mode 100644
index 00000000..02bf40a3
--- /dev/null
+++ b/research/chat-experience/phase-10h-live-verification.md
@@ -0,0 +1,66 @@
+# Phase 10H Live Verification
+
+Date: 2026-05-01
+
+## Scope
+
+Verified the Phase 10H transcript-integrity slice against a real local Phantom server running on Murph with OpenAI.
+
+Server shape:
+
+- Phantom repo: `/Users/truffle/work/phantom-murph-hardening`
+- Branch: `codex/chat-experience-polish-10h`
+- Local URL: `http://127.0.0.1:3133`
+- Runtime: Murph via `/Users/truffle/work/murph/packages/anthropic-sdk-shim/dist/index.js`
+- Provider: OpenAI
+- Model configured in Phantom: `gpt-5.5`
+
+## Test
+
+1. Started Phantom locally with Murph and OpenAI.
+2. Built the chat UI and copied the generated SPA into `public/chat` as a temporary local artifact.
+3. Authenticated through the UI login flow using a short-lived bootstrap token.
+4. Created a new chat session.
+5. Attached `/tmp/phantom-phase10h-attachment-smoke.txt`.
+6. Sent: "Use the attached text file. Answer in exactly one sentence and include the marker word from the file."
+7. Waited for the real model response.
+8. Reloaded the page and verified durable replay.
+9. Inspected SQLite rows for the session.
+
+Session id:
+
+`2c4b3f1c-5857-4105-a85c-7cf2a04444e4`
+
+Screenshots:
+
+- `/tmp/phantom-phase10h-live-after-send.png`
+- `/tmp/phantom-phase10h-live-after-reload.png`
+
+## Results
+
+Pass:
+
+- User attachment chip appeared before send.
+- The model used the attached file and answered with `marker-lima-742`.
+- The attachment chip survived reload.
+- The assistant answer survived reload.
+- Tool cards rendered in the run timeline and stayed collapsed by default after reload.
+- Session detail contained two durable messages, one user and one assistant.
+- The user message `content_json` contained the attachment metadata and prompt text.
+- The user message `content_json` did not contain raw base64.
+- The user message `content_json` did not contain the file-only marker text.
+- The assistant answer was stored in `content_json` as `"The marker word is marker-lima-742."`.
+
+Observed but not blocking:
+
+- Browser console showed two `503` responses from push-notification endpoints. This matches local push being unconfigured and did not affect chat send, stream, reload, or transcript integrity.
+- The Playwright API request context did not reflect the UI auth state, so the live test used browser-side `fetch` after login. The UI was authenticated and the session flow worked.
+
+## Remaining Work
+
+This verifies the first production-trust slice. The next chat-experience slice should focus on:
+
+1. Durable run timeline polish: keep tool cards collapsed by default, preserve expansion state, and make replayed timelines feel identical to live timelines.
+2. Richer progress: surface a useful active-state row during long tool-heavy tasks instead of making the UI feel idle.
+3. Artifact and file affordances: created pages, generated files, previews, copy/open actions, and markdown outputs should become first-class chat objects.
+4. Provider matrix: repeat the small live chat smoke for Anthropic and GLM/Z.AI after the OpenAI path is committed cleanly.
diff --git a/research/chat-experience/phase-10h-phantom-chat-review.md b/research/chat-experience/phase-10h-phantom-chat-review.md
new file mode 100644
index 00000000..4168e0ab
--- /dev/null
+++ b/research/chat-experience/phase-10h-phantom-chat-review.md
@@ -0,0 +1,150 @@
+# Phase 10H Phantom Chat Review
+
+Date: 2026-05-01
+
+Scope: review only. No application code changes.
+
+## Findings
+
+### P1: SDK result errors can still become empty successful assistant rows
+
+`src/chat/sdk-to-wire.ts:240-250` translates a non-success SDK `result` into a `session.error` frame, but `src/agent/chat-query.ts:154-158` only records cost and keeps going unless the result is the special "No conversation found" shape. `src/chat/writer.ts:121-133` then unconditionally commits an assistant message with `response.text` and `stopReason: "end_turn"`.
+
+Impact: provider errors, including post-compaction overflow failures, can leave a normal empty assistant row in durable chat history. That is exactly the wrong trust signal: the run errored, but reload can show a successful blank assistant turn.
+
+Fix direction: make `executeChatQuery` return or throw a typed terminal error for non-success result subtypes. `ChatSessionWriter` should not commit a successful assistant message when the terminal frame is `session.error`; it should persist only the errored run timeline, or an explicit error assistant row if the product wants one.
+
+### P1: Assistant rendering drops all text after the first text block
+
+`chat-ui/src/lib/chat-store.ts:181-200` supports multiple text blocks on one assistant message. That is required for tool loops because the runtime can stream separate content blocks across a single user turn. `chat-ui/src/components/assistant-message.tsx:15-31` then uses `find((b) => b.type === "text")` and renders only the first block.
+
+Impact: if the assistant emits a pre-tool note and later emits the real answer in another text block, the UI can hide the actual answer. This is a direct correctness issue, not just polish.
+
+Fix direction: render all text blocks in order, at minimum by mapping every `message.content` text block to `Markdown`. A later richer version can preserve exact interleaving between thinking, tools, and text, but the next PR should make no text block invisible.
+
+### P1: Uploaded files are sent to the runtime but disappear from the chat record
+
+`src/chat/http-handlers.ts:102-108` builds the SDK message from `attachment_ids`, so the model can receive files. The writer then commits the user message from the SDK content at `src/chat/writer.ts:67-72`, emits `attachments: []` at `src/chat/writer.ts:76-83`, and never calls `ChatAttachmentStore.commitToMessage` from `src/chat/attachment-store.ts:54-60`. The client reducer also discards any user-frame attachments and stores only text at `chat-ui/src/lib/chat-store.ts:130-146`.
+
+There is a second footgun in `chat-ui/src/routes/session-route.tsx:58-64`: if upload fails and returns no IDs, the message still sends without the intended files.
+
+Impact: the user can attach a file, Phantom can use it, then the transcript gives no durable evidence of what was attached. On upload failure, the user may accidentally send a file-dependent prompt without the file.
+
+Fix direction: pass accepted attachment metadata into `ChatSessionWriter`, call `commitToMessage`, emit populated `user.message.attachments`, add attachment content to `ChatMessage`, and render user-message file chips with preview links. If all intended uploads fail, keep the composer text and block send with a clear error.
+
+### P2: Generated pages and files are not first-class artifacts in the UI
+
+The Phantom page tools already return artifact metadata. `src/ui/tools.ts:74-81` returns `path`, `url`, and `size` for `phantom_create_page`, and `src/ui/preview.ts:227-238` returns a screenshot plus JSON metadata for `phantom_preview_page`. The backend can parse this for future model context in `src/chat/continuity-context.ts:125-148`.
+
+The UI path loses that structure. `src/chat/run-timeline.ts:297-307` records only a safe output summary, and `summarizeToolOutput` collapses non-empty output to `"Tool produced output."` at `src/chat/run-timeline.ts:629-633`. `chat-ui/src/lib/chat-activity.ts:115-128` maps only generic tool fields, and `chat-ui/src/components/tool-call-card.tsx:229-245` renders `full_ref` and output as plain text, not as artifact actions or previews.
+
+Impact: a created page, preview screenshot, or generated file is treated like a raw tool log. The user has to read JSON or markdown links instead of seeing a durable artifact chip with title, URL/path, size, preview status, and open/copy actions.
+
+Fix direction: add a small artifact summary contract for Phantom-native tools first: page title, page URL, path, size, preview status, console issue count, failed request count, and optional screenshot reference. This belongs in Phantom built-in/tool summary plus UI affordances, not in a new MCP call.
+
+### P2: Tool identity conflates built-in tools, Phantom-native MCP tools, external MCP tools, and UI actions
+
+Phantom registers core app capabilities as in-process MCP servers in `src/index.ts:249-256`, including dynamic tools, scheduler, reflective memory, web UI, secrets, preview, and browser. The wire translator only infers MCP via string heuristics at `src/chat/sdk-to-wire-handlers.ts:141-150` and `src/chat/sdk-to-wire-handlers.ts:223-232`. For canonical `mcp__server__tool` names, `toolName.split(":")[0]` does not extract the server. The card subtitle logic handles only Claude-style built-ins like `Read`, `Write`, `Bash`, and `WebFetch` at `chat-ui/src/components/tool-call-card.tsx:28-48`.
+
+Impact: a Phantom page creation tool, an external MCP integration, and a generic unknown tool can all render with weak labels. Collapsed cards are less useful than they should be, and the UI cannot choose the right affordance, such as opening a page, previewing a file, showing scheduler metadata, or just labeling an external MCP call.
+
+Fix direction: normalize tool identity into `{ origin, serverName, rawName, displayName, capabilityKind }`. Use Phantom-native mappings for `phantom_create_page`, `phantom_preview_page`, `phantom_generate_login`, scheduler, secrets, memory, and browser. Reserve MCP labeling for external server boundaries. Use UI controls for already-known artifacts instead of forcing the agent to call another tool.
+
+### P2: The live run activity helps, but long tasks can still feel like detached status chrome
+
+`chat-ui/src/components/message-list.tsx:75-94` renders `runActivity` after all messages, separate from the current user request and the assistant answer. `chat-ui/src/components/run-activity-row.tsx:173-249` does show label, elapsed time, facts, subagents, and tool rows, which is the right foundation. The remaining issue is hierarchy: it reads as another transcript row, not the header of the current run.
+
+Impact: long-running tasks no longer go completely silent, but the user still has to infer which request is active and why the agent is quiet. This matters most when a tool has started but there is no assistant prose yet, during compaction, reconnect, browser preview, or long Bash work with little output.
+
+Fix direction: attach the active run strip to the current user message until assistant content starts, then make it the header of the assistant run. Keep elapsed time live. Prefer labels like `Running Bash`, `Previewing page`, `Compacting context`, `Waiting for permission`, and `Reconnected` over generic `Working...` once the frame data supports it.
+
+### P2: Markdown and code rendering are functional, not yet product-grade
+
+`chat-ui/src/components/markdown.tsx:6-49` only customizes code and links. It relies on `prose prose-sm`, but `chat-ui/package.json:13-29` does not include `@tailwindcss/typography`, so those typography classes may be inert depending on the Tailwind v4 setup. `chat-ui/src/components/code-block.tsx:21-45` renders a bordered `pre`, no syntax highlighting, no wrap toggle, no table handling, and copy is hidden behind hover at `chat-ui/src/components/code-block.tsx:28-32`.
+
+Impact: tables, long links, generated URLs, code, and citations will work at a basic level, but they do not feel like a polished developer-facing agent surface. Touch users may not discover copy controls.
+
+Fix direction: add explicit markdown components for tables, lists, blockquotes, links, generated `/ui/...` URLs, and code blocks. Make copy visible on touch/focus, add a wrap toggle, and keep tables horizontally scrollable with tabular numerics.
+
+### P3: Thinking display is safe, but under-informative and not tied to provider capability
+
+The current UI avoids raw chain-of-thought: `chat-ui/src/components/thinking-block.tsx:4-15` shows `Thinking...`, `Thought`, or `Reasoning hidden`. That is safer than exposing private reasoning. However, `src/chat/sdk-to-wire-handlers.ts:269-276` emits `message.thinking_end` without duration, and Murph maps thinking start/delta/end at `/Users/truffle/work/murph/packages/core/src/query/query.ts:317-330` without any UI-facing provider capability summary.
+
+Murph model metadata marks thinking support for OpenAI and ZAI models at `/Users/truffle/work/murph/packages/core/src/providers/models.ts:96-115` and `/Users/truffle/work/murph/packages/core/src/providers/models.ts:162-190`, but Phantom's UI does not know whether the current provider supports visible reasoning, redacted reasoning, summaries, or only effort levels.
+
+Impact: the UI is honest, but it cannot yet tell the user "reasoning is hidden", "reasoned for 8s", or "this provider does not expose reasoning" in a consistent way.
+
+Fix direction: keep raw thinking hidden by default. Add timing and provider/model capability metadata to the safe activity layer. Only show reasoning summaries when Murph has an explicit safe summary contract, not from private thinking deltas.
+
+## Next Builder Slice
+
+Recommended one-PR scope: **chat transcript integrity plus first-class user files**.
+
+Implement:
+
+1. Fix non-success SDK result handling so `session.error` cannot also create an empty successful assistant row.
+2. Render every assistant text block, not only the first.
+3. Persist, stream, load, and render user attachments as file chips with preview/open metadata.
+4. Block send when intended attachments fail to upload.
+5. Add small visual polish to the active run strip so it stays visibly attached to the current turn, without building the full artifact drawer yet.
+
+Acceptance criteria:
+
+- No empty assistant commit after SDK `result` subtype errors.
+- Multi-block assistant messages show all text blocks.
+- Uploaded images, PDFs, and text files appear on the sent user message live and after reload.
+- Failed uploads do not silently send the prompt without files.
+- A long `sleep` or page-preview run shows visible activity within one second and keeps a live elapsed timer.
+
+## Second Slice Backlog
+
+1. Artifact summaries and previews for `phantom_create_page` and `phantom_preview_page`: URL chip, open, copy, page title, size, preview screenshot, console and network issue counts.
+2. Tool identity normalization: distinguish Murph/Pi built-ins, Claude-style built-ins, Phantom-native in-process tools, external MCP tools, and UI-only affordances.
+3. Rich tool details: structured parameters, stdout/stderr sections for Bash, file diff summaries for edits, browser/page preview metadata, redaction notices, and safe full-output references.
+4. Markdown polish: tables, code highlighting, wrap toggle, always reachable copy, link cards for generated URLs, and mobile-safe overflow.
+5. Thinking contract: provider capability labels, duration, hidden reasoning copy, and future safe reasoning summaries when Murph exposes them.
+6. Durable artifact gallery: a lightweight session artifact rail fed by existing tool output and file metadata, not by extra agent work.
+7. Composer polish: environment/status line, upload progress, attachment error recovery, command affordance, and clearer stop state.
+
+## UI Polish Notes
+
+- Tool cards should stay collapsed by default for completed tools, and auto-open only for error and blocked states. The current behavior at `chat-ui/src/components/tool-call-card.tsx:164-171` is correct.
+- Collapsed cards need better content: `phantom_create_page` should show page title/path, `phantom_preview_page` should show preview status and issue counts, Bash should show the command verb and elapsed time, and external MCP should show server plus tool.
+- Expanded cards should avoid one raw wall of text. Structure details into Parameters, Output, Artifacts, Errors, Redactions, and Full output.
+- The top/header/sidebar/composer borders are still heavy as a stack. `chat-ui/src/components/app-shell.tsx:149-164` and `chat-ui/src/components/chat-input.tsx:97-100` use the same border tone for structural edges and work objects. Keep object borders for tools/code/cards, soften shell boundaries.
+- User messages should feel like compact request objects, not chat bubbles. `chat-ui/src/components/user-message.tsx:8-10` is already calmer than saturated purple, but attachments and metadata need to live with the request.
+- Avoid faking thinking. The honest states are: reasoning active, reasoning hidden, provider did not expose reasoning, and reasoned for duration if measured.
+
+## Testing Plan
+
+Focused backend:
+
+- `bun test src/chat/__tests__/sdk-to-wire.test.ts`
+- Add a non-success `result` test proving `session.error` does not lead to a committed assistant success row.
+- `bun test src/chat/__tests__/writer.test.ts`
+- Add attachment commit/load tests and upload-failure behavior tests.
+- `bun test src/chat/__tests__/run-timeline.test.ts`
+- Add page artifact summary tests once the artifact slice starts.
+
+Focused chat UI:
+
+- `cd chat-ui && bun test src/lib/__tests__/chat-store.test.ts`
+- Add tests for multi-text-block rendering state, attachment frames, replayed attachment frames, and failed upload no-send behavior.
+- Add component tests if the project adds a renderer harness; otherwise verify through production build plus browser.
+
+Build and static gates:
+
+- `bun run typecheck`
+- `cd chat-ui && bun run typecheck`
+- `cd chat-ui && bun run build`
+- `bunx biome check` on touched Phantom files
+- `git diff --check`
+
+Live browser verification against Murph:
+
+1. Start Phantom locally with `PHANTOM_AGENT_RUNTIME=murph` and OpenAI.
+2. Send a long-running prompt such as `sleep 10 && pwd`; verify activity appears within one second, elapsed time advances, stop works, and the final answer renders.
+3. Send a prompt that forces a tool loop with text before and after the tool; verify both text blocks render.
+4. Upload an image, a PDF, and a text file; verify chips render live, survive reload, and preview/open actions work.
+5. Create and preview a `/ui/...` page; verify tool cards are collapsed but useful, expanded details show structured parameters/output, and follow-up context still knows the page URL after reload.
+6. Trigger or simulate a provider result error; verify no empty assistant success row appears and the run timeline shows the error state.
diff --git a/research/chat-experience/phase-10h-pi-thinking-research.md b/research/chat-experience/phase-10h-pi-thinking-research.md
new file mode 100644
index 00000000..c401d474
--- /dev/null
+++ b/research/chat-experience/phase-10h-pi-thinking-research.md
@@ -0,0 +1,590 @@
+# Phase 10H Pi Thinking Research
+
+## Scope And Constraints
+
+This report executes the saved prompt at `/Users/truffle/work/phantom-murph-hardening/prompts/phase-10h-pi-thinking-research.md` and incorporates the additional operator direction in `/Users/truffle/work/phantom-murph-hardening/research/chat-experience/phase-10h-product-direction.md`.
+
+Constraints honored:
+
+- No application code was edited.
+- No files were reverted.
+- The only write target used for this task is `/Users/truffle/work/phantom-murph-hardening/research/chat-experience/phase-10h-pi-thinking-research.md`.
+- This report treats local source files as evidence and labels design recommendations as inference when they go beyond direct source facts.
+
+## Executive Summary
+
+Pi has strong primitives for honest thinking, text streaming, tool calls, tool execution progress, usage, completion, and provider continuity. The strongest reusable layer is not the Pi web UI itself, but the separation it demonstrates: provider adapters preserve protocol details, the agent loop emits a stable event grammar, and UI components render already-normalized facts without mutating the model transcript.
+
+Phantom should adapt Pi concepts, not import Pi UI wholesale. Pi web UI is built around `mini-lit`, its own `Agent` usage model, and an in-memory artifact tool. Phantom already has a React chat UI, Murph-normalized events, durable sessions, page and file concepts, and product-specific artifacts. The next high-impact slice should make files and artifacts first-class chat surfaces, improve markdown and inspection, and keep thinking honest by rendering only provider-backed reasoning events or explicitly labeled redacted/hidden states.
+
+The product direction file is aligned with the Pi evidence: built-in Phantom tools should own native pages, artifacts, files, auth-sensitive previews, and session-specific operations; MCP tools should remain external and reusable integrations; UI affordances should inspect, open, copy, filter, expand, retry, and preview state that already exists. UI affordances must not invent tool execution or fabricate provider thinking.
+
+## Source Inventory
+
+### Doctrine And Project Contract
+
+- `/Users/truffle/.claude/AGENTS.md`: Root orchestration doctrine and operating expectations.
+- `/Users/truffle/.claude/CLAUDE.md`: Canonical doctrine, communication constraints, verification expectations, and strict professional output rules.
+- `/Users/truffle/work/murph/AGENTS.md`: Murph-specific clean-room contract, required reading list, strict TypeScript rule, v1 library-only scope, and safety constraints.
+- `/Users/truffle/work/murph/VISION.md`: Murph product and architecture intent.
+- `/Users/truffle/work/murph/PROGRESS.md`: Current phase status and already-completed work.
+- `/Users/truffle/work/murph/QUALITY-BAR.md`: Verification and completion bar.
+- `/Users/truffle/work/murph/ARCHITECTURE.md`: Runtime boundaries and normalized event architecture.
+- `/Users/truffle/work/murph/IMPLEMENTATION-PLAN.md`: Planned Murph phase sequencing and scope boundaries.
+
+Why this matters: the report must keep Murph clean-room, keep Phantom-specific UI outside Murph, and avoid application edits during research.
+
+### Prompt And Product Direction
+
+- `/Users/truffle/work/phantom-murph-hardening/prompts/phase-10h-pi-thinking-research.md`: Required deliverables for this report.
+- `/Users/truffle/work/phantom-murph-hardening/research/chat-experience/phase-10h-product-direction.md`: Additional operator direction on files and artifacts, tool ownership, markdown quality, interactive inspection, and honest provider thinking.
+- `/Users/truffle/work/phantom-murph-hardening/research/chat-experience/phase-10g-pi-continuity.md`: Prior Pi continuity research and constraints around provider thinking preservation.
+- `/Users/truffle/work/phantom-murph-hardening/research/chat-experience/phase-10a-synthesis.md`: Prior synthesis for chat experience direction.
+- `/Users/truffle/work/phantom-murph-hardening/research/chat-experience/phase-10c-murph-progress-research.md`: Prior Murph progress research.
+- `/Users/truffle/work/phantom-murph-hardening/research/chat-experience/phase-10d-chat-ui-polish-research.md`: Prior Phantom chat polish research.
+- `/Users/truffle/work/phantom-murph-hardening/research/chat-experience/phase-10e-progress-ui-implementation-research.md`: Prior progress UI implementation research.
+
+Why this matters: Phase 10H should extend the existing chat-experience research thread, not restart it or recommend conflicting UI semantics.
+
+### Pi Agent Runtime
+
+- `/Users/truffle/work/pi-mono/packages/agent/src/types.ts`: Agent state, messages, tool result shape, tool update callback, and agent events.
+- `/Users/truffle/work/pi-mono/packages/agent/src/agent-loop.ts`: Main loop, streaming response handling, tool execution events, transformContext integration, and completion handling.
+- `/Users/truffle/work/pi-mono/packages/agent/src/agent.ts`: Agent wrapper, options, thinking-level mapping, event dispatch, state updates, and default LLM conversion.
+- `/Users/truffle/work/pi-mono/packages/agent/src/index.ts`: Public exports for the Pi agent package.
+- `/Users/truffle/work/pi-mono/packages/agent/src/proxy.ts`: Agent transport/proxy surface.
+
+Why this matters: Murph already uses Pi as a substrate, so Phantom should receive provider facts through Murph normalization rather than coupling directly to Pi UI internals.
+
+### Pi AI Types And Providers
+
+- `/Users/truffle/work/pi-mono/packages/ai/src/types.ts`: `ThinkingLevel`, `ThinkingContent`, `ToolCall`, `Usage`, `AssistantMessageEvent`, and model capability types.
+- `/Users/truffle/work/pi-mono/packages/ai/src/stream.ts`: Public simple stream and completion entry points.
+- `/Users/truffle/work/pi-mono/packages/ai/src/utils/event-stream.ts`: `AssistantMessageEventStream` lifecycle and `.result()` behavior.
+- `/Users/truffle/work/pi-mono/packages/ai/src/providers/transform-messages.ts`: Cross-model thinking handling, redacted thinking preservation, thought signature dropping, synthetic tool result insertion, and partial assistant skip logic.
+- `/Users/truffle/work/pi-mono/packages/ai/src/providers/anthropic.ts`: Anthropic thinking display, redacted thinking mapping, signature streaming, tool-use streaming, and usage handling.
+- `/Users/truffle/work/pi-mono/packages/ai/src/providers/openai-responses.ts`: OpenAI Responses reasoning configuration and encrypted reasoning inclusion.
+- `/Users/truffle/work/pi-mono/packages/ai/src/providers/openai-responses-shared.ts`: OpenAI reasoning summary conversion, encrypted reasoning replay, tool-call conversion, and usage mapping.
+- `/Users/truffle/work/pi-mono/packages/ai/src/providers/openai-completions.ts`: Completions-compatible reasoning, reasoning details, tool-call streaming, and provider-specific thinking compatibility.
+- `/Users/truffle/work/pi-mono/packages/ai/src/providers/google-shared.ts`: Google thought signature handling and the distinction between `thought: true` and encrypted signatures.
+- `/Users/truffle/work/pi-mono/packages/ai/src/models.ts`: Local model capability metadata including reasoning support and xhigh support.
+
+Why this matters: these files define what is protocol-backed thinking, what is continuity metadata, and what is safe to surface in UI.
+
+### Pi Web UI And Artifacts
+
+- `/Users/truffle/work/pi-mono/packages/web-ui/README.md`: Pi web UI purpose, public components, chat panel usage, attachments, artifacts, storage, and event list.
+- `/Users/truffle/work/pi-mono/packages/web-ui/src/ChatPanel.ts`: High-level chat panel and artifact panel integration.
+- `/Users/truffle/work/pi-mono/packages/web-ui/src/components/AgentInterface.ts`: Event subscription, streaming message container hookup, usage stats, attachments, model and thinking selectors.
+- `/Users/truffle/work/pi-mono/packages/web-ui/src/components/MessageList.ts`: Message grouping, tool result pairing, and inline assistant rendering.
+- `/Users/truffle/work/pi-mono/packages/web-ui/src/components/Messages.ts`: Assistant message rendering for text, thinking, tool calls, tool results, errors, and usage.
+- `/Users/truffle/work/pi-mono/packages/web-ui/src/components/StreamingMessageContainer.ts`: Request-animation-frame batching and streaming assistant rendering.
+- `/Users/truffle/work/pi-mono/packages/web-ui/src/components/ThinkingBlock.ts`: Collapsible thinking display with streaming state.
+- `/Users/truffle/work/pi-mono/packages/web-ui/src/tools/renderer-registry.ts`: Tool renderer registry, status headers, disclosure, and default renderer selection.
+- `/Users/truffle/work/pi-mono/packages/web-ui/src/tools/renderers/DefaultRenderer.ts`: Generic tool-call card with params and output formatting.
+- `/Users/truffle/work/pi-mono/packages/web-ui/src/tools/artifacts/artifacts.ts`: In-memory artifact tool, preview panel, tabs, and supported artifact types.
+- `/Users/truffle/work/pi-mono/packages/web-ui/src/tools/artifacts/artifacts-tool-renderer.ts`: Artifact tool-call renderer with file pills, diffs, code blocks, console logs, and details.
+- `/Users/truffle/work/pi-mono/packages/web-ui/src/tools/artifacts/ArtifactElement.ts`: Artifact preview base class and header-button abstraction.
+- `/Users/truffle/work/pi-mono/packages/web-ui/src/tools/artifacts/ArtifactPill.ts`: Clickable artifact pill surface.
+
+Why this matters: Pi web UI contains useful rendering patterns, but Phantom should adapt them into its React, session, and artifact model.
+
+### Pi Mom CLI-Style Surfaces
+
+- `/Users/truffle/work/pi-mono/packages/mom/src/log.ts`: CLI logging for user input, tool start/success/error, response start, thinking, response, downloads, stop, warnings, errors, and usage summary.
+- `/Users/truffle/work/pi-mono/packages/mom/src/agent.ts`: Mom agent wiring around Pi coding agent and tool events.
+- `/Users/truffle/work/pi-mono/packages/mom/src/store.ts`: JSONL Slack log persistence and downloaded attachment storage.
+- `/Users/truffle/work/pi-mono/packages/mom/src/events.ts`: File-driven event scheduling.
+
+Why this matters: Mom is not a Phantom chat UI, but it demonstrates concise progress logging, output truncation, usage summaries, and file download persistence.
+
+### Murph Runtime And Normalization
+
+- `/Users/truffle/work/murph/packages/core/src/types/message.ts`: Murph content blocks, tool progress message shape, compact state, status messages, API retry messages, and file persisted messages.
+- `/Users/truffle/work/murph/packages/core/src/events/normalized-event.ts`: Normalized event grammar for text, thinking, redacted thinking, tool calls, tool execution, tool progress, session state, compaction, rate limits, retries, prompts, subagents, hooks, permissions, notifications, and errors.
+- `/Users/truffle/work/murph/packages/core/src/events/translator-pi.ts`: Pi-to-Murph translation for thinking, redacted thinking, tool calls, images, tool results, usage, and tool execution events.
+- `/Users/truffle/work/murph/packages/core/src/query/query.ts`: SDK stream mapping, runtime event mapping, tool progress preview limits, secret redaction, truncation, and full reference support.
+- `/Users/truffle/work/murph/packages/core/src/substrate/pi-harness.ts`: Murph Pi harness, `transformContext`, tool hooks, thinking level, thinking budgets, and normalized event forwarding.
+- `/Users/truffle/work/murph/packages/core/src/query/options.ts`: Murph thinking option normalization and Pi thinking-level mapping.
+- `/Users/truffle/work/murph/packages/anthropic-sdk-shim/src/index.ts`: Public shim export for `MurphToolProgressMessage`.
+
+Why this matters: Phantom should read the Murph-normalized event stream and avoid reaching around Murph into provider-specific or Pi-specific internals.
+
+### Phantom Chat Backend And UI
+
+- `/Users/truffle/work/phantom-murph-hardening/src/chat/types.ts`: Phantom wire protocol for session, message, thinking, tool, and error frames.
+- `/Users/truffle/work/phantom-murph-hardening/src/chat/types-tool.ts`: Phantom wire tool-event shapes.
+- `/Users/truffle/work/phantom-murph-hardening/src/chat/sdk-to-wire-handlers.ts`: Assistant and stream-event conversion into Phantom wire frames, including thinking and tool-use handling.
+- `/Users/truffle/work/phantom-murph-hardening/src/chat/sdk-to-wire.ts`: System/result/user event conversion, compaction/rate/subagent/tool-progress mapping, and safe tool error handling.
+- `/Users/truffle/work/phantom-murph-hardening/src/chat/writer.ts`: Chat stream writer, durable message commit, final assistant persistence, and timeline persistence.
+- `/Users/truffle/work/phantom-murph-hardening/src/chat/message-builder.ts`: User attachment conversion for images, PDFs, documents, and text.
+- `/Users/truffle/work/phantom-murph-hardening/src/chat/message-store.ts`: Durable chat message storage and `content_json` handling.
+- `/Users/truffle/work/phantom-murph-hardening/src/agent/chat-query.ts`: Phantom runtime query setup with partial messages, progress summaries, prompt suggestions, thinking config, effort, and `transformContext`.
+- `/Users/truffle/work/phantom-murph-hardening/chat-ui/src/lib/chat-types.ts`: Frontend message, thinking, tool, run activity, and timeline types.
+- `/Users/truffle/work/phantom-murph-hardening/chat-ui/src/lib/chat-store.ts`: Wire-frame reducer for messages, thinking blocks, tools, statuses, compaction, rate limits, MCP connection, and subagents.
+- `/Users/truffle/work/phantom-murph-hardening/chat-ui/src/lib/chat-dispatch-tools.ts`: Tool-call state machine and placeholder tool handling.
+- `/Users/truffle/work/phantom-murph-hardening/chat-ui/src/lib/chat-activity.ts`: Run-activity summaries, active run timeline, compaction/rate/MCP/subagent/tool activity.
+- `/Users/truffle/work/phantom-murph-hardening/chat-ui/src/components/assistant-message.tsx`: Assistant rendering for thinking blocks, tool cards, markdown text, streaming indicators, and usage.
+- `/Users/truffle/work/phantom-murph-hardening/chat-ui/src/components/thinking-block.tsx`: Phantom reasoning block labels and redacted-state display.
+- `/Users/truffle/work/phantom-murph-hardening/chat-ui/src/components/tool-call-card.tsx`: Tool cards, tool icons, parameter display, output display, error and blocked states, full reference links, and redaction.
+- `/Users/truffle/work/phantom-murph-hardening/chat-ui/src/components/run-activity-row.tsx`: Run activity row with status, facts, subagents, and tool cards.
+- `/Users/truffle/work/phantom-murph-hardening/chat-ui/src/components/markdown.tsx`: ReactMarkdown, GFM, sanitize, custom links, and code block integration.
+- `/Users/truffle/work/phantom-murph-hardening/chat-ui/src/components/code-block.tsx`: Code block header, language label, wrapping, and copy action.
+- `/Users/truffle/work/phantom-murph-hardening/chat-ui/src/components/message-list.tsx`: Message grouping, run timeline placement, current run activity row, and streaming accessibility text.
+- `/Users/truffle/work/phantom-murph-hardening/chat-ui/src/hooks/use-chat.ts`: SSE consumption, resume, initial history loading, and durable message parsing.
+
+Why this matters: Phantom already has most of the scaffolding for honest progress and thinking, but file/artifact inspection, markdown polish, and some event coverage remain the highest-impact gaps.
+
+## Pi Representation Model
+
+### Thinking
+
+Evidence:
+
+- `/Users/truffle/work/pi-mono/packages/ai/src/types.ts`
+- `/Users/truffle/work/pi-mono/packages/ai/src/providers/transform-messages.ts`
+- `/Users/truffle/work/pi-mono/packages/ai/src/providers/anthropic.ts`
+- `/Users/truffle/work/pi-mono/packages/ai/src/providers/openai-responses.ts`
+- `/Users/truffle/work/pi-mono/packages/ai/src/providers/openai-responses-shared.ts`
+- `/Users/truffle/work/pi-mono/packages/ai/src/providers/google-shared.ts`
+- `/Users/truffle/work/pi-mono/packages/agent/src/agent.ts`
+- `/Users/truffle/work/pi-mono/packages/agent/src/agent-loop.ts`
+
+Direct source facts:
+
+- Pi AI defines `ThinkingLevel` as reasoning levels and Pi agent-core adds an `off` state for agent configuration in `/Users/truffle/work/pi-mono/packages/agent/src/types.ts` and `/Users/truffle/work/pi-mono/packages/ai/src/types.ts`.
+- Pi AI represents thinking content as a content block with `type: "thinking"`, `thinking`, optional `thinkingSignature`, and optional `redacted` in `/Users/truffle/work/pi-mono/packages/ai/src/types.ts`.
+- Pi streams thinking through `AssistantMessageEvent` variants: `thinking_start`, `thinking_delta`, and `thinking_end` in `/Users/truffle/work/pi-mono/packages/ai/src/types.ts`.
+- Pi agent maps `state.thinkingLevel` into provider reasoning settings unless thinking is `off` in `/Users/truffle/work/pi-mono/packages/agent/src/agent.ts`.
+- Anthropic provider handling supports summarized or omitted thinking display, maps redacted thinking into a redacted thinking block, and preserves signature data in `/Users/truffle/work/pi-mono/packages/ai/src/providers/anthropic.ts`.
+- OpenAI Responses handling requests encrypted reasoning content when reasoning is enabled and maps reasoning summaries into Pi thinking events in `/Users/truffle/work/pi-mono/packages/ai/src/providers/openai-responses.ts` and `/Users/truffle/work/pi-mono/packages/ai/src/providers/openai-responses-shared.ts`.
+- Google handling distinguishes visible thought parts from encrypted thought signatures. The Google source states that `thought: true` is the marker for thinking and that `thoughtSignature` can appear on any part type in `/Users/truffle/work/pi-mono/packages/ai/src/providers/google-shared.ts`.
+- Cross-model transcript transformation preserves redacted thinking only when safe for same-model continuity, converts non-empty cross-model thinking to text, skips empty thinking, and removes tool-call thought signatures when crossing models in `/Users/truffle/work/pi-mono/packages/ai/src/providers/transform-messages.ts`.
+- Pi transform code skips errored or aborted assistant messages because partial reasoning or incomplete tool calls can create API errors in `/Users/truffle/work/pi-mono/packages/ai/src/providers/transform-messages.ts`.
+
+Inference:
+
+- Phantom should treat provider thinking as protocol data, not as a generic UI spinner. If Phantom did not receive a Murph/Pi thinking event, it should show working, waiting, streaming, tool running, or status text instead of claiming the model is thinking.
+- Phantom should not display `thinkingSignature`, encrypted reasoning content, or Google `thoughtSignature` as user-readable content. These are continuity artifacts according to the provider adapter source files listed above.
+- Phantom's current `thinking-block.tsx` choice to hide the thinking text and show labels such as "Thought", "Thought for Xs", or "Reasoning hidden" is safer than rendering raw provider reasoning by default. Evidence for the current UI behavior is `/Users/truffle/work/phantom-murph-hardening/chat-ui/src/components/thinking-block.tsx`.
+
+### Streaming Text
+
+Evidence:
+
+- `/Users/truffle/work/pi-mono/packages/ai/src/types.ts`
+- `/Users/truffle/work/pi-mono/packages/agent/src/agent-loop.ts`
+- `/Users/truffle/work/pi-mono/packages/web-ui/src/components/StreamingMessageContainer.ts`
+- `/Users/truffle/work/pi-mono/packages/web-ui/src/components/Messages.ts`
+- `/Users/truffle/work/phantom-murph-hardening/src/chat/sdk-to-wire-handlers.ts`
+- `/Users/truffle/work/phantom-murph-hardening/chat-ui/src/lib/chat-store.ts`
+
+Direct source facts:
+
+- Pi streams assistant text through `text_start`, `text_delta`, and `text_end` events in `/Users/truffle/work/pi-mono/packages/ai/src/types.ts`.
+- Pi agent loop forwards streaming assistant events as `message_update` events in `/Users/truffle/work/pi-mono/packages/agent/src/agent-loop.ts`.
+- Pi web UI batches streaming message updates with `requestAnimationFrame` in `/Users/truffle/work/pi-mono/packages/web-ui/src/components/StreamingMessageContainer.ts`.
+- Phantom backend maps assistant stream text into `message.text_delta` and related frames in `/Users/truffle/work/phantom-murph-hardening/src/chat/sdk-to-wire-handlers.ts`.
+- Phantom frontend appends text deltas into assistant content in `/Users/truffle/work/phantom-murph-hardening/chat-ui/src/lib/chat-store.ts`.
+
+Inference:
+
+- Phantom already has the correct data path for streaming text. The main gap is rendering quality and persistence shape, not transport.
+- Phantom should inspect whether multiple text blocks can arrive in a single assistant message. The current assistant renderer uses the first text block from `message.content.find(...)` in `/Users/truffle/work/phantom-murph-hardening/chat-ui/src/components/assistant-message.tsx`, so later text blocks could be hidden if they occur.
+
+### Tool Calls And Tool Progress
+
+Evidence:
+
+- `/Users/truffle/work/pi-mono/packages/ai/src/types.ts`
+- `/Users/truffle/work/pi-mono/packages/agent/src/types.ts`
+- `/Users/truffle/work/pi-mono/packages/agent/src/agent-loop.ts`
+- `/Users/truffle/work/pi-mono/packages/web-ui/src/tools/renderer-registry.ts`
+- `/Users/truffle/work/pi-mono/packages/web-ui/src/tools/renderers/DefaultRenderer.ts`
+- `/Users/truffle/work/murph/packages/core/src/types/message.ts`
+- `/Users/truffle/work/murph/packages/core/src/events/normalized-event.ts`
+- `/Users/truffle/work/murph/packages/core/src/query/query.ts`
+- `/Users/truffle/work/phantom-murph-hardening/src/chat/sdk-to-wire.ts`
+- `/Users/truffle/work/phantom-murph-hardening/chat-ui/src/lib/chat-dispatch-tools.ts`
+- `/Users/truffle/work/phantom-murph-hardening/chat-ui/src/components/tool-call-card.tsx`
+
+Direct source facts:
+
+- Pi tool calls are content blocks with `type: "toolCall"`, `id`, `name`, `arguments`, and optional `thoughtSignature` in `/Users/truffle/work/pi-mono/packages/ai/src/types.ts`.
+- Pi tools accept `onUpdate` callbacks during execution in `/Users/truffle/work/pi-mono/packages/agent/src/types.ts`.
+- Pi agent loop emits `tool_execution_start`, `tool_execution_update`, and `tool_execution_end` events around execution in `/Users/truffle/work/pi-mono/packages/agent/src/agent-loop.ts`.
+- Pi web UI renders tools through a renderer registry and default renderer in `/Users/truffle/work/pi-mono/packages/web-ui/src/tools/renderer-registry.ts` and `/Users/truffle/work/pi-mono/packages/web-ui/src/tools/renderers/DefaultRenderer.ts`.
+- Murph normalizes Pi and runtime tool activity into `tool_progress` and `tool_execution_*` events in `/Users/truffle/work/murph/packages/core/src/events/normalized-event.ts` and `/Users/truffle/work/murph/packages/core/src/query/query.ts`.
+- Murph tool progress includes phase, elapsed time, duration, input preview, output preview, truncation flag, safe display flag, redactions, and full reference in `/Users/truffle/work/murph/packages/core/src/types/message.ts`.
+- Phantom converts tool progress into `tool.running` and `tool.result` frames in `/Users/truffle/work/phantom-murph-hardening/src/chat/sdk-to-wire.ts`.
+- Phantom frontend has a tool state machine that handles pending, input streaming, running, result, error, blocked, and aborted states in `/Users/truffle/work/phantom-murph-hardening/chat-ui/src/lib/chat-dispatch-tools.ts`.
+- Phantom tool cards already show parameters, redacted output, block reasons, errors, full output reference, and a 12000-character display limit in `/Users/truffle/work/phantom-murph-hardening/chat-ui/src/components/tool-call-card.tsx`.
+
+Inference:
+
+- Phantom's progress and tool-card foundation is sound after Phase 10E. The next improvement should specialize built-in Phantom tools, MCP tools, file outputs, generated pages, and full references into richer inspection surfaces rather than adding more generic status rows.
+- Tool output is not equivalent to artifact state. A tool output may mention a file, page, public URL, or full reference, but Phantom needs explicit artifact extraction or explicit frames to make those surfaces first-class.
+
+### Usage And Completion
+
+Evidence:
+
+- `/Users/truffle/work/pi-mono/packages/ai/src/types.ts`
+- `/Users/truffle/work/pi-mono/packages/ai/src/utils/event-stream.ts`
+- `/Users/truffle/work/pi-mono/packages/agent/src/agent-loop.ts`
+- `/Users/truffle/work/pi-mono/packages/web-ui/src/components/AgentInterface.ts`
+- `/Users/truffle/work/pi-mono/packages/web-ui/src/components/Messages.ts`
+- `/Users/truffle/work/murph/packages/core/src/events/translator-pi.ts`
+- `/Users/truffle/work/phantom-murph-hardening/chat-ui/src/components/assistant-message.tsx`
+
+Direct source facts:
+
+- Pi `Usage` includes input, output, cache read, cache write, total tokens, and cost in `/Users/truffle/work/pi-mono/packages/ai/src/types.ts`.
+- Pi stream completion uses `done` and `error` assistant events in `/Users/truffle/work/pi-mono/packages/ai/src/types.ts` and `/Users/truffle/work/pi-mono/packages/ai/src/utils/event-stream.ts`.
+- Pi agent loop emits message and turn lifecycle events in `/Users/truffle/work/pi-mono/packages/agent/src/agent-loop.ts`.
+- Pi web UI displays usage when available in `/Users/truffle/work/pi-mono/packages/web-ui/src/components/Messages.ts` and session stats in `/Users/truffle/work/pi-mono/packages/web-ui/src/components/AgentInterface.ts`.
+- Murph translates Pi usage into normalized usage in `/Users/truffle/work/murph/packages/core/src/events/translator-pi.ts`.
+- Phantom assistant messages render cost and token usage when `message.usage` exists in `/Users/truffle/work/phantom-murph-hardening/chat-ui/src/components/assistant-message.tsx`.
+
+Inference:
+
+- Phantom can continue to show usage at assistant-message level, but should avoid making usage the primary sign of completion. Session lifecycle, stream stop, and tool completion events are better completion signals because usage can be absent or provider-dependent.
+
+## Pi Rendering Patterns
+
+### Web UI Pattern To Adapt
+
+Evidence:
+
+- `/Users/truffle/work/pi-mono/packages/web-ui/README.md`
+- `/Users/truffle/work/pi-mono/packages/web-ui/src/components/ThinkingBlock.ts`
+- `/Users/truffle/work/pi-mono/packages/web-ui/src/components/StreamingMessageContainer.ts`
+- `/Users/truffle/work/pi-mono/packages/web-ui/src/components/MessageList.ts`
+- `/Users/truffle/work/pi-mono/packages/web-ui/src/components/Messages.ts`
+- `/Users/truffle/work/pi-mono/packages/web-ui/src/tools/renderer-registry.ts`
+- `/Users/truffle/work/pi-mono/packages/web-ui/src/tools/artifacts/artifacts.ts`
+- `/Users/truffle/work/pi-mono/packages/web-ui/src/tools/artifacts/artifacts-tool-renderer.ts`
+- `/Users/truffle/work/pi-mono/packages/web-ui/src/tools/artifacts/ArtifactPill.ts`
+
+Direct source facts:
+
+- Pi web UI provides a complete chat panel, message list, streaming container, thinking block, tool renderers, attachments, and artifacts in `/Users/truffle/work/pi-mono/packages/web-ui/README.md`.
+- Pi web UI renders thinking as a collapsible block in `/Users/truffle/work/pi-mono/packages/web-ui/src/components/ThinkingBlock.ts`.
+- Pi web UI pairs tool calls with tool results and skips standalone tool-result messages in `/Users/truffle/work/pi-mono/packages/web-ui/src/components/MessageList.ts`.
+- Pi web UI has a tool renderer registry with headers, status treatment, disclosure, and fallback rendering in `/Users/truffle/work/pi-mono/packages/web-ui/src/tools/renderer-registry.ts`.
+- Pi web UI includes an artifacts tool, artifact panel, artifact pills, tabs, preview types, diffs, code blocks, logs, and console surfaces in `/Users/truffle/work/pi-mono/packages/web-ui/src/tools/artifacts/artifacts.ts`, `/Users/truffle/work/pi-mono/packages/web-ui/src/tools/artifacts/artifacts-tool-renderer.ts`, and `/Users/truffle/work/pi-mono/packages/web-ui/src/tools/artifacts/ArtifactPill.ts`.
+
+Inference:
+
+- Phantom should adapt these patterns: paired inline tool results, collapsible tool details, streaming update batching, honest thinking disclosure, artifact pills, artifact preview tabs, and file/page side-panel inspection.
+- Phantom should not import Pi web UI as-is. The Pi implementation is `mini-lit`, has its own chat surface and artifact storage pattern, and does not match Phantom's React components, durable sessions, or native Phantom page/artifact model. Evidence for Phantom's React UI is in `/Users/truffle/work/phantom-murph-hardening/chat-ui/src/components/assistant-message.tsx`, `/Users/truffle/work/phantom-murph-hardening/chat-ui/src/lib/chat-store.ts`, and `/Users/truffle/work/phantom-murph-hardening/chat-ui/src/hooks/use-chat.ts`.
+
+### CLI-Style Pattern To Adapt Selectively
+
+Evidence:
+
+- `/Users/truffle/work/pi-mono/packages/mom/src/log.ts`
+- `/Users/truffle/work/pi-mono/packages/mom/src/store.ts`
+- `/Users/truffle/work/pi-mono/packages/mom/src/agent.ts`
+
+Direct source facts:
+
+- Mom logs user input, tool lifecycle, response start, thinking, response text, downloads, stop, warnings, errors, and usage in `/Users/truffle/work/pi-mono/packages/mom/src/log.ts`.
+- Mom truncates long output and adds context to tool logs in `/Users/truffle/work/pi-mono/packages/mom/src/log.ts`.
+- Mom persists conversation events and downloaded attachments to local workspace files in `/Users/truffle/work/pi-mono/packages/mom/src/store.ts`.
+
+Inference:
+
+- Phantom can adapt Mom's concise temporal log shape for compact activity summaries and run timelines.
+- Phantom should not adapt Mom as a UI implementation. It is a CLI-style surface around Slack/mom workflows, not a browser chat UI. Evidence for that workflow is `/Users/truffle/work/pi-mono/packages/mom/src/agent.ts`.
+
+## Ownership Split
+
+### What Murph And Pi Should Own
+
+Evidence:
+
+- `/Users/truffle/work/murph/AGENTS.md`
+- `/Users/truffle/work/murph/ARCHITECTURE.md`
+- `/Users/truffle/work/murph/packages/core/src/events/normalized-event.ts`
+- `/Users/truffle/work/murph/packages/core/src/events/translator-pi.ts`
+- `/Users/truffle/work/murph/packages/core/src/substrate/pi-harness.ts`
+- `/Users/truffle/work/pi-mono/packages/ai/src/providers/transform-messages.ts`
+- `/Users/truffle/work/pi-mono/packages/ai/src/providers/anthropic.ts`
+- `/Users/truffle/work/pi-mono/packages/ai/src/providers/openai-responses-shared.ts`
+- `/Users/truffle/work/pi-mono/packages/ai/src/providers/google-shared.ts`
+
+Direct source facts:
+
+- Murph is a clean-room TypeScript agent runtime and should build generic runtime behavior rather than Phantom-specific shortcuts according to `/Users/truffle/work/murph/AGENTS.md`.
+- Murph already owns normalized event grammar and Pi translation in `/Users/truffle/work/murph/packages/core/src/events/normalized-event.ts` and `/Users/truffle/work/murph/packages/core/src/events/translator-pi.ts`.
+- Murph's Pi harness owns Pi substrate integration, tool hooks, thinking levels, thinking budgets, and normalized event forwarding in `/Users/truffle/work/murph/packages/core/src/substrate/pi-harness.ts`.
+- Pi provider adapters own provider-specific reasoning and continuity details in `/Users/truffle/work/pi-mono/packages/ai/src/providers/anthropic.ts`, `/Users/truffle/work/pi-mono/packages/ai/src/providers/openai-responses-shared.ts`, and `/Users/truffle/work/pi-mono/packages/ai/src/providers/google-shared.ts`.
+
+Inference:
+
+- Murph and Pi should own provider transport, model capability handling, transcript transformation, thinking budgets, encrypted or redacted reasoning continuity, normalized stream events, tool execution lifecycle, tool progress envelopes, usage, retries, compaction, and cross-model safety.
+- Murph should expose facts. It should not own Phantom page previews, artifact side panels, chat-specific visual affordances, or product-specific file browsing.
+
+### What Phantom Built-In Tools Should Own
+
+Evidence:
+
+- `/Users/truffle/work/phantom-murph-hardening/research/chat-experience/phase-10h-product-direction.md`
+- `/Users/truffle/work/phantom-murph-hardening/src/chat/message-builder.ts`
+- `/Users/truffle/work/phantom-murph-hardening/src/chat/writer.ts`
+- `/Users/truffle/work/phantom-murph-hardening/src/chat/message-store.ts`
+- `/Users/truffle/work/phantom-murph-hardening/chat-ui/src/components/tool-call-card.tsx`
+- `/Users/truffle/work/phantom-murph-hardening/chat-ui/src/hooks/use-chat.ts`
+
+Direct source facts:
+
+- The product direction file explicitly asks for files and artifacts as first-class UI surfaces, clear built-in versus MCP versus UI ownership, markdown quality, interactive inspection, and no fake provider thinking in `/Users/truffle/work/phantom-murph-hardening/research/chat-experience/phase-10h-product-direction.md`.
+- Phantom already transforms user attachments into message content in `/Users/truffle/work/phantom-murph-hardening/src/chat/message-builder.ts`.
+- Phantom persists final assistant content and timelines in `/Users/truffle/work/phantom-murph-hardening/src/chat/writer.ts` and `/Users/truffle/work/phantom-murph-hardening/src/chat/message-store.ts`.
+- Phantom tool cards already understand local tool names such as Read, Write, Edit, Bash, Glob, Grep, WebSearch, and WebFetch in `/Users/truffle/work/phantom-murph-hardening/chat-ui/src/components/tool-call-card.tsx`.
+
+Inference:
+
+- Phantom built-in tools should own Phantom-native operations that require product state, workspace state, auth, session identity, and safe preview semantics: page creation, page preview, generated file registration, attachment registration, durable artifact metadata, safe full-output references, and Phantom-owned file browsing.
+- Phantom built-ins should produce explicit artifact metadata when they create or modify files or pages. UI should not have to scrape arbitrary prose when a built-in tool already knows the artifact identity.
+
+### What MCP Tools Should Own
+
+Evidence:
+
+- `/Users/truffle/work/phantom-murph-hardening/research/chat-experience/phase-10h-product-direction.md`
+- `/Users/truffle/work/murph/packages/core/src/events/normalized-event.ts`
+- `/Users/truffle/work/phantom-murph-hardening/src/chat/sdk-to-wire-handlers.ts`
+- `/Users/truffle/work/phantom-murph-hardening/chat-ui/src/lib/chat-types.ts`
+
+Direct source facts:
+
+- Phantom wire and UI types track MCP connection state and MCP server metadata in `/Users/truffle/work/phantom-murph-hardening/chat-ui/src/lib/chat-types.ts`.
+- Phantom backend marks tools as MCP based on tool naming conventions in `/Users/truffle/work/phantom-murph-hardening/src/chat/sdk-to-wire-handlers.ts`.
+- Murph normalized events include MCP-adjacent generic tool execution events rather than Phantom-specific tool semantics in `/Users/truffle/work/murph/packages/core/src/events/normalized-event.ts`.
+
+Inference:
+
+- MCP tools should own external, reusable integrations such as third-party systems, browser automation, email, calendar, source control, search, and other portable capabilities.
+- MCP tools should not be used as a substitute for Phantom-native file/page/session UI when the product already owns the underlying state and authorization model.
+
+### What UI Affordances Should Own
+
+Evidence:
+
+- `/Users/truffle/work/phantom-murph-hardening/research/chat-experience/phase-10h-product-direction.md`
+- `/Users/truffle/work/phantom-murph-hardening/chat-ui/src/components/tool-call-card.tsx`
+- `/Users/truffle/work/phantom-murph-hardening/chat-ui/src/components/markdown.tsx`
+- `/Users/truffle/work/phantom-murph-hardening/chat-ui/src/components/code-block.tsx`
+- `/Users/truffle/work/phantom-murph-hardening/chat-ui/src/components/run-activity-row.tsx`
+
+Direct source facts:
+
+- Phantom UI already renders expandable tool cards, copied code blocks, markdown, and run activity rows in the files listed above.
+
+Inference:
+
+- UI affordances should open, copy, preview, filter, expand, collapse, retry, and inspect already-produced state.
+- UI affordances should not execute hidden tools, invent artifacts, infer provider thinking, or mutate transcript content sent back to providers.
+
+## Phantom Recommendations Ordered By Impact
+
+### 1. Make Files And Artifacts First-Class Chat Surfaces
+
+Evidence:
+
+- `/Users/truffle/work/phantom-murph-hardening/research/chat-experience/phase-10h-product-direction.md`
+- `/Users/truffle/work/pi-mono/packages/web-ui/src/tools/artifacts/artifacts.ts`
+- `/Users/truffle/work/pi-mono/packages/web-ui/src/tools/artifacts/artifacts-tool-renderer.ts`
+- `/Users/truffle/work/pi-mono/packages/web-ui/src/tools/artifacts/ArtifactPill.ts`
+- `/Users/truffle/work/phantom-murph-hardening/chat-ui/src/components/tool-call-card.tsx`
+- `/Users/truffle/work/murph/packages/core/src/types/message.ts`
+
+Inference:
+
+- Add an artifact extraction and rendering layer in Phantom UI that treats created files, edited files, generated pages, attachments, public URLs, and `full_ref` outputs as inspectable objects.
+- Render compact artifact pills inline in tool cards and assistant messages.
+- Add an artifact inspector side panel or drawer with type-specific previews: text, markdown, code, image, PDF, HTML/page preview, diff, logs, and metadata.
+- Preserve the tool card as the execution record, but let the artifact inspector become the place where users inspect durable outputs.
+- Keep full references safe. A `full_ref` should be an opaque reference until a server endpoint validates scope, path, auth, and display safety.
+
+### 2. Keep Thinking Honest And Provider-Backed
+
+Evidence:
+
+- `/Users/truffle/work/pi-mono/packages/ai/src/providers/anthropic.ts`
+- `/Users/truffle/work/pi-mono/packages/ai/src/providers/openai-responses-shared.ts`
+- `/Users/truffle/work/pi-mono/packages/ai/src/providers/google-shared.ts`
+- `/Users/truffle/work/pi-mono/packages/ai/src/providers/transform-messages.ts`
+- `/Users/truffle/work/phantom-murph-hardening/chat-ui/src/components/thinking-block.tsx`
+- `/Users/truffle/work/phantom-murph-hardening/research/chat-experience/phase-10h-product-direction.md`
+
+Inference:
+
+- Render thinking only when Murph/Phantom receives actual thinking events.
+- For redacted or hidden reasoning, show explicit labels such as "Reasoning hidden" and optionally duration. Do not show fake summaries.
+- For providers without reasoning events, use status language such as "Working", "Calling tools", "Reading files", or "Waiting for model" instead of "Thinking".
+- Never expose `thinkingSignature`, encrypted reasoning content, Google `thoughtSignature`, or provider replay payloads in UI.
+- Avoid treating tool progress, compaction, retries, or MCP connection events as model thinking.
+
+### 3. Clarify Built-In Tool, MCP Tool, And UI Labels
+
+Evidence:
+
+- `/Users/truffle/work/phantom-murph-hardening/research/chat-experience/phase-10h-product-direction.md`
+- `/Users/truffle/work/phantom-murph-hardening/src/chat/sdk-to-wire-handlers.ts`
+- `/Users/truffle/work/phantom-murph-hardening/chat-ui/src/lib/chat-types.ts`
+- `/Users/truffle/work/phantom-murph-hardening/chat-ui/src/components/tool-call-card.tsx`
+
+Inference:
+
+- Add visible but compact metadata for tool origin: Phantom built-in, MCP server, or local/runtime tool.
+- For built-in tools, prefer product words such as "Created page", "Updated file", "Read workspace", or "Generated preview".
+- For MCP tools, show the server name and tool name, because the current backend already derives MCP metadata from tool naming conventions in `/Users/truffle/work/phantom-murph-hardening/src/chat/sdk-to-wire-handlers.ts`.
+- For UI affordances, do not present actions such as opening, copying, expanding, previewing, or filtering as tool calls. They are inspection controls.
+
+### 4. Upgrade Markdown Rendering Quality
+
+Evidence:
+
+- `/Users/truffle/work/phantom-murph-hardening/chat-ui/src/components/markdown.tsx`
+- `/Users/truffle/work/phantom-murph-hardening/chat-ui/src/components/code-block.tsx`
+- `/Users/truffle/work/phantom-murph-hardening/research/chat-experience/phase-10h-product-direction.md`
+
+Inference:
+
+- Keep `remark-gfm` and `rehype-sanitize`, but add polished table, task-list, blockquote, ordered-list, unordered-list, inline-code, pre/code, and link treatments.
+- Add syntax highlighting or language-aware styling to code blocks while preserving copy actions.
+- Detect safe local artifact references and generated page links, then render them as artifact pills or preview links.
+- Keep raw HTML sanitized. Any custom renderer for links, images, or code must preserve sanitize guarantees.
+
+### 5. Fill Event Coverage Gaps Before Building More UI States
+
+Evidence:
+
+- `/Users/truffle/work/murph/packages/core/src/types/message.ts`
+- `/Users/truffle/work/murph/packages/core/src/events/normalized-event.ts`
+- `/Users/truffle/work/phantom-murph-hardening/src/chat/sdk-to-wire.ts`
+- `/Users/truffle/work/phantom-murph-hardening/chat-ui/src/lib/chat-types.ts`
+
+Inference:
+
+- Phantom currently maps compaction, rate limits, subagents, and tool progress, but should audit additional Murph events such as `api_retry`, `files_persisted`, `tool_use_summary`, `auth_status`, `local_command_output`, hook progress, plugin install, session state, notification, memory recall, and mirror errors before adding separate bespoke UI states.
+- Event coverage should stay factual. If Murph does not emit an event, Phantom UI should not synthesize a provider-like state.
+
+## Risks And Anti-Patterns
+
+### Protocol And Provider Risks
+
+Evidence:
+
+- `/Users/truffle/work/pi-mono/packages/ai/src/providers/transform-messages.ts`
+- `/Users/truffle/work/pi-mono/packages/ai/src/providers/google-shared.ts`
+- `/Users/truffle/work/pi-mono/packages/ai/src/providers/openai-responses-shared.ts`
+- `/Users/truffle/work/pi-mono/packages/ai/src/providers/anthropic.ts`
+- `/Users/truffle/work/murph/packages/core/src/events/translator-pi.ts`
+
+Risks:
+
+- Displaying encrypted reasoning payloads, `thinkingSignature`, or `thoughtSignature` as user-readable thinking would leak continuity metadata and misrepresent provider semantics.
+- Treating Google `thoughtSignature` as proof of visible thinking would contradict the adapter comment in `/Users/truffle/work/pi-mono/packages/ai/src/providers/google-shared.ts`.
+- Rewriting provider transcript content for UI display can break tool-call protocol and cross-model continuity. Pi transform code already has explicit safeguards in `/Users/truffle/work/pi-mono/packages/ai/src/providers/transform-messages.ts`.
+- Rendering raw provider reasoning by default can expose unsafe or private reasoning material. The product direction file asks to avoid fake provider thinking, and the provider files show that visible reasoning semantics vary by provider.
+
+### Tool And Artifact Risks
+
+Evidence:
+
+- `/Users/truffle/work/murph/packages/core/src/query/query.ts`
+- `/Users/truffle/work/murph/packages/core/src/types/message.ts`
+- `/Users/truffle/work/phantom-murph-hardening/chat-ui/src/components/tool-call-card.tsx`
+- `/Users/truffle/work/pi-mono/packages/web-ui/src/tools/artifacts/artifacts.ts`
+
+Risks:
+
+- Full output references can become path traversal or data exposure risks if the UI dereferences them without server-side validation. Murph exposes `full_ref` as part of tool progress in `/Users/truffle/work/murph/packages/core/src/types/message.ts`.
+- Tool inputs and outputs can contain secrets. Murph redaction exists in `/Users/truffle/work/murph/packages/core/src/query/query.ts`, and Phantom tool cards have frontend redaction in `/Users/truffle/work/phantom-murph-hardening/chat-ui/src/components/tool-call-card.tsx`; neither should be weakened by artifact previews.
+- Importing Pi's in-memory artifact tool directly would duplicate Phantom storage and authority. Pi's artifact implementation is in `/Users/truffle/work/pi-mono/packages/web-ui/src/tools/artifacts/artifacts.ts`, while Phantom durability is in `/Users/truffle/work/phantom-murph-hardening/src/chat/writer.ts` and `/Users/truffle/work/phantom-murph-hardening/src/chat/message-store.ts`.
+- UI-only artifact messages must not leak back into model context. Pi web UI has conversion logic to filter artifact messages in `/Users/truffle/work/pi-mono/packages/web-ui/src/components/Messages.ts`.
+
+### UX And Product Risks
+
+Evidence:
+
+- `/Users/truffle/work/phantom-murph-hardening/chat-ui/src/components/assistant-message.tsx`
+- `/Users/truffle/work/phantom-murph-hardening/chat-ui/src/components/markdown.tsx`
+- `/Users/truffle/work/phantom-murph-hardening/chat-ui/src/components/message-list.tsx`
+- `/Users/truffle/work/phantom-murph-hardening/research/chat-experience/phase-10h-product-direction.md`
+
+Risks:
+
+- A chat UI that only shows transient tool cards leaves generated files and pages feeling like log output instead of first-class product artifacts.
+- The current assistant renderer appears to render only the first text block from an assistant message in `/Users/truffle/work/phantom-murph-hardening/chat-ui/src/components/assistant-message.tsx`. If multiple text blocks can arrive, content may be hidden.
+- Markdown that is technically correct but visually weak can make good model output feel untrustworthy. Current markdown support is grounded in `/Users/truffle/work/phantom-murph-hardening/chat-ui/src/components/markdown.tsx`.
+- Confusing UI affordances with tools can make the user think opening or copying a file changed runtime state. The product direction file asks for clear separation in `/Users/truffle/work/phantom-murph-hardening/research/chat-experience/phase-10h-product-direction.md`.
+
+## Concrete Acceptance Criteria For Next Builder Slice
+
+Inference: the next builder slice should be a Phantom chat UI slice focused on first-class artifacts, honest thinking display, and markdown polish. It should not require Murph runtime changes unless the event coverage audit finds a missing normalized event that already exists in Murph but is not wired into Phantom.
+
+Acceptance criteria:
+
+1. Artifact extraction
+ - Phantom frontend derives artifact candidates from existing wire frames, tool outputs, tool names, `full_ref`, generated page URLs, and attachment metadata.
+ - Artifact candidates preserve source message id, tool call id when present, display label, type, origin, safe preview status, and raw reference.
+ - Extraction does not mutate model transcript content.
+
+2. Artifact rendering
+ - Tool cards and assistant markdown can render compact artifact pills for recognized files, pages, URLs, and full references.
+ - Clicking an artifact pill opens an inspector panel or drawer without starting a new model or tool call.
+ - Inspector supports at least text, markdown, code, image, PDF, generated page URL, and opaque full reference metadata.
+ - Full reference preview is disabled unless a server endpoint validates it as safe to display.
+
+3. Thinking honesty
+ - Thinking UI renders only from received thinking frames.
+ - Redacted thinking displays a redacted or hidden label and does not expose signatures or encrypted content.
+ - No provider without thinking frames is shown as "thinking"; status language uses runtime facts such as tool running, compaction, retry, or streaming text.
+ - Tests cover text thinking, redacted thinking, and no-thinking provider behavior.
+
+4. Tool-origin clarity
+ - Tool cards visually distinguish Phantom built-in, MCP, and generic runtime tools using existing metadata or clearly documented heuristics.
+ - MCP cards show server name when available.
+ - UI-only actions such as preview, copy, open, expand, collapse, filter, and retry are not displayed as tool calls.
+
+5. Markdown polish
+ - Tables, lists, blockquotes, inline code, fenced code, links, and task lists render with polished spacing and wrapping.
+ - Code blocks keep copy behavior and add language-aware presentation.
+ - Sanitization remains enabled.
+ - Safe artifact links in markdown render as links or artifact pills without enabling raw HTML execution.
+
+6. Persistence and replay
+ - Reloaded chat history shows durable assistant text, historical run timeline, and artifact references when their source data is persisted.
+ - If artifact details are transient and unavailable after reload, the UI clearly shows metadata instead of a broken preview.
+ - Existing session resume behavior in `/Users/truffle/work/phantom-murph-hardening/chat-ui/src/hooks/use-chat.ts` remains intact.
+
+7. Verification
+ - Unit tests cover reducer/extractor behavior for thinking, tool outputs, artifact candidates, and markdown link rendering.
+ - Component tests or browser checks cover artifact inspector open/close, tool-card expansion, markdown tables, code blocks, and redacted thinking.
+ - No application code outside the approved builder slice is changed.
+ - No explicit `any`, no `@ts-ignore`, and no hidden type escapes are introduced.
+
+## Highest-Signal Findings
+
+1. Pi already has honest thinking primitives, including redacted/encrypted continuity handling, but those primitives are provider-specific. Phantom should render only Murph/Pi-backed thinking frames and should never display signatures or encrypted reasoning payloads. Evidence: `/Users/truffle/work/pi-mono/packages/ai/src/types.ts`, `/Users/truffle/work/pi-mono/packages/ai/src/providers/anthropic.ts`, `/Users/truffle/work/pi-mono/packages/ai/src/providers/openai-responses-shared.ts`, `/Users/truffle/work/pi-mono/packages/ai/src/providers/google-shared.ts`.
+
+2. Pi web UI is a valuable pattern library, not a drop-in dependency for Phantom. Adapt the ideas of paired tool results, collapsible thinking, streaming batching, renderer registry, artifact pills, and artifact inspector, but implement them in Phantom's React and durable session model. Evidence: `/Users/truffle/work/pi-mono/packages/web-ui/src/components/Messages.ts`, `/Users/truffle/work/pi-mono/packages/web-ui/src/tools/renderer-registry.ts`, `/Users/truffle/work/pi-mono/packages/web-ui/src/tools/artifacts/artifacts.ts`, `/Users/truffle/work/phantom-murph-hardening/chat-ui/src/lib/chat-store.ts`.
+
+3. The next product jump is first-class files and artifacts. Current Phantom tool cards are useful execution records, but generated pages, edited files, attachments, URLs, and `full_ref` outputs need artifact pills and an inspector so outputs are not trapped inside log text. Evidence: `/Users/truffle/work/phantom-murph-hardening/research/chat-experience/phase-10h-product-direction.md`, `/Users/truffle/work/phantom-murph-hardening/chat-ui/src/components/tool-call-card.tsx`, `/Users/truffle/work/murph/packages/core/src/types/message.ts`.
+
+4. Ownership should stay split: Pi and Murph own provider protocol, transcript safety, thinking continuity, normalized events, usage, and tool progress; Phantom owns product-specific built-ins, pages, files, artifacts, previews, and UI inspection; MCP owns external reusable integrations. Evidence: `/Users/truffle/work/murph/packages/core/src/substrate/pi-harness.ts`, `/Users/truffle/work/murph/packages/core/src/events/translator-pi.ts`, `/Users/truffle/work/phantom-murph-hardening/research/chat-experience/phase-10h-product-direction.md`.
+
+5. Markdown quality and replay durability are now part of trust. Phantom has GFM and sanitize, but needs stronger table/code/link/artifact rendering, and the current assistant renderer should be checked for multi-text-block messages because it selects only the first text block. Evidence: `/Users/truffle/work/phantom-murph-hardening/chat-ui/src/components/markdown.tsx`, `/Users/truffle/work/phantom-murph-hardening/chat-ui/src/components/code-block.tsx`, `/Users/truffle/work/phantom-murph-hardening/chat-ui/src/components/assistant-message.tsx`.
+
+## Self Review
+
+- Every direct factual claim in this report is tied to a local source path in the same paragraph or bullet group.
+- Recommendations and product choices are labeled as inference.
+- No external source was needed because the requested evidence exists in local Pi, Murph, and Phantom source files.
+- No application code was edited.
+- The only file written for this task is `/Users/truffle/work/phantom-murph-hardening/research/chat-experience/phase-10h-pi-thinking-research.md`.
diff --git a/research/chat-experience/phase-10h-product-direction.md b/research/chat-experience/phase-10h-product-direction.md
new file mode 100644
index 00000000..6fc5e25c
--- /dev/null
+++ b/research/chat-experience/phase-10h-product-direction.md
@@ -0,0 +1,79 @@
+# Phase 10H Product Direction: Best-in-Class Chat
+
+Date: 2026-05-01
+
+## Operator Direction
+
+Cheema wants Phantom's chat to feel like a product people choose over other
+agent surfaces, not merely a working transcript. The experience needs obsessive
+attention to detail:
+
+- Long-running tasks should never feel dead. The user should see meaningful
+ live activity, tool usage, compaction, waiting, recovery, and completion.
+- Tool cards should be useful when collapsed and rich when expanded.
+- Thinking/progress should be honest across providers. Do not fake private
+ chain-of-thought. Show redacted/private reasoning state, summaries, timing,
+ usage, or provider-supported reasoning signals when those signals really
+ exist.
+- Files and artifacts should feel first-class. If the agent creates or reads a
+ file, the UI should make it inspectable when safe, with links, previews, and
+ metadata rather than raw walls of text.
+- Markdown should feel polished and predictable. Code, tables, links, lists,
+ citations, and generated URLs should render clearly.
+- Interaction details matter: borders, spacing, sticky composer behavior,
+ disabled/loading states, command affordances, copy actions, reveal controls,
+ and error recovery should all feel intentional.
+
+## Tools and Capability Model
+
+Every new capability should ask where it belongs:
+
+- **Murph/Pi** owns provider transport, thinking levels, model metadata,
+ overflow/compaction primitives, core agent event semantics, and tool execution
+ seams.
+- **Phantom built-in or CLI tools** are best for core app capabilities that must
+ be reliable, low-latency, and tightly integrated with Phantom state, files,
+ pages, sessions, and auth.
+- **MCP tools** are best for external integrations or capabilities that benefit
+ from a standard tool server boundary, independent lifecycle, or reuse across
+ agents.
+- **UI affordances** are best for browsing, opening, copying, previewing,
+ filtering, expanding, retrying, and inspecting data already produced by the
+ agent. Do not force the agent to call a tool when the browser can safely show
+ an existing artifact.
+
+Default bias:
+
+1. Reuse Pi or Murph if the primitive already exists.
+2. Use Phantom built-ins for Phantom-native files, pages, artifacts, sessions,
+ previews, and chat ergonomics.
+3. Use MCP for external or reusable integrations.
+4. Use UI controls for inspection and interaction with already-known state.
+
+## Product Bar
+
+The target is not novelty. The target is clarity, trust, and flow:
+
+- The user always knows whether the agent is thinking, using a tool, waiting,
+ compacting, retrying, done, blocked, or errored.
+- The UI shows enough detail to build trust without drowning the user.
+- Expanded tool detail should be structured: parameters, output, previews,
+ links, generated files, and safe full-output references.
+- Hidden or redacted provider thinking should be labeled honestly.
+- Visual language should use product icons and clear labels. Emojis are not a
+ default system primitive for professional surfaces.
+- All visible text must fit on desktop and mobile. Controls should not jump
+ around during streaming.
+
+## Immediate Research Questions
+
+1. What Pi/Pi Code already provides for thinking, progress, CLI rendering, file
+ display, and tool activity?
+2. Which Phantom chat states are currently missing, misleading, or visually
+ underpowered?
+3. Which capabilities should become Phantom built-in tools versus MCP tools
+ versus UI-only affordances?
+4. What can OpenAI, Anthropic, and ZAI truthfully expose as thinking/reasoning
+ through Pi/Murph today?
+5. What is the smallest next builder slice that improves the live experience
+ materially while remaining testable?
diff --git a/research/chat-experience/phase-10h-provider-thinking-research.md b/research/chat-experience/phase-10h-provider-thinking-research.md
new file mode 100644
index 00000000..28dc1c5f
--- /dev/null
+++ b/research/chat-experience/phase-10h-provider-thinking-research.md
@@ -0,0 +1,353 @@
+# Phase 10H Provider Thinking Research
+
+Date: 2026-05-01
+
+## Provider Capability Matrix Headline
+
+Phantom can honestly show live reasoning state across OpenAI, Anthropic, and ZAI, but it should only show reasoning text when Murph/Pi can prove it is a provider-supported summary or safe display text. Redacted, encrypted, private, or unknown thinking must be hidden or labeled as private. Separate thinking token counts are not available to Phantom today because Pi, Murph, and the Phantom wire protocol drop provider reasoning token fields.
+
+## Executive Findings
+
+Phantom's current UI posture is conservative and mostly correct: it displays "Thinking...", "Thought", or "Reasoning hidden" and does not render thinking text in `ThinkingBlock`. However, the browser store still accumulates `message.thinking_delta` text in memory, so private reasoning can still cross the Phantom wire and land client-side.
+
+OpenAI, Anthropic, and ZAI do not expose the same product object. OpenAI Responses exposes optional reasoning summaries, encrypted reasoning items, and upstream reasoning token counts. Anthropic Messages exposes thinking blocks, signatures, summarized or omitted display modes in Pi, and redacted thinking blocks. ZAI exposes `reasoning_content` for GLM thinking. That ZAI signal is best treated as private reasoning, not a user-facing summary.
+
+The missing product primitive is not another visual treatment. It is an event-contract field that distinguishes `summary`, `display_text`, `private`, `redacted`, `encrypted`, and `unknown` thinking. Without that field, Phantom should not render thinking text verbatim, even when the provider stream calls it "thinking".
+
+The added product direction is the right bar: never fake private chain-of-thought, never let long-running tasks feel dead, and map provider signals into honest UI states. Murph/Pi should own provider semantics and thinking visibility. Phantom should render known states, safe summaries, timing, usage, tools, compaction, retries, blocked states, and errors.
+
+## Source Map
+
+Primary local files reviewed:
+
+- Murph normalized event and usage model: `/Users/truffle/work/murph/packages/core/src/events/normalized-event.ts`
+- Murph Pi translator: `/Users/truffle/work/murph/packages/core/src/events/translator-pi.ts`
+- Murph message and usage types: `/Users/truffle/work/murph/packages/core/src/types/message.ts`
+- Murph harness thinking options: `/Users/truffle/work/murph/packages/core/src/substrate/harness.ts`
+- Murph query option normalization: `/Users/truffle/work/murph/packages/core/src/query/options.ts`
+- Murph query event mappers: `/Users/truffle/work/murph/packages/core/src/query/query.ts`
+- Murph built-in model capability records: `/Users/truffle/work/murph/packages/core/src/providers/models.ts`
+- Murph Pi adapter: `/Users/truffle/work/murph/packages/core/src/substrate/pi-adapter.ts`
+- Pi shared types: `/Users/truffle/work/pi-mono/packages/ai/src/types.ts`
+- Pi OpenAI Responses provider: `/Users/truffle/work/pi-mono/packages/ai/src/providers/openai-responses.ts`
+- Pi OpenAI Responses stream processor: `/Users/truffle/work/pi-mono/packages/ai/src/providers/openai-responses-shared.ts`
+- Pi Anthropic provider: `/Users/truffle/work/pi-mono/packages/ai/src/providers/anthropic.ts`
+- Pi OpenAI-compatible completions provider: `/Users/truffle/work/pi-mono/packages/ai/src/providers/openai-completions.ts`
+- Pi message transformer: `/Users/truffle/work/pi-mono/packages/ai/src/providers/transform-messages.ts`
+- Phantom wire types and translators: `/Users/truffle/work/phantom-murph-hardening/src/chat/types.ts`, `/Users/truffle/work/phantom-murph-hardening/src/chat/sdk-to-wire.ts`, `/Users/truffle/work/phantom-murph-hardening/src/chat/sdk-to-wire-handlers.ts`
+- Phantom durable timeline: `/Users/truffle/work/phantom-murph-hardening/src/chat/run-timeline.ts`
+- Phantom UI state and thinking component: `/Users/truffle/work/phantom-murph-hardening/chat-ui/src/lib/chat-types.ts`, `/Users/truffle/work/phantom-murph-hardening/chat-ui/src/lib/chat-store.ts`, `/Users/truffle/work/phantom-murph-hardening/chat-ui/src/components/thinking-block.tsx`
+- Product direction: `/Users/truffle/work/phantom-murph-hardening/research/chat-experience/phase-10h-product-direction.md`
+
+Official provider docs checked:
+
+- OpenAI reasoning guide: https://developers.openai.com/api/docs/guides/reasoning
+- OpenAI Responses API reference: https://developers.openai.com/api/reference/resources/responses/methods/create
+- Anthropic extended thinking guide: https://platform.claude.com/docs/en/build-with-claude/extended-thinking
+- ZAI chat completion API: https://docs.z.ai/api-reference/llm/chat-completion
+- ZAI thinking mode guide: https://docs.z.ai/guides/capabilities/thinking-mode
+- ZAI deep thinking guide: https://docs.z.ai/guides/capabilities/thinking
+- ZAI tool streaming guide: https://docs.z.ai/guides/capabilities/stream-tool
+
+## Event Taxonomy
+
+### Thinking
+
+Pi has `thinking_start`, `thinking_delta`, and `thinking_end` stream events in `AssistantMessageEvent`. `ThinkingContent` has `thinking`, optional `thinkingSignature`, and optional `redacted`.
+
+Murph normalizes Pi thinking into `thinking_start`, `thinking_delta`, and `thinking_end`. `NormalizedUsage` only has input, output, cache read, cache write, total, and cost. It does not have `reasoningTokens` or `thinkingTokens`.
+
+Murph SDK-style stream mapping converts normalized thinking events into `stream_event` content block start, delta, and stop with `content_block.type: "thinking"`.
+
+Phantom converts SDK stream events to `message.thinking_start`, `message.thinking_delta`, and `message.thinking_end`. `ThinkingStartFrame` includes `redacted: boolean`; `ThinkingDeltaFrame` only has text delta; `ThinkingEndFrame` has optional duration.
+
+Current UI state stores thinking text in `ThinkingBlockState.text`, but `ThinkingBlock` does not render it. Durable run timelines also drop raw thinking text and only retain labels such as "Thinking..." or "Finished reasoning."
+
+Existing event basis:
+
+- Pi `AssistantMessageEvent`
+- Murph `NormalizedEvent` thinking variants
+- SDK `stream_event`
+- Phantom `message.thinking_start`, `message.thinking_delta`, `message.thinking_end`
+
+Contract gaps:
+
+- No `thinkingVisibility` or `thinkingKind` field exists to say whether text is a provider summary, user-displayable thinking, private chain-of-thought, redacted, encrypted, or unknown.
+- No server-side Phantom rule prevents private thinking deltas from crossing into the browser.
+
+### Redacted Thinking
+
+Pi represents redaction as `ThinkingContent.redacted` with opaque encrypted payload stored in `thinkingSignature`. Anthropic redacted thinking enters Pi as a thinking block with `redacted: true`, placeholder text, and signature data.
+
+Murph final assistant content maps Pi redacted thinking to `MurphRedactedThinkingBlock` with `type: "redacted_thinking"` and optional `data`. The normalized event union has a `redacted_thinking` event type, but the Pi stream translator currently maps Pi stream `thinking_start` the same way for redacted and non-redacted blocks. It does not emit a normalized redacted content-block start during streaming.
+
+Phantom final assistant handling recognizes `redacted_thinking` and emits `message.thinking_start` with `redacted: true` and no delta. Stream handling can also handle a `content_block_start` with `content_block.type: "redacted_thinking"`, but Murph does not reliably produce that stream shape today.
+
+Existing event basis:
+
+- Pi `ThinkingContent.redacted`
+- Murph final content block `redacted_thinking`
+- Phantom `message.thinking_start.redacted`
+
+Contract gaps:
+
+- Murph should emit a streaming redacted-thinking signal when the provider says a block is redacted, not only rely on final assistant reconciliation.
+- The redacted payload must remain opaque and must never be displayed, parsed, summarized, or copied into user-visible text.
+
+### Progress Summaries
+
+Murph has runtime events for `tool_execution_start`, `tool_execution_update`, `tool_execution_end`, direct `tool_progress`, `subagent_start`, `subagent_progress`, `subagent_end`, `compact_started`, `compact_completed`, `session_state`, `prompt_suggestion`, `permission_request`, `permission_decision`, hooks, notifications, and errors.
+
+Murph maps tool execution events into `tool_progress` messages with safe previews, truncation, and secret redactions. It maps subagent events into task system messages. Phantom maps these into tool cards, subagent activity, and durable run activity labels.
+
+Phantom product direction says long-running tasks should never feel dead. Existing event support is enough for thinking, tool running, compaction, rate limits, subagents, blocked tools, aborted tools, done, and error. Waiting and retry states need better surfaced contracts.
+
+Existing event basis:
+
+- Murph normalized `tool_execution_*`, `tool_progress`, `subagent_*`, `compact_*`, `session_state`
+- Phantom `message.tool_call_*`, `message.subagent_*`, `session.status`, `session.compact_boundary`
+
+Contract gaps:
+
+- Murph has normalized `api_retry`, and `MurphAPIRetryMessage` exists, but `NormalizedRuntimeEventMapper` does not map `api_retry` to an SDK message and Phantom has no `session.retry` or equivalent frame.
+- `compact_completed` includes post-token count in Murph, but Phantom's `session.compact_boundary` only carries `pre_tokens`.
+
+### Token Usage
+
+Pi `Usage`, Murph `NormalizedUsage`, Murph `MurphUsage`, Murph `MurphModelUsage`, and Phantom `SessionDoneFrame.usage` carry normal input, output, cache, total, and cost fields. None carry separate thinking or reasoning token counts.
+
+OpenAI upstream exposes reasoning token counts under `output_tokens_details.reasoning_tokens`, but Pi's Responses stream processor discards that field and only persists output token totals. ZAI docs show prompt, completion, cached prompt, and total token usage in Chat Completions, not a separate reasoning token field. Anthropic usage in Pi is also reduced to input, output, cache read, cache write, total, and cost.
+
+Existing event basis:
+
+- Murph `NormalizedUsage`
+- Murph `MurphUsage`
+- Phantom `session.done.usage`
+
+Contract gaps:
+
+- Add `reasoningTokens?: number` or `thinkingTokens?: number` at Pi `Usage`, Murph `NormalizedUsage`, Murph `MurphUsage`, Murph `MurphModelUsage`, result messages, Phantom `SessionDoneFrame.usage`, and run timeline summaries.
+- Add provider attribution for whether reasoning tokens are provider-counted, estimated, or unavailable. Do not estimate counts in UI.
+
+### Tool Calls
+
+Pi streams `toolcall_start`, `toolcall_delta`, and `toolcall_end`. Murph normalizes these as `tool_call_start`, `tool_call_delta`, and `tool_call_end`. Phantom exposes `message.tool_call_start`, `message.tool_call_input_delta`, `message.tool_call_input_end`, `message.tool_call_running`, `message.tool_call_result`, `message.tool_call_blocked`, and `message.tool_call_aborted`.
+
+ZAI supports `tool_stream` for streaming tool calls on supported GLM routes, and Murph's ZAI OpenAI-compatible route enables `zaiToolStream`.
+
+Existing event basis:
+
+- Pi toolcall stream events
+- Murph normalized tool call and tool execution events
+- Phantom tool call frames
+
+Contract gaps:
+
+- Tool cards can be richer with structured parameters, safe output references, files, and previews, but that is product/UI work rather than provider thinking semantics.
+
+### Rate Limits
+
+Murph normalized `rate_limit` has status, reset time, type, utilization, and overage status. Murph maps it to `rate_limit_event`. Phantom maps it to `session.rate_limit` with status, type, reset time, and utilization.
+
+Existing event basis:
+
+- Murph `rate_limit`
+- SDK `rate_limit_event`
+- Phantom `session.rate_limit`
+
+Contract gaps:
+
+- Phantom drops `overageStatus`.
+- Retry and waiting states should be surfaced separately from rate-limit state.
+
+## Provider Capability Matrix
+
+| Provider route | Upstream signal | Pi/Murph today | Honest UI treatment | Token count status |
+| --- | --- | --- | --- | --- |
+| OpenAI via `openai-responses` | Reasoning models use private reasoning tokens. Raw reasoning tokens are not exposed through the API. Optional reasoning summaries appear only when opted in. Encrypted reasoning items can be included for continuity. Usage includes upstream `output_tokens_details.reasoning_tokens`. | Murph marks GPT-5.x and o3 routes as thinking-capable. Pi sends `reasoning.effort`, defaults `summary` to `auto`, includes `reasoning.encrypted_content`, streams reasoning summaries as Pi `thinking_*`, stores the final reasoning item JSON in `thinkingSignature`, and drops separate `reasoning_tokens` from usage. | Show active "Reasoning" state. Show summary text only after Murph/Pi labels it `summary`. Hide encrypted content and signatures. Do not claim raw chain-of-thought is visible. | Upstream count exists for OpenAI Responses. Phantom cannot show it today because Pi and Murph discard it. |
+| Anthropic via `anthropic-messages` | Extended thinking can return thinking blocks, signatures, and `redacted_thinking` blocks. Anthropic docs describe redacted data as opaque encrypted content with no readable summary. Pi also supports summarized or omitted thinking display. | Murph marks Opus/Sonnet routes as thinking-capable and Haiku as not thinking-capable. Pi maps thinking blocks to `thinking_*`, maps redacted thinking to Pi `ThinkingContent.redacted`, computes standard usage only, and can choose summarized or omitted display. Murph final assistant content preserves `redacted_thinking`; streaming redaction is not clearly distinguished at normalized event level. | Show active "Reasoning" state. Show provider summary only if Murph/Pi labels the text as summary or displayable. Show "Reasoning hidden" for redacted blocks. Never display signature or `data`. | No separate thinking-token field reaches Phantom today. Do not show a count. |
+| ZAI GLM via `openai-compat` config `zai` and `openai-completions` | ZAI GLM thinking is enabled by default for GLM-5.1, GLM-5, and GLM-4.7. Responses can include `reasoning_content`; streaming deltas can include `reasoning_content`, visible content, and tool calls. Preserved thinking requires exact unmodified `reasoning_content` replay. | Murph routes `glm-5` and `glm-5.1` through OpenAI-compatible completions with `thinking: true` and `toolStreaming: true`. Pi sets top-level `enable_thinking` when reasoning effort is present, enables `tool_stream`, and streams `reasoning_content`, `reasoning`, or `reasoning_text` as `thinking_*`. | Treat as private reasoning by default. Show active "Reasoning" state and timing. Do not render `reasoning_content` verbatim unless a future provider policy and Murph event label explicitly mark it user-displayable. | ZAI docs show normal prompt, completion, cache, and total usage. No separate reasoning token field reaches Phantom today. |
+
+## What Can Be Shown
+
+### Safe to Show Verbatim
+
+- Assistant final answer text.
+- User-authored text and attachments metadata already visible to the user.
+- Tool names, structured parameters, and outputs only through existing safe preview, truncation, redaction, and full-reference rules.
+- Rate limit status, reset time, utilization, compaction trigger, pre-token count, subagent summaries, and run status labels.
+- OpenAI reasoning summaries and Anthropic summarized thinking only after Murph/Pi explicitly labels them as summaries or user-displayable provider text.
+
+### Can Be Shown as Summary or Status
+
+- "Reasoning", "Thinking", "Reasoning hidden", "Provider reasoning summary available", "Compacting context", "Retrying provider request", "Waiting for rate limit", "Using tool", "Blocked", "Errored", and "Completed" states.
+- Provider reasoning summary text when it is a real upstream summary signal, not generated from private chain-of-thought.
+- Tool progress summaries generated from safe tool events.
+- Token usage totals that are actually present in the event stream.
+
+### Must Be Hidden or Labeled Private
+
+- OpenAI raw reasoning tokens. OpenAI does not expose raw reasoning tokens through the API.
+- OpenAI `encrypted_content` and full reasoning item payloads used only for continuity.
+- Anthropic `signature` and `redacted_thinking.data`.
+- Anthropic redacted thinking blocks. Display only a hidden or redacted label.
+- ZAI `reasoning_content` until the event contract marks it safe display text. Treat it as private chain-of-thought, because the provider describes it as reasoning process content rather than a summary.
+- Any thinking text with unknown provenance.
+
+## Can Phantom Show Thinking Tokens Today?
+
+No.
+
+OpenAI upstream can expose reasoning token counts, but the value is lost before Phantom:
+
+1. OpenAI Responses usage includes `output_tokens_details.reasoning_tokens`.
+2. Pi `processResponsesStream` reads response usage and stores input, output, cache read, cache write, total, and cost only.
+3. Pi `Usage` has no reasoning-token field.
+4. Murph `NormalizedUsage`, `MurphUsage`, `MurphModelUsage`, and `RunAttemptResult.usage` have no reasoning-token field.
+5. Phantom `SessionDoneFrame.usage`, assistant `usage_delta`, chat message state, and durable run timeline have no reasoning-token field.
+
+Required event-contract addition:
+
+```ts
+type ReasoningTokenUsage = {
+ reasoningTokens?: number;
+ reasoningTokenSource?: "provider" | "unavailable";
+};
+```
+
+This should be threaded from Pi provider usage parsing through Murph normalized usage and into Phantom `session.done.usage`. Phantom should display a count only when `reasoningTokenSource === "provider"` and `reasoningTokens` is a finite number.
+
+Do not estimate thinking tokens from text length. Do not infer hidden reasoning effort from duration. Do not show counts for providers that only expose blended output tokens.
+
+## Recommended Honest UI Model
+
+### Product Model
+
+Use one "Live Activity" stack driven by real events:
+
+- Reasoning: active or completed, with elapsed duration.
+- Reasoning hidden: provider redacted, encrypted, private, or unknown.
+- Provider reasoning summary: expandable only when `thinkingVisibility === "summary"` or equivalent exists.
+- Tool activity: started, input streaming, running, partial output, result, error, blocked, aborted.
+- Waiting and retrying: visible provider retry or rate-limit wait states.
+- Compacting: explicit compaction start and completion state.
+- Subagents: started, progress, completed, failed, stopped.
+- Done, blocked, aborted, or errored: terminal state.
+
+### Event Contract
+
+Add a Murph/Pi thinking visibility field before rendering any thinking text:
+
+```ts
+type ThinkingVisibility =
+ | "summary"
+ | "display_text"
+ | "private"
+ | "redacted"
+ | "encrypted"
+ | "unknown";
+```
+
+Map providers conservatively:
+
+- OpenAI reasoning summary deltas: `summary`
+- OpenAI encrypted reasoning content: `encrypted`
+- Anthropic `redacted_thinking`: `redacted`
+- Anthropic summarized display: `summary` or `display_text`, depending on the provider payload and selected display mode
+- Anthropic omitted thinking: `encrypted` or `private`
+- ZAI `reasoning_content`: `private` by default
+- Any unclassified OpenAI-compatible `reasoning`, `reasoning_text`, or `reasoning_content`: `unknown` or `private`
+
+Phantom should only render text for `summary` and `display_text`. For all other values, it should show state, duration, and a short label. It should not ship private text to the browser when the visibility is `private`, `redacted`, `encrypted`, or `unknown`.
+
+### Current Safe Builder Slice
+
+The smallest safe slice is:
+
+1. Keep the current non-rendering thinking component.
+2. Add server-side suppression of private or unknown thinking deltas once Murph/Pi can label them.
+3. Add UI labels for `summary`, `private`, `redacted`, and `encrypted`.
+4. Add retry/waiting frames for `api_retry`.
+5. Add reasoning token usage only for provider-counted values.
+
+Until the visibility field exists, leave thinking text hidden and use only state, duration, tool progress, compaction, rate limits, and subagent progress for liveliness.
+
+## Provider-Specific Notes
+
+### OpenAI
+
+OpenAI docs say reasoning models generate reasoning tokens that are not visible through the API, while optional reasoning summaries can be requested. The docs also show encrypted reasoning items for stateless or zero-data-retention continuity and upstream reasoning-token usage under `output_tokens_details.reasoning_tokens`.
+
+Pi already requests `reasoning.encrypted_content` and defaults the summary setting to `auto` for reasoning-capable OpenAI Responses models. Pi streams `response.reasoning_summary_text.delta` as `thinking_delta`, so that stream is a provider summary, not raw chain-of-thought. The missing piece is a label that preserves this distinction for Phantom.
+
+OpenAI UI rule:
+
+- Render as "Reasoning" while active.
+- Render summary text only after Murph/Pi labels it `summary`.
+- Hide encrypted reasoning content and JSON signatures.
+- Display reasoning token count only after the OpenAI `reasoning_tokens` field is preserved through Pi, Murph, and Phantom.
+
+### Anthropic
+
+Anthropic extended thinking can return thinking blocks, signatures, and redacted thinking. Anthropic docs say redacted thinking contains opaque encrypted data and no readable summary, and the opaque fields should be passed back unchanged for continuity when needed.
+
+Pi supports `thinkingDisplay: "summarized" | "omitted"`. It defaults to summarized in local code, even though the docs note model-specific display behavior. Pi maps redacted thinking to a redacted `ThinkingContent`, but the stream currently looks like a normal `thinking_start` until final content reconciliation.
+
+Anthropic UI rule:
+
+- Render as "Reasoning" while active.
+- If Murph/Pi labels a displayed block as summarized thinking, optionally show it as "Provider reasoning summary".
+- For `redacted_thinking`, show "Reasoning hidden" with no text.
+- Never expose `signature` or redacted `data`.
+
+### ZAI
+
+ZAI docs show GLM thinking can be enabled or disabled with a `thinking` parameter, and current GLM-5.1, GLM-5, and GLM-4.7 thinking is enabled by default. The Chat Completion API includes `reasoning_content`, and streaming can include `reasoning_content`, `content`, and `tool_calls`. ZAI preserved thinking requires exact unmodified reasoning-content replay.
+
+Murph routes ZAI as OpenAI-compatible completions. Pi recognizes `reasoning_content`, `reasoning`, or `reasoning_text` and streams those fields as thinking. Because this is described as reasoning process content rather than an explicit summary, Phantom must treat it as private by default.
+
+ZAI UI rule:
+
+- Render as "Reasoning" while active.
+- Do not display `reasoning_content` text.
+- Preserve provider-required reasoning content for replay only inside the provider transport layer, not as user-visible UI.
+- Use `tool_stream` events for live tool cards and progress.
+
+## Tests Needed
+
+### Murph and Pi Contract Tests
+
+- OpenAI Responses fixture: `response.reasoning_summary_text.delta` becomes thinking with `thinkingVisibility: "summary"`, final encrypted reasoning payload remains hidden, and upstream `reasoning_tokens` is preserved into usage.
+- Anthropic fixture: `thinking` with summarized display becomes `thinkingVisibility: "summary"` or `display_text`; `redacted_thinking` becomes `thinkingVisibility: "redacted"` and emits a redacted start signal during streaming.
+- ZAI fixture: streaming `delta.reasoning_content` becomes `thinkingVisibility: "private"` and is not marked displayable.
+- OpenAI-compatible fallback fixture: unknown `reasoning` or `reasoning_text` fields become `private` or `unknown`, not displayable.
+- Usage fixture: reasoning-token counts are included only when the upstream provider sent a dedicated field.
+- Replay fixture: encrypted and redacted provider continuity payloads round-trip unchanged where required, but never become visible text.
+
+### Phantom Translator Tests
+
+- `thinkingVisibility: "summary"` maps to an explicit user-displayable summary frame.
+- `private`, `redacted`, `encrypted`, and `unknown` thinking never emit `message.thinking_delta` containing raw private text.
+- Existing `redacted_thinking` final assistant content still emits `message.thinking_start` with `redacted: true` and no delta.
+- `api_retry` maps to a visible retry or waiting frame.
+- `compact_completed` can update post-token count if Phantom chooses to show it.
+- `session.done.usage.reasoning_tokens` appears only for provider-counted values.
+
+### Phantom UI Tests
+
+- Thinking card renders state, duration, and redacted/private labels without showing private text.
+- Provider summary text renders only when the frame says it is displayable.
+- Browser store and durable run timeline do not retain private thinking text.
+- Long-running canned streams show transitions for thinking, tool running, partial output, rate limit, compacting, retrying, done, and error.
+- Mobile and desktop snapshots show no layout jumps when labels change from active to completed.
+
+## Recommendation Checklist
+
+- Keep thinking text hidden by default: existing Phantom UI supports this today.
+- Add thinking visibility provenance: contract gap in Pi/Murph normalized events and Phantom wire frames.
+- Add server-side suppression for private thinking deltas: contract gap in Phantom translation once provenance exists.
+- Preserve OpenAI reasoning token counts: contract gap in Pi usage, Murph usage, SDK result messages, Phantom frames, and timelines.
+- Treat Anthropic redacted thinking as hidden: existing final content support exists, streaming redaction needs a Murph normalization fix.
+- Treat ZAI `reasoning_content` as private: existing stream support exists, display safety requires visibility metadata and UI suppression.
+- Surface retries and waiting: Murph has `api_retry`, but SDK and Phantom mapping are missing.
+- Use existing tool progress, rate limit, compaction, and subagent events for best-in-class liveness now.
diff --git a/src/agent/__tests__/agent-sdk-boundary-callers.test.ts b/src/agent/__tests__/agent-sdk-boundary-callers.test.ts
index cc42bf84..de86cb13 100644
--- a/src/agent/__tests__/agent-sdk-boundary-callers.test.ts
+++ b/src/agent/__tests__/agent-sdk-boundary-callers.test.ts
@@ -4,7 +4,13 @@ import { z } from "zod/v4";
import { PhantomConfigSchema } from "../../config/schemas.ts";
import type { PhantomConfig } from "../../config/types.ts";
import { runMigrations } from "../../db/migrate.ts";
-import { type AgentSdkQueryParams, type Query, type SDKMessage, __setAgentSdkQueryForTests } from "../agent-sdk.ts";
+import {
+ type AgentSdkQueryOptions,
+ type AgentSdkQueryParams,
+ type Query,
+ type SDKMessage,
+ __setAgentSdkQueryForTests,
+} from "../agent-sdk.ts";
import { executeChatQuery } from "../chat-query.ts";
import { CostTracker } from "../cost-tracker.ts";
import { runJudgeQuery } from "../judge-query.ts";
@@ -358,6 +364,54 @@ describe("Agent SDK boundary callers", () => {
expect(options?.thinking).toEqual({ type: "enabled", budgetTokens: 8192 });
});
+ test("chat query path passes Phantom continuity through Murph transformContext", async () => {
+ __setAgentSdkQueryForTests((params) => {
+ calls.push(params);
+ return queryFromMessages([initMessage(), assistantMessage("chat assistant"), resultMessage("chat result")]);
+ });
+
+ await executeChatQuery(
+ {
+ config: makeConfig({
+ agent_runtime: "murph",
+ model: "gpt-5.5",
+ provider: { type: "openai" },
+ }),
+ sessionStore: new SessionStore(db),
+ costTracker: new CostTracker(db),
+ memoryContextBuilder: null,
+ evolvedConfig: null,
+ roleTemplate: null,
+ onboardingPrompt: null,
+ mcpServerFactories: null,
+ },
+ "web:chat-session",
+ { role: "user", content: "give me the page link" },
+ Date.now(),
+ {
+ signal: new AbortController().signal,
+ sessionContext: "User-visible page: http://127.0.0.1:3112/ui/profile.html",
+ onSdkEvent: () => {},
+ },
+ );
+ const options = calls[0]?.options as AgentSdkQueryOptions | undefined;
+ const transformContext = options?.transformContext;
+ expect(transformContext).toBeDefined();
+ const systemPrompt = calls[0]?.options?.systemPrompt;
+ if (typeof systemPrompt === "object" && systemPrompt !== null && "append" in systemPrompt) {
+ expect(systemPrompt.append).not.toContain("User-visible page");
+ } else {
+ throw new Error("Expected object system prompt");
+ }
+
+ const transformed = (await transformContext?.([{ role: "user", content: "same prompt" }])) ?? [];
+ expect(transformed).toHaveLength(2);
+ const contextMessage = transformed[0] as Record;
+ expect(contextMessage.role).toBe("user");
+ expect(textFromContent(contextMessage.content)).toContain("");
+ expect(textFromContent(contextMessage.content)).toContain("http://127.0.0.1:3112/ui/profile.html");
+ });
+
test("chat query retries stale resume result frames without forwarding the error result", async () => {
const sdkEvents: SDKMessage[] = [];
let factoryCalls = 0;
@@ -474,3 +528,15 @@ describe("Agent SDK boundary callers", () => {
expect(options?.env?.OPENAI_API_KEY).toBe("openai-secret");
});
});
+
+function textFromContent(content: unknown): string {
+ if (typeof content === "string") return content;
+ if (!Array.isArray(content)) return "";
+ return content
+ .map((item) => {
+ if (item === null || typeof item !== "object" || Array.isArray(item)) return "";
+ const block = item as Record;
+ return block.type === "text" && typeof block.text === "string" ? block.text : "";
+ })
+ .join("\n");
+}
diff --git a/src/agent/__tests__/murph-context.test.ts b/src/agent/__tests__/murph-context.test.ts
new file mode 100644
index 00000000..87c19c2a
--- /dev/null
+++ b/src/agent/__tests__/murph-context.test.ts
@@ -0,0 +1,57 @@
+import { describe, expect, test } from "bun:test";
+import { createMurphContextTransform } from "../murph-context.ts";
+
+describe("createMurphContextTransform", () => {
+ test("injects Phantom context as a Pi-compatible user message before the latest user message", async () => {
+ const transform = createMurphContextTransform("User-visible page: http://127.0.0.1:3100/ui/profile.html");
+ expect(transform).toBeDefined();
+
+ const userMessage = { role: "user", content: [{ type: "text", text: "Give me the link." }] };
+ const output = await transform?.([{ role: "assistant", content: [] }, userMessage]);
+
+ expect(output).toHaveLength(3);
+ expect(record(output?.[1])?.role).toBe("user");
+ expect(textContent(output?.[1])).toContain("");
+ expect(textContent(output?.[1])).toContain("http://127.0.0.1:3100/ui/profile.html");
+ expect(output?.[2]).toBe(userMessage);
+ });
+
+ test("replaces stale Phantom context messages instead of accumulating them", async () => {
+ const transform = createMurphContextTransform("Fresh context");
+ const staleContext = {
+ role: "user",
+ content: [{ type: "text", text: "\nStale context\n" }],
+ timestamp: 1,
+ };
+
+ const output =
+ (await transform?.([{ role: "assistant", content: [] }, staleContext, { role: "toolResult", content: [] }])) ??
+ [];
+
+ const phantomContexts = output.filter((message) => textContent(message).includes(""));
+ expect(phantomContexts).toHaveLength(1);
+ expect(textContent(phantomContexts[0])).toContain("Fresh context");
+ expect(output).not.toContain(staleContext);
+ });
+
+ test("returns undefined for empty context", () => {
+ expect(createMurphContextTransform(" ")).toBeUndefined();
+ expect(createMurphContextTransform(undefined)).toBeUndefined();
+ });
+});
+
+function record(value: unknown): Record | undefined {
+ return value !== null && typeof value === "object" ? (value as Record) : undefined;
+}
+
+function textContent(value: unknown): string {
+ const content = record(value)?.content;
+ if (typeof content === "string") return content;
+ if (!Array.isArray(content)) return "";
+ return content
+ .map((item) => {
+ const block = record(item);
+ return block?.type === "text" && typeof block.text === "string" ? block.text : "";
+ })
+ .join("\n");
+}
diff --git a/src/agent/__tests__/prompt-assembler.test.ts b/src/agent/__tests__/prompt-assembler.test.ts
index 502ccff6..9378896f 100644
--- a/src/agent/__tests__/prompt-assembler.test.ts
+++ b/src/agent/__tests__/prompt-assembler.test.ts
@@ -161,4 +161,12 @@ describe("assemblePrompt UI vocabulary guidance", () => {
const prompt = assemblePrompt(baseConfig);
expect(prompt).toContain("public/_examples/");
});
+
+ test("distinguishes created page URLs from authentication links", () => {
+ const prompt = assemblePrompt(baseConfig);
+ expect(prompt).toContain("Page URLs and login URLs are different.");
+ expect(prompt).toContain("return the exact /ui/ page URL");
+ expect(prompt).toContain("Only call phantom_generate_login");
+ expect(prompt).toContain("Do not substitute");
+ });
});
diff --git a/src/agent/agent-sdk.ts b/src/agent/agent-sdk.ts
index b65410da..60b1dd33 100644
--- a/src/agent/agent-sdk.ts
+++ b/src/agent/agent-sdk.ts
@@ -35,6 +35,9 @@ export type {
};
export type AgentSdkQueryParams = Parameters[0];
+export type AgentSdkQueryOptions = NonNullable & {
+ transformContext?: (messages: unknown[], signal?: AbortSignal) => Promise | unknown[];
+};
export type AgentSdkQuery = (params: AgentSdkQueryParams) => Query;
export type AgentSdkRuntimeSelection = {
agentRuntime: AgentRuntimeKind;
diff --git a/src/agent/chat-query.ts b/src/agent/chat-query.ts
index 3710b2e4..4bea2b58 100644
--- a/src/agent/chat-query.ts
+++ b/src/agent/chat-query.ts
@@ -1,7 +1,13 @@
// Extracted chat-specific query logic for the runForChat method.
// Lives outside runtime.ts to keep that file under the 300-line budget.
-import { type McpServerConfig, type SDKMessage, type SDKUserMessage, query } from "./agent-sdk.ts";
+import {
+ type AgentSdkQueryOptions,
+ type McpServerConfig,
+ type SDKMessage,
+ type SDKUserMessage,
+ query,
+} from "./agent-sdk.ts";
type MessageParam = SDKUserMessage["message"];
import { buildAgentRuntimeEnv, resolveAgentRuntimeModel } from "../config/providers.ts";
@@ -14,6 +20,7 @@ import { type AgentCost, type AgentResponse, emptyCost } from "./events.ts";
import { createDangerousCommandBlocker, createFileTracker } from "./hooks.ts";
import { extractTextFromMessageParam } from "./message-param-utils.ts";
import { extractCost, extractTextFromMessage } from "./message-utils.ts";
+import { createMurphContextTransform } from "./murph-context.ts";
import { permissionOptionsFromConfig } from "./permission-options.ts";
import { assemblePrompt } from "./prompt-assembler.ts";
import { isNoConversationFoundResult, sdkResultErrorText } from "./sdk-result-errors.ts";
@@ -36,7 +43,7 @@ export async function executeChatQuery(
sessionKey: string,
message: MessageParam,
startTime: number,
- options: { signal: AbortSignal; onSdkEvent: (msg: SDKMessage) => void },
+ options: { signal: AbortSignal; onSdkEvent: (msg: SDKMessage) => void; sessionContext?: string },
): Promise {
const parts = sessionKey.split(":");
const channelId = parts[0] ?? "web";
@@ -55,6 +62,7 @@ export async function executeChatQuery(
/* Memory unavailable */
}
}
+ const useMurphContextTransform = deps.config.agent_runtime === "murph";
const appendPrompt = assemblePrompt(
deps.config,
memoryContext,
@@ -62,7 +70,9 @@ export async function executeChatQuery(
deps.roleTemplate ?? undefined,
deps.onboardingPrompt ?? undefined,
undefined,
+ useMurphContextTransform ? undefined : options.sessionContext,
);
+ const transformContext = useMurphContextTransform ? createMurphContextTransform(options.sessionContext) : undefined;
const queryModel = resolveAgentRuntimeModel(deps.config, deps.config.model);
const providerEnv = buildAgentRuntimeEnv(deps.config, queryModel);
@@ -93,30 +103,32 @@ export async function executeChatQuery(
await Promise.all(Object.entries(deps.mcpServerFactories).map(async ([k, f]) => [k, await f()] as const)),
)
: undefined;
+ const queryOptions: AgentSdkQueryOptions = {
+ model: queryModel,
+ ...permissionOptions,
+ settingSources: ["project", "user"],
+ systemPrompt: {
+ type: "preset" as const,
+ preset: "claude_code" as const,
+ append: appendPrompt,
+ },
+ persistSession: true,
+ effort: deps.config.effort,
+ thinking: getThinkingConfig(queryModel),
+ includePartialMessages: true,
+ agentProgressSummaries: true,
+ promptSuggestions: true,
+ ...(deps.config.max_budget_usd > 0 ? { maxBudgetUsd: deps.config.max_budget_usd } : {}),
+ abortController: controller,
+ env: { ...process.env, ...providerEnv },
+ hooks: { PreToolUse: [commandBlocker], PostToolUse: [fileTracker.hook] },
+ ...(useResume && session?.sdk_session_id ? { resume: session.sdk_session_id } : {}),
+ ...(mcpServers ? { mcpServers } : {}),
+ ...(transformContext ? { transformContext } : {}),
+ };
const queryStream = query({
prompt: makePrompt(),
- options: {
- model: queryModel,
- ...permissionOptions,
- settingSources: ["project", "user"],
- systemPrompt: {
- type: "preset" as const,
- preset: "claude_code" as const,
- append: appendPrompt,
- },
- persistSession: true,
- effort: deps.config.effort,
- thinking: getThinkingConfig(queryModel),
- includePartialMessages: true,
- agentProgressSummaries: true,
- promptSuggestions: true,
- ...(deps.config.max_budget_usd > 0 ? { maxBudgetUsd: deps.config.max_budget_usd } : {}),
- abortController: controller,
- env: { ...process.env, ...providerEnv },
- hooks: { PreToolUse: [commandBlocker], PostToolUse: [fileTracker.hook] },
- ...(useResume && session?.sdk_session_id ? { resume: session.sdk_session_id } : {}),
- ...(mcpServers ? { mcpServers } : {}),
- },
+ options: queryOptions,
});
for await (const msg of queryStream) {
diff --git a/src/agent/murph-context.ts b/src/agent/murph-context.ts
new file mode 100644
index 00000000..231086eb
--- /dev/null
+++ b/src/agent/murph-context.ts
@@ -0,0 +1,72 @@
+export type MurphContextTransform = (messages: unknown[], signal?: AbortSignal) => Promise | unknown[];
+
+const PHANTOM_CONTEXT_OPEN_TAG = "";
+const PHANTOM_CONTEXT_CLOSE_TAG = "";
+
+type PhantomContextMessage = {
+ role: "user";
+ content: [{ type: "text"; text: string }];
+ timestamp: number;
+};
+
+export function createMurphContextTransform(context: string | undefined): MurphContextTransform | undefined {
+ const trimmed = context?.trim();
+ if (!trimmed) return undefined;
+
+ return (messages: unknown[]) => {
+ const cleaned = messages.filter((message) => !isPhantomContextMessage(message));
+ const contextMessage = buildContextMessage(trimmed);
+ if (cleaned.length === 0) {
+ return [contextMessage];
+ }
+
+ const lastIndex = cleaned.length - 1;
+ const lastMessage = cleaned[lastIndex];
+ if (hasRole(lastMessage, "user")) {
+ return [...cleaned.slice(0, lastIndex), contextMessage, lastMessage];
+ }
+
+ return [...cleaned, contextMessage];
+ };
+}
+
+function buildContextMessage(content: string): PhantomContextMessage {
+ return {
+ role: "user",
+ content: [
+ {
+ type: "text",
+ text: [
+ PHANTOM_CONTEXT_OPEN_TAG,
+ "Durable context supplied by Phantom outside the raw transcript.",
+ "Use it to continue after Murph compaction without asking the user to repeat known app state.",
+ content,
+ PHANTOM_CONTEXT_CLOSE_TAG,
+ ].join("\n"),
+ },
+ ],
+ timestamp: Date.now(),
+ };
+}
+
+function isPhantomContextMessage(message: unknown): boolean {
+ if (!isRecord(message) || message.role !== "user") return false;
+ const content = message.content;
+ if (typeof content === "string") return content.includes(PHANTOM_CONTEXT_OPEN_TAG);
+ if (!Array.isArray(content)) return false;
+ return content.some(
+ (item) => isRecord(item) && item.type === "text" && textField(item).includes(PHANTOM_CONTEXT_OPEN_TAG),
+ );
+}
+
+function hasRole(message: unknown, role: string): boolean {
+ return isRecord(message) && message.role === role;
+}
+
+function isRecord(value: unknown): value is Record {
+ return value !== null && typeof value === "object";
+}
+
+function textField(record: Record): string {
+ return typeof record.text === "string" ? record.text : "";
+}
diff --git a/src/agent/prompt-assembler.ts b/src/agent/prompt-assembler.ts
index 1c134309..3f8ccc3f 100644
--- a/src/agent/prompt-assembler.ts
+++ b/src/agent/prompt-assembler.ts
@@ -18,6 +18,7 @@ export function assemblePrompt(
roleTemplate?: RoleTemplate,
onboardingPrompt?: string,
dataDir?: string,
+ chatRuntimeContext?: string,
): string {
const sections: string[] = [];
@@ -74,6 +75,10 @@ export function assemblePrompt(
sections.push(buildMemorySection(memoryContext));
}
+ if (chatRuntimeContext) {
+ sections.push(buildChatRuntimeContext(chatRuntimeContext));
+ }
+
return sections.join("\n\n");
}
@@ -140,9 +145,14 @@ function buildEnvironment(config: PhantomConfig): string {
lines.push("");
lines.push("Schedule types: one-shot (at), interval (every N ms), cron (weekdays at 9am).");
lines.push("");
- lines.push("To give a user access to a /ui/ page, call phantom_generate_login to create a magic link");
- lines.push("and send the link to them via Slack. The link must be sent as plain text without any");
- lines.push("Markdown wrapping (no asterisks, no bold, no parentheses) so Slack renders it cleanly.");
+ lines.push("Page URLs and login URLs are different.");
+ lines.push("When the user asks for the page, link, profile, report, dashboard, or thing you created,");
+ lines.push("return the exact /ui/ page URL from phantom_create_page or phantom_preview_page.");
+ lines.push("Only call phantom_generate_login when the user explicitly asks for access, auth,");
+ lines.push("a login link, a magic link, or says they cannot open a page because login is required.");
+ lines.push("If you share a login link, label it as an authentication link. Do not substitute");
+ lines.push("a login link for a created page URL.");
+ lines.push("Links must be sent as plain text without Markdown wrapping so Slack renders them cleanly.");
lines.push("");
lines.push(...buildUIGuidanceLines(publicUrl ?? undefined));
lines.push("");
@@ -231,6 +241,10 @@ function buildMemorySection(memoryContext: string): string {
return `# Your Memory\n\nPersistent memory from previous sessions. Use this to maintain continuity.\n\n${memoryContext}`;
}
+function buildChatRuntimeContext(chatRuntimeContext: string): string {
+ return `# Current Chat Context\n\n${chatRuntimeContext}`;
+}
+
function buildFallbackRoleHint(config: PhantomConfig): string {
return `Your role is ${config.role}. Approach every task with that expertise.`;
}
diff --git a/src/agent/prompt-blocks/ui-guidance.ts b/src/agent/prompt-blocks/ui-guidance.ts
index 887d1a22..4eb8c5c1 100644
--- a/src/agent/prompt-blocks/ui-guidance.ts
+++ b/src/agent/prompt-blocks/ui-guidance.ts
@@ -147,6 +147,10 @@ export function buildUIGuidanceLines(publicUrl: string | undefined): string[] {
lines.push("phantom_preview_page with the same path. Review the screenshot. Read the");
lines.push("JSON metadata block. If console.errors > 0 or network.failedRequests > 0,");
lines.push("fix the HTML and re-preview until both are zero. Only then report the page.");
+ lines.push("Preserve the exact page URL returned by phantom_create_page or preview metadata.");
+ lines.push("When the user later asks for the page or link you created, return that page URL.");
+ lines.push("Do not answer a page-link request by calling phantom_generate_login unless");
+ lines.push("the user explicitly asks for an authentication link.");
lines.push("");
if (publicUrl) {
lines.push(`Pages are at ${publicUrl}/ui/`);
diff --git a/src/agent/runtime.ts b/src/agent/runtime.ts
index 43aaf1e6..64917696 100644
--- a/src/agent/runtime.ts
+++ b/src/agent/runtime.ts
@@ -126,7 +126,7 @@ export class AgentRuntime {
async runForChat(
sessionKey: string,
message: MessageParam,
- options: { signal: AbortSignal; onSdkEvent: (msg: SDKMessage) => void },
+ options: { signal: AbortSignal; onSdkEvent: (msg: SDKMessage) => void; sessionContext?: string },
): Promise {
if (this.activeSessions.has(sessionKey)) {
return { text: "Error: session busy", sessionId: "", cost: emptyCost(), durationMs: 0 };
diff --git a/src/chat/__tests__/continuity-context.test.ts b/src/chat/__tests__/continuity-context.test.ts
new file mode 100644
index 00000000..dbbe9850
--- /dev/null
+++ b/src/chat/__tests__/continuity-context.test.ts
@@ -0,0 +1,109 @@
+import { Database } from "bun:sqlite";
+import { afterEach, beforeEach, describe, expect, test } from "bun:test";
+import { MIGRATIONS } from "../../db/schema.ts";
+import { buildChatContinuityContext } from "../continuity-context.ts";
+import { ChatEventLog } from "../event-log.ts";
+import { ChatSessionStore } from "../session-store.ts";
+
+let db: Database;
+let eventLog: ChatEventLog;
+let sessionStore: ChatSessionStore;
+
+beforeEach(() => {
+ db = new Database(":memory:");
+ for (const sql of MIGRATIONS) {
+ db.run(sql);
+ }
+ eventLog = new ChatEventLog(db);
+ sessionStore = new ChatSessionStore(db);
+});
+
+afterEach(() => {
+ db.close();
+});
+
+describe("buildChatContinuityContext", () => {
+ test("summarizes created page artifacts from the durable stream log", () => {
+ const session = sessionStore.create();
+ eventLog.append(session.id, null, 1, "message.tool_call_start", {
+ event: "message.tool_call_start",
+ tool_call_id: "tool-1",
+ tool_name: "phantom_create_page",
+ message_id: "assistant-1",
+ parent_tool_use_id: null,
+ is_mcp: true,
+ });
+ eventLog.append(session.id, null, 2, "message.tool_call_input_end", {
+ event: "message.tool_call_input_end",
+ tool_call_id: "tool-1",
+ input: {
+ path: "muhammad-ahmed-cheema.html",
+ title: "Muhammad Ahmed Cheema Profile",
+ },
+ });
+ eventLog.append(session.id, null, 3, "message.tool_call_result", {
+ event: "message.tool_call_result",
+ tool_call_id: "tool-1",
+ tool_name: "phantom_create_page",
+ status: "success",
+ output: JSON.stringify({
+ path: "muhammad-ahmed-cheema.html",
+ url: "http://127.0.0.1:3112/ui/muhammad-ahmed-cheema.html",
+ size: 12345,
+ }),
+ });
+
+ const context = buildChatContinuityContext({ sessionId: session.id, eventLog });
+
+ expect(context).toContain("User-visible page artifacts");
+ expect(context).toContain("Muhammad Ahmed Cheema Profile");
+ expect(context).toContain("http://127.0.0.1:3112/ui/muhammad-ahmed-cheema.html");
+ expect(context).toContain("muhammad-ahmed-cheema.html");
+ expect(context).not.toContain("/ui/login");
+ });
+
+ test("skips login links and keeps recent compact checkpoints", () => {
+ const session = sessionStore.create();
+ eventLog.append(session.id, null, 1, "session.compact_boundary", {
+ event: "session.compact_boundary",
+ trigger: "auto",
+ pre_tokens: 1434337,
+ });
+ eventLog.append(session.id, null, 2, "message.tool_call_result", {
+ event: "message.tool_call_result",
+ tool_call_id: "tool-login",
+ tool_name: "phantom_generate_login",
+ status: "success",
+ output: JSON.stringify({
+ magicLink: "http://127.0.0.1:3112/ui/login?magic=secret",
+ }),
+ });
+
+ const context = buildChatContinuityContext({ sessionId: session.id, eventLog });
+
+ expect(context).toContain("auto compaction at stream seq 1 before about 1,434,337 tokens.");
+ expect(context).toContain("Authentication links");
+ expect(context).not.toContain("magic=secret");
+ });
+
+ test("uses the latest stream events when the full event log is larger than the scan limit", () => {
+ const session = sessionStore.create();
+ for (let seq = 1; seq <= 12; seq++) {
+ eventLog.append(session.id, null, seq, "session.status", {
+ event: "session.status",
+ status: "working",
+ permission_mode: "bypassPermissions",
+ });
+ }
+ eventLog.append(session.id, null, 13, "session.compact_boundary", {
+ event: "session.compact_boundary",
+ trigger: "auto",
+ pre_tokens: 500000,
+ });
+
+ const context = buildChatContinuityContext({ sessionId: session.id, eventLog, limit: 3 });
+
+ expect(context).toContain("stream seq 13");
+ expect(context).toContain("500,000");
+ });
+});
diff --git a/src/chat/__tests__/http.test.ts b/src/chat/__tests__/http.test.ts
index 62b72601..50ff4edf 100644
--- a/src/chat/__tests__/http.test.ts
+++ b/src/chat/__tests__/http.test.ts
@@ -177,6 +177,52 @@ describe("Chat HTTP handlers", () => {
expect(body.run_timelines[0]?.summary.status).toBe("working");
});
+ test("GET /chat/sessions/:id returns durable attachment transcript metadata", async () => {
+ const sessionStore = new ChatSessionStore(db);
+ const messageStore = new ChatMessageStore(db);
+ const attachmentStore = new ChatAttachmentStore(db);
+ const session = sessionStore.create();
+ const attachmentId = attachmentStore.create({
+ id: "att-1",
+ sessionId: session.id,
+ kind: "pdf",
+ filename: "brief.pdf",
+ mimeType: "application/pdf",
+ sizeBytes: 1234,
+ storagePath: "/tmp/brief.pdf",
+ });
+ messageStore.commit({
+ id: "user-1",
+ sessionId: session.id,
+ seq: 1,
+ role: "user",
+ contentJson: JSON.stringify([
+ {
+ type: "attachment",
+ id: attachmentId,
+ filename: "brief.pdf",
+ mime_type: "application/pdf",
+ size_bytes: 1234,
+ preview_url: `/chat/attachments/${attachmentId}/preview`,
+ },
+ { type: "text", text: "Review this." },
+ ]),
+ });
+ attachmentStore.commitToMessage(attachmentId, "user-1");
+
+ const res = await handler(makeAuthReq(`/chat/sessions/${session.id}`));
+ expect(res?.status).toBe(200);
+ const body = (await res?.json()) as { messages: Array<{ content_json: string }> };
+ const content = JSON.parse(body.messages[0]?.content_json ?? "[]") as Array>;
+ expect(content[0]).toMatchObject({
+ type: "attachment",
+ id: "att-1",
+ filename: "brief.pdf",
+ mime_type: "application/pdf",
+ size_bytes: 1234,
+ });
+ });
+
test("GET /chat/sessions/:id returns 404 for missing session", async () => {
const res = await handler(makeAuthReq("/chat/sessions/nonexistent"));
expect(res?.status).toBe(404);
@@ -334,4 +380,29 @@ describe("Chat HTTP handlers", () => {
);
expect(res?.status).toBe(200);
});
+
+ test("POST /chat/stream rejects invalid attachment ids", async () => {
+ const createRes = await handler(
+ makeAuthReq("/chat/sessions", {
+ method: "POST",
+ body: JSON.stringify({}),
+ }),
+ );
+ const created = (await createRes?.json()) as { id: string };
+
+ const res = await handler(
+ makeAuthReq("/chat/stream", {
+ method: "POST",
+ body: JSON.stringify({
+ session_id: created.id,
+ text: "Use the attached file.",
+ attachment_ids: ["missing-attachment"],
+ }),
+ }),
+ );
+
+ expect(res?.status).toBe(400);
+ const body = await res?.json();
+ expect(body.error).toBe("attachment_not_found");
+ });
});
diff --git a/src/chat/__tests__/message-builder.test.ts b/src/chat/__tests__/message-builder.test.ts
index 1307d207..6c38f395 100644
--- a/src/chat/__tests__/message-builder.test.ts
+++ b/src/chat/__tests__/message-builder.test.ts
@@ -175,8 +175,9 @@ describe("buildUserMessageParam", () => {
expect(content[3]?.text).toBe("analyze all");
});
- test("ignores non-existent attachment ids", async () => {
- const msg = await buildUserMessageParam("hello", ["nonexistent-id"], attachmentStore);
- expect(msg.content).toBe("hello");
+ test("rejects non-existent attachment ids", async () => {
+ await expect(buildUserMessageParam("hello", ["nonexistent-id"], attachmentStore)).rejects.toThrow(
+ "Attachment is not available for this chat.",
+ );
});
});
diff --git a/src/chat/__tests__/sdk-to-wire.test.ts b/src/chat/__tests__/sdk-to-wire.test.ts
index e646aeb3..afb89d75 100644
--- a/src/chat/__tests__/sdk-to-wire.test.ts
+++ b/src/chat/__tests__/sdk-to-wire.test.ts
@@ -415,6 +415,31 @@ describe("sdk-to-wire translator", () => {
expect(frames.some((f) => f.event === "session.error")).toBe(true);
});
+ test("result error after assistant start does not emit a normal assistant_end", () => {
+ const ctx = makeCtx();
+ translateSdkMessage(
+ {
+ type: "assistant",
+ message: { content: [{ type: "text", text: "Partial answer" }] },
+ parent_tool_use_id: null,
+ },
+ ctx,
+ );
+ const frames = translateSdkMessage(
+ {
+ type: "result",
+ subtype: "error_during_execution",
+ errors: ["Provider failed"],
+ total_cost_usd: 0.001,
+ usage: {},
+ duration_ms: 500,
+ },
+ ctx,
+ );
+ expect(frames.some((f) => f.event === "session.error")).toBe(true);
+ expect(frames.some((f) => f.event === "message.assistant_end")).toBe(false);
+ });
+
test("result with prompt_suggestion -> session.suggestion", () => {
const ctx = makeCtx();
const frames = translateSdkMessage({ type: "prompt_suggestion", suggestion: "Tell me more" }, ctx);
diff --git a/src/chat/__tests__/writer.test.ts b/src/chat/__tests__/writer.test.ts
index ed6167fb..f1fc5aba 100644
--- a/src/chat/__tests__/writer.test.ts
+++ b/src/chat/__tests__/writer.test.ts
@@ -1,7 +1,9 @@
import { Database } from "bun:sqlite";
import { afterEach, beforeEach, describe, expect, test } from "bun:test";
import { MIGRATIONS } from "../../db/schema.ts";
+import { ChatAttachmentStore } from "../attachment-store.ts";
import { ChatEventLog } from "../event-log.ts";
+import { buildUserTranscriptContent } from "../message-builder.ts";
import { ChatMessageStore } from "../message-store.ts";
import { ChatRunTimelineStore } from "../run-timeline.ts";
import { ChatSessionStore } from "../session-store.ts";
@@ -12,6 +14,7 @@ import { ChatSessionWriter, getActiveWriter } from "../writer.ts";
let db: Database;
let sessionStore: ChatSessionStore;
let messageStore: ChatMessageStore;
+let attachmentStore: ChatAttachmentStore;
let eventLog: ChatEventLog;
let timelineStore: ChatRunTimelineStore;
let streamBus: StreamBus;
@@ -23,6 +26,7 @@ beforeEach(() => {
}
sessionStore = new ChatSessionStore(db);
messageStore = new ChatMessageStore(db);
+ attachmentStore = new ChatAttachmentStore(db);
eventLog = new ChatEventLog(db);
timelineStore = new ChatRunTimelineStore(db);
streamBus = new StreamBus();
@@ -36,7 +40,7 @@ function mockRuntime(overrides?: {
runForChat?: (
key: string,
msg: unknown,
- opts: { signal: AbortSignal; onSdkEvent: (msg: unknown) => void },
+ opts: { signal: AbortSignal; onSdkEvent: (msg: unknown) => void; sessionContext?: string },
) => Promise<{
text: string;
sessionId: string;
@@ -100,6 +104,65 @@ describe("ChatSessionWriter", () => {
expect(eventTypes).toContain("session.done");
});
+ test("commits attachments to the user message and emits metadata", async () => {
+ const session = sessionStore.create();
+ const frames: ChatWireFrame[] = [];
+ streamBus.subscribe(session.id, (f) => frames.push(f));
+ const attachmentId = attachmentStore.create({
+ sessionId: session.id,
+ kind: "image",
+ filename: "diagram.png",
+ mimeType: "image/png",
+ sizeBytes: 12,
+ storagePath: "/tmp/diagram.png",
+ });
+ const attachments = [
+ {
+ id: attachmentId,
+ filename: "diagram.png",
+ mime_type: "image/png",
+ size_bytes: 12,
+ preview_url: `/chat/attachments/${attachmentId}/preview`,
+ },
+ ];
+
+ const writer = new ChatSessionWriter({
+ sessionId: session.id,
+ runtime: mockRuntime(),
+ eventLog,
+ messageStore,
+ attachmentStore,
+ sessionStore,
+ streamBus,
+ });
+ writer.claim();
+ const sdkMessage = {
+ role: "user" as const,
+ content: [
+ {
+ type: "image",
+ source: { type: "base64", media_type: "image/png", data: "RAW_BASE64_PAYLOAD" },
+ },
+ { type: "text", text: "describe this" },
+ ],
+ } as Parameters[0];
+
+ await writer.run(sdkMessage, "tab1", "describe this", {
+ attachments,
+ transcriptContent: buildUserTranscriptContent("describe this", attachments),
+ });
+
+ const userFrame = frames.find((frame) => frame.event === "user.message");
+ expect(userFrame?.event).toBe("user.message");
+ if (userFrame?.event !== "user.message") return;
+ expect(userFrame.attachments).toEqual(attachments);
+
+ const userRow = messageStore.getById(userFrame.message_id);
+ expect(userRow?.content_json).toBe(JSON.stringify(buildUserTranscriptContent("describe this", attachments)));
+ expect(userRow?.content_json).not.toContain("RAW_BASE64_PAYLOAD");
+ expect(attachmentStore.getById(attachmentId)?.message_id).toBe(userFrame.message_id);
+ });
+
test("writer sets isActive during run", async () => {
const session = sessionStore.create();
let wasActive = false;
@@ -165,6 +228,51 @@ describe("ChatSessionWriter", () => {
expect(eventTypes).toContain("session.error");
});
+ test("non-success SDK result does not commit a successful assistant message", async () => {
+ const session = sessionStore.create();
+ const frames: ChatWireFrame[] = [];
+ streamBus.subscribe(session.id, (f) => frames.push(f));
+
+ const writer = new ChatSessionWriter({
+ sessionId: session.id,
+ runtime: mockRuntime({
+ runForChat: async (_key, _message, opts) => {
+ opts.onSdkEvent({
+ type: "result",
+ subtype: "error_during_execution",
+ errors: ["Provider failed"],
+ total_cost_usd: 0.02,
+ usage: { input_tokens: 10, output_tokens: 0 },
+ duration_ms: 15,
+ num_turns: 1,
+ });
+ return {
+ text: "",
+ sessionId: "sdk-1",
+ cost: { totalUsd: 0.02, inputTokens: 10, outputTokens: 0, modelUsage: {} },
+ durationMs: 15,
+ };
+ },
+ }),
+ eventLog,
+ messageStore,
+ sessionStore,
+ timelineStore,
+ streamBus,
+ });
+ writer.claim();
+
+ await writer.run({ role: "user", content: "fail" }, "t1", "fail");
+
+ const messages = messageStore.getBySession(session.id);
+ expect(messages.map((message) => message.role)).toEqual(["user"]);
+ expect(frames.some((frame) => frame.event === "session.error")).toBe(true);
+ expect(frames.some((frame) => frame.event === "session.done")).toBe(false);
+ const timelines = timelineStore.getDetailsBySession(session.id);
+ expect(timelines[0]?.status).toBe("error");
+ expect(timelines[0]?.assistant_message_id).toBeNull();
+ });
+
test("multi-subscriber fan-out delivers to all", async () => {
const session = sessionStore.create();
const frames1: ChatWireFrame[] = [];
@@ -305,6 +413,73 @@ describe("ChatSessionWriter", () => {
expect(timelines[0]?.summary.status).toBe("completed");
});
+ test("passes durable page context into the chat runtime", async () => {
+ const session = sessionStore.create();
+ let capturedContext: string | undefined;
+ eventLog.append(session.id, null, 1, "message.tool_call_start", {
+ event: "message.tool_call_start",
+ tool_call_id: "tool-1",
+ tool_name: "phantom_create_page",
+ message_id: "assistant-1",
+ parent_tool_use_id: null,
+ is_mcp: true,
+ });
+ eventLog.append(session.id, null, 2, "message.tool_call_input_end", {
+ event: "message.tool_call_input_end",
+ tool_call_id: "tool-1",
+ input: {
+ path: "profile.html",
+ title: "Profile Page",
+ },
+ });
+ eventLog.append(session.id, null, 3, "message.tool_call_result", {
+ event: "message.tool_call_result",
+ tool_call_id: "tool-1",
+ tool_name: "phantom_create_page",
+ status: "success",
+ output: JSON.stringify({
+ path: "profile.html",
+ url: "http://127.0.0.1:3112/ui/profile.html",
+ }),
+ });
+
+ const writer = new ChatSessionWriter({
+ sessionId: session.id,
+ runtime: mockRuntime({
+ runForChat: async (_key, _message, opts) => {
+ capturedContext = opts.sessionContext;
+ opts.onSdkEvent({
+ type: "result",
+ subtype: "success",
+ result: "ok",
+ stop_reason: "end_turn",
+ total_cost_usd: 0,
+ usage: {},
+ modelUsage: {},
+ duration_ms: 0,
+ num_turns: 1,
+ });
+ return {
+ text: "ok",
+ sessionId: "sdk-1",
+ cost: { totalUsd: 0, inputTokens: 0, outputTokens: 0, modelUsage: {} },
+ durationMs: 0,
+ };
+ },
+ }),
+ eventLog,
+ messageStore,
+ sessionStore,
+ streamBus,
+ });
+ writer.claim();
+
+ await writer.run({ role: "user", content: "give me the page link" }, "t1", "give me the page link");
+
+ expect(capturedContext).toContain("Profile Page");
+ expect(capturedContext).toContain("http://127.0.0.1:3112/ui/profile.html");
+ });
+
test("persists errored run timeline without committing assistant id", async () => {
const session = sessionStore.create();
const writer = new ChatSessionWriter({
diff --git a/src/chat/continuity-context.ts b/src/chat/continuity-context.ts
new file mode 100644
index 00000000..19e51949
--- /dev/null
+++ b/src/chat/continuity-context.ts
@@ -0,0 +1,213 @@
+import type { ChatEventLog, ChatStreamEvent } from "./event-log.ts";
+
+const DEFAULT_EVENT_SCAN_LIMIT = 5000;
+const MAX_ARTIFACTS = 8;
+const MAX_COMPACTIONS = 3;
+const MAX_LABEL_LENGTH = 90;
+const PAGE_TOOLS = new Set(["phantom_create_page", "phantom_preview_page"]);
+
+type BuildChatContinuityContextInput = {
+ sessionId: string;
+ eventLog: ChatEventLog;
+ limit?: number;
+};
+
+type ToolAccumulator = {
+ seq: number;
+ toolName?: string;
+ input?: unknown;
+ output?: string;
+ status?: string;
+};
+
+type PageArtifact = {
+ seq: number;
+ toolName: string;
+ label: string;
+ url?: string;
+ path?: string;
+ size?: number;
+};
+
+type CompactCheckpoint = {
+ seq: number;
+ trigger?: string;
+ preTokens?: number;
+};
+
+export function buildChatContinuityContext(input: BuildChatContinuityContextInput): string | undefined {
+ const events = input.eventLog.tail(input.sessionId, input.limit ?? DEFAULT_EVENT_SCAN_LIMIT);
+ const tools = new Map();
+ const compactions: CompactCheckpoint[] = [];
+
+ for (const event of events) {
+ const payload = parsePayload(event);
+ if (!payload) continue;
+ const eventType = stringField(payload, "event") ?? event.event_type;
+
+ if (eventType === "session.compact_boundary") {
+ compactions.push({
+ seq: event.seq,
+ trigger: stringField(payload, "trigger"),
+ preTokens: numberField(payload, "pre_tokens"),
+ });
+ continue;
+ }
+
+ if (!eventType.startsWith("message.tool_call_")) continue;
+ const toolCallId = stringField(payload, "tool_call_id");
+ if (!toolCallId) continue;
+ const tool = tools.get(toolCallId) ?? { seq: event.seq };
+ tool.seq = event.seq;
+
+ const toolName = stringField(payload, "tool_name");
+ if (toolName) tool.toolName = toolName;
+
+ if (eventType === "message.tool_call_input_end") {
+ tool.input = payload.input;
+ } else if (eventType === "message.tool_call_running") {
+ const outputPreview = stringField(payload, "output_preview");
+ if (outputPreview && !tool.output) tool.output = outputPreview;
+ } else if (eventType === "message.tool_call_result") {
+ tool.status = stringField(payload, "status");
+ tool.output = stringField(payload, "output") ?? stringField(payload, "output_preview") ?? tool.output;
+ }
+
+ tools.set(toolCallId, tool);
+ }
+
+ const artifacts = dedupeArtifacts([...tools.values()].flatMap((tool) => artifactFromTool(tool) ?? []));
+ const latestCompactions = compactions.slice(-MAX_COMPACTIONS);
+ if (artifacts.length === 0 && latestCompactions.length === 0) {
+ return undefined;
+ }
+
+ return renderContext({
+ artifacts: artifacts.slice(-MAX_ARTIFACTS),
+ compactions: latestCompactions,
+ });
+}
+
+function renderContext(input: { artifacts: PageArtifact[]; compactions: CompactCheckpoint[] }): string {
+ const lines = [
+ "Durable Phantom chat context:",
+ "- The transcript may have been compacted by Murph. Continue from the latest user message using these host facts when relevant.",
+ "- Authentication links from phantom_generate_login are not page artifacts.",
+ ];
+
+ if (input.compactions.length > 0) {
+ lines.push("", "Recent compaction checkpoints:");
+ for (const checkpoint of input.compactions) {
+ const trigger = checkpoint.trigger ?? "unknown";
+ const tokens =
+ checkpoint.preTokens === undefined
+ ? ""
+ : ` before about ${checkpoint.preTokens.toLocaleString("en-US")} tokens`;
+ lines.push(`- ${trigger} compaction at stream seq ${checkpoint.seq}${tokens}.`);
+ }
+ }
+
+ if (input.artifacts.length > 0) {
+ lines.push("", "User-visible page artifacts from earlier tool work:");
+ for (const artifact of input.artifacts) {
+ const parts = [`- ${artifact.label}`];
+ if (artifact.url) parts.push(` URL: ${artifact.url}`);
+ if (artifact.path) parts.push(` path: ${artifact.path}`);
+ if (artifact.size !== undefined) parts.push(` size: ${artifact.size} bytes`);
+ parts.push(` via ${artifact.toolName} at stream seq ${artifact.seq}.`);
+ lines.push(parts.join(";"));
+ }
+ }
+
+ return lines.join("\n");
+}
+
+function artifactFromTool(tool: ToolAccumulator): PageArtifact | undefined {
+ if (!tool.toolName || !PAGE_TOOLS.has(tool.toolName)) return undefined;
+
+ const input = recordFromUnknown(tool.input);
+ const output = parseJsonRecord(tool.output);
+ const path = stringField(output, "path") ?? stringField(input, "path");
+ const url = normalizePageUrl(
+ stringField(output, "url") ??
+ stringField(output, "publicUrl") ??
+ stringField(output, "pageUrl") ??
+ urlFromText(tool.output),
+ );
+ if (!url && !path) return undefined;
+
+ const title = stringField(input, "title") ?? stringField(output, "title") ?? path ?? url ?? "Created page";
+ const size = numberField(output, "size");
+ return {
+ seq: tool.seq,
+ toolName: tool.toolName,
+ label: truncate(title, MAX_LABEL_LENGTH),
+ ...(url ? { url } : {}),
+ ...(path ? { path } : {}),
+ ...(size !== undefined ? { size } : {}),
+ };
+}
+
+function dedupeArtifacts(artifacts: PageArtifact[]): PageArtifact[] {
+ const byKey = new Map();
+ for (const artifact of artifacts) {
+ const key = artifact.url ?? artifact.path ?? `${artifact.toolName}:${artifact.seq}`;
+ byKey.set(key, artifact);
+ }
+ return [...byKey.values()].sort((left, right) => left.seq - right.seq);
+}
+
+function parsePayload(event: ChatStreamEvent): Record | undefined {
+ try {
+ const parsed = JSON.parse(event.payload_json);
+ return recordFromUnknown(parsed);
+ } catch {
+ return undefined;
+ }
+}
+
+function parseJsonRecord(value: string | undefined): Record | undefined {
+ if (!value) return undefined;
+ try {
+ return recordFromUnknown(JSON.parse(value));
+ } catch {
+ return undefined;
+ }
+}
+
+function recordFromUnknown(value: unknown): Record | undefined {
+ if (value === null || typeof value !== "object" || Array.isArray(value)) {
+ return undefined;
+ }
+ return value as Record;
+}
+
+function stringField(record: Record | undefined, key: string): string | undefined {
+ const value = record?.[key];
+ if (typeof value !== "string") return undefined;
+ const trimmed = value.trim();
+ return trimmed.length > 0 ? trimmed : undefined;
+}
+
+function numberField(record: Record | undefined, key: string): number | undefined {
+ const value = record?.[key];
+ return typeof value === "number" && Number.isFinite(value) ? value : undefined;
+}
+
+function normalizePageUrl(url: string | undefined): string | undefined {
+ if (!url || !url.includes("/ui/") || url.includes("/ui/login")) {
+ return undefined;
+ }
+ return url;
+}
+
+function urlFromText(text: string | undefined): string | undefined {
+ if (!text) return undefined;
+ const match = text.match(/https?:\/\/[^\s"']+\/ui\/[^\s"']+/);
+ return normalizePageUrl(match?.[0]);
+}
+
+function truncate(value: string, maxLength: number): string {
+ if (value.length <= maxLength) return value;
+ return `${value.slice(0, maxLength - 3)}...`;
+}
diff --git a/src/chat/event-log.ts b/src/chat/event-log.ts
index ecccb8e8..f8b89242 100644
--- a/src/chat/event-log.ts
+++ b/src/chat/event-log.ts
@@ -45,6 +45,19 @@ export class ChatEventLog {
.all(sessionId, afterSeq, maxRows) as ChatStreamEvent[];
}
+ tail(sessionId: string, limit?: number): ChatStreamEvent[] {
+ const maxRows = limit ?? 5000;
+ const rows = this.db
+ .query(
+ `SELECT * FROM chat_stream_events
+ WHERE session_id = ?
+ ORDER BY seq DESC
+ LIMIT ?`,
+ )
+ .all(sessionId, maxRows) as ChatStreamEvent[];
+ return rows.reverse();
+ }
+
getMaxSeq(sessionId: string): number {
const row = this.db
.query("SELECT MAX(seq) as max_seq FROM chat_stream_events WHERE session_id = ?")
diff --git a/src/chat/http-handlers.ts b/src/chat/http-handlers.ts
index 42a9adf5..073a14c5 100644
--- a/src/chat/http-handlers.ts
+++ b/src/chat/http-handlers.ts
@@ -1,11 +1,8 @@
-// Session-specific and streaming route handlers for the chat HTTP API.
-// Split from http.ts to keep both files under 300 lines.
-
import type { SDKUserMessage } from "../agent/agent-sdk.ts";
type MessageParam = SDKUserMessage["message"];
import type { ChatHandlerDeps } from "./http.ts";
-import { buildUserMessageParam } from "./message-builder.ts";
+import { type BuiltUserMessage, ChatAttachmentResolutionError, buildUserMessage } from "./message-builder.ts";
import {
CHAT_SSE_HEADERS,
CHAT_SSE_RETRY_MS,
@@ -84,28 +81,35 @@ export async function handleStream(req: Request, deps: ChatHandlerDeps): Promise
} catch {
return Response.json({ error: "Invalid JSON" }, { status: 400 });
}
-
if (!body.session_id || !body.text) {
return Response.json({ error: "session_id and text are required" }, { status: 400 });
}
-
const existingWriter = getActiveWriter(body.session_id);
if (existingWriter?.isActive) {
return Response.json({ error: "Session busy" }, { status: 409 });
}
-
const session = deps.sessionStore.get(body.session_id);
- if (!session) {
- return Response.json({ error: "Session not found" }, { status: 404 });
- }
-
+ if (!session) return Response.json({ error: "Session not found" }, { status: 404 });
const tabId = body.tab_id ?? "default";
const attachmentIds = body.attachment_ids ?? [];
- let message: MessageParam;
+ let message: MessageParam = { role: "user", content: body.text };
+ let writerOptions: Pick | undefined;
if (attachmentIds.length > 0) {
- message = await buildUserMessageParam(body.text, attachmentIds, deps.attachmentStore);
- } else {
- message = { role: "user", content: body.text };
+ try {
+ const builtMessage = await buildUserMessage(body.text, attachmentIds, body.session_id, deps.attachmentStore);
+ message = builtMessage.message;
+ writerOptions = { attachments: builtMessage.attachments, transcriptContent: builtMessage.transcriptContent };
+ } catch (err: unknown) {
+ if (err instanceof ChatAttachmentResolutionError) {
+ return Response.json({ error: err.code, message: err.message }, { status: 400 });
+ }
+ const messageText = err instanceof Error ? err.message : String(err);
+ console.error(`[chat-http] Attachment build failed for session ${body.session_id}: ${messageText}`);
+ return Response.json(
+ { error: "attachment_read_failed", message: "Could not read one or more attachments." },
+ { status: 500 },
+ );
+ }
}
const writer = new ChatSessionWriter({
@@ -113,6 +117,7 @@ export async function handleStream(req: Request, deps: ChatHandlerDeps): Promise
runtime: deps.runtime,
eventLog: deps.eventLog,
messageStore: deps.messageStore,
+ attachmentStore: deps.attachmentStore,
sessionStore: deps.sessionStore,
timelineStore: deps.timelineStore,
streamBus: deps.streamBus,
@@ -123,14 +128,12 @@ export async function handleStream(req: Request, deps: ChatHandlerDeps): Promise
const sessionId = body.session_id;
const stream = createSSEStream(sessionId, deps.streamBus, writer);
- writer.run(message, tabId, body.text).catch((err: unknown) => {
+ writer.run(message, tabId, body.text, writerOptions).catch((err: unknown) => {
const msg = err instanceof Error ? err.message : String(err);
console.error(`[chat-http] Writer error for session ${sessionId}: ${msg}`);
});
- return new Response(stream, {
- headers: CHAT_SSE_HEADERS,
- });
+ return new Response(stream, { headers: CHAT_SSE_HEADERS });
}
export async function handleResume(req: Request, sessionId: string, deps: ChatHandlerDeps): Promise {
@@ -261,9 +264,7 @@ export async function handleResume(req: Request, sessionId: string, deps: ChatHa
},
});
- return new Response(stream, {
- headers: CHAT_SSE_HEADERS,
- });
+ return new Response(stream, { headers: CHAT_SSE_HEADERS });
}
export function handleAbort(sessionId: string): Response {
diff --git a/src/chat/message-builder.ts b/src/chat/message-builder.ts
index cd01f946..50f40be8 100644
--- a/src/chat/message-builder.ts
+++ b/src/chat/message-builder.ts
@@ -21,23 +21,104 @@ type ContentBlock = {
title?: string;
};
+export type UserAttachmentMetadata = {
+ id: string;
+ filename: string;
+ mime_type: string;
+ size_bytes: number | null;
+ preview_url: string;
+};
+
+export type UserTranscriptContentBlock =
+ | (UserAttachmentMetadata & { type: "attachment" })
+ | { type: "text"; text: string };
+
+export type BuiltUserMessage = {
+ message: MessageParam;
+ attachments: UserAttachmentMetadata[];
+ transcriptContent: string | UserTranscriptContentBlock[];
+};
+
+export type AttachmentResolutionCode = "attachment_not_found" | "attachment_wrong_session" | "attachment_already_sent";
+
+export class ChatAttachmentResolutionError extends Error {
+ readonly code: AttachmentResolutionCode;
+
+ constructor(code: AttachmentResolutionCode) {
+ const message =
+ code === "attachment_already_sent"
+ ? "Attachment has already been sent."
+ : "Attachment is not available for this chat.";
+ super(message);
+ this.name = "ChatAttachmentResolutionError";
+ this.code = code;
+ }
+}
+
+export async function buildUserMessage(
+ text: string,
+ attachmentIds: string[],
+ sessionId: string,
+ attachmentStore: ChatAttachmentStore,
+): Promise {
+ const attachments = resolveUserMessageAttachments(attachmentIds, attachmentStore, sessionId);
+ const metadata = attachments.map(attachmentToMetadata);
+ const message = await buildMessageParamFromAttachments(text, attachments);
+ return {
+ message,
+ attachments: metadata,
+ transcriptContent: buildUserTranscriptContent(text, metadata),
+ };
+}
+
export async function buildUserMessageParam(
text: string,
attachmentIds: string[],
attachmentStore: ChatAttachmentStore,
): Promise {
- if (attachmentIds.length === 0) {
- return { role: "user", content: text };
- }
+ const attachments = resolveUserMessageAttachments(attachmentIds, attachmentStore);
+ return buildMessageParamFromAttachments(text, attachments);
+}
+
+export function buildUserTranscriptContent(
+ text: string,
+ attachments: UserAttachmentMetadata[],
+): string | UserTranscriptContentBlock[] {
+ if (attachments.length === 0) return text;
+ return [...attachments.map((attachment) => ({ ...attachment, type: "attachment" as const })), { type: "text", text }];
+}
- const attachments: ChatAttachment[] = [];
+function resolveUserMessageAttachments(
+ attachmentIds: string[],
+ attachmentStore: ChatAttachmentStore,
+ sessionId?: string,
+): ChatAttachment[] {
+ if (attachmentIds.length === 0) return [];
- for (const id of attachmentIds) {
+ return attachmentIds.map((id) => {
const att = attachmentStore.getById(id);
- if (att) attachments.push(att);
- }
+ if (!att) throw new ChatAttachmentResolutionError("attachment_not_found");
+ if (sessionId && att.session_id !== sessionId) {
+ throw new ChatAttachmentResolutionError("attachment_wrong_session");
+ }
+ if (att.message_id !== null) {
+ throw new ChatAttachmentResolutionError("attachment_already_sent");
+ }
+ return att;
+ });
+}
+
+function attachmentToMetadata(att: ChatAttachment): UserAttachmentMetadata {
+ return {
+ id: att.id,
+ filename: att.filename ?? "file",
+ mime_type: att.mime_type ?? "application/octet-stream",
+ size_bytes: att.size_bytes,
+ preview_url: `/chat/attachments/${att.id}/preview`,
+ };
+}
- // All IDs were invalid - fall back to plain text
+async function buildMessageParamFromAttachments(text: string, attachments: ChatAttachment[]): Promise {
if (attachments.length === 0) {
return { role: "user", content: text };
}
diff --git a/src/chat/sdk-to-wire.ts b/src/chat/sdk-to-wire.ts
index ab75f8b4..85b0512f 100644
--- a/src/chat/sdk-to-wire.ts
+++ b/src/chat/sdk-to-wire.ts
@@ -211,7 +211,7 @@ function handleResult(msg: Record, ctx: TranslationContext): Ch
const durationMs = (msg.duration_ms as number) ?? 0;
const numTurns = (msg.num_turns as number) ?? 1;
- if (ctx.assistantStartEmitted && !ctx.assistantEndEmitted) {
+ if (subtype === "success" && ctx.assistantStartEmitted && !ctx.assistantEndEmitted) {
frames.push({
event: "message.assistant_end",
message_id: ctx.messageId,
diff --git a/src/chat/types.ts b/src/chat/types.ts
index da57be1a..d20c59be 100644
--- a/src/chat/types.ts
+++ b/src/chat/types.ts
@@ -140,7 +140,13 @@ export type UserMessageFrame = {
event: "user.message";
message_id: string;
text: string;
- attachments: Array<{ id: string; filename: string; mime_type: string }>;
+ attachments: Array<{
+ id: string;
+ filename: string;
+ mime_type: string;
+ size_bytes: number | null;
+ preview_url: string;
+ }>;
sent_at: string;
source_tab_id: string;
};
diff --git a/src/chat/upload.ts b/src/chat/upload.ts
index 2e2293cb..ff63ea59 100644
--- a/src/chat/upload.ts
+++ b/src/chat/upload.ts
@@ -20,6 +20,7 @@ export type UploadDeps = {
export type AcceptedAttachment = {
id: string;
+ client_id?: string;
filename: string;
mime_type: string;
size: number;
@@ -27,6 +28,7 @@ export type AcceptedAttachment = {
};
export type RejectedAttachment = {
+ client_id?: string;
filename: string;
reason: string;
message: string;
@@ -61,17 +63,23 @@ export async function handleUploadAttachments(req: Request, sessionId: string, d
}
const files = formData.getAll("file").filter((v): v is File => v instanceof File);
+ const clientIds = formData
+ .getAll("client_id")
+ .map((value) => (typeof value === "string" && value.length > 0 ? value : null));
if (files.length === 0) {
return Response.json({ error: "no_files", message: "No files attached." }, { status: 400 });
}
+ const uploadItems = files.map((file, index) => ({ file, clientId: clientIds[index] ?? null }));
+
if (files.length > MAX_FILES_PER_REQUEST) {
// Take the first MAX_FILES_PER_REQUEST, reject the rest
- const toProcess = files.slice(0, MAX_FILES_PER_REQUEST);
- const overflow = files.slice(MAX_FILES_PER_REQUEST);
- const overflowRejected = overflow.map((f) => ({
- filename: f.name,
+ const toProcess = uploadItems.slice(0, MAX_FILES_PER_REQUEST);
+ const overflow = uploadItems.slice(MAX_FILES_PER_REQUEST);
+ const overflowRejected = overflow.map((item) => ({
+ ...(item.clientId ? { client_id: item.clientId } : {}),
+ filename: item.file.name,
reason: "limit_exceeded",
message: `Limit of ${MAX_FILES_PER_REQUEST} files per upload reached.`,
}));
@@ -81,25 +89,36 @@ export async function handleUploadAttachments(req: Request, sessionId: string, d
return Response.json({ attachments: result.attachments, rejected: result.rejected }, { status });
}
- const result = await processFiles(files, sessionId, deps);
+ const result = await processFiles(uploadItems, sessionId, deps);
const status = result.rejected.length === 0 ? 200 : result.attachments.length === 0 ? 400 : 207;
return Response.json({ attachments: result.attachments, rejected: result.rejected }, { status });
}
+type UploadItem = {
+ file: File;
+ clientId: string | null;
+};
+
async function processFiles(
- files: File[],
+ files: UploadItem[],
sessionId: string,
deps: UploadDeps,
): Promise<{ attachments: AcceptedAttachment[]; rejected: RejectedAttachment[] }> {
const accepted: AcceptedAttachment[] = [];
const rejected: RejectedAttachment[] = [];
- for (const file of files) {
+ for (const item of files) {
+ const { file } = item;
const mime = file.type || guessMimeFromName(file.name) || "";
const validation = validateFile(mime, file.size, file.name);
if (!validation.ok) {
- rejected.push({ filename: file.name, reason: validation.reason, message: validation.message });
+ rejected.push({
+ ...(item.clientId ? { client_id: item.clientId } : {}),
+ filename: file.name,
+ reason: validation.reason,
+ message: validation.message,
+ });
console.log(`[chat-upload] sessionId=${sessionId} file=${file.name} reason=${validation.reason}`);
continue;
}
@@ -125,6 +144,7 @@ async function processFiles(
accepted.push({
id,
+ ...(item.clientId ? { client_id: item.clientId } : {}),
filename: sanitizeFilename(file.name),
mime_type: mime,
size: file.size,
@@ -135,7 +155,12 @@ async function processFiles(
} catch (err: unknown) {
const msg = err instanceof Error ? err.message : String(err);
console.error(`[chat-upload] write failed for ${file.name}: ${msg}`);
- rejected.push({ filename: file.name, reason: "storage_failed", message: "Could not save file. Please retry." });
+ rejected.push({
+ ...(item.clientId ? { client_id: item.clientId } : {}),
+ filename: file.name,
+ reason: "storage_failed",
+ message: "Could not save file. Please retry.",
+ });
}
}
diff --git a/src/chat/writer.ts b/src/chat/writer.ts
index dc579852..806331ef 100644
--- a/src/chat/writer.ts
+++ b/src/chat/writer.ts
@@ -2,8 +2,11 @@ import type { SDKUserMessage } from "../agent/agent-sdk.ts";
type MessageParam = SDKUserMessage["message"];
import type { AgentRuntime } from "../agent/runtime.ts";
+import type { ChatAttachmentStore } from "./attachment-store.ts";
import { autoRenameSession } from "./auto-rename.ts";
+import { buildChatContinuityContext } from "./continuity-context.ts";
import type { ChatEventLog } from "./event-log.ts";
+import type { UserAttachmentMetadata, UserTranscriptContentBlock } from "./message-builder.ts";
import type { ChatMessageStore } from "./message-store.ts";
import type { NotificationTriggerService } from "./notifications/triggers.ts";
import type { ChatRunTimelineStore } from "./run-timeline.ts";
@@ -18,12 +21,18 @@ export type ChatSessionWriterDeps = {
runtime: AgentRuntime;
eventLog: ChatEventLog;
messageStore: ChatMessageStore;
+ attachmentStore?: ChatAttachmentStore;
sessionStore: ChatSessionStore;
timelineStore?: ChatRunTimelineStore;
streamBus: StreamBus;
notificationTriggers?: NotificationTriggerService;
};
+export type ChatSessionWriterRunOptions = {
+ attachments?: UserAttachmentMetadata[];
+ transcriptContent?: string | UserTranscriptContentBlock[];
+};
+
// Active writers keyed by sessionId for abort and busy-check lookups
const activeWriters = new Map();
@@ -53,22 +62,35 @@ export class ChatSessionWriter {
activeWriters.set(this.deps.sessionId, this);
}
- async run(message: MessageParam, tabId: string, userText: string): Promise {
+ async run(
+ message: MessageParam,
+ tabId: string,
+ userText: string,
+ options?: ChatSessionWriterRunOptions,
+ ): Promise {
if (!this.running) {
throw new Error("Writer must be claimed before run()");
}
this.abortController = new AbortController();
+ const attachments = options?.attachments ?? [];
+ if (attachments.length > 0 && !this.deps.attachmentStore) {
+ throw new Error("Attachment store is required to commit chat attachments");
+ }
const seqCounter = { current: this.deps.eventLog.getMaxSeq(this.deps.sessionId) };
const msgSeq = this.deps.messageStore.getMaxSeq(this.deps.sessionId) + 1;
+ const transcriptContent = options?.transcriptContent ?? userText;
const userMessageId = this.deps.messageStore.commit({
sessionId: this.deps.sessionId,
seq: msgSeq,
role: "user",
- contentJson: JSON.stringify(typeof message === "string" ? message : message.content),
+ contentJson: JSON.stringify(transcriptContent),
});
+ for (const attachment of attachments) {
+ this.deps.attachmentStore?.commitToMessage(attachment.id, userMessageId);
+ }
this.deps.sessionStore.incrementMessageCount(this.deps.sessionId);
this.deps.sessionStore.setFirstUserMessageAt(this.deps.sessionId);
@@ -76,7 +98,7 @@ export class ChatSessionWriter {
event: "user.message",
message_id: userMessageId,
text: userText,
- attachments: [],
+ attachments,
sent_at: new Date().toISOString(),
source_tab_id: tabId,
};
@@ -97,13 +119,22 @@ export class ChatSessionWriter {
const sessionKey = `web:${this.deps.sessionId}`;
const startTime = Date.now();
let resultText = "";
+ let terminalErrorMessage: string | null = null;
try {
+ const sessionContext = buildChatContinuityContext({
+ sessionId: this.deps.sessionId,
+ eventLog: this.deps.eventLog,
+ });
const response = await this.deps.runtime.runForChat(sessionKey, message, {
signal: this.abortController.signal,
+ sessionContext,
onSdkEvent: (sdkMsg: unknown) => {
const frames = translateSdkMessage(sdkMsg as Record, ctx);
for (const frame of frames) {
+ if (frame.event === "session.error") {
+ terminalErrorMessage = frame.errors[0] ?? "Run failed.";
+ }
const seq = this.emitFrame(frame, seqCounter);
if (timeline.apply(frame, seq)) {
this.persistTimeline(timeline);
@@ -114,6 +145,19 @@ export class ChatSessionWriter {
resultText = response.text;
+ if (terminalErrorMessage) {
+ this.deps.sessionStore.updateCost(this.deps.sessionId, response.cost);
+ if (this.deps.notificationTriggers) {
+ this.deps.notificationTriggers
+ .onHardError(this.deps.sessionId, terminalErrorMessage)
+ .catch((triggerErr: unknown) => {
+ const msg = triggerErr instanceof Error ? triggerErr.message : String(triggerErr);
+ console.warn(`[push] trigger failed: ${msg}`);
+ });
+ }
+ return;
+ }
+
this.deps.messageStore.commit({
id: assistantMessageId,
sessionId: this.deps.sessionId,
diff --git a/src/ui/tools.ts b/src/ui/tools.ts
index 507f0da0..56934335 100644
--- a/src/ui/tools.ts
+++ b/src/ui/tools.ts
@@ -27,7 +27,7 @@ export function createWebUiToolServer(
"phantom_create_page",
"Create or update an HTML page served at /ui/. If html is provided, writes it directly. " +
"If title and content are provided instead, wraps the content in the base template. " +
- "Returns the public URL of the page.",
+ "Returns the page URL to share when the user asks for the page you created.",
{
path: z.string().min(1).describe("File path relative to public/, e.g. 'dashboard.html' or 'reports/weekly.html'"),
html: z.string().optional().describe("Full HTML content to write (use this for complete pages)"),
@@ -77,6 +77,7 @@ export function createWebUiToolServer(
path: safePath,
url: publicUrl,
size: htmlContent.length,
+ note: "This is the created page URL, not a login link.",
});
} catch (error: unknown) {
const msg = error instanceof Error ? error.message : String(error);
@@ -87,7 +88,9 @@ export function createWebUiToolServer(
const generateLoginTool = tool(
"phantom_generate_login",
- "Generate a magic link for web UI authentication. Send this link to the user via Slack. " +
+ "Generate a magic link for web UI authentication. Use only when the user asks for access, " +
+ "auth, login, a magic link, or says they cannot open a page because login is required. " +
+ "Do not use this to answer a request for the page URL of something you created. " +
"The link expires in 10 minutes. After authentication, the session lasts 7 days.",
{},
async () => {
@@ -100,7 +103,7 @@ export function createWebUiToolServer(
// sessionToken intentionally excluded - agent should only share the magic link
expiresIn: "10 minutes",
sessionDuration: "7 days",
- note: "Send the magic link to the user via Slack. They click it and are authenticated instantly.",
+ note: "This is an authentication link, not a page URL. Send it only when login access is needed.",
});
} catch (error: unknown) {
const msg = error instanceof Error ? error.message : String(error);