From fe351be876dfb4efec9d9a08f13d1349d76d674d Mon Sep 17 00:00:00 2001 From: Om Gupta Date: Sun, 3 May 2026 16:05:10 +0530 Subject: [PATCH 1/3] clarify web search action labels --- .../src/components/chat/ActionsGroup.tsx | 126 +++++++++++++++--- .../src/components/chat/MessageList.tsx | 20 ++- 2 files changed, 120 insertions(+), 26 deletions(-) diff --git a/packages/desktop/src/components/chat/ActionsGroup.tsx b/packages/desktop/src/components/chat/ActionsGroup.tsx index 2756329e..9f3f6521 100644 --- a/packages/desktop/src/components/chat/ActionsGroup.tsx +++ b/packages/desktop/src/components/chat/ActionsGroup.tsx @@ -1,30 +1,45 @@ import { AnimatePresence, motion } from 'framer-motion' import { Brain, ChevronRight, Code, PanelRight, Workflow } from 'lucide-react' import type { ReactNode } from 'react' -import { useEffect, useMemo, useState } from 'react' +import { useEffect, useMemo, useRef, useState } from 'react' import { artifactStore } from '../../lib/store/artifactStore.js' import { parseCitationSources } from '../../lib/store/handlers/citationParser.js' import { ArtifactCard } from './ArtifactCard.js' import { SourceCards } from './SourceCards.js' import type { ToolAction } from './groupMessages.js' -const SEARCH_TOOLS = new Set(['web_search', 'exa_search', 'exa_find_similar', 'web_research']) +const SEARCH_TOOLS = new Set([ + 'web_search', + 'exa_search', + 'exa_answer', + 'exa_find_similar', + 'web_research', + 'parallel_research', +]) // ── Tool type labels & helpers ───────────────────────────────────── /** Get a favicon URL for tools that interact with external URLs (free, no API key) */ function getToolFavicon(toolName: string, toolInput?: Record): string | null { - if (toolName === 'exa_search' || toolName === 'exa_find_similar') { + const normalizedToolName = normalizeToolName(toolName) + if ( + normalizedToolName === 'exa_search' || + normalizedToolName === 'exa_find_similar' || + normalizedToolName === 'exa_answer' + ) { return 'https://www.google.com/s2/favicons?domain=exa.ai&sz=16' } - if (toolName === 'web_search') { + if (normalizedToolName === 'web_search') { return 'https://www.google.com/s2/favicons?domain=google.com&sz=16' } + if (normalizedToolName === 'web_research' || normalizedToolName === 'parallel_research') { + return 'https://www.google.com/s2/favicons?domain=parallel.ai&sz=16' + } if (!toolInput) return null let url: string | null = null - if (toolName === 'browser') url = toolInput.url as string - else if (toolName === 'network') url = (toolInput.url || toolInput.host) as string - else if (toolName === 'http_api') url = toolInput.url as string + if (normalizedToolName === 'browser') url = toolInput.url as string + else if (normalizedToolName === 'network') url = (toolInput.url || toolInput.host) as string + else if (normalizedToolName === 'http_api') url = toolInput.url as string if (!url) return null try { const hostname = new URL(url.startsWith('http') ? url : `https://${url}`).hostname @@ -36,7 +51,8 @@ function getToolFavicon(toolName: string, toolInput?: Record): /** Get a short, bold tool type label (like Claude Code's "Read", "Edit", "Shell") */ function getToolTypeLabel(toolName: string, toolInput?: Record): string { - switch (toolName) { + const normalizedToolName = normalizeToolName(toolName) + switch (normalizedToolName) { case 'shell': return 'Shell' case 'filesystem': { @@ -78,11 +94,15 @@ function getToolTypeLabel(toolName: string, toolInput?: Record) case 'sub_agent': return 'Agent' case 'web_search': - return 'Search' + return 'Web Search' case 'exa_search': - return 'Search' + return 'Web Search' + case 'exa_answer': + return 'Web Answer' case 'web_research': - return 'Research' + return 'Web Research' + case 'parallel_research': + return 'Web Research' case 'exa_find_similar': return 'Similar' default: @@ -104,10 +124,62 @@ function formatMcpToolName(toolName: string): string { .join(' ') } +function normalizeToolName(toolName: string): string { + const colonIdx = toolName.indexOf(':') + return colonIdx >= 0 ? toolName.slice(colonIdx + 1) : toolName +} + +function getFirstString(...values: unknown[]): string | null { + for (const value of values) { + if (typeof value === 'string' && value.trim()) return value.trim() + } + return null +} + +function getSearchQuery(toolInput?: Record): string | null { + if (!toolInput) return null + + const directQuery = getFirstString( + toolInput.query, + toolInput.q, + toolInput.question, + toolInput.objective, + toolInput.prompt, + ) + if (directQuery) return directQuery + + const searchQueries = toolInput.search_query ?? toolInput.searchQuery ?? toolInput.queries + if (Array.isArray(searchQueries)) { + const queries = searchQueries + .map((entry) => { + if (typeof entry === 'string') return entry.trim() + if (entry && typeof entry === 'object') { + return getFirstString( + (entry as Record).q, + (entry as Record).query, + ) + } + return null + }) + .filter((query): query is string => Boolean(query)) + + if (queries.length > 0) return queries.join(' | ') + } + + return null +} + +function formatQuotedTarget(value: string, maxLength = 60): string { + const trimmed = value.replace(/\s+/g, ' ').trim() + const short = trimmed.length > maxLength ? `${trimmed.slice(0, maxLength - 3)}...` : trimmed + return `"${short}"` +} + /** Get the target/description shown after the type label (in code-styled pill) */ function getToolTarget(toolName: string, toolInput?: Record): string | null { if (!toolInput) return null - switch (toolName) { + const normalizedToolName = normalizeToolName(toolName) + switch (normalizedToolName) { case 'shell': { const cmd = (toolInput.command as string) || '' return cmd.length > 80 ? `${cmd.slice(0, 77)}...` : cmd @@ -147,7 +219,7 @@ function getToolTarget(toolName: string, toolInput?: Record): s } case 'code_search': { const query = (toolInput.query as string) || '' - return query ? `"${query.slice(0, 50)}"` : null + return query ? formatQuotedTarget(query, 50) : null } case 'http_api': { const method = (toolInput.method as string) || 'GET' @@ -175,11 +247,12 @@ function getToolTarget(toolName: string, toolInput?: Record): s return (toolInput.task as string) || null case 'web_search': case 'exa_search': + case 'exa_answer': + case 'parallel_research': case 'web_research': { - const query = (toolInput.query as string) || '' + const query = getSearchQuery(toolInput) || '' if (!query) return null - const trimmed = query.length > 60 ? `${query.slice(0, 57)}...` : query - return `"${trimmed}"` + return formatQuotedTarget(query) } case 'exa_find_similar': { const url = (toolInput.url as string) || '' @@ -208,7 +281,8 @@ function getToolMeta( } if (!resultContent) return null - switch (toolName) { + const normalizedToolName = normalizeToolName(toolName) + switch (normalizedToolName) { case 'filesystem': { const op = toolInput?.operation as string if (op === 'read') { @@ -238,8 +312,10 @@ function getToolMeta( } case 'web_search': case 'exa_search': + case 'exa_answer': case 'exa_find_similar': - case 'web_research': { + case 'web_research': + case 'parallel_research': { const resultMatches = resultContent.match(/\burl\b/gi) if (resultMatches && resultMatches.length > 0) { const count = resultMatches.length @@ -309,7 +385,7 @@ function ActionChip({ action }: ActionChipProps) { const [showFullResult, setShowFullResult] = useState(false) const displayedResult = showFullResult ? resultContent : resultLines.slice(0, 6).join('\n') - const isSearchTool = SEARCH_TOOLS.has(toolName) + const isSearchTool = SEARCH_TOOLS.has(normalizeToolName(toolName)) const searchSources = useMemo( () => (isSearchTool && resultContent && !isError ? parseCitationSources(resultContent) : []), [isSearchTool, resultContent, isError], @@ -426,12 +502,20 @@ function GroupChip({ errorCount = 0, }: GroupChipProps) { const [open, setOpen] = useState(defaultOpen) + const userToggledRef = useRef(false) useEffect(() => { - if (defaultOpen) setOpen(true) + if (!userToggledRef.current) setOpen(defaultOpen) }, [defaultOpen]) return (
- + + + ) +} diff --git a/packages/desktop/src/components/chat/ContextIndicator.tsx b/packages/desktop/src/components/chat/ContextIndicator.tsx deleted file mode 100644 index 3183c3ef..00000000 --- a/packages/desktop/src/components/chat/ContextIndicator.tsx +++ /dev/null @@ -1,40 +0,0 @@ -import { Brain, FolderOpen } from 'lucide-react' -import type { ConversationContextInfo } from '../../lib/conversations.js' -import { useStore } from '../../lib/store.js' - -interface ContextIndicatorProps { - contextInfo?: ConversationContextInfo - sessionId?: string -} - -export function ContextIndicator({ contextInfo, sessionId }: ContextIndicatorProps) { - const openContextPanel = useStore((s) => s.openContextPanel) - - const totalMemories = contextInfo - ? contextInfo.globalMemories.length + - contextInfo.conversationMemories.length + - contextInfo.crossConversationMemories.length - : 0 - - if (!sessionId && totalMemories === 0) return null - - return ( -
- -
- ) -} diff --git a/packages/desktop/src/components/chat/ContextPopover.tsx b/packages/desktop/src/components/chat/ContextPopover.tsx new file mode 100644 index 00000000..28f3278a --- /dev/null +++ b/packages/desktop/src/components/chat/ContextPopover.tsx @@ -0,0 +1,240 @@ +import type { ContextBreakdown } from '@anton/protocol' +import { type RefObject, useEffect, useMemo, useRef } from 'react' +import { createPortal } from 'react-dom' +import { useStore } from '../../lib/store.js' + +interface Props { + open: boolean + onClose: () => void + anchorRect: DOMRect | null + breakdown: ContextBreakdown + /** + * Trigger button that opens this popover. Used to skip outside-click + * close when the user clicks the trigger again — without this, the + * mousedown on the trigger fires onClose, then the click toggles the + * gauge back open. Net effect: the popover never closes via the + * gauge. + */ + triggerRef?: RefObject +} + +interface Row { + key: string + label: string + tokens: number + /** Solid hue for the stacked bar segment + row legend dot. */ + className: string +} + +function formatTokens(n: number): string { + if (n >= 1_000_000) return `${(n / 1_000_000).toFixed(1)}M` + if (n >= 100_000) return `${Math.round(n / 1_000)}k` + if (n >= 1_000) return `${(n / 1_000).toFixed(1)}k` + return String(Math.max(0, Math.round(n))) +} + +const ESTIMATED_POPOVER_HEIGHT = 360 +const ESTIMATED_POPOVER_WIDTH = 320 +const VIEWPORT_PADDING = 8 + +/** + * Floating per-category breakdown of the active session's context-window + * usage. Rendered in a portal so a transformed ancestor can't break the + * fixed positioning. Anchored above the gauge by default; flips below + * when there isn't enough room above. + * + * Pi-SDK sessions render every category. Harness sessions + * (`breakdown.source === 'harness'`) only carry `messages` + `contextWindow`, + * so we collapse to a 2-row breakdown plus a footnote. + */ +export function ContextPopover({ open, onClose, anchorRect, breakdown, triggerRef }: Props) { + const popoverRef = useRef(null) + const openContextPanel = useStore((s) => s.openContextPanel) + + useEffect(() => { + if (!open) return + const onKey = (e: KeyboardEvent) => { + if (e.key === 'Escape') { + e.preventDefault() + onClose() + } + } + const onClick = (e: MouseEvent) => { + const target = e.target as Node + if (popoverRef.current?.contains(target)) return + // Don't close on the trigger button — the trigger's own click + // handler will toggle `open` to false a beat later, and closing + // here would race with that and re-open the popover. + if (triggerRef?.current?.contains(target)) return + onClose() + } + window.addEventListener('keydown', onKey) + const id = window.setTimeout(() => window.addEventListener('mousedown', onClick), 0) + return () => { + window.removeEventListener('keydown', onKey) + window.removeEventListener('mousedown', onClick) + window.clearTimeout(id) + } + }, [open, onClose, triggerRef]) + + const rows: Row[] = useMemo(() => { + if (breakdown.source === 'harness') { + return [ + { + key: 'messages', + label: 'Messages', + tokens: breakdown.messages, + className: 'context-popover__seg--messages', + }, + ] + } + return [ + { + key: 'messages', + label: 'Messages', + tokens: breakdown.messages, + className: 'context-popover__seg--messages', + }, + { + key: 'systemPrompt', + label: 'System prompt', + tokens: breakdown.systemPrompt, + className: 'context-popover__seg--system-prompt', + }, + { + key: 'systemTools', + label: 'System tools', + tokens: breakdown.systemTools, + className: 'context-popover__seg--system-tools', + }, + { + key: 'mcpTools', + label: 'MCP tools', + tokens: breakdown.mcpTools, + className: 'context-popover__seg--mcp-tools', + }, + { + key: 'skills', + label: 'Skills', + tokens: breakdown.skills, + className: 'context-popover__seg--skills', + }, + { + key: 'memoryFiles', + label: 'Memory files', + tokens: breakdown.memoryFiles, + className: 'context-popover__seg--memory', + }, + ] + }, [breakdown]) + + const usedTokens = rows.reduce((sum, r) => sum + r.tokens, 0) + const reserved = breakdown.autocompactBuffer + const free = Math.max(0, breakdown.contextWindow - usedTokens - reserved) + + if (!open || !anchorRect) return null + + const spaceAbove = anchorRect.top + const placeAbove = spaceAbove >= ESTIMATED_POPOVER_HEIGHT + 16 + // Center the popover horizontally on the trigger so the visual + // pointer always reads "this comes from that button". Clamp to the + // viewport so a gauge near the right edge can't push the popover + // off-screen. + const anchorCenter = anchorRect.left + anchorRect.width / 2 + const idealLeft = anchorCenter - ESTIMATED_POPOVER_WIDTH / 2 + const maxLeft = window.innerWidth - ESTIMATED_POPOVER_WIDTH - VIEWPORT_PADDING + const left = Math.max(VIEWPORT_PADDING, Math.min(idealLeft, maxLeft)) + const style: React.CSSProperties = placeAbove + ? { + position: 'fixed', + left, + bottom: window.innerHeight - anchorRect.top + 8, + zIndex: 50, + } + : { + position: 'fixed', + left, + top: anchorRect.bottom + 8, + zIndex: 50, + } + + // Stacked bar uses percentage widths so totals stay stable across resizes. + const window_ = breakdown.contextWindow + const pct = (n: number) => (window_ > 0 ? (n / window_) * 100 : 0) + + return createPortal( +
+
+ Context + + {formatTokens(usedTokens)}/{formatTokens(window_)} + +
+ +
+ {rows + .filter((r) => r.tokens > 0) + .map((r) => ( + + ))} + {reserved > 0 && ( + + )} +
+ +
    + {rows.map((r) => ( +
  • + + {r.label} + + {window_ > 0 ? `${pct(r.tokens).toFixed(1)}%` : '—'} + +
  • + ))} + {reserved > 0 && ( +
  • + + Autocompact buffer + {pct(reserved).toFixed(1)}% +
  • + )} +
  • + + Free space + {pct(free).toFixed(1)}% +
  • +
+ + {breakdown.source === 'harness' && ( +
+ Per-section detail is not available for harness sessions. +
+ )} + + +
, + document.body, + ) +} diff --git a/packages/desktop/src/index.css b/packages/desktop/src/index.css index 949bb681..59740944 100644 --- a/packages/desktop/src/index.css +++ b/packages/desktop/src/index.css @@ -14288,38 +14288,167 @@ button { } /* ═══════════════════════════════════════════════════════════════════ - CONTEXT INDICATOR + CONTEXT GAUGE + POPOVER (composer toolbar) ═══════════════════════════════════════════════════════════════════ */ -/* ── Context indicator badge ── */ -.context-indicator { - position: relative; - display: flex; - justify-content: flex-end; - padding: 4px 16px 0; +.context-gauge { + display: inline-grid; + place-items: center; + width: 26px; + height: 26px; + padding: 0; + border: 0; + background: transparent; + border-radius: 999px; + color: var(--text-3); + cursor: pointer; + transition: background 0.12s ease, color 0.12s ease; +} +.context-gauge:hover, +.context-gauge--open { + background: var(--bg-elev-2); + color: var(--text); +} +.context-gauge__track { + stroke: color-mix(in oklch, var(--border-strong) 100%, transparent); +} +.context-gauge__fill { + stroke: var(--text-3); + transition: stroke-dashoffset 0.18s ease, stroke 0.12s ease; +} +.context-gauge--warning .context-gauge__fill { + stroke: var(--warning); +} +.context-gauge--critical .context-gauge__fill { + stroke: var(--accent); } -.context-indicator__badge { +/* ── Popover ── */ +.context-popover { + width: 280px; + padding: 12px 14px 10px; + background: var(--bg-elev-2); + border: 1px solid var(--border-strong); + border-radius: 12px; + box-shadow: 0 12px 32px rgba(0, 0, 0, 0.32), 0 2px 8px rgba(0, 0, 0, 0.18); + color: var(--text); + font-size: 12.5px; + line-height: 1.4; +} +.context-popover__head { display: flex; - align-items: center; + align-items: baseline; + justify-content: space-between; + margin-bottom: 10px; +} +.context-popover__title { + font-size: 13px; + font-weight: 600; + letter-spacing: -0.005em; +} +.context-popover__counter { + color: var(--text-3); + font-variant-numeric: tabular-nums; +} +.context-popover__bar { + display: flex; + width: 100%; + height: 6px; + border-radius: 999px; + overflow: hidden; + background: color-mix(in oklch, var(--border) 100%, transparent); + margin-bottom: 10px; +} +.context-popover__seg { + height: 100%; +} +.context-popover__rows { + margin: 0; + padding: 0; + list-style: none; + display: flex; + flex-direction: column; gap: 4px; - padding: 2px 8px; - border: none; +} +.context-popover__row { + display: grid; + grid-template-columns: 8px 1fr auto; + align-items: center; + gap: 8px; + padding: 2px 0; + color: var(--text-2, var(--text)); +} +.context-popover__row--free .context-popover__label { + color: var(--text-3); +} +.context-popover__dot { + width: 8px; + height: 8px; + border-radius: 999px; + display: inline-block; +} +.context-popover__dot--free { background: transparent; - color: var(--text-tertiary, #666); - font-size: 11px; + box-shadow: inset 0 0 0 1px var(--border-strong); +} +.context-popover__label { + white-space: nowrap; + overflow: hidden; + text-overflow: ellipsis; +} +.context-popover__value { + color: var(--text-3); + font-variant-numeric: tabular-nums; + font-size: 12px; +} +.context-popover__footnote { + margin-top: 10px; + padding-top: 8px; + border-top: 1px solid var(--border); + font-size: 11.5px; + color: var(--text-3); +} +.context-popover__details-link { + display: block; + width: 100%; + margin-top: 10px; + padding: 6px 0 0; + border: 0; + border-top: 1px solid var(--border); + background: transparent; + color: var(--text-3); + font-size: 12px; + text-align: left; cursor: pointer; - border-radius: 4px; - transition: background 0.15s, color 0.15s; + transition: color 0.12s ease; } - -.context-indicator__badge:hover { - background: var(--bg-hover, rgba(var(--overlay), 0.06)); - color: var(--text-secondary, #999); +.context-popover__details-link:hover { + color: var(--accent); } -.context-indicator__count { - font-variant-numeric: tabular-nums; +/* Stacked-bar segment hues — kept distinct so the bar reads + instantly without needing a legend. Tones drawn from existing + accent-derived oklch values to match the rest of the UI. */ +.context-popover__seg--messages { + background: var(--accent); +} +.context-popover__seg--system-prompt { + background: oklch(0.7 0.07 250); +} +.context-popover__seg--system-tools { + background: oklch(0.7 0.1 160); +} +.context-popover__seg--mcp-tools { + background: oklch(0.7 0.1 220); +} +.context-popover__seg--skills { + background: oklch(0.72 0.13 300); +} +.context-popover__seg--memory { + background: oklch(0.74 0.13 35); +} +.context-popover__seg--reserved { + background: color-mix(in oklch, var(--text-3) 60%, transparent); } /* ── Context panel (inside SidePanel) ── */ diff --git a/packages/desktop/src/lib/store/handlers/interactionHandler.ts b/packages/desktop/src/lib/store/handlers/interactionHandler.ts index c9ef3c19..bfa32310 100644 --- a/packages/desktop/src/lib/store/handlers/interactionHandler.ts +++ b/packages/desktop/src/lib/store/handlers/interactionHandler.ts @@ -189,6 +189,14 @@ export function handleInteractionMessage(msg: AiMessage, ctx: MessageContext): b return true } + case 'context_update': { + const sid = ctx.msgSessionId + if (sid && msg.breakdown) { + sessionStore.getState().updateSessionState(sid, { contextBreakdown: msg.breakdown }) + } + return true + } + case 'done': { const ss = sessionStore.getState() const store = useStore.getState() diff --git a/packages/desktop/src/lib/store/sessionStore.ts b/packages/desktop/src/lib/store/sessionStore.ts index b1773aed..ee575f0e 100644 --- a/packages/desktop/src/lib/store/sessionStore.ts +++ b/packages/desktop/src/lib/store/sessionStore.ts @@ -95,6 +95,12 @@ export interface SessionState { lastResponseProvider: string | null lastResponseModel: string | null + // Per-category context window breakdown (drives the composer Context + // gauge + popover). Null until the agent-server emits the first + // `context_update` event for this session. Pi-SDK sessions carry the + // full split; harness sessions only populate `messages` + `contextWindow`. + contextBreakdown: import('@anton/protocol').ContextBreakdown | null + // Pending interactions pendingConfirm: PendingConfirm | null pendingPlan: PendingPlan | null @@ -150,6 +156,7 @@ export function createSessionState( sessionUsage: null, lastResponseProvider: null, lastResponseModel: null, + contextBreakdown: null, pendingConfirm: null, pendingPlan: null, pendingAskUser: null, diff --git a/specs/features/ui/CONTEXT_GAUGE.md b/specs/features/ui/CONTEXT_GAUGE.md new file mode 100644 index 00000000..a787c510 --- /dev/null +++ b/specs/features/ui/CONTEXT_GAUGE.md @@ -0,0 +1,113 @@ +# Composer Context Gauge + Popover + +A circular gauge in the chat composer toolbar shows context-window +utilisation at a glance. Clicking the gauge opens a popover with a +per-category breakdown of where the prompt budget is going (messages, +system prompt, tools, skills, memory, autocompact reserve, free space). + +The feature replaces the previous `ContextIndicator` (memory-count +badge) — the memory count is still surfaced inside the side panel's +Context view. + +## Categories + +| Category | Source | Pi SDK | Harness | +|---|---|:-:|:-:| +| Messages | `estimateTokens(piAgent.state.messages)` | ✓ | ✓ (= `total inputTokens`) | +| System prompt | identity + workspace/user rules + current context + surface + agent context + connectors + project type + reference knowledge + workflows + project memory instructions | ✓ | ✗ | +| System tools | built-in `AgentTool` schemas (names in `BUILT_IN_TOOL_NAMES`) | ✓ | ✗ | +| MCP tools | tools from `mcpManager` + direct OAuth connector tools | ✓ | ✗ | +| Skills | `buildActiveSkillsLayer` output | ✓ | ✗ | +| Memory files | `buildMemoryLayer` output | ✓ | ✗ | +| Autocompact buffer | `contextWindow * (1 − threshold)` (default `0.20 × window`) | ✓ | ✗ | +| Free space | `contextWindow − Σ(used) − autocompactBuffer` (derived client-side) | ✓ | ✓ | + +Categories matching Claude Code's UI but **omitted** in v1: + +- **System tools (deferred)** / **MCP tools (deferred)** — Anton has no + lazy-tool-loading mechanism today; every tool's schema is live in + every prompt. We can re-add these rows when ToolSearch-style deferral + ships. +- **Custom agents** — Anton's agents are reachable via `delegate_to_agent` + rather than injected into the prompt, so the row would always read + `0.0%`. Add when an agent flow injects content (e.g. agent identity + prompts) into the prompt. + +## Token estimation + +Two-tier: + +1. **Estimate (sync, cheap)** — char-count per layer / 4. Same heuristic + `estimateMessageTokens` already uses for compaction. Computed inside + `Session.getSystemPrompt` (caches sizes onto `_lastLayerSizes`) and + `categorizeTools` (sums `name + description + JSON.stringify(parameters)` + per tool, classified by `BUILT_IN_TOOL_NAMES`). +2. **Calibration (per-turn)** — after a turn finishes, compare the + model's reported `input_tokens` against our pre-turn estimate. The + ratio updates `_contextEstimateScale`, clamped to `[0.5, 2.0]`, and + gets applied to subsequent breakdowns. Drifts toward reality across + the first 1–2 turns without paying tokenizer cost. + +Real tokenisers (`tiktoken`, `@anthropic-ai/tokenizer`) were +deliberately deferred — the calibration loop is "good enough" for a +gauge and avoids per-prompt CPU cost on every turn. + +## Protocol + +```ts +// packages/protocol/src/messages.ts +export interface ContextBreakdown { + contextWindow: number + systemPrompt: number + systemTools: number + mcpTools: number + skills: number + memoryFiles: number + messages: number + autocompactBuffer: number + source: 'pi-sdk' | 'harness' +} + +export interface AiContextUpdateMessage { + type: 'context_update' + sessionId?: string + breakdown: ContextBreakdown +} +``` + +Server emits `context_update` at: + +- `session_created` (initial breakdown so the popover never shows a + loading state on Pi SDK sessions). +- After every `turn_end` event from the Pi SDK loop (`Session` already + emits `token_update` at the same site). +- After every codex-harness `tokenUsageUpdated` notification (harness + variant: `messages` + `contextWindow` only). + +## Gauge color states + +The autocompaction threshold (default `0.80`) drives the warning band: + +- `< 70%` → `idle` — muted ring (`var(--text-3)`) +- `70–80%` → `warning` — `var(--warning)` amber +- `≥ 80%` → `critical` — `var(--accent)` (compaction will fire on the + next turn) + +## Files touched + +| File | Change | +|---|---| +| `packages/protocol/src/messages.ts` | + `ContextBreakdown`, + `AiContextUpdateMessage`, union extension | +| `packages/agent-core/src/agent.ts` | + `BUILT_IN_TOOL_NAMES`, + `categorizeTools` | +| `packages/agent-core/src/prompt-layers.ts` | + `SessionPromptLayerSizes`, + `emptyPromptLayerSizes` | +| `packages/agent-core/src/session.ts` | layer-size capture in `getSystemPrompt`, + `getContextBreakdown`, + `updateContextEstimateScale`, emit `context_update` post-turn | +| `packages/agent-core/src/harness/codex-harness-session.ts` | emit harness-variant `context_update` from `onTokenUsageUpdated` | +| `packages/agent-server/src/server.ts` | emit initial `context_update` after `session_created` | +| `packages/desktop/src/lib/store/sessionStore.ts` | + `contextBreakdown` field on `SessionState` | +| `packages/desktop/src/lib/store/handlers/interactionHandler.ts` | handle `context_update` | +| `packages/desktop/src/components/chat/ContextGauge.tsx` | new | +| `packages/desktop/src/components/chat/ContextPopover.tsx` | new | +| `packages/desktop/src/components/chat/ChatInput.tsx` | slot gauge into `composer__toolbar-right` | +| `packages/desktop/src/components/chat/ContextIndicator.tsx` | **deleted** | +| `packages/desktop/src/components/RoutineChat.tsx` | drop `ContextIndicator` import + render | +| `packages/desktop/src/index.css` | + gauge + popover styles, − stale `.context-indicator` rules | diff --git a/specs/features/ui/CONTEXT_GAUGE_BLUNDERS.md b/specs/features/ui/CONTEXT_GAUGE_BLUNDERS.md new file mode 100644 index 00000000..c57e4812 --- /dev/null +++ b/specs/features/ui/CONTEXT_GAUGE_BLUNDERS.md @@ -0,0 +1,203 @@ +# Context Gauge — Known Blunders & Fix Queue + +Tracking doc for issues caught during self-review of the initial +`feat/composer-context-gauge` implementation. Delete once every entry +is **Fixed** and verified. + +Numbering matches the order they were surfaced. + +## Critical + +### 1. Calibration is biased low + +**Where**: `packages/agent-core/src/session.ts` — `turn_end` handler. + +`updateContextEstimateScale(input)` runs after pi-ai has already +appended the assistant's response message to `piAgent.state.messages`. +So `breakdown.messages` (post-turn) is larger than the actual input +that produced `inputTokens` (pre-turn). `actual / rawEstimate < 1` → +scale settles around 0.85–0.95 → gauge under-reports by 5–15% on +every turn forever. + +**Fix**: snapshot the raw estimate at `turn_start` and use that for +calibration at `turn_end`, OR exclude the most recently appended +assistant message from the estimate during calibration. + +Status: fixed + +--- + +### 2. Fork children show zero breakdown + +**Where**: `packages/agent-core/src/session.ts:2480-2482`. + +`getSystemPrompt()` returns early when `systemPromptOverride` is set +(sub-agents, fork children) and never populates `_lastLayerSizes`. +The breakdown for those sessions reports `systemPrompt: 0` even when +the override prompt is huge. Tools and messages still report real +numbers, so the gauge looks plausible but the popover lies. + +**Fix**: when `systemPromptOverride` is active, populate +`_lastLayerSizes.identity` with `override.length` (everything else +stays 0) so the breakdown reports the override under "System prompt". + +Status: fixed + +--- + +### 3. Stale gauge after model switch + +**Where**: `packages/agent-core/src/session.ts:1421-1433` +(`switchModel`). + +Updates `resolvedModel` and pi-ai's model but does not re-emit +`context_update`. Switching opus (1M) → sonnet (200k) leaves the +popover showing old proportions until the next turn finishes. + +**Fix**: queue a `context_update` event from `switchModel` (push onto +the next yielded event batch) using the new `resolvedModel.contextWindow`. +Same emit path used by `setSurface`, `refreshConnectorTools`, +`loadConversationContext`. + +Status: fixed + +--- + +### 4. Codex harness `messages` over-reports + +**Where**: `packages/agent-core/src/harness/codex-harness-session.ts` +in `onTokenUsageUpdated`. + +`total.inputTokens` from codex is *cumulative billed input* across +all turns (counts cached tokens too). On long conversations it +exceeds actual prompt size and can show >100% on the gauge. + +**Fix**: prefer `last.inputTokens` (per-turn input) over +`total.inputTokens` for the breakdown's `messages` field; fall back +to `total` only when `last` is absent. Cap the gauge at 100% as a +defensive measure. + +Status: fixed + +--- + +### 5. Harness `session_created` has no initial breakdown + +**Where**: `packages/agent-server/src/server.ts:2219-2229`. + +The initial `context_update` emit only runs on the Pi-SDK +`session_created` branch. Harness sessions show no gauge until the +first turn completes. + +**Fix**: emit a synthetic harness breakdown right after the harness +`session_created` send, with `contextWindow` looked up from the +resolved model and all category counters at 0. + +Status: fixed + +--- + +### 7. Auto-fix collateral leaked into the diff + +**Where**: working tree. + +`pnpm check:fix` reformatted unrelated pre-existing files: +`agent-config/package.json`, `tauri.conf.json`, `ProviderSettingsModal.tsx`, +parts of `agent-server/server.ts`, parts of `index.css`. None of these +relate to the gauge. + +**Fix**: `git checkout` those files (preserving only the gauge-related +diffs) so the PR stays scoped. + +Status: fixed + +--- + +## Medium + +### 6. Clicking the gauge while open doesn't close it + +**Where**: `packages/desktop/src/components/chat/ContextPopover.tsx` ++ `ContextGauge.tsx`. + +Outside-click handler runs on mousedown against `popoverRef`. The +gauge button is outside `popoverRef`, so mousedown closes the popover; +the subsequent click on the gauge fires `setOpen(v => !v)` and reopens +it. The popover can only be closed by clicking elsewhere. + +**Fix**: ignore outside-click when the target is the gauge button +(pass `triggerRef` into the popover and check `triggerRef.current.contains(target)` +before calling `onClose`). + +Status: fixed + +--- + +### 8. Unnecessary `as never[]` type cast + +**Where**: `packages/agent-core/src/session.ts:2664`. + +`estimateTokens(this.piAgent.state.messages as never[])` — the cast +silences type checking. `piAgent.state.messages` is already +`AgentMessage[]`, which is what `estimateTokens` accepts. + +**Fix**: drop the cast. + +Status: fixed + +--- + +## Minor + +### 9. Lost entry point to side panel context view + +**Where**: removed `ContextIndicator.tsx` (clicked → `openContextPanel()`). + +The new gauge opens the popover instead of the side panel. No 1-click +path from composer to the memory list anymore. + +**Fix**: add a "View memory details" affordance to the popover footer +that calls `openContextPanel()` and closes the popover. + +Status: fixed + +--- + +### 10. `_contextEstimateScale` not persisted + +**Where**: `Session.persist()` / `loadSession()`. + +Scale resets to 1.0 on every session resume; calibration starts over. + +**Fix**: persist scale alongside `compactionState` in +`PersistedSession` and rehydrate in the constructor. + +Status: fixed + +--- + +### 11. Magic `anchorRect.left - 80` offset + +**Where**: `ContextPopover.tsx`. + +Random nudge to "center" the popover near the gauge. Not derived from +the popover width. + +**Fix**: align by anchor center: `left = anchorRect.left + anchorRect.width / 2 - ESTIMATED_POPOVER_WIDTH / 2`, +clamped to viewport. + +Status: fixed + +--- + +### 12. `setSurface` / `refreshConnectorTools` / `loadConversationContext` don't re-emit + +**Where**: same file, three methods. + +Each rebuilds the prompt and updates `_lastLayerSizes`, but no +`context_update` event is queued. Popover stays stale until next turn. + +**Fix**: same approach as #3 — push a `context_update` onto the next +yielded event batch from each setter. + +Status: fixed