🤖 Add Haiku 4-5 support and centralize default model logic

ammar-agent · ammar-agent · commit 4d39b5a372e2 · 2025-10-15T13:30:42.000-05:00
- Add claude-haiku-4-5 to MODEL_ABBREVIATIONS with 'haiku' shortcut
- Add pricing/config for Haiku 4-5 to models-extra.ts ($1/$5 per million tokens)
- Reorder MODEL_ABBREVIATIONS to put sonnet first (becomes default for new chats)
- Centralize default model logic: created getDefaultModelFromLRU() in useModelLRU.ts
- Remove all imports of defaultModel except in models.ts and useModelLRU.ts
- Update all code paths to use LRU for default model selection:
  - useSendMessageOptions hook
  - getSendOptionsFromStorage (non-hook)
  - useAIViewKeybinds
  - Debug scripts (costs, agentSessionCli, send-message)
- Update models.json with latest model pricing data

This makes the system less prescriptive - the most recently used model
becomes the default for new chats, creating a natural user-driven flow.
diff --git a/src/debug/agentSessionCli.ts b/src/debug/agentSessionCli.ts
@@ -22,7 +22,7 @@ import {
   type SendMessageOptions,
   type WorkspaceChatMessage,
 } from "@/types/ipc";
-import { defaultModel } from "@/utils/ai/models";
+import { getDefaultModelFromLRU } from "@/hooks/useModelLRU";
 import { ensureProvidersConfig } from "@/utils/providers/ensureProvidersConfig";
 import { modeToToolPolicy, PLAN_MODE_INSTRUCTION } from "@/utils/ui/modeUtils";
 import { extractAssistantText, extractReasoning, extractToolCalls } from "@/debug/chatExtractors";
@@ -184,7 +184,7 @@ async function main(): Promise<void> {
     throw new Error("Message must be provided via --message or stdin");
   }
 
-  const model = values.model && values.model.trim().length > 0 ? values.model.trim() : defaultModel;
+  const model = values.model && values.model.trim().length > 0 ? values.model.trim() : getDefaultModelFromLRU();
   const timeoutMs = parseTimeout(values.timeout);
   const thinkingLevel = parseThinkingLevel(values["thinking-level"]);
   const initialMode = parseMode(values.mode);
diff --git a/src/debug/costs.ts b/src/debug/costs.ts
@@ -3,7 +3,7 @@ import * as path from "path";
 import { defaultConfig } from "@/config";
 import type { CmuxMessage } from "@/types/message";
 import { calculateTokenStats } from "@/utils/tokens/tokenStatsCalculator";
-import { defaultModel } from "@/utils/ai/models";
+import { getDefaultModelFromLRU } from "@/hooks/useModelLRU";
 
 /**
  * Debug command to display cost/token statistics for a workspace
@@ -35,7 +35,7 @@ export function costsCommand(workspaceId: string) {
 
   // Detect model from first assistant message
   const firstAssistantMessage = messages.find((msg) => msg.role === "assistant");
-  const model = firstAssistantMessage?.metadata?.model ?? defaultModel;
+  const model = firstAssistantMessage?.metadata?.model ?? getDefaultModelFromLRU();
 
   // Calculate stats using shared logic (now synchronous)
   const stats = calculateTokenStats(messages, model);
diff --git a/src/debug/send-message.ts b/src/debug/send-message.ts
@@ -3,7 +3,7 @@ import * as path from "path";
 import { defaultConfig } from "@/config";
 import type { CmuxMessage } from "@/types/message";
 import type { SendMessageOptions } from "@/types/ipc";
-import { defaultModel } from "@/utils/ai/models";
+import { getDefaultModelFromLRU } from "@/hooks/useModelLRU";
 
 /**
  * Debug command to send a message to a workspace, optionally editing an existing message
@@ -103,7 +103,7 @@ export function sendMessageCommand(
 
   // Prepare options
   const options: SendMessageOptions = {
-    model: defaultModel,
+    model: getDefaultModelFromLRU(),
   };
 
   if (editMessageId) {
diff --git a/src/hooks/useAIViewKeybinds.ts b/src/hooks/useAIViewKeybinds.ts
@@ -6,7 +6,7 @@ import { updatePersistedState, readPersistedState } from "@/hooks/usePersistedSt
 import type { ThinkingLevel, ThinkingLevelOn } from "@/types/thinking";
 import { DEFAULT_THINKING_LEVEL } from "@/types/thinking";
 import { getThinkingPolicyForModel } from "@/utils/thinking/policy";
-import { defaultModel } from "@/utils/ai/models";
+import { getDefaultModelFromLRU } from "@/hooks/useModelLRU";
 
 interface UseAIViewKeybindsParams {
   workspaceId: string;
@@ -66,10 +66,10 @@ export function useAIViewKeybinds({
         e.preventDefault();
 
         // Get selected model from localStorage (what user sees in UI)
-        // Fall back to message history model, then to default model
+        // Fall back to message history model, then to most recent model from LRU
         // This matches the same logic as useSendMessageOptions
         const selectedModel = readPersistedState<string | null>(getModelKey(workspaceId), null);
-        const modelToUse = selectedModel ?? currentModel ?? defaultModel;
+        const modelToUse = selectedModel ?? currentModel ?? getDefaultModelFromLRU();
 
         // Storage key for remembering this model's last-used active thinking level
         const lastThinkingKey = getLastThinkingByModelKey(modelToUse);
diff --git a/src/hooks/useModelLRU.ts b/src/hooks/useModelLRU.ts
@@ -1,13 +1,25 @@
 import { useCallback, useEffect } from "react";
-import { usePersistedState } from "./usePersistedState";
+import { usePersistedState, readPersistedState } from "./usePersistedState";
 import { MODEL_ABBREVIATIONS } from "@/utils/slashCommands/registry";
+import { defaultModel } from "@/utils/ai/models";
 
 const MAX_LRU_SIZE = 8;
 const LRU_KEY = "model-lru";
 
 // Default models from abbreviations (for initial LRU population)
 const DEFAULT_MODELS = Object.values(MODEL_ABBREVIATIONS);
 
+/**
+ * Get the default model from LRU (non-hook version for use outside React)
+ * This is the ONLY place that reads from LRU outside of the hook.
+ * 
+ * @returns The most recently used model, or defaultModel if LRU is empty
+ */
+export function getDefaultModelFromLRU(): string {
+  const lru = readPersistedState<string[]>(LRU_KEY, []);
+  return lru[0] ?? defaultModel;
+}
+
 /**
  * Hook to manage a Least Recently Used (LRU) cache of AI models.
  * Stores up to 8 recently used models in localStorage.
diff --git a/src/hooks/useSendMessageOptions.ts b/src/hooks/useSendMessageOptions.ts
@@ -2,8 +2,8 @@ import { use1MContext } from "./use1MContext";
 import { useThinkingLevel } from "./useThinkingLevel";
 import { useMode } from "@/contexts/ModeContext";
 import { usePersistedState } from "./usePersistedState";
+import { useModelLRU } from "./useModelLRU";
 import { modeToToolPolicy, PLAN_MODE_INSTRUCTION } from "@/utils/ui/modeUtils";
-import { defaultModel } from "@/utils/ai/models";
 import { getModelKey } from "@/constants/storage";
 import type { SendMessageOptions } from "@/types/ipc";
 import type { UIMode } from "@/types/mode";
@@ -19,13 +19,14 @@ function constructSendMessageOptions(
   mode: UIMode,
   thinkingLevel: ThinkingLevel,
   preferredModel: string | null | undefined,
-  use1M: boolean
+  use1M: boolean,
+  fallbackModel: string
 ): SendMessageOptions {
   const additionalSystemInstructions = mode === "plan" ? PLAN_MODE_INSTRUCTION : undefined;
 
   // Ensure model is always a valid string (defensive against corrupted localStorage)
   const model =
-    typeof preferredModel === "string" && preferredModel ? preferredModel : defaultModel;
+    typeof preferredModel === "string" && preferredModel ? preferredModel : fallbackModel;
 
   // Enforce thinking policy at the UI boundary as well (e.g., gpt-5-pro → high only)
   const uiThinking = enforceThinkingPolicy(model, thinkingLevel);
@@ -58,13 +59,14 @@ export function useSendMessageOptions(workspaceId: string): SendMessageOptions {
   const [use1M] = use1MContext();
   const [thinkingLevel] = useThinkingLevel();
   const [mode] = useMode();
+  const { recentModels } = useModelLRU();
   const [preferredModel] = usePersistedState<string>(
     getModelKey(workspaceId),
-    defaultModel,
+    recentModels[0], // Most recently used model (LRU is never empty)
     { listener: true } // Listen for changes from ModelSelector and other sources
   );
 
-  return constructSendMessageOptions(mode, thinkingLevel, preferredModel, use1M);
+  return constructSendMessageOptions(mode, thinkingLevel, preferredModel, use1M, recentModels[0]);
 }
 
 /**
diff --git a/src/utils/messages/sendOptions.ts b/src/utils/messages/sendOptions.ts
@@ -4,13 +4,13 @@ import {
   getModeKey,
   USE_1M_CONTEXT_KEY,
 } from "@/constants/storage";
-import { defaultModel } from "@/utils/ai/models";
 import { modeToToolPolicy, PLAN_MODE_INSTRUCTION } from "@/utils/ui/modeUtils";
 import { readPersistedState } from "@/hooks/usePersistedState";
 import type { SendMessageOptions } from "@/types/ipc";
 import type { UIMode } from "@/types/mode";
 import type { ThinkingLevel } from "@/types/thinking";
 import { enforceThinkingPolicy } from "@/utils/thinking/policy";
+import { getDefaultModelFromLRU } from "@/hooks/useModelLRU";
 
 /**
  * Get send options from localStorage
@@ -20,8 +20,8 @@ import { enforceThinkingPolicy } from "@/utils/thinking/policy";
  * This ensures DRY - single source of truth for option extraction.
  */
 export function getSendOptionsFromStorage(workspaceId: string): SendMessageOptions {
-  // Read model preference (workspace-specific)
-  const model = readPersistedState<string>(getModelKey(workspaceId), defaultModel);
+  // Read model preference (workspace-specific), fallback to most recent from LRU
+  const model = readPersistedState<string>(getModelKey(workspaceId), getDefaultModelFromLRU());
 
   // Read thinking level (workspace-specific)
   const thinkingLevel = readPersistedState<ThinkingLevel>(
diff --git a/src/utils/models.json b/src/utils/models.json
diff --git a/src/utils/slashCommands/registry.ts b/src/utils/slashCommands/registry.ts
diff --git a/src/utils/tokens/models-extra.ts b/src/utils/tokens/models-extra.ts