Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions scripts/update_models.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@

/**
* Downloads the latest model prices and context window data from LiteLLM
* and saves it to src/utils/models.json
* and saves it to src/utils/tokens/models.json
*/

const LITELLM_URL =
"https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json";
const OUTPUT_PATH = "src/utils/models.json";
const OUTPUT_PATH = "src/utils/tokens/models.json";

async function updateModels() {
console.log(`Fetching model data from ${LITELLM_URL}...`);
Expand Down
9 changes: 6 additions & 3 deletions src/components/ChatInput.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,7 @@ export const ChatInput: React.FC<ChatInputProps> = ({
const inputRef = useRef<HTMLTextAreaElement>(null);
const modelSelectorRef = useRef<ModelSelectorRef>(null);
const [mode, setMode] = useMode();
const { recentModels } = useModelLRU();
const { recentModels, addModel } = useModelLRU();
const commandListId = useId();

// Get current send message options from shared hook (must be at component top level)
Expand All @@ -359,8 +359,11 @@ export const ChatInput: React.FC<ChatInputProps> = ({
const preferredModel = sendMessageOptions.model;
// Setter for model - updates localStorage directly so useSendMessageOptions picks it up
const setPreferredModel = useCallback(
(model: string) => updatePersistedState(getModelKey(workspaceId), model),
[workspaceId]
(model: string) => {
addModel(model); // Update LRU
updatePersistedState(getModelKey(workspaceId), model); // Update workspace-specific
},
[workspaceId, addModel]
);

const focusMessageInput = useCallback(() => {
Expand Down
5 changes: 3 additions & 2 deletions src/debug/agentSessionCli.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ import {
type SendMessageOptions,
type WorkspaceChatMessage,
} from "@/types/ipc";
import { defaultModel } from "@/utils/ai/models";
import { getDefaultModelFromLRU } from "@/hooks/useModelLRU";
import { ensureProvidersConfig } from "@/utils/providers/ensureProvidersConfig";
import { modeToToolPolicy, PLAN_MODE_INSTRUCTION } from "@/utils/ui/modeUtils";
import { extractAssistantText, extractReasoning, extractToolCalls } from "@/debug/chatExtractors";
Expand Down Expand Up @@ -184,7 +184,8 @@ async function main(): Promise<void> {
throw new Error("Message must be provided via --message or stdin");
}

const model = values.model && values.model.trim().length > 0 ? values.model.trim() : defaultModel;
const model =
values.model && values.model.trim().length > 0 ? values.model.trim() : getDefaultModelFromLRU();
const timeoutMs = parseTimeout(values.timeout);
const thinkingLevel = parseThinkingLevel(values["thinking-level"]);
const initialMode = parseMode(values.mode);
Expand Down
4 changes: 2 additions & 2 deletions src/debug/costs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import * as path from "path";
import { defaultConfig } from "@/config";
import type { CmuxMessage } from "@/types/message";
import { calculateTokenStats } from "@/utils/tokens/tokenStatsCalculator";
import { defaultModel } from "@/utils/ai/models";
import { getDefaultModelFromLRU } from "@/hooks/useModelLRU";

/**
* Debug command to display cost/token statistics for a workspace
Expand Down Expand Up @@ -35,7 +35,7 @@ export function costsCommand(workspaceId: string) {

// Detect model from first assistant message
const firstAssistantMessage = messages.find((msg) => msg.role === "assistant");
const model = firstAssistantMessage?.metadata?.model ?? defaultModel;
const model = firstAssistantMessage?.metadata?.model ?? getDefaultModelFromLRU();

// Calculate stats using shared logic (now synchronous)
const stats = calculateTokenStats(messages, model);
Expand Down
4 changes: 2 additions & 2 deletions src/debug/send-message.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import * as path from "path";
import { defaultConfig } from "@/config";
import type { CmuxMessage } from "@/types/message";
import type { SendMessageOptions } from "@/types/ipc";
import { defaultModel } from "@/utils/ai/models";
import { getDefaultModelFromLRU } from "@/hooks/useModelLRU";

/**
* Debug command to send a message to a workspace, optionally editing an existing message
Expand Down Expand Up @@ -103,7 +103,7 @@ export function sendMessageCommand(

// Prepare options
const options: SendMessageOptions = {
model: defaultModel,
model: getDefaultModelFromLRU(),
};

if (editMessageId) {
Expand Down
6 changes: 3 additions & 3 deletions src/hooks/useAIViewKeybinds.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import { updatePersistedState, readPersistedState } from "@/hooks/usePersistedSt
import type { ThinkingLevel, ThinkingLevelOn } from "@/types/thinking";
import { DEFAULT_THINKING_LEVEL } from "@/types/thinking";
import { getThinkingPolicyForModel } from "@/utils/thinking/policy";
import { defaultModel } from "@/utils/ai/models";
import { getDefaultModelFromLRU } from "@/hooks/useModelLRU";

interface UseAIViewKeybindsParams {
workspaceId: string;
Expand Down Expand Up @@ -66,10 +66,10 @@ export function useAIViewKeybinds({
e.preventDefault();

// Get selected model from localStorage (what user sees in UI)
// Fall back to message history model, then to default model
// Fall back to message history model, then to most recent model from LRU
// This matches the same logic as useSendMessageOptions
const selectedModel = readPersistedState<string | null>(getModelKey(workspaceId), null);
const modelToUse = selectedModel ?? currentModel ?? defaultModel;
const modelToUse = selectedModel ?? currentModel ?? getDefaultModelFromLRU();

// Storage key for remembering this model's last-used active thinking level
const lastThinkingKey = getLastThinkingByModelKey(modelToUse);
Expand Down
29 changes: 18 additions & 11 deletions src/hooks/useModelLRU.ts
Original file line number Diff line number Diff line change
@@ -1,38 +1,45 @@
import { useCallback, useEffect } from "react";
import { usePersistedState } from "./usePersistedState";
import { usePersistedState, readPersistedState } from "./usePersistedState";
import { MODEL_ABBREVIATIONS } from "@/utils/slashCommands/registry";
import { defaultModel } from "@/utils/ai/models";

const MAX_LRU_SIZE = 8;
const LRU_KEY = "model-lru";

// Default models from abbreviations (for initial LRU population)
const DEFAULT_MODELS = Object.values(MODEL_ABBREVIATIONS);

/**
* Get the default model from LRU (non-hook version for use outside React)
* This is the ONLY place that reads from LRU outside of the hook.
*
* @returns The most recently used model, or defaultModel if LRU is empty
*/
export function getDefaultModelFromLRU(): string {
const lru = readPersistedState<string[]>(LRU_KEY, DEFAULT_MODELS.slice(0, MAX_LRU_SIZE));
return lru[0] ?? defaultModel;
}

/**
* Hook to manage a Least Recently Used (LRU) cache of AI models.
* Stores up to 8 recently used models in localStorage.
* Initializes with default abbreviated models if empty.
*/
export function useModelLRU() {
const [recentModels, setRecentModels] = usePersistedState<string[]>(LRU_KEY, []);
const [recentModels, setRecentModels] = usePersistedState<string[]>(
LRU_KEY,
DEFAULT_MODELS.slice(0, MAX_LRU_SIZE)
);

// Ensure default models are always present in the LRU (only once on mount)
// Merge any new defaults from MODEL_ABBREVIATIONS (only once on mount)
useEffect(() => {
setRecentModels((prev) => {
// If empty, just use defaults
if (prev.length === 0) {
return DEFAULT_MODELS.slice(0, MAX_LRU_SIZE);
}

// If we have some models, merge with defaults (keeping existing order, adding missing defaults at end)
const merged = [...prev];
for (const defaultModel of DEFAULT_MODELS) {
if (!merged.includes(defaultModel)) {
merged.push(defaultModel);
}
}

// Limit to MAX_LRU_SIZE
return merged.slice(0, MAX_LRU_SIZE);
});
// eslint-disable-next-line react-hooks/exhaustive-deps
Expand Down
12 changes: 7 additions & 5 deletions src/hooks/useSendMessageOptions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ import { use1MContext } from "./use1MContext";
import { useThinkingLevel } from "./useThinkingLevel";
import { useMode } from "@/contexts/ModeContext";
import { usePersistedState } from "./usePersistedState";
import { useModelLRU } from "./useModelLRU";
import { modeToToolPolicy, PLAN_MODE_INSTRUCTION } from "@/utils/ui/modeUtils";
import { defaultModel } from "@/utils/ai/models";
import { getModelKey } from "@/constants/storage";
import type { SendMessageOptions } from "@/types/ipc";
import type { UIMode } from "@/types/mode";
Expand All @@ -19,13 +19,14 @@ function constructSendMessageOptions(
mode: UIMode,
thinkingLevel: ThinkingLevel,
preferredModel: string | null | undefined,
use1M: boolean
use1M: boolean,
fallbackModel: string
): SendMessageOptions {
const additionalSystemInstructions = mode === "plan" ? PLAN_MODE_INSTRUCTION : undefined;

// Ensure model is always a valid string (defensive against corrupted localStorage)
const model =
typeof preferredModel === "string" && preferredModel ? preferredModel : defaultModel;
typeof preferredModel === "string" && preferredModel ? preferredModel : fallbackModel;

// Enforce thinking policy at the UI boundary as well (e.g., gpt-5-pro β†’ high only)
const uiThinking = enforceThinkingPolicy(model, thinkingLevel);
Expand Down Expand Up @@ -58,13 +59,14 @@ export function useSendMessageOptions(workspaceId: string): SendMessageOptions {
const [use1M] = use1MContext();
const [thinkingLevel] = useThinkingLevel();
const [mode] = useMode();
const { recentModels } = useModelLRU();
const [preferredModel] = usePersistedState<string>(
getModelKey(workspaceId),
defaultModel,
recentModels[0], // Most recently used model (LRU is never empty)
{ listener: true } // Listen for changes from ModelSelector and other sources
);

return constructSendMessageOptions(mode, thinkingLevel, preferredModel, use1M);
return constructSendMessageOptions(mode, thinkingLevel, preferredModel, use1M, recentModels[0]);
}

/**
Expand Down
6 changes: 3 additions & 3 deletions src/utils/messages/sendOptions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@ import {
getModeKey,
USE_1M_CONTEXT_KEY,
} from "@/constants/storage";
import { defaultModel } from "@/utils/ai/models";
import { modeToToolPolicy, PLAN_MODE_INSTRUCTION } from "@/utils/ui/modeUtils";
import { readPersistedState } from "@/hooks/usePersistedState";
import type { SendMessageOptions } from "@/types/ipc";
import type { UIMode } from "@/types/mode";
import type { ThinkingLevel } from "@/types/thinking";
import { enforceThinkingPolicy } from "@/utils/thinking/policy";
import { getDefaultModelFromLRU } from "@/hooks/useModelLRU";

/**
* Get send options from localStorage
Expand All @@ -20,8 +20,8 @@ import { enforceThinkingPolicy } from "@/utils/thinking/policy";
* This ensures DRY - single source of truth for option extraction.
*/
export function getSendOptionsFromStorage(workspaceId: string): SendMessageOptions {
// Read model preference (workspace-specific)
const model = readPersistedState<string>(getModelKey(workspaceId), defaultModel);
// Read model preference (workspace-specific), fallback to most recent from LRU
const model = readPersistedState<string>(getModelKey(workspaceId), getDefaultModelFromLRU());

// Read thinking level (workspace-specific)
const thinkingLevel = readPersistedState<ThinkingLevel>(
Expand Down
4 changes: 3 additions & 1 deletion src/utils/slashCommands/registry.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,11 @@ import type {
import minimist from "minimist";

// Model abbreviations for common models
// Order matters: first model becomes the default for new chats
export const MODEL_ABBREVIATIONS: Record<string, string> = {
opus: "anthropic:claude-opus-4-1",
sonnet: "anthropic:claude-sonnet-4-5",
haiku: "anthropic:claude-haiku-4-5",
opus: "anthropic:claude-opus-4-1",
"gpt-5": "openai:gpt-5",
"gpt-5-pro": "openai:gpt-5-pro",
codex: "openai:gpt-5-codex",
Expand Down
16 changes: 16 additions & 0 deletions src/utils/tokens/models-extra.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,20 @@ export const modelsExtra: Record<string, ModelData> = {
knowledge_cutoff: "2024-09-30",
supported_endpoints: ["/v1/responses"],
},

// Claude Haiku 4.5 - Released October 15, 2025
// $1/M input, $5/M output
"claude-haiku-4-5": {
max_input_tokens: 200000,
max_output_tokens: 8192,
input_cost_per_token: 0.000001, // $1 per million input tokens
output_cost_per_token: 0.000005, // $5 per million output tokens
cache_creation_input_token_cost: 0.00000125, // $1.25 per million tokens
cache_read_input_token_cost: 0.0000001, // $0.10 per million tokens
litellm_provider: "anthropic",
mode: "chat",
supports_function_calling: true,
supports_vision: true,
supports_response_schema: true,
},
};
Loading