diff --git a/tools/server/public/index.html.gz b/tools/server/public/index.html.gz index c76f5778be8fe..88e7bacdad4c8 100644 Binary files a/tools/server/public/index.html.gz and b/tools/server/public/index.html.gz differ diff --git a/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessage.svelte b/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessage.svelte index fed0cf712695f..107f054f368bf 100644 --- a/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessage.svelte +++ b/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessage.svelte @@ -1,4 +1,5 @@ + +{#if toolCalls && toolCalls.length > 0} +
+ {#each toolCalls as toolCall, index (toolCall.id ?? `${index}`)} + + {/each} +
+{:else if fallbackContent} + + + +
+ + + Tool calls +
+ +
+ + + Toggle tool call content +
+
+ + +
+
+
{fallbackContent}
+
+
+
+
+
+{/if} + + diff --git a/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageToolCallItem.svelte b/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageToolCallItem.svelte new file mode 100644 index 0000000000000..ec5e740d46ef8 --- /dev/null +++ b/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageToolCallItem.svelte @@ -0,0 +1,104 @@ + + + + + +
+ + + {headerLabel} +
+ +
+ + + Toggle tool call payload +
+
+ + +
+
+
{formattedPayload}
+
+
+
+
+
+ + diff --git a/tools/server/webui/src/lib/components/app/chat/ChatSettings/ChatSettingsDialog.svelte b/tools/server/webui/src/lib/components/app/chat/ChatSettings/ChatSettingsDialog.svelte index bf17633095242..dd77298d6fc9e 100644 --- a/tools/server/webui/src/lib/components/app/chat/ChatSettings/ChatSettingsDialog.svelte +++ b/tools/server/webui/src/lib/components/app/chat/ChatSettings/ChatSettingsDialog.svelte @@ -214,6 +214,11 @@ label: 'Show raw LLM output', type: 'checkbox' }, + { + key: 'showToolCalls', + label: 'Show tool call chunks', + type: 'checkbox' + }, { key: 'custom', label: 'Custom JSON', diff --git a/tools/server/webui/src/lib/components/app/chat/ChatSidebar/ChatSidebar.svelte b/tools/server/webui/src/lib/components/app/chat/ChatSidebar/ChatSidebar.svelte index 5976e5dd03d7b..adf9f880ae670 100644 --- a/tools/server/webui/src/lib/components/app/chat/ChatSidebar/ChatSidebar.svelte +++ b/tools/server/webui/src/lib/components/app/chat/ChatSidebar/ChatSidebar.svelte @@ -13,6 +13,7 @@ updateConversationName } from '$lib/stores/chat.svelte'; import ChatSidebarActions from './ChatSidebarActions.svelte'; + import ModelSelector from './ModelSelector.svelte'; const sidebar = Sidebar.useSidebar(); @@ -110,6 +111,8 @@

llama.cpp

+ + diff --git a/tools/server/webui/src/lib/components/app/chat/ChatSidebar/ModelSelector.svelte b/tools/server/webui/src/lib/components/app/chat/ChatSidebar/ModelSelector.svelte new file mode 100644 index 0000000000000..ca74610b4a179 --- /dev/null +++ b/tools/server/webui/src/lib/components/app/chat/ChatSidebar/ModelSelector.svelte @@ -0,0 +1,99 @@ + + +{#if loading && options.length === 0 && !isMounted} +
+ + Loading models… +
+{:else if options.length === 0} +

No models available.

+{:else} + {@const selectedOption = getDisplayOption()} + + + + {selectedOption?.name || 'Select model'} + + {#if updating} + + {/if} + + + + {#each options as option (option.id)} + + {option.name} + + {#if option.description} + {option.description} + {/if} + + {/each} + + +{/if} + +{#if error} +

{error}

+{/if} diff --git a/tools/server/webui/src/lib/components/app/index.ts b/tools/server/webui/src/lib/components/app/index.ts index 4c2cbdebe16eb..cf7c6ca75e935 100644 --- a/tools/server/webui/src/lib/components/app/index.ts +++ b/tools/server/webui/src/lib/components/app/index.ts @@ -14,6 +14,7 @@ export { default as ChatFormFileInputInvisible } from './chat/ChatForm/ChatFormF export { default as ChatMessage } from './chat/ChatMessages/ChatMessage.svelte'; export { default as ChatMessages } from './chat/ChatMessages/ChatMessages.svelte'; export { default as ChatMessageThinkingBlock } from './chat/ChatMessages/ChatMessageThinkingBlock.svelte'; +export { default as ChatMessageToolCallBlock } from './chat/ChatMessages/ChatMessageToolCallBlock.svelte'; export { default as MessageBranchingControls } from './chat/ChatMessages/ChatMessageBranchingControls.svelte'; export { default as ChatProcessingInfo } from './chat/ChatProcessingInfo.svelte'; @@ -30,6 +31,7 @@ export { default as ParameterSourceIndicator } from './chat/ChatSettings/Paramet export { default as ChatSidebar } from './chat/ChatSidebar/ChatSidebar.svelte'; export { default as ChatSidebarConversationItem } from './chat/ChatSidebar/ChatSidebarConversationItem.svelte'; export { default as ChatSidebarSearch } from './chat/ChatSidebar/ChatSidebarSearch.svelte'; +export { default as ChatSidebarModelSelector } from './chat/ChatSidebar/ModelSelector.svelte'; export { default as ChatErrorDialog } from './dialogs/ChatErrorDialog.svelte'; export { default as EmptyFileAlertDialog } from './dialogs/EmptyFileAlertDialog.svelte'; diff --git a/tools/server/webui/src/lib/constants/settings-config.ts b/tools/server/webui/src/lib/constants/settings-config.ts index 154ec888ce2dc..c2e0ce047e205 100644 --- a/tools/server/webui/src/lib/constants/settings-config.ts +++ b/tools/server/webui/src/lib/constants/settings-config.ts @@ -6,6 +6,7 @@ export const SETTING_CONFIG_DEFAULT: Record = theme: 'system', showTokensPerSecond: false, showThoughtInProgress: false, + showToolCalls: false, disableReasoningFormat: false, keepStatsVisible: false, askForTitleConfirmation: false, @@ -78,6 +79,8 @@ export const SETTING_CONFIG_INFO: Record = { custom: 'Custom JSON parameters to send to the API. Must be valid JSON format.', showTokensPerSecond: 'Display generation speed in tokens per second during streaming.', showThoughtInProgress: 'Expand thought process by default when generating messages.', + showToolCalls: + 'Display streamed tool call payloads from Harmony-compatible delta.tool_calls data inside assistant messages.', disableReasoningFormat: 'Show raw LLM output without backend parsing and frontend Markdown rendering to inspect streaming across different models.', keepStatsVisible: 'Keep processing statistics visible after generation finishes.', diff --git a/tools/server/webui/src/lib/services/chat.ts b/tools/server/webui/src/lib/services/chat.ts index 37e60b85b5a6a..15f7080ecd2bb 100644 --- a/tools/server/webui/src/lib/services/chat.ts +++ b/tools/server/webui/src/lib/services/chat.ts @@ -1,5 +1,8 @@ import { config } from '$lib/stores/settings.svelte'; +import { selectedModelName } from '$lib/stores/models.svelte'; +import type { ApiChatCompletionToolCall, ApiChatCompletionToolCallDelta } from '$lib/types/api'; import { slotsService } from './slots'; + /** * ChatService - Low-level API communication layer for llama.cpp server interactions * @@ -50,6 +53,9 @@ export class ChatService { onChunk, onComplete, onError, + onReasoningChunk, + onToolCallChunk, + onModel, // Generation parameters temperature, max_tokens, @@ -118,6 +124,11 @@ export class ChatService { stream }; + const activeModel = selectedModelName(); + if (activeModel) { + requestBody.model = activeModel; + } + requestBody.reasoning_format = currentConfig.disableReasoningFormat ? 'none' : 'auto'; if (temperature !== undefined) requestBody.temperature = temperature; @@ -164,14 +175,16 @@ export class ChatService { try { const apiKey = currentConfig.apiKey?.toString().trim(); + const headers = { + 'Content-Type': 'application/json', + ...(apiKey ? { Authorization: `Bearer ${apiKey}` } : {}) + }; + const requestPayload = JSON.stringify(requestBody); const response = await fetch(`./v1/chat/completions`, { method: 'POST', - headers: { - 'Content-Type': 'application/json', - ...(apiKey ? { Authorization: `Bearer ${apiKey}` } : {}) - }, - body: JSON.stringify(requestBody), + headers, + body: requestPayload, signal: this.abortController.signal }); @@ -190,11 +203,13 @@ export class ChatService { onChunk, onComplete, onError, - options.onReasoningChunk + onReasoningChunk, + onToolCallChunk, + onModel ); - } else { - return this.handleNonStreamResponse(response, onComplete, onError); } + + return this.handleNonStreamResponse(response, onComplete, onError, onToolCallChunk, onModel); } catch (error) { if (error instanceof Error && error.name === 'AbortError') { console.log('Chat completion request was aborted'); @@ -248,11 +263,59 @@ export class ChatService { onComplete?: ( response: string, reasoningContent?: string, - timings?: ChatMessageTimings + timings?: ChatMessageTimings, + toolCalls?: string ) => void, onError?: (error: Error) => void, - onReasoningChunk?: (chunk: string) => void + onReasoningChunk?: (chunk: string) => void, + onToolCallChunk?: (chunk: string) => void, + onModel?: (model: string) => void ): Promise { + let aggregatedContent = ''; + let fullReasoningContent = ''; + let aggregatedToolCalls: ApiChatCompletionToolCall[] = []; + let hasReceivedData = false; + let toolCallIndexOffset = 0; + let hasOpenToolCallBatch = false; + let lastTimings: ChatMessageTimings | undefined; + let streamFinished = false; + let modelEmitted = false; + + const finalizeOpenToolCallBatch = () => { + if (!hasOpenToolCallBatch) { + return; + } + + toolCallIndexOffset = aggregatedToolCalls.length; + hasOpenToolCallBatch = false; + }; + + const processToolCallDelta = (toolCalls?: ApiChatCompletionToolCallDelta[]) => { + if (!toolCalls || toolCalls.length === 0) { + return; + } + + aggregatedToolCalls = this.mergeToolCallDeltas( + aggregatedToolCalls, + toolCalls, + toolCallIndexOffset + ); + if (aggregatedToolCalls.length === 0) { + return; + } + + hasOpenToolCallBatch = true; + + const serializedToolCalls = JSON.stringify(aggregatedToolCalls); + + if (!serializedToolCalls) { + return; + } + + hasReceivedData = true; + onToolCallChunk?.(serializedToolCalls); + }; + const reader = response.body?.getReader(); if (!reader) { @@ -260,11 +323,6 @@ export class ChatService { } const decoder = new TextDecoder(); - let aggregatedContent = ''; - let fullReasoningContent = ''; - let hasReceivedData = false; - let lastTimings: ChatMessageTimings | undefined; - let streamFinished = false; try { let chunk = ''; @@ -274,58 +332,82 @@ export class ChatService { chunk += decoder.decode(value, { stream: true }); const lines = chunk.split('\n'); - chunk = lines.pop() || ''; // Save incomplete line for next read + chunk = lines.pop() || ''; for (const line of lines) { - if (line.startsWith('data: ')) { - const data = line.slice(6); - if (data === '[DONE]') { - streamFinished = true; - continue; + if (!line.startsWith('data: ')) { + continue; + } + + const data = line.slice(6); + if (data === '[DONE]') { + streamFinished = true; + continue; + } + + try { + const parsed: ApiChatCompletionStreamChunk = JSON.parse(data); + + const chunkModel = this.extractModelName(parsed); + if (chunkModel && !modelEmitted) { + modelEmitted = true; + onModel?.(chunkModel); + } + + const content = parsed.choices[0]?.delta?.content; + const reasoningContent = parsed.choices[0]?.delta?.reasoning_content; + const toolCalls = parsed.choices[0]?.delta?.tool_calls; + const timings = parsed.timings; + const promptProgress = parsed.prompt_progress; + + if (timings || promptProgress) { + this.updateProcessingState(timings, promptProgress); } - try { - const parsed: ApiChatCompletionStreamChunk = JSON.parse(data); - - const content = parsed.choices[0]?.delta?.content; - const reasoningContent = parsed.choices[0]?.delta?.reasoning_content; - const timings = parsed.timings; - const promptProgress = parsed.prompt_progress; - - if (timings || promptProgress) { - this.updateProcessingState(timings, promptProgress); - - // Store the latest timing data - if (timings) { - lastTimings = timings; - } - } - - if (content) { - hasReceivedData = true; - aggregatedContent += content; - onChunk?.(content); - } - - if (reasoningContent) { - hasReceivedData = true; - fullReasoningContent += reasoningContent; - onReasoningChunk?.(reasoningContent); - } - } catch (e) { - console.error('Error parsing JSON chunk:', e); + if (timings) { + lastTimings = timings; } + + if (content) { + finalizeOpenToolCallBatch(); + hasReceivedData = true; + aggregatedContent += content; + onChunk?.(content); + } + + if (reasoningContent) { + finalizeOpenToolCallBatch(); + hasReceivedData = true; + fullReasoningContent += reasoningContent; + onReasoningChunk?.(reasoningContent); + } + + processToolCallDelta(toolCalls); + } catch (e) { + console.error('Error parsing JSON chunk:', e); } } } if (streamFinished) { - if (!hasReceivedData && aggregatedContent.length === 0) { + if ( + !hasReceivedData && + aggregatedContent.length === 0 && + aggregatedToolCalls.length === 0 + ) { const noResponseError = new Error('No response received from server. Please try again.'); throw noResponseError; } - onComplete?.(aggregatedContent, fullReasoningContent || undefined, lastTimings); + const finalToolCalls = + aggregatedToolCalls.length > 0 ? JSON.stringify(aggregatedToolCalls) : undefined; + + onComplete?.( + aggregatedContent, + fullReasoningContent || undefined, + lastTimings, + finalToolCalls + ); } } catch (error) { const err = error instanceof Error ? error : new Error('Stream error'); @@ -338,6 +420,54 @@ export class ChatService { } } + private mergeToolCallDeltas( + existing: ApiChatCompletionToolCall[], + deltas: ApiChatCompletionToolCallDelta[], + indexOffset = 0 + ): ApiChatCompletionToolCall[] { + const result = existing.map((call) => ({ + ...call, + function: call.function ? { ...call.function } : undefined + })); + + for (const delta of deltas) { + const index = + typeof delta.index === 'number' && delta.index >= 0 + ? delta.index + indexOffset + : result.length; + + while (result.length <= index) { + result.push({ function: undefined }); + } + + const target = result[index]!; + + if (delta.id) { + target.id = delta.id; + } + + if (delta.type) { + target.type = delta.type; + } + + if (delta.function) { + const fn = target.function ? { ...target.function } : {}; + + if (delta.function.name) { + fn.name = delta.function.name; + } + + if (delta.function.arguments) { + fn.arguments = (fn.arguments ?? '') + delta.function.arguments; + } + + target.function = fn; + } + } + + return result; + } + /** * Handles non-streaming response from the chat completion API. * Parses the JSON response and extracts the generated content. @@ -353,9 +483,12 @@ export class ChatService { onComplete?: ( response: string, reasoningContent?: string, - timings?: ChatMessageTimings + timings?: ChatMessageTimings, + toolCalls?: string ) => void, - onError?: (error: Error) => void + onError?: (error: Error) => void, + onToolCallChunk?: (chunk: string) => void, + onModel?: (model: string) => void ): Promise { try { const responseText = await response.text(); @@ -366,19 +499,36 @@ export class ChatService { } const data: ApiChatCompletionResponse = JSON.parse(responseText); + const responseModel = this.extractModelName(data); + if (responseModel) { + onModel?.(responseModel); + } + const content = data.choices[0]?.message?.content || ''; const reasoningContent = data.choices[0]?.message?.reasoning_content; + const toolCalls = data.choices[0]?.message?.tool_calls; if (reasoningContent) { console.log('Full reasoning content:', reasoningContent); } - if (!content.trim()) { + let serializedToolCalls: string | undefined; + + if (toolCalls && toolCalls.length > 0) { + const mergedToolCalls = this.mergeToolCallDeltas([], toolCalls); + + if (mergedToolCalls.length > 0) { + serializedToolCalls = JSON.stringify(mergedToolCalls); + onToolCallChunk?.(serializedToolCalls); + } + } + + if (!content.trim() && !serializedToolCalls) { const noResponseError = new Error('No response received from server. Please try again.'); throw noResponseError; } - onComplete?.(content, reasoningContent); + onComplete?.(content, reasoningContent, undefined, serializedToolCalls); return content; } catch (error) { @@ -588,6 +738,69 @@ export class ChatService { } } + private extractModelName(data: unknown): string | undefined { + if (!data || typeof data !== 'object') { + return undefined; + } + + const record = data as Record; + const normalize = (value: unknown): string | undefined => { + if (typeof value !== 'string') { + return undefined; + } + + const trimmed = value.trim(); + + return trimmed.length > 0 ? trimmed : undefined; + }; + + const rootModel = normalize(record['model']); + if (rootModel) { + return rootModel; + } + + const choices = record['choices']; + if (!Array.isArray(choices) || choices.length === 0) { + return undefined; + } + + const firstChoice = choices[0] as Record | undefined; + if (!firstChoice) { + return undefined; + } + + const choiceModel = normalize(firstChoice['model']); + if (choiceModel) { + return choiceModel; + } + + const delta = firstChoice['delta'] as Record | undefined; + if (delta) { + const deltaModel = normalize(delta['model']); + if (deltaModel) { + return deltaModel; + } + } + + const message = firstChoice['message'] as Record | undefined; + if (message) { + const messageModel = normalize(message['model']); + if (messageModel) { + return messageModel; + } + } + + const metadata = firstChoice['metadata'] as Record | undefined; + if (metadata) { + const metadataModel = normalize(metadata['model']); + if (metadataModel) { + return metadataModel; + } + } + + return undefined; + } + private updateProcessingState( timings?: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress diff --git a/tools/server/webui/src/lib/services/models.ts b/tools/server/webui/src/lib/services/models.ts new file mode 100644 index 0000000000000..1c7fa3b45631c --- /dev/null +++ b/tools/server/webui/src/lib/services/models.ts @@ -0,0 +1,22 @@ +import { base } from '$app/paths'; +import { config } from '$lib/stores/settings.svelte'; +import type { ApiModelListResponse } from '$lib/types/api'; + +export class ModelsService { + static async list(): Promise { + const currentConfig = config(); + const apiKey = currentConfig.apiKey?.toString().trim(); + + const response = await fetch(`${base}/v1/models`, { + headers: { + ...(apiKey ? { Authorization: `Bearer ${apiKey}` } : {}) + } + }); + + if (!response.ok) { + throw new Error(`Failed to fetch model list (status ${response.status})`); + } + + return response.json() as Promise; + } +} diff --git a/tools/server/webui/src/lib/stores/chat.svelte.ts b/tools/server/webui/src/lib/stores/chat.svelte.ts index 5b77abb4cb21c..2100a08148742 100644 --- a/tools/server/webui/src/lib/stores/chat.svelte.ts +++ b/tools/server/webui/src/lib/stores/chat.svelte.ts @@ -1,6 +1,5 @@ import { DatabaseStore } from '$lib/stores/database'; import { chatService, slotsService } from '$lib/services'; -import { serverStore } from '$lib/stores/server.svelte'; import { config } from '$lib/stores/settings.svelte'; import { filterByLeafNodeId, findLeafNode, findDescendantMessages } from '$lib/utils/branching'; import { browser } from '$app/environment'; @@ -186,6 +185,7 @@ class ChatStore { type, timestamp: Date.now(), thinking: '', + toolCalls: '', children: [], extra: extras }, @@ -300,30 +300,31 @@ class ChatStore { ): Promise { let streamedContent = ''; let streamedReasoningContent = ''; - let modelCaptured = false; + let streamedToolCallContent = ''; + let resolvedModel: string | null = null; + let modelPersisted = false; - const captureModelIfNeeded = (updateDbImmediately = true): string | undefined => { - if (!modelCaptured) { - const currentModelName = serverStore.modelName; + const recordModel = (modelName: string, persistImmediately = true): void => { + const trimmedModel = modelName.trim(); - if (currentModelName) { - if (updateDbImmediately) { - DatabaseStore.updateMessage(assistantMessage.id, { model: currentModelName }).catch( - console.error - ); - } + if (!trimmedModel || trimmedModel === resolvedModel) { + return; + } - const messageIndex = this.findMessageIndex(assistantMessage.id); + resolvedModel = trimmedModel; - this.updateMessageAtIndex(messageIndex, { model: currentModelName }); - modelCaptured = true; + const messageIndex = this.findMessageIndex(assistantMessage.id); - return currentModelName; - } + this.updateMessageAtIndex(messageIndex, { model: trimmedModel }); + + if (persistImmediately && !modelPersisted) { + modelPersisted = true; + DatabaseStore.updateMessage(assistantMessage.id, { model: trimmedModel }).catch((error) => { + console.error('Failed to persist model name:', error); + modelPersisted = false; + }); } - return undefined; }; - slotsService.startStreaming(); await chatService.sendMessage(allMessages, { @@ -333,7 +334,6 @@ class ChatStore { streamedContent += chunk; this.currentResponse = streamedContent; - captureModelIfNeeded(); const messageIndex = this.findMessageIndex(assistantMessage.id); this.updateMessageAtIndex(messageIndex, { content: streamedContent @@ -343,42 +343,64 @@ class ChatStore { onReasoningChunk: (reasoningChunk: string) => { streamedReasoningContent += reasoningChunk; - captureModelIfNeeded(); - const messageIndex = this.findMessageIndex(assistantMessage.id); this.updateMessageAtIndex(messageIndex, { thinking: streamedReasoningContent }); }, + onToolCallChunk: (toolCallChunk: string) => { + const chunk = toolCallChunk.trim(); + + if (!chunk) { + return; + } + + streamedToolCallContent = chunk; + + const messageIndex = this.findMessageIndex(assistantMessage.id); + + this.updateMessageAtIndex(messageIndex, { toolCalls: streamedToolCallContent }); + }, + + onModel: (modelName: string) => { + recordModel(modelName); + }, + onComplete: async ( finalContent?: string, reasoningContent?: string, - timings?: ChatMessageTimings + timings?: ChatMessageTimings, + toolCallContent?: string ) => { slotsService.stopStreaming(); const updateData: { content: string; thinking: string; + toolCalls: string; timings?: ChatMessageTimings; model?: string; } = { content: finalContent || streamedContent, thinking: reasoningContent || streamedReasoningContent, + toolCalls: toolCallContent || streamedToolCallContent, timings: timings }; - const capturedModel = captureModelIfNeeded(false); - - if (capturedModel) { - updateData.model = capturedModel; + if (resolvedModel && !modelPersisted) { + updateData.model = resolvedModel; + modelPersisted = true; } await DatabaseStore.updateMessage(assistantMessage.id, updateData); const messageIndex = this.findMessageIndex(assistantMessage.id); - const localUpdateData: { timings?: ChatMessageTimings; model?: string } = { + const localUpdateData: { + timings?: ChatMessageTimings; + model?: string; + toolCalls?: string; + } = { timings: timings }; @@ -386,6 +408,10 @@ class ChatStore { localUpdateData.model = updateData.model; } + if (updateData.toolCalls !== undefined) { + localUpdateData.toolCalls = updateData.toolCalls; + } + this.updateMessageAtIndex(messageIndex, localUpdateData); await DatabaseStore.updateCurrentNode(this.activeConversation!.id, assistantMessage.id); @@ -491,6 +517,7 @@ class ChatStore { content: '', timestamp: Date.now(), thinking: '', + toolCalls: '', children: [] }, parentId || null diff --git a/tools/server/webui/src/lib/stores/database.ts b/tools/server/webui/src/lib/stores/database.ts index 6394c5b7eda74..bef55d0ceb792 100644 --- a/tools/server/webui/src/lib/stores/database.ts +++ b/tools/server/webui/src/lib/stores/database.ts @@ -154,6 +154,7 @@ export class DatabaseStore { content: '', parent: null, thinking: '', + toolCalls: '', children: [] }; diff --git a/tools/server/webui/src/lib/stores/models.svelte.ts b/tools/server/webui/src/lib/stores/models.svelte.ts new file mode 100644 index 0000000000000..967346fcc964a --- /dev/null +++ b/tools/server/webui/src/lib/stores/models.svelte.ts @@ -0,0 +1,223 @@ +import { browser } from '$app/environment'; +import { ModelsService } from '$lib/services/models'; +import type { ApiModelDataEntry, ApiModelDetails } from '$lib/types/api'; + +export interface ModelOption { + id: string; + name: string; + model: string; + description?: string; + capabilities: string[]; + details?: ApiModelDetails['details']; + meta?: ApiModelDataEntry['meta']; +} + +type PersistedModelSelection = { + id: string; + model: string; +}; + +const STORAGE_KEY = 'llama.cpp:selectedModel'; + +class ModelsStore { + private _models = $state([]); + private _loading = $state(false); + private _updating = $state(false); + private _error = $state(null); + private _selectedModelId = $state(null); + private _selectedModelName = $state(null); + + constructor() { + const persisted = this.readPersistedSelection(); + if (persisted) { + this._selectedModelId = persisted.id; + this._selectedModelName = persisted.model; + } + } + + get models(): ModelOption[] { + return this._models; + } + + get loading(): boolean { + return this._loading; + } + + get updating(): boolean { + return this._updating; + } + + get error(): string | null { + return this._error; + } + + get selectedModelId(): string | null { + return this._selectedModelId; + } + + get selectedModelName(): string | null { + return this._selectedModelName; + } + + get selectedModel(): ModelOption | null { + if (!this._selectedModelId) { + return null; + } + + return this._models.find((model) => model.id === this._selectedModelId) ?? null; + } + + async fetch(force = false): Promise { + if (this._loading) return; + if (this._models.length > 0 && !force) return; + + this._loading = true; + this._error = null; + + try { + const response = await ModelsService.list(); + + const models: ModelOption[] = response.data.map((item, index) => { + const details = response.models?.[index]; + const rawCapabilities = Array.isArray(details?.capabilities) + ? [...(details?.capabilities ?? [])] + : []; + const displayNameSource = + details?.name && details.name.trim().length > 0 ? details.name : item.id; + const displayName = this.toDisplayName(displayNameSource); + + return { + id: item.id, + name: displayName, + model: details?.model || item.id, + description: details?.description, + capabilities: rawCapabilities.filter((value): value is string => Boolean(value)), + details: details?.details, + meta: item.meta ?? null + } satisfies ModelOption; + }); + + this._models = models; + + const persisted = this.readPersistedSelection(); + let nextSelectionId = this._selectedModelId ?? persisted?.id ?? null; + let nextSelectionName = this._selectedModelName ?? persisted?.model ?? null; + if (nextSelectionId) { + const match = models.find((model) => model.id === nextSelectionId); + if (match) { + nextSelectionId = match.id; + nextSelectionName = match.model; + } else if (models[0]) { + nextSelectionId = models[0].id; + nextSelectionName = models[0].model; + } else { + nextSelectionId = null; + nextSelectionName = null; + } + } else if (models[0]) { + nextSelectionId = models[0].id; + nextSelectionName = models[0].model; + } + + this._selectedModelId = nextSelectionId; + this._selectedModelName = nextSelectionName; + this.persistSelection( + nextSelectionId && nextSelectionName + ? { id: nextSelectionId, model: nextSelectionName } + : null + ); + } catch (error) { + this._models = []; + this._error = error instanceof Error ? error.message : 'Failed to load models'; + throw error; + } finally { + this._loading = false; + } + } + + async select(modelId: string): Promise { + if (!modelId || this._updating) { + return; + } + + if (this._selectedModelId === modelId) { + return; + } + + const option = this._models.find((model) => model.id === modelId); + if (!option) { + throw new Error('Selected model is not available'); + } + + this._updating = true; + this._error = null; + + try { + this._selectedModelId = option.id; + this._selectedModelName = option.model; + this.persistSelection({ id: option.id, model: option.model }); + } finally { + this._updating = false; + } + } + + private toDisplayName(id: string): string { + const segments = id.split(/\\|\//); + const candidate = segments.pop(); + return candidate && candidate.trim().length > 0 ? candidate : id; + } + + private readPersistedSelection(): PersistedModelSelection | null { + if (!browser) { + return null; + } + + try { + const raw = localStorage.getItem(STORAGE_KEY); + if (!raw) { + return null; + } + + const parsed = JSON.parse(raw); + if (parsed && typeof parsed.id === 'string') { + const id = parsed.id; + const model = + typeof parsed.model === 'string' && parsed.model.length > 0 ? parsed.model : id; + return { id, model }; + } + } catch (error) { + console.warn('Failed to read model selection from localStorage:', error); + } + + return null; + } + + private persistSelection(selection: PersistedModelSelection | null): void { + if (!browser) { + return; + } + + try { + if (selection) { + localStorage.setItem(STORAGE_KEY, JSON.stringify(selection)); + } else { + localStorage.removeItem(STORAGE_KEY); + } + } catch (error) { + console.warn('Failed to persist model selection to localStorage:', error); + } + } +} + +export const modelsStore = new ModelsStore(); + +export const modelOptions = () => modelsStore.models; +export const modelsLoading = () => modelsStore.loading; +export const modelsUpdating = () => modelsStore.updating; +export const modelsError = () => modelsStore.error; +export const selectedModelId = () => modelsStore.selectedModelId; +export const selectedModelName = () => modelsStore.selectedModelName; +export const selectedModelOption = () => modelsStore.selectedModel; + +export const fetchModels = modelsStore.fetch.bind(modelsStore); +export const selectModel = modelsStore.select.bind(modelsStore); diff --git a/tools/server/webui/src/lib/types/api.d.ts b/tools/server/webui/src/lib/types/api.d.ts index d0e60a6c13706..2d2a8e5bf61db 100644 --- a/tools/server/webui/src/lib/types/api.d.ts +++ b/tools/server/webui/src/lib/types/api.d.ts @@ -36,6 +36,41 @@ export interface ApiChatMessageData { timestamp?: number; } +export interface ApiModelDataEntry { + id: string; + object: string; + created: number; + owned_by: string; + meta?: Record | null; +} + +export interface ApiModelDetails { + name: string; + model: string; + modified_at?: string; + size?: string | number; + digest?: string; + type?: string; + description?: string; + tags?: string[]; + capabilities?: string[]; + parameters?: string; + details?: { + parent_model?: string; + format?: string; + family?: string; + families?: string[]; + parameter_size?: string; + quantization_level?: string; + }; +} + +export interface ApiModelListResponse { + object: string; + data: ApiModelDataEntry[]; + models?: ApiModelDetails[]; +} + export interface ApiLlamaCppServerProps { default_generation_settings: { id: number; @@ -120,6 +155,7 @@ export interface ApiChatCompletionRequest { content: string | ApiChatMessageContentPart[]; }>; stream?: boolean; + model?: string; // Reasoning parameters reasoning_format?: string; // Generation parameters @@ -149,11 +185,32 @@ export interface ApiChatCompletionRequest { custom?: Record; } +export interface ApiChatCompletionToolCallFunctionDelta { + name?: string; + arguments?: string; +} + +export interface ApiChatCompletionToolCallDelta { + index?: number; + id?: string; + type?: string; + function?: ApiChatCompletionToolCallFunctionDelta; +} + +export interface ApiChatCompletionToolCall extends ApiChatCompletionToolCallDelta { + function?: ApiChatCompletionToolCallFunctionDelta & { arguments?: string }; +} + export interface ApiChatCompletionStreamChunk { + model?: string; choices: Array<{ + model?: string; + metadata?: { model?: string }; delta: { content?: string; reasoning_content?: string; + model?: string; + tool_calls?: ApiChatCompletionToolCallDelta[]; }; }>; timings?: { @@ -167,10 +224,15 @@ export interface ApiChatCompletionStreamChunk { } export interface ApiChatCompletionResponse { + model?: string; choices: Array<{ + model?: string; + metadata?: { model?: string }; message: { content: string; reasoning_content?: string; + model?: string; + tool_calls?: ApiChatCompletionToolCallDelta[]; }; }>; } diff --git a/tools/server/webui/src/lib/types/database.d.ts b/tools/server/webui/src/lib/types/database.d.ts index 7f6b76ba271cc..7b489c164905e 100644 --- a/tools/server/webui/src/lib/types/database.d.ts +++ b/tools/server/webui/src/lib/types/database.d.ts @@ -49,6 +49,7 @@ export interface DatabaseMessage { content: string; parent: string; thinking: string; + toolCalls?: string; children: string[]; extra?: DatabaseMessageExtra[]; timings?: ChatMessageTimings; diff --git a/tools/server/webui/src/lib/types/settings.d.ts b/tools/server/webui/src/lib/types/settings.d.ts index 4311f779ad841..c5398c0834ada 100644 --- a/tools/server/webui/src/lib/types/settings.d.ts +++ b/tools/server/webui/src/lib/types/settings.d.ts @@ -41,7 +41,14 @@ export interface SettingsChatServiceOptions { // Callbacks onChunk?: (chunk: string) => void; onReasoningChunk?: (chunk: string) => void; - onComplete?: (response: string, reasoningContent?: string, timings?: ChatMessageTimings) => void; + onToolCallChunk?: (chunk: string) => void; + onModel?: (model: string) => void; + onComplete?: ( + response: string, + reasoningContent?: string, + timings?: ChatMessageTimings, + toolCalls?: string + ) => void; onError?: (error: Error) => void; } diff --git a/tools/server/webui/src/routes/+layout.svelte b/tools/server/webui/src/routes/+layout.svelte index 8912f642ceffc..737e3609d5cc8 100644 --- a/tools/server/webui/src/routes/+layout.svelte +++ b/tools/server/webui/src/routes/+layout.svelte @@ -148,7 +148,7 @@ - +