diff --git a/tools/server/public/index.html.gz b/tools/server/public/index.html.gz
index c76f5778be8fe..88e7bacdad4c8 100644
Binary files a/tools/server/public/index.html.gz and b/tools/server/public/index.html.gz differ
diff --git a/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessage.svelte b/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessage.svelte
index fed0cf712695f..107f054f368bf 100644
--- a/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessage.svelte
+++ b/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessage.svelte
@@ -1,4 +1,5 @@
+
+{#if toolCalls && toolCalls.length > 0}
+
+ {#each toolCalls as toolCall, index (toolCall.id ?? `${index}`)}
+
+ {/each}
+
+{:else if fallbackContent}
+
+
+
+
+
+
+ Tool calls
+
+
+
+
+
+ Toggle tool call content
+
+
+
+
+
+
+
+
+{/if}
+
+
diff --git a/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageToolCallItem.svelte b/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageToolCallItem.svelte
new file mode 100644
index 0000000000000..ec5e740d46ef8
--- /dev/null
+++ b/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageToolCallItem.svelte
@@ -0,0 +1,104 @@
+
+
+
+
+
+
+
+
+ {headerLabel}
+
+
+
+
+
+ Toggle tool call payload
+
+
+
+
+
+
+
+
+
+
diff --git a/tools/server/webui/src/lib/components/app/chat/ChatSettings/ChatSettingsDialog.svelte b/tools/server/webui/src/lib/components/app/chat/ChatSettings/ChatSettingsDialog.svelte
index bf17633095242..dd77298d6fc9e 100644
--- a/tools/server/webui/src/lib/components/app/chat/ChatSettings/ChatSettingsDialog.svelte
+++ b/tools/server/webui/src/lib/components/app/chat/ChatSettings/ChatSettingsDialog.svelte
@@ -214,6 +214,11 @@
label: 'Show raw LLM output',
type: 'checkbox'
},
+ {
+ key: 'showToolCalls',
+ label: 'Show tool call chunks',
+ type: 'checkbox'
+ },
{
key: 'custom',
label: 'Custom JSON',
diff --git a/tools/server/webui/src/lib/components/app/chat/ChatSidebar/ChatSidebar.svelte b/tools/server/webui/src/lib/components/app/chat/ChatSidebar/ChatSidebar.svelte
index 5976e5dd03d7b..adf9f880ae670 100644
--- a/tools/server/webui/src/lib/components/app/chat/ChatSidebar/ChatSidebar.svelte
+++ b/tools/server/webui/src/lib/components/app/chat/ChatSidebar/ChatSidebar.svelte
@@ -13,6 +13,7 @@
updateConversationName
} from '$lib/stores/chat.svelte';
import ChatSidebarActions from './ChatSidebarActions.svelte';
+ import ModelSelector from './ModelSelector.svelte';
const sidebar = Sidebar.useSidebar();
@@ -110,6 +111,8 @@
llama.cpp
+
+
diff --git a/tools/server/webui/src/lib/components/app/chat/ChatSidebar/ModelSelector.svelte b/tools/server/webui/src/lib/components/app/chat/ChatSidebar/ModelSelector.svelte
new file mode 100644
index 0000000000000..ca74610b4a179
--- /dev/null
+++ b/tools/server/webui/src/lib/components/app/chat/ChatSidebar/ModelSelector.svelte
@@ -0,0 +1,99 @@
+
+
+{#if loading && options.length === 0 && !isMounted}
+
+
+ Loading models…
+
+{:else if options.length === 0}
+ No models available.
+{:else}
+ {@const selectedOption = getDisplayOption()}
+
+
+
+ {selectedOption?.name || 'Select model'}
+
+ {#if updating}
+
+ {/if}
+
+
+
+ {#each options as option (option.id)}
+
+ {option.name}
+
+ {#if option.description}
+ {option.description}
+ {/if}
+
+ {/each}
+
+
+{/if}
+
+{#if error}
+ {error}
+{/if}
diff --git a/tools/server/webui/src/lib/components/app/index.ts b/tools/server/webui/src/lib/components/app/index.ts
index 4c2cbdebe16eb..cf7c6ca75e935 100644
--- a/tools/server/webui/src/lib/components/app/index.ts
+++ b/tools/server/webui/src/lib/components/app/index.ts
@@ -14,6 +14,7 @@ export { default as ChatFormFileInputInvisible } from './chat/ChatForm/ChatFormF
export { default as ChatMessage } from './chat/ChatMessages/ChatMessage.svelte';
export { default as ChatMessages } from './chat/ChatMessages/ChatMessages.svelte';
export { default as ChatMessageThinkingBlock } from './chat/ChatMessages/ChatMessageThinkingBlock.svelte';
+export { default as ChatMessageToolCallBlock } from './chat/ChatMessages/ChatMessageToolCallBlock.svelte';
export { default as MessageBranchingControls } from './chat/ChatMessages/ChatMessageBranchingControls.svelte';
export { default as ChatProcessingInfo } from './chat/ChatProcessingInfo.svelte';
@@ -30,6 +31,7 @@ export { default as ParameterSourceIndicator } from './chat/ChatSettings/Paramet
export { default as ChatSidebar } from './chat/ChatSidebar/ChatSidebar.svelte';
export { default as ChatSidebarConversationItem } from './chat/ChatSidebar/ChatSidebarConversationItem.svelte';
export { default as ChatSidebarSearch } from './chat/ChatSidebar/ChatSidebarSearch.svelte';
+export { default as ChatSidebarModelSelector } from './chat/ChatSidebar/ModelSelector.svelte';
export { default as ChatErrorDialog } from './dialogs/ChatErrorDialog.svelte';
export { default as EmptyFileAlertDialog } from './dialogs/EmptyFileAlertDialog.svelte';
diff --git a/tools/server/webui/src/lib/constants/settings-config.ts b/tools/server/webui/src/lib/constants/settings-config.ts
index 154ec888ce2dc..c2e0ce047e205 100644
--- a/tools/server/webui/src/lib/constants/settings-config.ts
+++ b/tools/server/webui/src/lib/constants/settings-config.ts
@@ -6,6 +6,7 @@ export const SETTING_CONFIG_DEFAULT: Record =
theme: 'system',
showTokensPerSecond: false,
showThoughtInProgress: false,
+ showToolCalls: false,
disableReasoningFormat: false,
keepStatsVisible: false,
askForTitleConfirmation: false,
@@ -78,6 +79,8 @@ export const SETTING_CONFIG_INFO: Record = {
custom: 'Custom JSON parameters to send to the API. Must be valid JSON format.',
showTokensPerSecond: 'Display generation speed in tokens per second during streaming.',
showThoughtInProgress: 'Expand thought process by default when generating messages.',
+ showToolCalls:
+ 'Display streamed tool call payloads from Harmony-compatible delta.tool_calls data inside assistant messages.',
disableReasoningFormat:
'Show raw LLM output without backend parsing and frontend Markdown rendering to inspect streaming across different models.',
keepStatsVisible: 'Keep processing statistics visible after generation finishes.',
diff --git a/tools/server/webui/src/lib/services/chat.ts b/tools/server/webui/src/lib/services/chat.ts
index 37e60b85b5a6a..15f7080ecd2bb 100644
--- a/tools/server/webui/src/lib/services/chat.ts
+++ b/tools/server/webui/src/lib/services/chat.ts
@@ -1,5 +1,8 @@
import { config } from '$lib/stores/settings.svelte';
+import { selectedModelName } from '$lib/stores/models.svelte';
+import type { ApiChatCompletionToolCall, ApiChatCompletionToolCallDelta } from '$lib/types/api';
import { slotsService } from './slots';
+
/**
* ChatService - Low-level API communication layer for llama.cpp server interactions
*
@@ -50,6 +53,9 @@ export class ChatService {
onChunk,
onComplete,
onError,
+ onReasoningChunk,
+ onToolCallChunk,
+ onModel,
// Generation parameters
temperature,
max_tokens,
@@ -118,6 +124,11 @@ export class ChatService {
stream
};
+ const activeModel = selectedModelName();
+ if (activeModel) {
+ requestBody.model = activeModel;
+ }
+
requestBody.reasoning_format = currentConfig.disableReasoningFormat ? 'none' : 'auto';
if (temperature !== undefined) requestBody.temperature = temperature;
@@ -164,14 +175,16 @@ export class ChatService {
try {
const apiKey = currentConfig.apiKey?.toString().trim();
+ const headers = {
+ 'Content-Type': 'application/json',
+ ...(apiKey ? { Authorization: `Bearer ${apiKey}` } : {})
+ };
+ const requestPayload = JSON.stringify(requestBody);
const response = await fetch(`./v1/chat/completions`, {
method: 'POST',
- headers: {
- 'Content-Type': 'application/json',
- ...(apiKey ? { Authorization: `Bearer ${apiKey}` } : {})
- },
- body: JSON.stringify(requestBody),
+ headers,
+ body: requestPayload,
signal: this.abortController.signal
});
@@ -190,11 +203,13 @@ export class ChatService {
onChunk,
onComplete,
onError,
- options.onReasoningChunk
+ onReasoningChunk,
+ onToolCallChunk,
+ onModel
);
- } else {
- return this.handleNonStreamResponse(response, onComplete, onError);
}
+
+ return this.handleNonStreamResponse(response, onComplete, onError, onToolCallChunk, onModel);
} catch (error) {
if (error instanceof Error && error.name === 'AbortError') {
console.log('Chat completion request was aborted');
@@ -248,11 +263,59 @@ export class ChatService {
onComplete?: (
response: string,
reasoningContent?: string,
- timings?: ChatMessageTimings
+ timings?: ChatMessageTimings,
+ toolCalls?: string
) => void,
onError?: (error: Error) => void,
- onReasoningChunk?: (chunk: string) => void
+ onReasoningChunk?: (chunk: string) => void,
+ onToolCallChunk?: (chunk: string) => void,
+ onModel?: (model: string) => void
): Promise {
+ let aggregatedContent = '';
+ let fullReasoningContent = '';
+ let aggregatedToolCalls: ApiChatCompletionToolCall[] = [];
+ let hasReceivedData = false;
+ let toolCallIndexOffset = 0;
+ let hasOpenToolCallBatch = false;
+ let lastTimings: ChatMessageTimings | undefined;
+ let streamFinished = false;
+ let modelEmitted = false;
+
+ const finalizeOpenToolCallBatch = () => {
+ if (!hasOpenToolCallBatch) {
+ return;
+ }
+
+ toolCallIndexOffset = aggregatedToolCalls.length;
+ hasOpenToolCallBatch = false;
+ };
+
+ const processToolCallDelta = (toolCalls?: ApiChatCompletionToolCallDelta[]) => {
+ if (!toolCalls || toolCalls.length === 0) {
+ return;
+ }
+
+ aggregatedToolCalls = this.mergeToolCallDeltas(
+ aggregatedToolCalls,
+ toolCalls,
+ toolCallIndexOffset
+ );
+ if (aggregatedToolCalls.length === 0) {
+ return;
+ }
+
+ hasOpenToolCallBatch = true;
+
+ const serializedToolCalls = JSON.stringify(aggregatedToolCalls);
+
+ if (!serializedToolCalls) {
+ return;
+ }
+
+ hasReceivedData = true;
+ onToolCallChunk?.(serializedToolCalls);
+ };
+
const reader = response.body?.getReader();
if (!reader) {
@@ -260,11 +323,6 @@ export class ChatService {
}
const decoder = new TextDecoder();
- let aggregatedContent = '';
- let fullReasoningContent = '';
- let hasReceivedData = false;
- let lastTimings: ChatMessageTimings | undefined;
- let streamFinished = false;
try {
let chunk = '';
@@ -274,58 +332,82 @@ export class ChatService {
chunk += decoder.decode(value, { stream: true });
const lines = chunk.split('\n');
- chunk = lines.pop() || ''; // Save incomplete line for next read
+ chunk = lines.pop() || '';
for (const line of lines) {
- if (line.startsWith('data: ')) {
- const data = line.slice(6);
- if (data === '[DONE]') {
- streamFinished = true;
- continue;
+ if (!line.startsWith('data: ')) {
+ continue;
+ }
+
+ const data = line.slice(6);
+ if (data === '[DONE]') {
+ streamFinished = true;
+ continue;
+ }
+
+ try {
+ const parsed: ApiChatCompletionStreamChunk = JSON.parse(data);
+
+ const chunkModel = this.extractModelName(parsed);
+ if (chunkModel && !modelEmitted) {
+ modelEmitted = true;
+ onModel?.(chunkModel);
+ }
+
+ const content = parsed.choices[0]?.delta?.content;
+ const reasoningContent = parsed.choices[0]?.delta?.reasoning_content;
+ const toolCalls = parsed.choices[0]?.delta?.tool_calls;
+ const timings = parsed.timings;
+ const promptProgress = parsed.prompt_progress;
+
+ if (timings || promptProgress) {
+ this.updateProcessingState(timings, promptProgress);
}
- try {
- const parsed: ApiChatCompletionStreamChunk = JSON.parse(data);
-
- const content = parsed.choices[0]?.delta?.content;
- const reasoningContent = parsed.choices[0]?.delta?.reasoning_content;
- const timings = parsed.timings;
- const promptProgress = parsed.prompt_progress;
-
- if (timings || promptProgress) {
- this.updateProcessingState(timings, promptProgress);
-
- // Store the latest timing data
- if (timings) {
- lastTimings = timings;
- }
- }
-
- if (content) {
- hasReceivedData = true;
- aggregatedContent += content;
- onChunk?.(content);
- }
-
- if (reasoningContent) {
- hasReceivedData = true;
- fullReasoningContent += reasoningContent;
- onReasoningChunk?.(reasoningContent);
- }
- } catch (e) {
- console.error('Error parsing JSON chunk:', e);
+ if (timings) {
+ lastTimings = timings;
}
+
+ if (content) {
+ finalizeOpenToolCallBatch();
+ hasReceivedData = true;
+ aggregatedContent += content;
+ onChunk?.(content);
+ }
+
+ if (reasoningContent) {
+ finalizeOpenToolCallBatch();
+ hasReceivedData = true;
+ fullReasoningContent += reasoningContent;
+ onReasoningChunk?.(reasoningContent);
+ }
+
+ processToolCallDelta(toolCalls);
+ } catch (e) {
+ console.error('Error parsing JSON chunk:', e);
}
}
}
if (streamFinished) {
- if (!hasReceivedData && aggregatedContent.length === 0) {
+ if (
+ !hasReceivedData &&
+ aggregatedContent.length === 0 &&
+ aggregatedToolCalls.length === 0
+ ) {
const noResponseError = new Error('No response received from server. Please try again.');
throw noResponseError;
}
- onComplete?.(aggregatedContent, fullReasoningContent || undefined, lastTimings);
+ const finalToolCalls =
+ aggregatedToolCalls.length > 0 ? JSON.stringify(aggregatedToolCalls) : undefined;
+
+ onComplete?.(
+ aggregatedContent,
+ fullReasoningContent || undefined,
+ lastTimings,
+ finalToolCalls
+ );
}
} catch (error) {
const err = error instanceof Error ? error : new Error('Stream error');
@@ -338,6 +420,54 @@ export class ChatService {
}
}
+ private mergeToolCallDeltas(
+ existing: ApiChatCompletionToolCall[],
+ deltas: ApiChatCompletionToolCallDelta[],
+ indexOffset = 0
+ ): ApiChatCompletionToolCall[] {
+ const result = existing.map((call) => ({
+ ...call,
+ function: call.function ? { ...call.function } : undefined
+ }));
+
+ for (const delta of deltas) {
+ const index =
+ typeof delta.index === 'number' && delta.index >= 0
+ ? delta.index + indexOffset
+ : result.length;
+
+ while (result.length <= index) {
+ result.push({ function: undefined });
+ }
+
+ const target = result[index]!;
+
+ if (delta.id) {
+ target.id = delta.id;
+ }
+
+ if (delta.type) {
+ target.type = delta.type;
+ }
+
+ if (delta.function) {
+ const fn = target.function ? { ...target.function } : {};
+
+ if (delta.function.name) {
+ fn.name = delta.function.name;
+ }
+
+ if (delta.function.arguments) {
+ fn.arguments = (fn.arguments ?? '') + delta.function.arguments;
+ }
+
+ target.function = fn;
+ }
+ }
+
+ return result;
+ }
+
/**
* Handles non-streaming response from the chat completion API.
* Parses the JSON response and extracts the generated content.
@@ -353,9 +483,12 @@ export class ChatService {
onComplete?: (
response: string,
reasoningContent?: string,
- timings?: ChatMessageTimings
+ timings?: ChatMessageTimings,
+ toolCalls?: string
) => void,
- onError?: (error: Error) => void
+ onError?: (error: Error) => void,
+ onToolCallChunk?: (chunk: string) => void,
+ onModel?: (model: string) => void
): Promise {
try {
const responseText = await response.text();
@@ -366,19 +499,36 @@ export class ChatService {
}
const data: ApiChatCompletionResponse = JSON.parse(responseText);
+ const responseModel = this.extractModelName(data);
+ if (responseModel) {
+ onModel?.(responseModel);
+ }
+
const content = data.choices[0]?.message?.content || '';
const reasoningContent = data.choices[0]?.message?.reasoning_content;
+ const toolCalls = data.choices[0]?.message?.tool_calls;
if (reasoningContent) {
console.log('Full reasoning content:', reasoningContent);
}
- if (!content.trim()) {
+ let serializedToolCalls: string | undefined;
+
+ if (toolCalls && toolCalls.length > 0) {
+ const mergedToolCalls = this.mergeToolCallDeltas([], toolCalls);
+
+ if (mergedToolCalls.length > 0) {
+ serializedToolCalls = JSON.stringify(mergedToolCalls);
+ onToolCallChunk?.(serializedToolCalls);
+ }
+ }
+
+ if (!content.trim() && !serializedToolCalls) {
const noResponseError = new Error('No response received from server. Please try again.');
throw noResponseError;
}
- onComplete?.(content, reasoningContent);
+ onComplete?.(content, reasoningContent, undefined, serializedToolCalls);
return content;
} catch (error) {
@@ -588,6 +738,69 @@ export class ChatService {
}
}
+ private extractModelName(data: unknown): string | undefined {
+ if (!data || typeof data !== 'object') {
+ return undefined;
+ }
+
+ const record = data as Record;
+ const normalize = (value: unknown): string | undefined => {
+ if (typeof value !== 'string') {
+ return undefined;
+ }
+
+ const trimmed = value.trim();
+
+ return trimmed.length > 0 ? trimmed : undefined;
+ };
+
+ const rootModel = normalize(record['model']);
+ if (rootModel) {
+ return rootModel;
+ }
+
+ const choices = record['choices'];
+ if (!Array.isArray(choices) || choices.length === 0) {
+ return undefined;
+ }
+
+ const firstChoice = choices[0] as Record | undefined;
+ if (!firstChoice) {
+ return undefined;
+ }
+
+ const choiceModel = normalize(firstChoice['model']);
+ if (choiceModel) {
+ return choiceModel;
+ }
+
+ const delta = firstChoice['delta'] as Record | undefined;
+ if (delta) {
+ const deltaModel = normalize(delta['model']);
+ if (deltaModel) {
+ return deltaModel;
+ }
+ }
+
+ const message = firstChoice['message'] as Record | undefined;
+ if (message) {
+ const messageModel = normalize(message['model']);
+ if (messageModel) {
+ return messageModel;
+ }
+ }
+
+ const metadata = firstChoice['metadata'] as Record | undefined;
+ if (metadata) {
+ const metadataModel = normalize(metadata['model']);
+ if (metadataModel) {
+ return metadataModel;
+ }
+ }
+
+ return undefined;
+ }
+
private updateProcessingState(
timings?: ChatMessageTimings,
promptProgress?: ChatMessagePromptProgress
diff --git a/tools/server/webui/src/lib/services/models.ts b/tools/server/webui/src/lib/services/models.ts
new file mode 100644
index 0000000000000..1c7fa3b45631c
--- /dev/null
+++ b/tools/server/webui/src/lib/services/models.ts
@@ -0,0 +1,22 @@
+import { base } from '$app/paths';
+import { config } from '$lib/stores/settings.svelte';
+import type { ApiModelListResponse } from '$lib/types/api';
+
+export class ModelsService {
+ static async list(): Promise {
+ const currentConfig = config();
+ const apiKey = currentConfig.apiKey?.toString().trim();
+
+ const response = await fetch(`${base}/v1/models`, {
+ headers: {
+ ...(apiKey ? { Authorization: `Bearer ${apiKey}` } : {})
+ }
+ });
+
+ if (!response.ok) {
+ throw new Error(`Failed to fetch model list (status ${response.status})`);
+ }
+
+ return response.json() as Promise;
+ }
+}
diff --git a/tools/server/webui/src/lib/stores/chat.svelte.ts b/tools/server/webui/src/lib/stores/chat.svelte.ts
index 5b77abb4cb21c..2100a08148742 100644
--- a/tools/server/webui/src/lib/stores/chat.svelte.ts
+++ b/tools/server/webui/src/lib/stores/chat.svelte.ts
@@ -1,6 +1,5 @@
import { DatabaseStore } from '$lib/stores/database';
import { chatService, slotsService } from '$lib/services';
-import { serverStore } from '$lib/stores/server.svelte';
import { config } from '$lib/stores/settings.svelte';
import { filterByLeafNodeId, findLeafNode, findDescendantMessages } from '$lib/utils/branching';
import { browser } from '$app/environment';
@@ -186,6 +185,7 @@ class ChatStore {
type,
timestamp: Date.now(),
thinking: '',
+ toolCalls: '',
children: [],
extra: extras
},
@@ -300,30 +300,31 @@ class ChatStore {
): Promise {
let streamedContent = '';
let streamedReasoningContent = '';
- let modelCaptured = false;
+ let streamedToolCallContent = '';
+ let resolvedModel: string | null = null;
+ let modelPersisted = false;
- const captureModelIfNeeded = (updateDbImmediately = true): string | undefined => {
- if (!modelCaptured) {
- const currentModelName = serverStore.modelName;
+ const recordModel = (modelName: string, persistImmediately = true): void => {
+ const trimmedModel = modelName.trim();
- if (currentModelName) {
- if (updateDbImmediately) {
- DatabaseStore.updateMessage(assistantMessage.id, { model: currentModelName }).catch(
- console.error
- );
- }
+ if (!trimmedModel || trimmedModel === resolvedModel) {
+ return;
+ }
- const messageIndex = this.findMessageIndex(assistantMessage.id);
+ resolvedModel = trimmedModel;
- this.updateMessageAtIndex(messageIndex, { model: currentModelName });
- modelCaptured = true;
+ const messageIndex = this.findMessageIndex(assistantMessage.id);
- return currentModelName;
- }
+ this.updateMessageAtIndex(messageIndex, { model: trimmedModel });
+
+ if (persistImmediately && !modelPersisted) {
+ modelPersisted = true;
+ DatabaseStore.updateMessage(assistantMessage.id, { model: trimmedModel }).catch((error) => {
+ console.error('Failed to persist model name:', error);
+ modelPersisted = false;
+ });
}
- return undefined;
};
-
slotsService.startStreaming();
await chatService.sendMessage(allMessages, {
@@ -333,7 +334,6 @@ class ChatStore {
streamedContent += chunk;
this.currentResponse = streamedContent;
- captureModelIfNeeded();
const messageIndex = this.findMessageIndex(assistantMessage.id);
this.updateMessageAtIndex(messageIndex, {
content: streamedContent
@@ -343,42 +343,64 @@ class ChatStore {
onReasoningChunk: (reasoningChunk: string) => {
streamedReasoningContent += reasoningChunk;
- captureModelIfNeeded();
-
const messageIndex = this.findMessageIndex(assistantMessage.id);
this.updateMessageAtIndex(messageIndex, { thinking: streamedReasoningContent });
},
+ onToolCallChunk: (toolCallChunk: string) => {
+ const chunk = toolCallChunk.trim();
+
+ if (!chunk) {
+ return;
+ }
+
+ streamedToolCallContent = chunk;
+
+ const messageIndex = this.findMessageIndex(assistantMessage.id);
+
+ this.updateMessageAtIndex(messageIndex, { toolCalls: streamedToolCallContent });
+ },
+
+ onModel: (modelName: string) => {
+ recordModel(modelName);
+ },
+
onComplete: async (
finalContent?: string,
reasoningContent?: string,
- timings?: ChatMessageTimings
+ timings?: ChatMessageTimings,
+ toolCallContent?: string
) => {
slotsService.stopStreaming();
const updateData: {
content: string;
thinking: string;
+ toolCalls: string;
timings?: ChatMessageTimings;
model?: string;
} = {
content: finalContent || streamedContent,
thinking: reasoningContent || streamedReasoningContent,
+ toolCalls: toolCallContent || streamedToolCallContent,
timings: timings
};
- const capturedModel = captureModelIfNeeded(false);
-
- if (capturedModel) {
- updateData.model = capturedModel;
+ if (resolvedModel && !modelPersisted) {
+ updateData.model = resolvedModel;
+ modelPersisted = true;
}
await DatabaseStore.updateMessage(assistantMessage.id, updateData);
const messageIndex = this.findMessageIndex(assistantMessage.id);
- const localUpdateData: { timings?: ChatMessageTimings; model?: string } = {
+ const localUpdateData: {
+ timings?: ChatMessageTimings;
+ model?: string;
+ toolCalls?: string;
+ } = {
timings: timings
};
@@ -386,6 +408,10 @@ class ChatStore {
localUpdateData.model = updateData.model;
}
+ if (updateData.toolCalls !== undefined) {
+ localUpdateData.toolCalls = updateData.toolCalls;
+ }
+
this.updateMessageAtIndex(messageIndex, localUpdateData);
await DatabaseStore.updateCurrentNode(this.activeConversation!.id, assistantMessage.id);
@@ -491,6 +517,7 @@ class ChatStore {
content: '',
timestamp: Date.now(),
thinking: '',
+ toolCalls: '',
children: []
},
parentId || null
diff --git a/tools/server/webui/src/lib/stores/database.ts b/tools/server/webui/src/lib/stores/database.ts
index 6394c5b7eda74..bef55d0ceb792 100644
--- a/tools/server/webui/src/lib/stores/database.ts
+++ b/tools/server/webui/src/lib/stores/database.ts
@@ -154,6 +154,7 @@ export class DatabaseStore {
content: '',
parent: null,
thinking: '',
+ toolCalls: '',
children: []
};
diff --git a/tools/server/webui/src/lib/stores/models.svelte.ts b/tools/server/webui/src/lib/stores/models.svelte.ts
new file mode 100644
index 0000000000000..967346fcc964a
--- /dev/null
+++ b/tools/server/webui/src/lib/stores/models.svelte.ts
@@ -0,0 +1,223 @@
+import { browser } from '$app/environment';
+import { ModelsService } from '$lib/services/models';
+import type { ApiModelDataEntry, ApiModelDetails } from '$lib/types/api';
+
+export interface ModelOption {
+ id: string;
+ name: string;
+ model: string;
+ description?: string;
+ capabilities: string[];
+ details?: ApiModelDetails['details'];
+ meta?: ApiModelDataEntry['meta'];
+}
+
+type PersistedModelSelection = {
+ id: string;
+ model: string;
+};
+
+const STORAGE_KEY = 'llama.cpp:selectedModel';
+
+class ModelsStore {
+ private _models = $state([]);
+ private _loading = $state(false);
+ private _updating = $state(false);
+ private _error = $state(null);
+ private _selectedModelId = $state(null);
+ private _selectedModelName = $state(null);
+
+ constructor() {
+ const persisted = this.readPersistedSelection();
+ if (persisted) {
+ this._selectedModelId = persisted.id;
+ this._selectedModelName = persisted.model;
+ }
+ }
+
+ get models(): ModelOption[] {
+ return this._models;
+ }
+
+ get loading(): boolean {
+ return this._loading;
+ }
+
+ get updating(): boolean {
+ return this._updating;
+ }
+
+ get error(): string | null {
+ return this._error;
+ }
+
+ get selectedModelId(): string | null {
+ return this._selectedModelId;
+ }
+
+ get selectedModelName(): string | null {
+ return this._selectedModelName;
+ }
+
+ get selectedModel(): ModelOption | null {
+ if (!this._selectedModelId) {
+ return null;
+ }
+
+ return this._models.find((model) => model.id === this._selectedModelId) ?? null;
+ }
+
+ async fetch(force = false): Promise {
+ if (this._loading) return;
+ if (this._models.length > 0 && !force) return;
+
+ this._loading = true;
+ this._error = null;
+
+ try {
+ const response = await ModelsService.list();
+
+ const models: ModelOption[] = response.data.map((item, index) => {
+ const details = response.models?.[index];
+ const rawCapabilities = Array.isArray(details?.capabilities)
+ ? [...(details?.capabilities ?? [])]
+ : [];
+ const displayNameSource =
+ details?.name && details.name.trim().length > 0 ? details.name : item.id;
+ const displayName = this.toDisplayName(displayNameSource);
+
+ return {
+ id: item.id,
+ name: displayName,
+ model: details?.model || item.id,
+ description: details?.description,
+ capabilities: rawCapabilities.filter((value): value is string => Boolean(value)),
+ details: details?.details,
+ meta: item.meta ?? null
+ } satisfies ModelOption;
+ });
+
+ this._models = models;
+
+ const persisted = this.readPersistedSelection();
+ let nextSelectionId = this._selectedModelId ?? persisted?.id ?? null;
+ let nextSelectionName = this._selectedModelName ?? persisted?.model ?? null;
+ if (nextSelectionId) {
+ const match = models.find((model) => model.id === nextSelectionId);
+ if (match) {
+ nextSelectionId = match.id;
+ nextSelectionName = match.model;
+ } else if (models[0]) {
+ nextSelectionId = models[0].id;
+ nextSelectionName = models[0].model;
+ } else {
+ nextSelectionId = null;
+ nextSelectionName = null;
+ }
+ } else if (models[0]) {
+ nextSelectionId = models[0].id;
+ nextSelectionName = models[0].model;
+ }
+
+ this._selectedModelId = nextSelectionId;
+ this._selectedModelName = nextSelectionName;
+ this.persistSelection(
+ nextSelectionId && nextSelectionName
+ ? { id: nextSelectionId, model: nextSelectionName }
+ : null
+ );
+ } catch (error) {
+ this._models = [];
+ this._error = error instanceof Error ? error.message : 'Failed to load models';
+ throw error;
+ } finally {
+ this._loading = false;
+ }
+ }
+
+ async select(modelId: string): Promise {
+ if (!modelId || this._updating) {
+ return;
+ }
+
+ if (this._selectedModelId === modelId) {
+ return;
+ }
+
+ const option = this._models.find((model) => model.id === modelId);
+ if (!option) {
+ throw new Error('Selected model is not available');
+ }
+
+ this._updating = true;
+ this._error = null;
+
+ try {
+ this._selectedModelId = option.id;
+ this._selectedModelName = option.model;
+ this.persistSelection({ id: option.id, model: option.model });
+ } finally {
+ this._updating = false;
+ }
+ }
+
+ private toDisplayName(id: string): string {
+ const segments = id.split(/\\|\//);
+ const candidate = segments.pop();
+ return candidate && candidate.trim().length > 0 ? candidate : id;
+ }
+
+ private readPersistedSelection(): PersistedModelSelection | null {
+ if (!browser) {
+ return null;
+ }
+
+ try {
+ const raw = localStorage.getItem(STORAGE_KEY);
+ if (!raw) {
+ return null;
+ }
+
+ const parsed = JSON.parse(raw);
+ if (parsed && typeof parsed.id === 'string') {
+ const id = parsed.id;
+ const model =
+ typeof parsed.model === 'string' && parsed.model.length > 0 ? parsed.model : id;
+ return { id, model };
+ }
+ } catch (error) {
+ console.warn('Failed to read model selection from localStorage:', error);
+ }
+
+ return null;
+ }
+
+ private persistSelection(selection: PersistedModelSelection | null): void {
+ if (!browser) {
+ return;
+ }
+
+ try {
+ if (selection) {
+ localStorage.setItem(STORAGE_KEY, JSON.stringify(selection));
+ } else {
+ localStorage.removeItem(STORAGE_KEY);
+ }
+ } catch (error) {
+ console.warn('Failed to persist model selection to localStorage:', error);
+ }
+ }
+}
+
+export const modelsStore = new ModelsStore();
+
+export const modelOptions = () => modelsStore.models;
+export const modelsLoading = () => modelsStore.loading;
+export const modelsUpdating = () => modelsStore.updating;
+export const modelsError = () => modelsStore.error;
+export const selectedModelId = () => modelsStore.selectedModelId;
+export const selectedModelName = () => modelsStore.selectedModelName;
+export const selectedModelOption = () => modelsStore.selectedModel;
+
+export const fetchModels = modelsStore.fetch.bind(modelsStore);
+export const selectModel = modelsStore.select.bind(modelsStore);
diff --git a/tools/server/webui/src/lib/types/api.d.ts b/tools/server/webui/src/lib/types/api.d.ts
index d0e60a6c13706..2d2a8e5bf61db 100644
--- a/tools/server/webui/src/lib/types/api.d.ts
+++ b/tools/server/webui/src/lib/types/api.d.ts
@@ -36,6 +36,41 @@ export interface ApiChatMessageData {
timestamp?: number;
}
+export interface ApiModelDataEntry {
+ id: string;
+ object: string;
+ created: number;
+ owned_by: string;
+ meta?: Record | null;
+}
+
+export interface ApiModelDetails {
+ name: string;
+ model: string;
+ modified_at?: string;
+ size?: string | number;
+ digest?: string;
+ type?: string;
+ description?: string;
+ tags?: string[];
+ capabilities?: string[];
+ parameters?: string;
+ details?: {
+ parent_model?: string;
+ format?: string;
+ family?: string;
+ families?: string[];
+ parameter_size?: string;
+ quantization_level?: string;
+ };
+}
+
+export interface ApiModelListResponse {
+ object: string;
+ data: ApiModelDataEntry[];
+ models?: ApiModelDetails[];
+}
+
export interface ApiLlamaCppServerProps {
default_generation_settings: {
id: number;
@@ -120,6 +155,7 @@ export interface ApiChatCompletionRequest {
content: string | ApiChatMessageContentPart[];
}>;
stream?: boolean;
+ model?: string;
// Reasoning parameters
reasoning_format?: string;
// Generation parameters
@@ -149,11 +185,32 @@ export interface ApiChatCompletionRequest {
custom?: Record;
}
+export interface ApiChatCompletionToolCallFunctionDelta {
+ name?: string;
+ arguments?: string;
+}
+
+export interface ApiChatCompletionToolCallDelta {
+ index?: number;
+ id?: string;
+ type?: string;
+ function?: ApiChatCompletionToolCallFunctionDelta;
+}
+
+export interface ApiChatCompletionToolCall extends ApiChatCompletionToolCallDelta {
+ function?: ApiChatCompletionToolCallFunctionDelta & { arguments?: string };
+}
+
export interface ApiChatCompletionStreamChunk {
+ model?: string;
choices: Array<{
+ model?: string;
+ metadata?: { model?: string };
delta: {
content?: string;
reasoning_content?: string;
+ model?: string;
+ tool_calls?: ApiChatCompletionToolCallDelta[];
};
}>;
timings?: {
@@ -167,10 +224,15 @@ export interface ApiChatCompletionStreamChunk {
}
export interface ApiChatCompletionResponse {
+ model?: string;
choices: Array<{
+ model?: string;
+ metadata?: { model?: string };
message: {
content: string;
reasoning_content?: string;
+ model?: string;
+ tool_calls?: ApiChatCompletionToolCallDelta[];
};
}>;
}
diff --git a/tools/server/webui/src/lib/types/database.d.ts b/tools/server/webui/src/lib/types/database.d.ts
index 7f6b76ba271cc..7b489c164905e 100644
--- a/tools/server/webui/src/lib/types/database.d.ts
+++ b/tools/server/webui/src/lib/types/database.d.ts
@@ -49,6 +49,7 @@ export interface DatabaseMessage {
content: string;
parent: string;
thinking: string;
+ toolCalls?: string;
children: string[];
extra?: DatabaseMessageExtra[];
timings?: ChatMessageTimings;
diff --git a/tools/server/webui/src/lib/types/settings.d.ts b/tools/server/webui/src/lib/types/settings.d.ts
index 4311f779ad841..c5398c0834ada 100644
--- a/tools/server/webui/src/lib/types/settings.d.ts
+++ b/tools/server/webui/src/lib/types/settings.d.ts
@@ -41,7 +41,14 @@ export interface SettingsChatServiceOptions {
// Callbacks
onChunk?: (chunk: string) => void;
onReasoningChunk?: (chunk: string) => void;
- onComplete?: (response: string, reasoningContent?: string, timings?: ChatMessageTimings) => void;
+ onToolCallChunk?: (chunk: string) => void;
+ onModel?: (model: string) => void;
+ onComplete?: (
+ response: string,
+ reasoningContent?: string,
+ timings?: ChatMessageTimings,
+ toolCalls?: string
+ ) => void;
onError?: (error: Error) => void;
}
diff --git a/tools/server/webui/src/routes/+layout.svelte b/tools/server/webui/src/routes/+layout.svelte
index 8912f642ceffc..737e3609d5cc8 100644
--- a/tools/server/webui/src/routes/+layout.svelte
+++ b/tools/server/webui/src/routes/+layout.svelte
@@ -148,7 +148,7 @@
-
+