Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/lib/server/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,8 @@ type ExtraConfigKeys =
| "METRICS_ENABLED"
| "METRICS_PORT"
| "MCP_SERVERS"
| "MCP_FORWARD_HF_USER_TOKEN";
| "MCP_FORWARD_HF_USER_TOKEN"
| "MCP_TOOL_TIMEOUT_MS";

type ConfigProxy = ConfigManager & { [K in ConfigKey | ExtraConfigKeys]: string };

Expand Down
3 changes: 2 additions & 1 deletion src/lib/server/mcp/httpClient.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
import { Client } from "@modelcontextprotocol/sdk/client";
import { getClient } from "./clientPool";
import { config } from "$lib/server/config";

export interface McpServerConfig {
name: string;
url: string;
headers?: Record<string, string>;
}

const DEFAULT_TIMEOUT_MS = 30_000;
const DEFAULT_TIMEOUT_MS = Number(config.MCP_TOOL_TIMEOUT_MS) || 30_000;

export type McpToolTextResponse = {
text: string;
Expand Down
25 changes: 24 additions & 1 deletion src/lib/server/textGeneration/mcp/fileRefs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,15 @@ const IMAGE_REF_KIND: RefKind = {
toDataUrl: (payload) => `data:${payload.mime};base64,${payload.base64}`,
};

const DEFAULT_REF_KINDS: RefKind[] = [IMAGE_REF_KIND];
const AUDIO_REF_KIND: RefKind = {
prefix: "audio",
matches: (mime) =>
typeof mime === "string" &&
(mime.startsWith("audio/") || mime === "mp3" || mime === "wav" || mime === "x-wav"),
toDataUrl: (payload) => `data:${payload.mime};base64,${payload.base64}`,
};

const DEFAULT_REF_KINDS: RefKind[] = [IMAGE_REF_KIND, AUDIO_REF_KIND];

/**
* Build a resolver that maps short ref strings (e.g. "image_1", "image_2") to the
Expand Down Expand Up @@ -76,6 +84,10 @@ export function buildImageRefResolver(messages: EndpointMessage[]): FileRefResol
return buildFileRefResolver(messages, [IMAGE_REF_KIND]);
}

export function buildAudioRefResolver(messages: EndpointMessage[]): FileRefResolver | undefined {
return buildFileRefResolver(messages, [AUDIO_REF_KIND]);
}

type FieldRule = {
keys: string[];
action: "attachPayload" | "replaceWithDataUrl";
Expand All @@ -95,6 +107,17 @@ const DEFAULT_FIELD_RULES: FieldRule[] = [
action: "replaceWithDataUrl",
allowedPrefixes: ["image"],
},
{
keys: ["audio_ref"],
action: "attachPayload",
attachKey: "audio",
allowedPrefixes: ["audio"],
},
{
keys: ["input_audio", "audio", "audio_url"],
action: "replaceWithDataUrl",
allowedPrefixes: ["audio"],
},
];

/**
Expand Down
4 changes: 3 additions & 1 deletion src/lib/server/textGeneration/mcp/routerResolution.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ export interface RouterResolutionInput {
messages: EndpointMessage[];
conversationId: string;
hasImageInput: boolean;
hasAudioInput?: boolean;
locals: App.Locals | undefined;
}

Expand All @@ -33,6 +34,7 @@ export async function resolveRouterTarget({
messages,
conversationId,
hasImageInput,
hasAudioInput,
locals,
}: RouterResolutionInput): Promise<RouterResolutionResult> {
let targetModel = model;
Expand All @@ -48,7 +50,7 @@ export async function resolveRouterTarget({
const mod = await import("../../models");
const allModels = mod.models as ProcessedModel[];

if (hasImageInput) {
if (hasImageInput || hasAudioInput) {
const multimodalCandidate = findConfiguredMultimodalModel(allModels);
if (!multimodalCandidate) {
runMcp = false;
Expand Down
27 changes: 25 additions & 2 deletions src/lib/server/textGeneration/mcp/runMcpFlow.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ import { executeToolCalls, type NormalizedToolCall } from "./toolInvocation";
import { drainPool } from "$lib/server/mcp/clientPool";
import type { TextGenerationContext } from "../types";
import { hasAuthHeader, isStrictHfMcpLogin, hasNonEmptyToken } from "$lib/server/mcp/hf";
import { buildImageRefResolver } from "./fileRefs";
import { buildImageRefResolver, buildAudioRefResolver } from "./fileRefs";
import { prepareMessagesWithFiles } from "$lib/server/textGeneration/utils/prepareFiles";
import { makeImageProcessor } from "$lib/server/endpoints/images";

Expand Down Expand Up @@ -202,7 +202,11 @@ export async function* runMcpFlow({
// If anything goes wrong reading the flag, proceed (previous behavior)
}

const resolveFileRef = buildImageRefResolver(messages);
const resolveFileRef = (ref: string) => {
const imageResolver = buildImageRefResolver(messages);
const audioResolver = buildAudioRefResolver(messages);
return imageResolver?.(ref) ?? audioResolver?.(ref);
};
const imageProcessor = makeImageProcessor({
supportedMimeTypes: ["image/png", "image/jpeg"],
preferredMimeType: "image/jpeg",
Expand All @@ -217,11 +221,18 @@ export async function* runMcpFlow({
)
);

const hasAudioInput = messages.some((msg) =>
(msg.files ?? []).some(
(file) => typeof file?.mime === "string" && file.mime.startsWith("audio/")
)
);

const { runMcp, targetModel, candidateModelId, resolvedRoute } = await resolveRouterTarget({
model,
messages,
conversationId: conv._id.toString(),
hasImageInput,
hasAudioInput,
locals,
});

Expand Down Expand Up @@ -353,6 +364,18 @@ export async function* runMcpFlow({
if (typeof value === "string" || typeof value === "number" || typeof value === "boolean") {
return value;
}

// Handle Gradio Audio objects: {path, url, orig_name, mime_type, size, is_stream, meta}
if (
typeof value === "object" &&
value !== null &&
!Array.isArray(value) &&
"url" in value &&
typeof (value as { url?: unknown }).url === "string"
) {
return (value as { url: string }).url;
}

return undefined;
};

Expand Down
3 changes: 2 additions & 1 deletion src/lib/server/textGeneration/mcp/toolInvocation.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { randomUUID } from "crypto";
import { logger } from "../../logger";
import { config } from "$lib/server/config";
import type { MessageUpdate } from "$lib/types/MessageUpdate";
import { MessageToolUpdateType, MessageUpdateType } from "$lib/types/MessageUpdate";
import { ToolResultStatus } from "$lib/types/Tool";
Expand Down Expand Up @@ -69,7 +70,7 @@ export async function* executeToolCalls({
toPrimitive,
processToolOutput,
abortSignal,
toolTimeoutMs = 30_000,
toolTimeoutMs = Number(config.MCP_TOOL_TIMEOUT_MS) || 30_000,
}: ExecuteToolCallsParams): AsyncGenerator<ToolExecutionEvent, void, undefined> {
const toolMessages: ChatCompletionMessageParam[] = [];
const toolRuns: ToolRun[] = [];
Expand Down
Loading