From acb46314a35903263e10fce1c51b34d69dc64b51 Mon Sep 17 00:00:00 2001 From: Julien Chaumond Date: Wed, 21 May 2025 17:27:23 +0200 Subject: [PATCH 1/9] [Tiny Agents] Expose a OpenAPI-compatible Web server --- packages/mcp-client/src/Agent.ts | 26 ++-- packages/tiny-agents/package.json | 3 +- packages/tiny-agents/src/cli.ts | 16 +-- packages/tiny-agents/src/lib/mainCliLoop.ts | 5 +- packages/tiny-agents/src/lib/webServer.ts | 126 ++++++++++++++++++++ 5 files changed, 157 insertions(+), 19 deletions(-) create mode 100644 packages/tiny-agents/src/lib/webServer.ts diff --git a/packages/mcp-client/src/Agent.ts b/packages/mcp-client/src/Agent.ts index 823769a00e..63a9e42b14 100644 --- a/packages/mcp-client/src/Agent.ts +++ b/packages/mcp-client/src/Agent.ts @@ -46,6 +46,7 @@ const exitLoopTools = [taskCompletionTool, askQuestionTool]; export class Agent extends McpClient { private readonly servers: (ServerConfig | StdioServerParameters)[]; + public readonly prompt: string; protected messages: ChatCompletionInputMessage[]; constructor({ @@ -73,10 +74,11 @@ export class Agent extends McpClient { super(provider ? { provider, endpointUrl, model, apiKey } : { provider, endpointUrl, model, apiKey }); /// ^This shenanigan is just here to please an overzealous TS type-checker. this.servers = servers; + this.prompt = prompt ?? DEFAULT_SYSTEM_PROMPT; this.messages = [ { role: "system", - content: prompt ?? DEFAULT_SYSTEM_PROMPT, + content: this.prompt, }, ]; } @@ -86,19 +88,27 @@ export class Agent extends McpClient { } async *run( - input: string, + input: string | ChatCompletionInputMessage[], opts: { abortSignal?: AbortSignal } = {} ): AsyncGenerator { - this.messages.push({ - role: "user", - content: input, - }); + let messages: ChatCompletionInputMessage[]; + if (typeof input === "string") { + /// Use internal array of messages + this.messages.push({ + role: "user", + content: input, + }); + messages = this.messages; + } else { + /// Use the passed messages directly + messages = input; + } let numOfTurns = 0; let nextTurnShouldCallTools = true; while (true) { try { - yield* this.processSingleTurnWithTools(this.messages, { + yield* this.processSingleTurnWithTools(messages, { exitLoopTools, exitIfFirstChunkNoTool: numOfTurns > 0 && nextTurnShouldCallTools, abortSignal: opts.abortSignal, @@ -111,7 +121,7 @@ export class Agent extends McpClient { } numOfTurns++; // eslint-disable-next-line @typescript-eslint/no-non-null-assertion - const currentLast = this.messages.at(-1)!; + const currentLast = messages.at(-1)!; debug("current role", currentLast.role); if ( currentLast.role === "tool" && diff --git a/packages/tiny-agents/package.json b/packages/tiny-agents/package.json index 39df6c0135..e258a9cd10 100644 --- a/packages/tiny-agents/package.json +++ b/packages/tiny-agents/package.json @@ -34,7 +34,8 @@ "prepare": "pnpm run build", "test": "vitest run", "check": "tsc", - "cli": "tsx src/cli.ts" + "cli": "tsx src/cli.ts", + "cli:watch": "tsx watch src/cli.ts" }, "files": [ "src", diff --git a/packages/tiny-agents/src/cli.ts b/packages/tiny-agents/src/cli.ts index 0722b394f0..8592000dee 100644 --- a/packages/tiny-agents/src/cli.ts +++ b/packages/tiny-agents/src/cli.ts @@ -7,8 +7,10 @@ import { PROVIDERS_OR_POLICIES } from "@huggingface/inference"; import { Agent } from "@huggingface/mcp-client"; import { version as packageVersion } from "../package.json"; import { ServerConfigSchema } from "./lib/types"; -import { debug, error } from "./lib/utils"; +import { ANSI, debug, error } from "./lib/utils"; import { mainCliLoop } from "./lib/mainCliLoop"; +import { startServer } from "./lib/webServer"; +import { stdout } from "node:process"; const USAGE_HELP = ` Usage: @@ -154,13 +156,13 @@ async function main() { } ); - if (command === "serve") { - error(`Serve is not implemented yet, coming soon!`); - process.exit(1); + debug(agent); + await agent.loadTools(); + + if (command === "run") { + mainCliLoop(agent); } else { - debug(agent); - // main loop from mcp-client/cli.ts - await mainCliLoop(agent); + startServer(agent); } } diff --git a/packages/tiny-agents/src/lib/mainCliLoop.ts b/packages/tiny-agents/src/lib/mainCliLoop.ts index cdc20056ce..0cd35d3aad 100644 --- a/packages/tiny-agents/src/lib/mainCliLoop.ts +++ b/packages/tiny-agents/src/lib/mainCliLoop.ts @@ -5,7 +5,8 @@ import type { ChatCompletionStreamOutput } from "@huggingface/tasks"; import type { Agent } from "../index"; /** - * From mcp-client/cli.ts + * From mcp-client/cli.ts, + * minus the agent.loadTools() done upstream. */ export async function mainCliLoop(agent: Agent): Promise { const rl = readline.createInterface({ input: stdin, output: stdout }); @@ -40,8 +41,6 @@ export async function mainCliLoop(agent: Agent): Promise { throw err; }); - await agent.loadTools(); - stdout.write(ANSI.BLUE); stdout.write(`Agent loaded with ${agent.availableTools.length} tools:\n`); stdout.write(agent.availableTools.map((t) => `- ${t.function.name}`).join("\n")); diff --git a/packages/tiny-agents/src/lib/webServer.ts b/packages/tiny-agents/src/lib/webServer.ts new file mode 100644 index 0000000000..5bc1de5c38 --- /dev/null +++ b/packages/tiny-agents/src/lib/webServer.ts @@ -0,0 +1,126 @@ +import type { IncomingMessage } from "node:http"; +import { createServer, ServerResponse } from "node:http"; +import type { AddressInfo } from "node:net"; +import { z } from "zod"; +import type { Agent } from "../index"; +import { ANSI } from "./utils"; +import { stdout } from "node:process"; +import type { ChatCompletionStreamOutput } from "@huggingface/tasks"; + +const REQUEST_ID_HEADER = "X-Request-Id"; +// Generate a simple UUID v4 without dependencies +const generateUUID = () => { + return "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx".replace(/[xy]/g, (c) => { + const r = (Math.random() * 16) | 0; + const v = c === "x" ? r : (r & 0x3) | 0x8; + return v.toString(16); + }); +}; +const ChatCompletionInputSchema = z.object({ + messages: z.array( + z.object({ + role: z.enum(["user", "assistant"]), + content: z.string(), + }) + ), + /// Only allow stream: true + stream: z.literal(true), +}); +function getJsonBody(req: IncomingMessage) { + return new Promise((resolve, reject) => { + let data = ""; + req.on("data", (chunk) => (data += chunk)); + req.on("end", () => { + try { + resolve(JSON.parse(data)); + } catch (e) { + reject(e); + } + }); + req.on("error", reject); + }); +} +class ServerResp extends ServerResponse { + error(statusCode: number, reason: string) { + this.writeHead(statusCode).end(JSON.stringify({ error: reason })); + } +} + +export function startServer(agent: Agent): void { + const server = createServer({ ServerResponse: ServerResp }, async (req, res) => { + res.setHeader(REQUEST_ID_HEADER, generateUUID()); + res.setHeader("Content-Type", "application/json"); + if (req.method === "POST" && req.url === "/v1/chat/completions") { + let body: unknown; + let requestBody: z.infer; + try { + body = await getJsonBody(req); + } catch { + return res.error(404, "Invalid JSON"); + } + try { + requestBody = ChatCompletionInputSchema.parse(body); + } catch (err) { + if (err instanceof z.ZodError) { + return res.error(404, "Invalid ChatCompletionInput body \n" + JSON.stringify(err)); + } + return res.error(404, "Invalid ChatCompletionInput body"); + } + /// Ok, from now on we will send a SSE (Server-Sent Events) response. + res.setHeaders( + new Headers({ + "Content-Type": "text/event-stream", + "Cache-Control": "no-cache", + Connection: "keep-alive", + }) + ); + + /// Prepend the agent's prompt + const messages = [ + { + role: "system", + content: agent.prompt, + }, + ...requestBody.messages, + ]; + + for await (const chunk of agent.run(messages)) { + if ("choices" in chunk) { + res.write(`data: ${JSON.stringify(chunk)}\n\n`); + } else { + /// Tool call info + /// /!\ We format it as a regular chunk! + const chunkToolcallInfo = { + choices: [ + { + index: 0, + delta: { + role: "assistant", + content: + "" + `Tool[${chunk.name}] ${chunk.tool_call_id}\n` + chunk.content + "", + }, + }, + ], + created: Math.floor(Date.now() / 1000), + id: chunk.tool_call_id, + model: "", + system_fingerprint: "", + } satisfies ChatCompletionStreamOutput; + + res.write(`data: ${JSON.stringify(chunkToolcallInfo)}\n\n`); + } + } + res.end(); + } else { + res.error(404, "Route or method not found, try POST /v1/chat/completions"); + } + }); + server.listen(process.env.PORT ? parseInt(process.env.PORT) : 9_999, () => { + stdout.write(ANSI.BLUE); + stdout.write(`Agent loaded with ${agent.availableTools.length} tools:\n`); + stdout.write(agent.availableTools.map((t) => `- ${t.function.name}`).join("\n")); + stdout.write(ANSI.RESET); + stdout.write("\n"); + console.log(ANSI.GRAY + `listening on http://localhost:${(server.address() as AddressInfo).port}` + ANSI.RESET); + }); +} From 7460a2bcdc4754c98ea79ae374baf075fb1b4509 Mon Sep 17 00:00:00 2001 From: Julien Chaumond Date: Wed, 21 May 2025 17:41:49 +0200 Subject: [PATCH 2/9] tool_call => tool_call_info to not conflict with internal tool calls of model --- packages/tiny-agents/src/lib/webServer.ts | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/packages/tiny-agents/src/lib/webServer.ts b/packages/tiny-agents/src/lib/webServer.ts index 5bc1de5c38..6de018849e 100644 --- a/packages/tiny-agents/src/lib/webServer.ts +++ b/packages/tiny-agents/src/lib/webServer.ts @@ -97,7 +97,10 @@ export function startServer(agent: Agent): void { delta: { role: "assistant", content: - "" + `Tool[${chunk.name}] ${chunk.tool_call_id}\n` + chunk.content + "", + "" + + `Tool[${chunk.name}] ${chunk.tool_call_id}\n` + + chunk.content + + "", }, }, ], From e8e5fa6f54cf217b35edb60ef4691e1ba0871fa6 Mon Sep 17 00:00:00 2001 From: Julien Chaumond Date: Wed, 21 May 2025 17:43:20 +0200 Subject: [PATCH 3/9] Example of use --- packages/tiny-agents/src/example.ts | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 packages/tiny-agents/src/example.ts diff --git a/packages/tiny-agents/src/example.ts b/packages/tiny-agents/src/example.ts new file mode 100644 index 0000000000..1dd044124c --- /dev/null +++ b/packages/tiny-agents/src/example.ts @@ -0,0 +1,18 @@ +import { chatCompletionStream } from "@huggingface/inference"; + +async function main() { + const endpointUrl = `http://localhost:9999/v1/chat/completions`; + // launch "tiny-agents serve" before running this + + for await (const chunk of chatCompletionStream({ + endpointUrl, + model: "", + messages: [{ role: "user", content: "What are the top 5 trending models on Hugging Face?" }], + })) { + console.log(chunk.choices[0]?.delta.content); + } +} + +if (require.main === module) { + main(); +} From ae601590166e69618eadaa15caa929f6ea1f32c5 Mon Sep 17 00:00:00 2001 From: Julien Chaumond Date: Wed, 21 May 2025 19:19:47 +0200 Subject: [PATCH 4/9] review from @coyotte508 --- packages/tiny-agents/src/cli.ts | 2 +- packages/tiny-agents/src/lib/webServer.ts | 11 ++--------- 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/packages/tiny-agents/src/cli.ts b/packages/tiny-agents/src/cli.ts index 8592000dee..69a9e99dc0 100644 --- a/packages/tiny-agents/src/cli.ts +++ b/packages/tiny-agents/src/cli.ts @@ -7,7 +7,7 @@ import { PROVIDERS_OR_POLICIES } from "@huggingface/inference"; import { Agent } from "@huggingface/mcp-client"; import { version as packageVersion } from "../package.json"; import { ServerConfigSchema } from "./lib/types"; -import { ANSI, debug, error } from "./lib/utils"; +import { debug, error } from "./lib/utils"; import { mainCliLoop } from "./lib/mainCliLoop"; import { startServer } from "./lib/webServer"; import { stdout } from "node:process"; diff --git a/packages/tiny-agents/src/lib/webServer.ts b/packages/tiny-agents/src/lib/webServer.ts index 6de018849e..6b08242a1b 100644 --- a/packages/tiny-agents/src/lib/webServer.ts +++ b/packages/tiny-agents/src/lib/webServer.ts @@ -8,14 +8,7 @@ import { stdout } from "node:process"; import type { ChatCompletionStreamOutput } from "@huggingface/tasks"; const REQUEST_ID_HEADER = "X-Request-Id"; -// Generate a simple UUID v4 without dependencies -const generateUUID = () => { - return "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx".replace(/[xy]/g, (c) => { - const r = (Math.random() * 16) | 0; - const v = c === "x" ? r : (r & 0x3) | 0x8; - return v.toString(16); - }); -}; + const ChatCompletionInputSchema = z.object({ messages: z.array( z.object({ @@ -48,7 +41,7 @@ class ServerResp extends ServerResponse { export function startServer(agent: Agent): void { const server = createServer({ ServerResponse: ServerResp }, async (req, res) => { - res.setHeader(REQUEST_ID_HEADER, generateUUID()); + res.setHeader(REQUEST_ID_HEADER, crypto.randomUUID()); res.setHeader("Content-Type", "application/json"); if (req.method === "POST" && req.url === "/v1/chat/completions") { let body: unknown; From 6ce116242a763537b8d0e8df043dc5fb076b9d82 Mon Sep 17 00:00:00 2001 From: Julien Chaumond Date: Thu, 22 May 2025 12:25:05 +0200 Subject: [PATCH 5/9] Update packages/tiny-agents/src/lib/webServer.ts Co-authored-by: Mishig --- packages/tiny-agents/src/lib/webServer.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/tiny-agents/src/lib/webServer.ts b/packages/tiny-agents/src/lib/webServer.ts index 6b08242a1b..8e4833765a 100644 --- a/packages/tiny-agents/src/lib/webServer.ts +++ b/packages/tiny-agents/src/lib/webServer.ts @@ -49,7 +49,7 @@ export function startServer(agent: Agent): void { try { body = await getJsonBody(req); } catch { - return res.error(404, "Invalid JSON"); + return res.error(400, "Invalid JSON"); } try { requestBody = ChatCompletionInputSchema.parse(body); From a0a865f1c16ce678751deadd890fe575ecdd0035 Mon Sep 17 00:00:00 2001 From: Julien Chaumond Date: Thu, 22 May 2025 12:28:32 +0200 Subject: [PATCH 6/9] Update packages/tiny-agents/src/cli.ts Co-authored-by: Mishig --- packages/tiny-agents/src/cli.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/packages/tiny-agents/src/cli.ts b/packages/tiny-agents/src/cli.ts index 69a9e99dc0..dba0dda16b 100644 --- a/packages/tiny-agents/src/cli.ts +++ b/packages/tiny-agents/src/cli.ts @@ -10,7 +10,6 @@ import { ServerConfigSchema } from "./lib/types"; import { debug, error } from "./lib/utils"; import { mainCliLoop } from "./lib/mainCliLoop"; import { startServer } from "./lib/webServer"; -import { stdout } from "node:process"; const USAGE_HELP = ` Usage: From c5fbd547be0233ef8cb2a4c3218a4098d67670a4 Mon Sep 17 00:00:00 2001 From: Julien Chaumond Date: Thu, 22 May 2025 12:30:29 +0200 Subject: [PATCH 7/9] 404 => 400 @mishig25 --- packages/tiny-agents/src/lib/webServer.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/tiny-agents/src/lib/webServer.ts b/packages/tiny-agents/src/lib/webServer.ts index 8e4833765a..4d27028240 100644 --- a/packages/tiny-agents/src/lib/webServer.ts +++ b/packages/tiny-agents/src/lib/webServer.ts @@ -55,9 +55,9 @@ export function startServer(agent: Agent): void { requestBody = ChatCompletionInputSchema.parse(body); } catch (err) { if (err instanceof z.ZodError) { - return res.error(404, "Invalid ChatCompletionInput body \n" + JSON.stringify(err)); + return res.error(400, "Invalid ChatCompletionInput body \n" + JSON.stringify(err)); } - return res.error(404, "Invalid ChatCompletionInput body"); + return res.error(400, "Invalid ChatCompletionInput body"); } /// Ok, from now on we will send a SSE (Server-Sent Events) response. res.setHeaders( From 1dfaae2e371eb48f429ab7dbdf67eea72188b8d2 Mon Sep 17 00:00:00 2001 From: Julien Chaumond Date: Thu, 22 May 2025 20:51:14 +0200 Subject: [PATCH 8/9] Ok, just output a `role: "tool"` chunk --- packages/tiny-agents/src/lib/webServer.ts | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/packages/tiny-agents/src/lib/webServer.ts b/packages/tiny-agents/src/lib/webServer.ts index 4d27028240..af2f3036f4 100644 --- a/packages/tiny-agents/src/lib/webServer.ts +++ b/packages/tiny-agents/src/lib/webServer.ts @@ -82,18 +82,14 @@ export function startServer(agent: Agent): void { res.write(`data: ${JSON.stringify(chunk)}\n\n`); } else { /// Tool call info - /// /!\ We format it as a regular chunk! + /// /!\ We format it as a regular chunk of role = "tool" const chunkToolcallInfo = { choices: [ { index: 0, delta: { - role: "assistant", - content: - "" + - `Tool[${chunk.name}] ${chunk.tool_call_id}\n` + - chunk.content + - "", + role: "tool", + content: `Tool[${chunk.name}] ${chunk.tool_call_id}\n` + chunk.content, }, }, ], From cd50de4403a6768fb538fbb1e64e273a73941671 Mon Sep 17 00:00:00 2001 From: Julien Chaumond Date: Fri, 23 May 2025 12:42:55 +0200 Subject: [PATCH 9/9] Update packages/tiny-agents/src/lib/webServer.ts Co-authored-by: Nathan Sarrazin --- packages/tiny-agents/src/lib/webServer.ts | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/packages/tiny-agents/src/lib/webServer.ts b/packages/tiny-agents/src/lib/webServer.ts index af2f3036f4..21cf17a53c 100644 --- a/packages/tiny-agents/src/lib/webServer.ts +++ b/packages/tiny-agents/src/lib/webServer.ts @@ -13,7 +13,23 @@ const ChatCompletionInputSchema = z.object({ messages: z.array( z.object({ role: z.enum(["user", "assistant"]), - content: z.string(), + content: z.string().or( + z.array( + z + .object({ + type: z.literal("text"), + text: z.string(), + }) + .or( + z.object({ + type: z.literal("image_url"), + image_url: z.object({ + url: z.string(), + }), + }) + ) + ) + ), }) ), /// Only allow stream: true