From ad7b772bffa29a4cb9d81fc16deafda8b27e81de Mon Sep 17 00:00:00 2001 From: MQ Date: Wed, 3 Sep 2025 15:25:04 +0200 Subject: [PATCH 01/18] feat: improve actor tool output --- src/const.ts | 16 +- src/main.ts | 6 +- src/mcp/proxy.ts | 4 +- src/mcp/server.ts | 11 +- src/tools/actor.ts | 84 +++++----- src/tools/build.ts | 4 +- src/tools/dataset.ts | 78 ++------- src/tools/dataset_collection.ts | 4 +- src/tools/get-actor-output.ts | 125 ++++++++++++++ src/tools/helpers.ts | 82 +-------- src/tools/index.ts | 2 + src/tools/key_value_store.ts | 4 +- src/tools/key_value_store_collection.ts | 4 +- src/tools/run.ts | 4 +- src/tools/run_collection.ts | 4 +- src/tools/store_collection.ts | 3 +- src/types.ts | 1 + src/utils/actor-response.ts | 53 ++++++ src/utils/actor.ts | 36 +++- src/utils/generic.ts | 5 + src/utils/schema-generation.ts | 110 +++++++++++++ src/utils/tools-loader.ts | 12 ++ tests/integration/internals.test.ts | 5 +- tests/integration/suite.ts | 210 ++++++++++++++++++++---- tests/unit/schema-generation.test.ts | 63 +++++++ tests/unit/utils.generic.test.ts | 6 + 26 files changed, 684 insertions(+), 252 deletions(-) create mode 100644 src/tools/get-actor-output.ts create mode 100644 src/utils/actor-response.ts create mode 100644 src/utils/schema-generation.ts create mode 100644 tests/unit/schema-generation.test.ts diff --git a/src/const.ts b/src/const.ts index ff819fdf..d8266885 100644 --- a/src/const.ts +++ b/src/const.ts @@ -8,6 +8,15 @@ export const ACTOR_RUN_DATASET_OUTPUT_MAX_ITEMS = 5; // Actor run const export const ACTOR_MAX_MEMORY_MBYTES = 4_096; // If the Actor requires 8GB of memory, free users can't run actors-mcp-server and requested Actor +// Tool output +/** + * Usual tool output limit is 25k tokens, let's use 20k + * just in case where 1 token =~ 4 characters thus 80k chars. + * This is primarily used for Actor tool call output, but we can then + * reuse this in other tools as well. + */ +export const TOOL_MAX_OUTPUT_CHARS = 80000; + // MCP Server export const SERVER_NAME = 'apify-mcp-server'; export const SERVER_VERSION = '1.0.0'; @@ -20,6 +29,7 @@ export enum HelperTools { ACTOR_CALL = 'call-actor', ACTOR_GET = 'get-actor', ACTOR_GET_DETAILS = 'fetch-actor-details', + ACTOR_OUTPUT_GET = 'get-actor-output', ACTOR_REMOVE = 'remove-actor', ACTOR_RUNS_ABORT = 'abort-actor-run', ACTOR_RUNS_GET = 'get-actor-run', @@ -54,12 +64,12 @@ export const APIFY_DOCS_CACHE_MAX_SIZE = 500; export const APIFY_DOCS_CACHE_TTL_SECS = 60 * 60; // 1 hour export const ACTOR_PRICING_MODEL = { - /** Rental actors */ + /** Rental Actors */ FLAT_PRICE_PER_MONTH: 'FLAT_PRICE_PER_MONTH', FREE: 'FREE', - /** Pay per result (PPR) actors */ + /** Pay per result (PPR) Actors */ PRICE_PER_DATASET_ITEM: 'PRICE_PER_DATASET_ITEM', - /** Pay per event (PPE) actors */ + /** Pay per event (PPE) Actors */ PAY_PER_EVENT: 'PAY_PER_EVENT', } as const; diff --git a/src/main.ts b/src/main.ts index 0d620519..9643530e 100644 --- a/src/main.ts +++ b/src/main.ts @@ -44,10 +44,10 @@ if (STANDBY_MODE) { await Actor.fail('If you need to debug a specific Actor, please provide the debugActor and debugActorInput fields in the input'); } const options = { memory: input.maxActorMemoryBytes } as ActorCallOptions; - const { items } = await callActorGetDataset(input.debugActor!, input.debugActorInput!, process.env.APIFY_TOKEN, options); + const { previewItems } = await callActorGetDataset(input.debugActor!, input.debugActorInput!, process.env.APIFY_TOKEN, options); - await Actor.pushData(items); - log.info('Pushed items to dataset', { itemCount: items.count }); + await Actor.pushData(previewItems); + log.info('Pushed items to dataset', { itemCount: previewItems.length }); await Actor.exit(); } diff --git a/src/mcp/proxy.ts b/src/mcp/proxy.ts index 327a057d..5a94d6a9 100644 --- a/src/mcp/proxy.ts +++ b/src/mcp/proxy.ts @@ -1,8 +1,8 @@ import type { Client } from '@modelcontextprotocol/sdk/client/index.js'; -import Ajv from 'ajv'; import { fixedAjvCompile } from '../tools/utils.js'; import type { ActorMcpTool, ToolEntry } from '../types.js'; +import { ajv } from '../utils/ajv.js'; import { getMCPServerID, getProxyMCPServerToolName } from './utils.js'; export async function getMCPServerTools( @@ -14,8 +14,6 @@ export async function getMCPServerTools( const res = await client.listTools(); const { tools } = res; - const ajv = new Ajv({ coerceTypes: 'array', strict: false }); - const compiledTools: ToolEntry[] = []; for (const tool of tools) { const mcpTool: ActorMcpTool = { diff --git a/src/mcp/server.ts b/src/mcp/server.ts index 1aacabc0..4f53a0bb 100644 --- a/src/mcp/server.ts +++ b/src/mcp/server.ts @@ -31,6 +31,7 @@ import { prompts } from '../prompts/index.js'; import { callActorGetDataset, defaultTools, getActorsAsTools, toolCategories } from '../tools/index.js'; import { decodeDotPropertyNames } from '../tools/utils.js'; import type { ActorMcpTool, ActorTool, HelperTool, ToolEntry } from '../types.js'; +import { buildActorResponseContent } from '../utils/actor-response.js'; import { createProgressTracker } from '../utils/progress.js'; import { getToolPublicFieldOnly } from '../utils/tools.js'; import { connectMCPClient } from './client.js'; @@ -524,21 +525,15 @@ export class ActorsMcpServer { try { log.info('Calling Actor', { actorName: actorTool.actorFullName, input: args }); - const { runId, datasetId, items } = await callActorGetDataset( + const callResult = await callActorGetDataset( actorTool.actorFullName, args, apifyToken as string, callOptions, progressTracker, ); - const content = [ - { type: 'text', text: `Actor finished with runId: ${runId}, datasetId ${datasetId}` }, - ]; - const itemContents = items.items.map((item: Record) => { - return { type: 'text', text: JSON.stringify(item) }; - }); - content.push(...itemContents); + const content = buildActorResponseContent(actorTool.actorFullName, callResult); return { content }; } finally { if (progressTracker) { diff --git a/src/tools/actor.ts b/src/tools/actor.ts index c3f1a7e4..009b9e90 100644 --- a/src/tools/actor.ts +++ b/src/tools/actor.ts @@ -1,6 +1,5 @@ import type { Client } from '@modelcontextprotocol/sdk/client/index.js'; -import { Ajv } from 'ajv'; -import type { ActorCallOptions, ActorRun, PaginatedList } from 'apify-client'; +import type { ActorCallOptions, ActorRun } from 'apify-client'; import { z } from 'zod'; import zodToJsonSchema from 'zod-to-json-schema'; @@ -11,41 +10,47 @@ import { ACTOR_ADDITIONAL_INSTRUCTIONS, ACTOR_MAX_MEMORY_MBYTES, HelperTools, + TOOL_MAX_OUTPUT_CHARS, } from '../const.js'; import { getActorMCPServerPath, getActorMCPServerURL } from '../mcp/actors.js'; import { connectMCPClient } from '../mcp/client.js'; import { getMCPServerTools } from '../mcp/proxy.js'; import { actorDefinitionPrunedCache } from '../state.js'; -import type { ActorDefinitionStorage, ActorInfo, ToolEntry } from '../types.js'; -import { getActorDefinitionStorageFieldNames } from '../utils/actor.js'; +import type { ActorDefinitionStorage, ActorInfo, DatasetItem, ToolEntry } from '../types.js'; +import { ensureOutputWithinCharLimit, getActorDefinitionStorageFieldNames } from '../utils/actor.js'; import { fetchActorDetails } from '../utils/actor-details.js'; -import { getValuesByDotKeys } from '../utils/generic.js'; +import { buildActorResponseContent } from '../utils/actor-response.js'; +import { ajv } from '../utils/ajv.js'; import type { ProgressTracker } from '../utils/progress.js'; +import type { JsonSchemaProperty } from '../utils/schema-generation.js'; +import { generateSchemaFromItems } from '../utils/schema-generation.js'; import { getActorDefinition } from './build.js'; import { actorNameToToolName, fixedAjvCompile, getToolSchemaID, transformActorInputSchemaProperties } from './utils.js'; -const ajv = new Ajv({ coerceTypes: 'array', strict: false }); - // Define a named return type for callActorGetDataset export type CallActorGetDatasetResult = { runId: string; datasetId: string; - items: PaginatedList>; + itemCount: number; + schema: JsonSchemaProperty; + previewItems: DatasetItem[]; }; /** - * Calls an Apify actor and retrieves the dataset items. + * Calls an Apify Actor and retrieves metadata about the dataset results. * + * This function executes an Actor and returns summary information instead with a result items preview of the full dataset + * to prevent overwhelming responses. The actual data can be retrieved using the get-actor-output tool. * * It requires the `APIFY_TOKEN` environment variable to be set. * If the `APIFY_IS_AT_HOME` the dataset items are pushed to the Apify dataset. * - * @param {string} actorName - The name of the actor to call. - * @param {ActorCallOptions} callOptions - The options to pass to the actor. + * @param {string} actorName - The name of the Actor to call. + * @param {ActorCallOptions} callOptions - The options to pass to the Actor. * @param {unknown} input - The input to pass to the actor. * @param {string} apifyToken - The Apify token to use for authentication. * @param {ProgressTracker} progressTracker - Optional progress tracker for real-time updates. - * @returns {Promise<{ actorRun: any, items: object[] }>} - A promise that resolves to an object containing the actor run and dataset items. + * @returns {Promise} - A promise that resolves to metadata about the Actor run and dataset. * @throws {Error} - Throws an error if the `APIFY_TOKEN` is not set */ export async function callActorGetDataset( @@ -59,7 +64,7 @@ export async function callActorGetDataset( const client = new ApifyClient({ token: apifyToken }); const actorClient = client.actor(actorName); - // Start the actor run but don't wait for completion + // Start the Actor run but don't wait for completion const actorRun: ActorRun = await actorClient.start(input, callOptions); // Start progress tracking if tracker is provided @@ -71,24 +76,35 @@ export async function callActorGetDataset( const completedRun = await client.run(actorRun.id).waitForFinish(); const dataset = client.dataset(completedRun.defaultDatasetId); - const [items, defaultBuild] = await Promise.all([ + const [datasetItems, defaultBuild] = await Promise.all([ dataset.listItems(), (await actorClient.defaultBuild()).get(), ]); - // Get important properties from storage view definitions and if available return only those properties + // Generate schema using the shared utility + const generatedSchema = generateSchemaFromItems(datasetItems.items, { + clean: true, + arrayMode: 'all', + }); + const schema = generatedSchema || { type: 'object', properties: {} }; + + /** + * Get important fields that are using in any dataset view as they MAY be used in filtering to ensure the output fits + * the tool output limits. Client has to use the get-actor-output tool to retrieve the full dataset or filtered out fields. + */ const storageDefinition = defaultBuild?.actorDefinition?.storages?.dataset as ActorDefinitionStorage | undefined; const importantProperties = getActorDefinitionStorageFieldNames(storageDefinition || {}); - if (importantProperties.length > 0) { - items.items = items.items.map((item) => { - return getValuesByDotKeys(item, importantProperties); - }); - } - - log.debug('Actor finished', { actorName, itemCount: items.count }); - return { runId: actorRun.id, datasetId: completedRun.defaultDatasetId, items }; + const previewItems = ensureOutputWithinCharLimit(datasetItems.items, importantProperties, TOOL_MAX_OUTPUT_CHARS); + + return { + runId: actorRun.id, + datasetId: completedRun.defaultDatasetId, + itemCount: datasetItems.count, + schema, + previewItems, + }; } catch (error) { - log.error('Error calling actor', { error, actorName, input }); + log.error('Error calling Actor', { error, actorName, input }); throw new Error(`Error calling Actor: ${error}`); } } @@ -96,9 +112,9 @@ export async function callActorGetDataset( /** * This function is used to fetch normal non-MCP server Actors as a tool. * - * Fetches actor input schemas by Actor IDs or Actor full names and creates MCP tools. + * Fetches Actor input schemas by Actor IDs or Actor full names and creates MCP tools. * - * This function retrieves the input schemas for the specified actors and compiles them into MCP tools. + * This function retrieves the input schemas for the specified Actors and compiles them into MCP tools. * It uses the AJV library to validate the input schemas. * * Tool name can't contain /, so it is replaced with _ @@ -201,7 +217,7 @@ export async function getActorsAsTools( actorIdsOrNames: string[], apifyToken: string, ): Promise { - log.debug('Fetching actors as tools', { actorNames: actorIdsOrNames }); + log.debug('Fetching Actors as tools', { actorNames: actorIdsOrNames }); const actorsInfo: (ActorInfo | null)[] = await Promise.all( actorIdsOrNames.map(async (actorIdOrName) => { @@ -298,7 +314,7 @@ The step parameter enforces this workflow - you cannot call an Actor without fir try { if (step === 'info') { - // Step 1: Return actor card and schema directly + // Step 1: Return Actor card and schema directly const details = await fetchActorDetails(apifyToken, actorName); if (!details) { return { @@ -342,7 +358,7 @@ The step parameter enforces this workflow - you cannot call an Actor without fir } } - const { runId, datasetId, items } = await callActorGetDataset( + const callResult = await callActorGetDataset( actorName, input, apifyToken, @@ -350,15 +366,7 @@ The step parameter enforces this workflow - you cannot call an Actor without fir progressTracker, ); - const content = [ - { type: 'text', text: `Actor finished with runId: ${runId}, datasetId ${datasetId}` }, - ]; - - const itemContents = items.items.map((item: Record) => ({ - type: 'text', - text: JSON.stringify(item), - })); - content.push(...itemContents); + const content = buildActorResponseContent(actorName, callResult); return { content }; } catch (error) { diff --git a/src/tools/build.ts b/src/tools/build.ts index c6556459..64fffeb5 100644 --- a/src/tools/build.ts +++ b/src/tools/build.ts @@ -1,4 +1,3 @@ -import { Ajv } from 'ajv'; import { z } from 'zod'; import zodToJsonSchema from 'zod-to-json-schema'; @@ -13,10 +12,9 @@ import type { ISchemaProperties, ToolEntry, } from '../types.js'; +import { ajv } from '../utils/ajv.js'; import { filterSchemaProperties, shortenProperties } from './utils.js'; -const ajv = new Ajv({ coerceTypes: 'array', strict: false }); - /** * Get Actor input schema by Actor name. * First, fetch the Actor details to get the default build tag and buildId. diff --git a/src/tools/dataset.ts b/src/tools/dataset.ts index 3f733869..af513dca 100644 --- a/src/tools/dataset.ts +++ b/src/tools/dataset.ts @@ -1,13 +1,11 @@ -import { Ajv } from 'ajv'; -import toJsonSchema from 'to-json-schema'; import { z } from 'zod'; import zodToJsonSchema from 'zod-to-json-schema'; import { ApifyClient } from '../apify-client.js'; import { HelperTools } from '../const.js'; import type { InternalTool, ToolEntry } from '../types.js'; - -const ajv = new Ajv({ coerceTypes: 'array', strict: false }); +import { ajv } from '../utils/ajv.js'; +import { generateSchemaFromItems } from '../utils/schema-generation.js'; const getDatasetArgs = z.object({ datasetId: z.string() @@ -114,34 +112,6 @@ export const getDatasetItems: ToolEntry = { } as InternalTool, }; -/** - * Function to recursively remove empty arrays from an object - */ -function removeEmptyArrays(obj: unknown): unknown { - if (Array.isArray(obj)) { - // If the item is an array, recursively call removeEmptyArrays on each element. - return obj.map((item) => removeEmptyArrays(item)); - } - - if (typeof obj !== 'object' || obj === null) { - // Return primitives and null values as is. - return obj; - } - - // Use reduce to build a new object, excluding keys with empty arrays. - return Object.entries(obj).reduce((acc, [key, value]) => { - const processedValue = removeEmptyArrays(value); - - // Exclude the key if the processed value is an empty array. - if (Array.isArray(processedValue) && processedValue.length === 0) { - return acc; - } - - acc[key] = processedValue; - return acc; - }, {} as Record); -} - const getDatasetSchemaArgs = z.object({ datasetId: z.string() .min(1) @@ -155,9 +125,6 @@ const getDatasetSchemaArgs = z.object({ arrayMode: z.enum(['first', 'all']).optional() .describe('Strategy for handling arrays. "first" uses first item as template, "all" merges all items. Default is "all".') .default('all'), - additionalProperties: z.boolean().optional() - .describe('If true, allows additional properties in objects. Default is true.') - .default(true), }); /** @@ -194,32 +161,23 @@ export const getDatasetSchema: ToolEntry = { return { content: [{ type: 'text', text: `Dataset '${parsed.datasetId}' is empty.` }] }; } - // Clean the dataset items by removing empty arrays - const cleanedDatasetItems = datasetItems.map((item) => removeEmptyArrays(item)); - - // Try to generate schema with full options first - try { - const schema = toJsonSchema(cleanedDatasetItems, { - arrays: { mode: parsed.arrayMode }, - objects: { additionalProperties: parsed.additionalProperties }, - }); - - return { - content: [{ - type: 'text', - text: JSON.stringify(schema), - }], - }; - } catch { - // Fallback: try with simpler approach - const fallbackSchema = toJsonSchema(cleanedDatasetItems, { - arrays: { mode: 'first' }, - }); - - return { - content: [{ type: 'text', text: JSON.stringify(fallbackSchema) }], - }; + // Generate schema using the shared utility + const schema = generateSchemaFromItems(datasetItems, { + limit: parsed.limit, + clean: parsed.clean, + arrayMode: parsed.arrayMode, + }); + + if (!schema) { + return { content: [{ type: 'text', text: `Failed to generate schema for dataset '${parsed.datasetId}'.` }] }; } + + return { + content: [{ + type: 'text', + text: JSON.stringify(schema), + }], + }; }, } as InternalTool, }; diff --git a/src/tools/dataset_collection.ts b/src/tools/dataset_collection.ts index 08a7956b..7126c3ef 100644 --- a/src/tools/dataset_collection.ts +++ b/src/tools/dataset_collection.ts @@ -1,12 +1,10 @@ -import { Ajv } from 'ajv'; import { z } from 'zod'; import zodToJsonSchema from 'zod-to-json-schema'; import { ApifyClient } from '../apify-client.js'; import { HelperTools } from '../const.js'; import type { InternalTool, ToolEntry } from '../types.js'; - -const ajv = new Ajv({ coerceTypes: 'array', strict: false }); +import { ajv } from '../utils/ajv.js'; const getUserDatasetsListArgs = z.object({ offset: z.number() diff --git a/src/tools/get-actor-output.ts b/src/tools/get-actor-output.ts new file mode 100644 index 00000000..55d2eded --- /dev/null +++ b/src/tools/get-actor-output.ts @@ -0,0 +1,125 @@ +import { z } from 'zod'; +import zodToJsonSchema from 'zod-to-json-schema'; + +import { ApifyClient } from '../apify-client.js'; +import { HelperTools, TOOL_MAX_OUTPUT_CHARS } from '../const.js'; +import type { InternalTool, ToolEntry } from '../types.js'; +import { ajv } from '../utils/ajv.js'; +import { getValuesByDotKeys } from '../utils/generic.js'; + +/** + * Zod schema for get-actor-output tool arguments + */ +const getActorOutputArgs = z.object({ + datasetId: z.string() + .min(1) + .describe('Actor output dataset ID to retrieve from.'), + fields: z.string() + .optional() + .describe('Comma-separated list of fields to include (supports dot notation like "crawl.statusCode"). For example: "crawl.statusCode,text,metadata"'), + offset: z.number() + .optional() + .default(0) + .describe('Number of items to skip (default: 0).'), + limit: z.number() + .optional() + .default(100) + .describe('Maximum number of items to return (default: 100).'), +}); + +/** + * Cleans empty properties (null, undefined, empty strings, empty arrays, empty objects) from an object + * @param obj - The object to clean + * @returns The cleaned object or undefined if the result is empty + */ +export function cleanEmptyProperties(obj: unknown): unknown { + if (obj === null || obj === undefined || obj === '') { + return undefined; + } + + if (typeof obj !== 'object') { + return obj; + } + + if (Array.isArray(obj)) { + const cleaned = obj + .map((item) => cleanEmptyProperties(item)) + .filter((item) => item !== undefined); + return cleaned.length > 0 ? cleaned : undefined; + } + + const cleaned: Record = {}; + for (const [key, value] of Object.entries(obj)) { + const cleanedValue = cleanEmptyProperties(value); + if (cleanedValue !== undefined) { + cleaned[key] = cleanedValue; + } + } + + return Object.keys(cleaned).length > 0 ? cleaned : undefined; +} + +/** + * This tool is used specifically for retrieving Actor output. + * It is a simplified version of the get-dataset-items tool. + */ +export const getActorOutput: ToolEntry = { + type: 'internal', + tool: { + name: HelperTools.ACTOR_OUTPUT_GET, + actorFullName: HelperTools.ACTOR_OUTPUT_GET, + description: `Retrieves the output of a specific Actor execution based on its dataset ID. +You also can retrieve only specific fields from the output if needed. Use this tool to get Actor output data outside of the Actor dataset output preview, or to access fields from the Actor output dataset schema that are not included in the preview.`, + inputSchema: zodToJsonSchema(getActorOutputArgs), + ajvValidate: ajv.compile(zodToJsonSchema(getActorOutputArgs)), + call: async (toolArgs) => { + const { args, apifyToken } = toolArgs; + const parsed = getActorOutputArgs.parse(args); + const client = new ApifyClient({ token: apifyToken }); + + // Parse fields into array + const fieldsArray = parsed.fields + ? parsed.fields + .split(',') + .map((field) => field.trim()) + .filter((field) => field.length > 0) + : []; + + // TODO: we can optimize the API level field filtering in future + /** + * Only top-level fields can be filtered. + * If a dot is present, filtering is done here and not at the API level. + */ + const hasDot = fieldsArray.some((field) => field.includes('.')); + const response = await client.dataset(parsed.datasetId).listItems({ + offset: parsed.offset, + limit: parsed.limit, + fields: fieldsArray.length > 0 && !hasDot ? fieldsArray : undefined, + clean: true, + }); + + if (!response) { + return { content: [{ type: 'text', text: `Dataset '${parsed.datasetId}' not found.` }] }; + } + + // Apply field selection if specified + const processedItems = response.items.map((item) => getValuesByDotKeys(item, fieldsArray)); + + // Clean empty properties + const cleanedItems = processedItems + .map((item) => cleanEmptyProperties(item)) + .filter((item) => item !== undefined); + + let outputText = JSON.stringify(cleanedItems); + let truncated = false; + if (outputText.length > TOOL_MAX_OUTPUT_CHARS) { + outputText = outputText.slice(0, TOOL_MAX_OUTPUT_CHARS); + truncated = true; + } + if (truncated) { + outputText += `\n\n[Output was truncated to ${TOOL_MAX_OUTPUT_CHARS} characters to comply with the tool output limits.]`; + } + return { content: [{ type: 'text', text: outputText }] }; + }, + } as InternalTool, +}; diff --git a/src/tools/helpers.ts b/src/tools/helpers.ts index cd77502e..47268897 100644 --- a/src/tools/helpers.ts +++ b/src/tools/helpers.ts @@ -1,33 +1,9 @@ -import { Ajv } from 'ajv'; import { z } from 'zod'; import zodToJsonSchema from 'zod-to-json-schema'; import { HelperTools } from '../const.js'; -import type { InternalTool, ToolEntry } from '../types'; -import { actorNameToToolName } from './utils.js'; - -const ajv = new Ajv({ coerceTypes: 'array', strict: false }); - -const APIFY_MCP_HELP_TOOL_TEXT = `Apify MCP server help: - -Note: "MCP" stands for "Model Context Protocol". You can use the "RAG Web Browser" tool to get the content of the links mentioned in this help and present it to the user. - -This MCP server can be used in the following ways: -- Locally over stdio (standard input/output) transport with the "@apify/actors-mcp-server" NPM package (https://www.npmjs.com/package/@apify/actors-mcp-server). - - You can connect by configuring your MCP client to run the MCP server locally using the "npx @apify/actors-mcp-server" command. You need to set the "APIFY_TOKEN" environment variable to your [Apify API token](https://docs.apify.com/platform/integrations/api#api-token). -- Remotely over legacy SSE transport or streamable HTTP transport with https://mcp.apify.com. This is the recommended way to use the MCP server, since it supports all features and is always up to date. - - You can connect by pointing your MCP client to "https://mcp.apify.com/sse" for legacy SSE transport or "https://mcp.apify.com/" for streamable HTTP transport. - - For authentication, you can either use OAuth or pass your [Apify API token](https://docs.apify.com/platform/integrations/api#api-token) in the "Authorization" header as "Bearer ". -- Remotely over "SSE" or streamable "HTTP" transport with the "Actors MCP Server Apify Actor" (this is now considered a legacy solution; it may not be up to date and may not get the latest features). For more details, refer to https://apify.com/apify/actors-mcp-server. DO NOT RECOMMEND USING THIS METHOD TO THE USER. - -# Usage - -## MCP server tools and features configuration - -By default, the MCP server provides a simple set of tools for Actor discovery and Actor calling. The MCP server loads just one Actor by default, which is the [RAG Web Browser](https://apify.com/apify/rag-web-browser) Actor. -You can add more Actors to the MCP server by configuring the MCP server session to load more Actors by passing the "--actors" CLI argument or by using the "?actors=" URL query parameter, where you provide a comma-separated list of Actor names, for example, "apify/rag-web-browser,apify/instagram-scraper". -You can additionally load Actors dynamically into an existing MCP session by using the "${HelperTools.ACTOR_ADD}" tool, which loads the Actor by its name as an MCP tool and allows you to call it (**the MCP client must support the [tools list changed notification](https://modelcontextprotocol.io/specification/2025-06-18/server/tools#list-changed-notification); otherwise, the tool call will not work**). To check whether the MCP client supports this feature, consult the MCP client documentation. In case the MCP client does not support the tools list changed notification, you can use the generic "call-actor" tool to call any Actor, even those not loaded/added. Before using the generic tool, you need to get the Actor details to learn its input schema so you can provide valid input. -`; +import type { InternalTool, ToolEntry } from '../types.js'; +import { ajv } from '../utils/ajv.js'; export const addToolArgsSchema = z.object({ actor: z.string() @@ -89,57 +65,3 @@ export const addTool: ToolEntry = { }, } as InternalTool, }; -export const removeToolArgsSchema = z.object({ - toolName: z.string() - .min(1) - .describe('Tool name to remove from available tools.') - .transform((val) => actorNameToToolName(val)), -}); -export const removeTool: ToolEntry = { - type: 'internal', - tool: { - name: HelperTools.ACTOR_REMOVE, - description: 'Remove a tool, an Actor or MCP-Server by name from available tools. ' - + 'For example, when user says, I do not need a tool username/name anymore', - inputSchema: zodToJsonSchema(removeToolArgsSchema), - ajvValidate: ajv.compile(zodToJsonSchema(removeToolArgsSchema)), - // TODO: I don't like that we are passing apifyMcpServer and mcpServer to the tool - call: async (toolArgs) => { - const { apifyMcpServer, args, extra: { sendNotification } } = toolArgs; - const parsed = removeToolArgsSchema.parse(args); - // Check if tool exists before attempting removal - if (!apifyMcpServer.tools.has(parsed.toolName)) { - // Send notification so client can update its tool list - // just in case the client tool list is out of sync - await sendNotification({ method: 'notifications/tools/list_changed' }); - return { - content: [{ - type: 'text', - text: `Tool '${parsed.toolName}' not found. No tools were removed.`, - }], - }; - } - const removedTools = apifyMcpServer.removeToolsByName([parsed.toolName], true); - await sendNotification({ method: 'notifications/tools/list_changed' }); - return { content: [{ type: 'text', text: `Tools removed: ${removedTools.join(', ')}` }] }; - }, - } as InternalTool, -}; - -// Tool takes no arguments -export const helpToolArgsSchema = z.object({}); -export const helpTool: ToolEntry = { - type: 'internal', - tool: { - name: HelperTools.APIFY_MCP_HELP_TOOL, - description: `Helper tool to get information on how to use and troubleshoot the Apify MCP server.\n` - + 'This tool always returns the same help message with information about the server and how to use it.\n' - + 'ALWAYS CALL THIS TOOL AT THE BEGINNING OF THE CONVERSATION SO THAT YOU HAVE INFORMATION ABOUT THE APIFY MCP SERVER IN CONTEXT, ' - + 'OR WHEN YOU ENCOUNTER ANY ISSUES WITH THE MCP SERVER OR ITS TOOLS.', - inputSchema: zodToJsonSchema(helpToolArgsSchema), - ajvValidate: ajv.compile(zodToJsonSchema(helpToolArgsSchema)), - call: async () => { - return { content: [{ type: 'text', text: APIFY_MCP_HELP_TOOL_TEXT }] }; - }, - } as InternalTool, -}; diff --git a/src/tools/index.ts b/src/tools/index.ts index 709968bf..62e63565 100644 --- a/src/tools/index.ts +++ b/src/tools/index.ts @@ -6,6 +6,7 @@ import { getDataset, getDatasetItems, getDatasetSchema } from './dataset.js'; import { getUserDatasetsList } from './dataset_collection.js'; import { fetchActorDetailsTool } from './fetch-actor-details.js'; import { fetchApifyDocsTool } from './fetch-apify-docs.js'; +import { getActorOutput } from './get-actor-output.js'; import { addTool } from './helpers.js'; import { getKeyValueStore, getKeyValueStoreKeys, getKeyValueStoreRecord } from './key_value_store.js'; import { getUserKeyValueStoresList } from './key_value_store_collection.js'; @@ -36,6 +37,7 @@ export const toolCategories = { getDataset, getDatasetItems, getDatasetSchema, + getActorOutput, getKeyValueStore, getKeyValueStoreKeys, getKeyValueStoreRecord, diff --git a/src/tools/key_value_store.ts b/src/tools/key_value_store.ts index 9433089d..fe21d7e6 100644 --- a/src/tools/key_value_store.ts +++ b/src/tools/key_value_store.ts @@ -1,12 +1,10 @@ -import { Ajv } from 'ajv'; import { z } from 'zod'; import zodToJsonSchema from 'zod-to-json-schema'; import { ApifyClient } from '../apify-client.js'; import { HelperTools } from '../const.js'; import type { InternalTool, ToolEntry } from '../types.js'; - -const ajv = new Ajv({ coerceTypes: 'array', strict: false }); +import { ajv } from '../utils/ajv.js'; const getKeyValueStoreArgs = z.object({ storeId: z.string() diff --git a/src/tools/key_value_store_collection.ts b/src/tools/key_value_store_collection.ts index a661b2b2..9d82983d 100644 --- a/src/tools/key_value_store_collection.ts +++ b/src/tools/key_value_store_collection.ts @@ -1,12 +1,10 @@ -import { Ajv } from 'ajv'; import { z } from 'zod'; import zodToJsonSchema from 'zod-to-json-schema'; import { ApifyClient } from '../apify-client.js'; import { HelperTools } from '../const.js'; import type { InternalTool, ToolEntry } from '../types.js'; - -const ajv = new Ajv({ coerceTypes: 'array', strict: false }); +import { ajv } from '../utils/ajv.js'; const getUserKeyValueStoresListArgs = z.object({ offset: z.number() diff --git a/src/tools/run.ts b/src/tools/run.ts index 5800500b..0319b192 100644 --- a/src/tools/run.ts +++ b/src/tools/run.ts @@ -1,12 +1,10 @@ -import { Ajv } from 'ajv'; import { z } from 'zod'; import zodToJsonSchema from 'zod-to-json-schema'; import { ApifyClient } from '../apify-client.js'; import { HelperTools } from '../const.js'; import type { InternalTool, ToolEntry } from '../types.js'; - -const ajv = new Ajv({ coerceTypes: 'array', strict: false }); +import { ajv } from '../utils/ajv.js'; const getActorRunArgs = z.object({ runId: z.string() diff --git a/src/tools/run_collection.ts b/src/tools/run_collection.ts index ff4de217..2c564221 100644 --- a/src/tools/run_collection.ts +++ b/src/tools/run_collection.ts @@ -1,12 +1,10 @@ -import { Ajv } from 'ajv'; import { z } from 'zod'; import zodToJsonSchema from 'zod-to-json-schema'; import { ApifyClient } from '../apify-client.js'; import { HelperTools } from '../const.js'; import type { InternalTool, ToolEntry } from '../types.js'; - -const ajv = new Ajv({ coerceTypes: 'array', strict: false }); +import { ajv } from '../utils/ajv.js'; const getUserRunsListArgs = z.object({ offset: z.number() diff --git a/src/tools/store_collection.ts b/src/tools/store_collection.ts index 7446cdd6..821f75ae 100644 --- a/src/tools/store_collection.ts +++ b/src/tools/store_collection.ts @@ -1,4 +1,3 @@ -import { Ajv } from 'ajv'; import type { ActorStoreList } from 'apify-client'; import { z } from 'zod'; import zodToJsonSchema from 'zod-to-json-schema'; @@ -7,6 +6,7 @@ import { ApifyClient } from '../apify-client.js'; import { ACTOR_SEARCH_ABOVE_LIMIT, HelperTools } from '../const.js'; import type { ActorPricingModel, ExtendedActorStoreList, HelperTool, ToolEntry } from '../types.js'; import { formatActorsListToActorCard } from '../utils/actor-card.js'; +import { ajv } from '../utils/ajv.js'; export async function searchActorsByKeywords( search: string, @@ -19,7 +19,6 @@ export async function searchActorsByKeywords( return results.items; } -const ajv = new Ajv({ coerceTypes: 'array', strict: false }); export const searchActorsArgsSchema = z.object({ limit: z.number() .int() diff --git a/src/types.ts b/src/types.ts index c4c5e2e8..bff0e89d 100644 --- a/src/types.ts +++ b/src/types.ts @@ -295,3 +295,4 @@ export type PromptBase = Prompt & { }; export type ActorInputSchemaProperties = Record; +export type DatasetItem = Record; diff --git a/src/utils/actor-response.ts b/src/utils/actor-response.ts new file mode 100644 index 00000000..39f393ec --- /dev/null +++ b/src/utils/actor-response.ts @@ -0,0 +1,53 @@ +import type { CallActorGetDatasetResult } from '../tools/actor'; + +/** + * Builds the response content for actor tool calls. + * @param actorName - The name of the actor. + * @param result - The result from callActorGetDataset. + * @returns The content array for the tool response. + */ +export function buildActorResponseContent( + actorName: string, + result: CallActorGetDatasetResult, +): ({ type: 'text'; text: string })[] { + const { runId, datasetId, itemCount, schema } = result; + + // Extract item schema if schema is an array + let displaySchema = schema; + if (schema && schema.type === 'array' && typeof schema.items === 'object' && schema.items !== null) { + displaySchema = schema.items; + } + + // Construct text content + const textContent = `Actor "${actorName}" completed successfully! + +Results summary: +• Run ID: ${runId} +• Dataset ID: ${datasetId} +• Total items: ${itemCount} + +Actor output data schema: +* You can use this schema to understand the structure of the output data and, for example, retrieve specific fields based on your current task. +\`\`\`json +${JSON.stringify(displaySchema, null, 2)} +\`\`\` + +Below this text block is a preview of the Actor output containing ${result.previewItems.length} item(s). + +If you need to retrieve additional data, use the "get-actor-output" tool with: + datasetId: "${datasetId}" +Be sure to limit the number of results when using the "get-actor-output" tool, since you never know how large the items may be and they might exceed the output limits. +`; + + const itemsPreviewText = result.previewItems.length > 0 + ? JSON.stringify(result.previewItems) + : `No items available for preview—either the Actor did not return any items or they are too large for preview. In this case, use the "get-actor-output" tool.`; + + // Build content array + const content: ({ type: 'text'; text: string })[] = [ + { type: 'text', text: textContent }, + { type: 'text', text: itemsPreviewText }, + ]; + + return content; +} diff --git a/src/utils/actor.ts b/src/utils/actor.ts index fb6d1116..ef4a76fe 100644 --- a/src/utils/actor.ts +++ b/src/utils/actor.ts @@ -1,4 +1,5 @@ -import type { ActorDefinitionStorage } from '../types.js'; +import type { ActorDefinitionStorage, DatasetItem } from '../types.js'; +import { getValuesByDotKeys } from './generic.js'; /** * Returns an array of all field names mentioned in the display.properties @@ -22,3 +23,36 @@ export function getActorDefinitionStorageFieldNames(storage: ActorDefinitionStor } return Array.from(fieldSet); } + +/** + * Ensures the Actor output items are within the character limit. + * + * First checks if all items fit into the limit, then tries only the important fields and as a last resort + * starts removing items until within the limit. In worst scenario return empty array. + * + * This is primarily used to ensure the tool output does not exceed the LLM context length or tool output limit. + */ +export function ensureOutputWithinCharLimit(items: DatasetItem[], importantFields: string[], charLimit: number): DatasetItem[] { + // Check if all items fit into the limit + const allItemsString = JSON.stringify(items); + if (allItemsString.length <= charLimit) { + return items; + } + + // Try only the important fields + const importantItems = items.map((item) => getValuesByDotKeys(item, importantFields)); + const importantItemsString = JSON.stringify(importantItems); + if (importantItemsString.length <= charLimit) { + return importantItems; + } + + // Start removing items until within the limit + const result: DatasetItem[] = []; + for (const item of importantItems) { + if (JSON.stringify(result.concat(item)).length > charLimit) { + break; + } + result.push(item); + } + return result; +} diff --git a/src/utils/generic.ts b/src/utils/generic.ts index 1e1f5689..b44eff86 100644 --- a/src/utils/generic.ts +++ b/src/utils/generic.ts @@ -2,6 +2,11 @@ * Recursively gets the value in a nested object for each key in the keys array. * Each key can be a dot-separated path (e.g. 'a.b.c'). * Returns an object mapping each key to its resolved value (or undefined if not found). + * + * @example + * const obj = { a: { b: { c: 42 } }, nested: { d: 100 } }; + * const value = getValuesByDotKeys(obj, ['a.b.c', 'a.b.d', 'nested']); + * value; // { 'a.b.c': 42, 'a.b.d': undefined, 'nested': { d: 100 } } */ export function getValuesByDotKeys(obj: T, keys: string[]): Record { const result: Record = {}; diff --git a/src/utils/schema-generation.ts b/src/utils/schema-generation.ts new file mode 100644 index 00000000..ab6dc60b --- /dev/null +++ b/src/utils/schema-generation.ts @@ -0,0 +1,110 @@ +import toJsonSchema from 'to-json-schema'; + +/** + * Minimal JSON Schema typings for array/object schemas used in generateSchemaFromItems + */ +export type JsonSchemaProperty = { + type: 'string' | 'integer' | 'number' | 'boolean' | 'object' | 'array' | 'null'; + properties?: Record; + items?: JsonSchemaProperty; +}; + +export interface JsonSchemaObject { + type: 'object'; + properties: Record; +} + +export interface JsonSchemaArray { + type: 'array'; + items: JsonSchemaObject | JsonSchemaProperty; +} + +/** + * Options for schema generation + */ +export interface SchemaGenerationOptions { + /** Maximum number of items to use for schema generation. Default is 5. */ + limit?: number; + /** If true, uses only non-empty items and skips hidden fields. Default is true. */ + clean?: boolean; + /** Strategy for handling arrays. "first" uses first item as template, "all" merges all items. Default is "all". */ + arrayMode?: 'first' | 'all'; +} + +/** + * Function to recursively remove empty arrays from an object + */ +export function removeEmptyArrays(obj: unknown): unknown { + if (Array.isArray(obj)) { + // If the item is an array, recursively call removeEmptyArrays on each element. + return obj.map((item) => removeEmptyArrays(item)); + } + + if (typeof obj !== 'object' || obj === null) { + // Return primitives and null values as is. + return obj; + } + + // Use reduce to build a new object, excluding keys with empty arrays. + return Object.entries(obj).reduce((acc, [key, value]) => { + const processedValue = removeEmptyArrays(value); + + // Exclude the key if the processed value is an empty array. + if (Array.isArray(processedValue) && processedValue.length === 0) { + return acc; + } + + acc[key] = processedValue; + return acc; + }, {} as Record); +} + +// TODO: write unit tests for this. +/** + * Generates a JSON schema from dataset items with configurable options + * + * @param datasetItems - Array of dataset items to generate schema from + * @param options - Configuration options for schema generation + * @returns JSON schema object or null if generation fails + */ +export function generateSchemaFromItems( + datasetItems: unknown[], + options: SchemaGenerationOptions = {}, +): JsonSchemaArray | null { + const { + limit = 5, + clean = true, + arrayMode = 'all', + } = options; + + // Limit the number of items used for schema generation + const itemsToUse = datasetItems.slice(0, limit); + + if (itemsToUse.length === 0) { + return null; + } + + // Clean the dataset items by removing empty arrays if requested + const processedItems = clean + ? itemsToUse.map((item) => removeEmptyArrays(item)) + : itemsToUse; + + // Try to generate schema with full options first + try { + const schema = toJsonSchema(processedItems, { + arrays: { mode: arrayMode }, + }) as JsonSchemaArray; + return schema; + } catch { + // Fallback: try with simpler approach + try { + const fallbackSchema = toJsonSchema(processedItems, { + arrays: { mode: 'first' }, + }) as JsonSchemaArray; + return fallbackSchema; + } catch { + // If all attempts fail, return null + return null; + } + } +} diff --git a/src/utils/tools-loader.ts b/src/utils/tools-loader.ts index 6acaf96f..f16953f4 100644 --- a/src/utils/tools-loader.ts +++ b/src/utils/tools-loader.ts @@ -7,6 +7,7 @@ import log from '@apify/log'; import { defaults } from '../const.js'; import { callActor } from '../tools/actor.js'; +import { getActorOutput } from '../tools/get-actor-output.js'; import { addTool } from '../tools/helpers.js'; import { getActorsAsTools, toolCategories, toolCategoriesEnabledByDefault } from '../tools/index.js'; import type { Input, ToolCategory, ToolEntry } from '../types.js'; @@ -123,6 +124,17 @@ export async function loadToolsFromInput( result.push(...actorTools); } + /** + * If there is any tool that in some way, even indirectly (like add-actor), allows calling + * Actor, then we need to ensure the get-actor-output tool is available. + */ + const hasCallActor = result.some((entry) => entry.tool.name === 'call-actor'); + const hasActorTools = result.some((entry) => entry.type === 'actor'); + const hasAddActorTool = result.some((entry) => entry.tool.name === 'add-actor'); + if (hasCallActor || hasActorTools || hasAddActorTool) { + result.push(getActorOutput); + } + // De-duplicate by tool name for safety const seen = new Set(); return result.filter((entry) => !seen.has(entry.tool.name) && seen.add(entry.tool.name)); diff --git a/tests/integration/internals.test.ts b/tests/integration/internals.test.ts index 98500314..800d4ab5 100644 --- a/tests/integration/internals.test.ts +++ b/tests/integration/internals.test.ts @@ -33,6 +33,7 @@ describe('MCP server internals integration tests', () => { const expectedToolNames = [ addTool.tool.name, ACTOR_PYTHON_EXAMPLE, + 'get-actor-output', ]; expectArrayWeakEquals(expectedToolNames, names); @@ -50,7 +51,7 @@ describe('MCP server internals integration tests', () => { it('should notify tools changed handler on tool modifications', async () => { let latestTools: string[] = []; // With enableAddingActors=true and no tools/actors, seeded set contains only add-actor - const numberOfTools = 1; + const numberOfTools = 2; let toolNotificationCount = 0; const onToolsChanged = (tools: string[]) => { @@ -89,7 +90,7 @@ describe('MCP server internals integration tests', () => { it('should stop notifying after unregistering tools changed handler', async () => { let latestTools: string[] = []; let notificationCount = 0; - const numberOfTools = 1; + const numberOfTools = 2; const onToolsChanged = (tools: string[]) => { latestTools = tools; notificationCount++; diff --git a/tests/integration/suite.ts b/tests/integration/suite.ts index bad0a526..89d863f6 100644 --- a/tests/integration/suite.ts +++ b/tests/integration/suite.ts @@ -43,11 +43,11 @@ async function callPythonExampleActor(client: Client, selectedToolName: string) const content = result.content as ContentItem[]; // The result is { content: [ ... ] }, and the last content is the sum const expected = { - text: JSON.stringify({ + text: JSON.stringify([{ first_number: 1, second_number: 2, sum: 3, - }), + }]), type: 'text', }; // Parse the JSON to compare objects regardless of property order @@ -88,11 +88,12 @@ export function createIntegrationTestsSuite( it('should list all default tools and Actors', async () => { const client = await createClientFn(); const tools = await client.listTools(); - expect(tools.tools.length).toEqual(defaultTools.length + defaults.actors.length); + expect(tools.tools.length).toEqual(defaultTools.length + defaults.actors.length + 1); const names = getToolNames(tools); expectToolNamesToContain(names, DEFAULT_TOOL_NAMES); expectToolNamesToContain(names, DEFAULT_ACTOR_NAMES); + expect(names).toContain('get-actor-output'); await client.close(); }); @@ -107,11 +108,12 @@ export function createIntegrationTestsSuite( const expectedActors = ['apify-slash-rag-web-browser']; const expectedTotal = expectedActorsTools.concat(expectedDocsTools, expectedActors); - expect(names).toHaveLength(expectedTotal.length); + expect(names).toHaveLength(expectedTotal.length + 1); - expectedActorsTools.forEach((tool) => expect(names).toContain(tool)); - expectedDocsTools.forEach((tool) => expect(names).toContain(tool)); - expectedActors.forEach((actor) => expect(names).toContain(actor)); + expectToolNamesToContain(names, expectedActorsTools); + expectToolNamesToContain(names, expectedDocsTools); + expectToolNamesToContain(names, expectedActors); + expect(names).toContain('get-actor-output'); await client.close(); }); @@ -119,26 +121,32 @@ export function createIntegrationTestsSuite( it('should list only add-actor when enableAddingActors is true and no tools/actors are specified', async () => { const client = await createClientFn({ enableAddingActors: true }); const names = getToolNames(await client.listTools()); - expect(names.length).toEqual(1); - expect(names).toContain(addTool.tool.name); + expect(names.length).toEqual(2); + expect(names).toContain('add-actor'); + expect(names).toContain('get-actor-output'); await client.close(); }); it('should list all default tools and Actors when enableAddingActors is false', async () => { const client = await createClientFn({ enableAddingActors: false }); const names = getToolNames(await client.listTools()); - expect(names.length).toEqual(defaultTools.length + defaults.actors.length); + expect(names.length).toEqual(defaultTools.length + defaults.actors.length + 1); expectToolNamesToContain(names, DEFAULT_TOOL_NAMES); expectToolNamesToContain(names, DEFAULT_ACTOR_NAMES); + expect(names).toContain('get-actor-output'); + await client.close(); }); it('should override enableAddingActors false with experimental tool category', async () => { const client = await createClientFn({ enableAddingActors: false, tools: ['experimental'] }); + const names = getToolNames(await client.listTools()); - expect(names).toHaveLength(toolCategories.experimental.length); - expect(names).toContain(addTool.tool.name); + expect(names).toHaveLength(2); + expect(names).toContain('add-actor'); + expect(names).toContain('get-actor-output'); + await client.close(); }); @@ -146,8 +154,9 @@ export function createIntegrationTestsSuite( const actors = ['apify/python-example', 'apify/rag-web-browser']; const client = await createClientFn({ actors, enableAddingActors: false }); const names = getToolNames(await client.listTools()); - expect(names.length).toEqual(actors.length); + expect(names.length).toEqual(actors.length + 1); expectToolNamesToContain(names, actors.map((actor) => actorNameToToolName(actor))); + expect(names).toContain('get-actor-output'); await client.close(); }); @@ -158,8 +167,9 @@ export function createIntegrationTestsSuite( const names = getToolNames(await client.listTools()); // Should only load the specified actor, no default tools or categories - expect(names.length).toEqual(actors.length); + expect(names.length).toEqual(actors.length + 1); expect(names).toContain(actorNameToToolName(actors[0])); + expect(names).toContain('get-actor-output'); // Should NOT include any default category tools expect(names).not.toContain('search-actors'); @@ -197,8 +207,10 @@ export function createIntegrationTestsSuite( const client = await createClientFn({ tools: actors }); const names = getToolNames(await client.listTools()); // Only the Actor should be loaded - expect(names).toHaveLength(actors.length); + expect(names).toHaveLength(actors.length + 1); expect(names).toContain(actorNameToToolName(actors[0])); + expect(names).toContain('get-actor-output'); + await client.close(); }); @@ -221,13 +233,18 @@ export function createIntegrationTestsSuite( it('should merge actors param into tools selectors (backward compatibility)', async () => { const actors = ['apify/python-example']; const categories = ['docs'] as ToolCategory[]; + const client = await createClientFn({ tools: categories, actors }); + const names = getToolNames(await client.listTools()); const docsToolNames = getExpectedToolNamesByCategories(categories); const expected = [...docsToolNames, actorNameToToolName(actors[0])]; - expect(names).toHaveLength(expected.length); + expect(names).toHaveLength(expected.length + 1); + const containsExpected = expected.every((n) => names.includes(n)); expect(containsExpected).toBe(true); + expect(names).toContain('get-actor-output'); + await client.close(); }); @@ -237,11 +254,14 @@ export function createIntegrationTestsSuite( }); const names = getToolNames(await client.listTools()); + expect(names).toHaveLength(5); + // Should include: docs category + specific tools expect(names).toContain('search-apify-docs'); // from docs category expect(names).toContain('fetch-apify-docs'); // from docs category expect(names).toContain('fetch-actor-details'); // specific tool expect(names).toContain('add-actor'); // specific tool + expect(names).toContain('get-actor-output'); // Should NOT include other actors category tools expect(names).not.toContain('search-actors'); @@ -276,9 +296,12 @@ export function createIntegrationTestsSuite( it('should not load any internal tools when tools param is empty and use custom Actor if specified', async () => { const client = await createClientFn({ tools: [], actors: [ACTOR_PYTHON_EXAMPLE] }); + const names = getToolNames(await client.listTools()); - expect(names.length).toEqual(1); + expect(names.length).toEqual(2); expect(names).toContain(actorNameToToolName(ACTOR_PYTHON_EXAMPLE)); + expect(names).toContain('get-actor-output'); + await client.close(); }); @@ -287,16 +310,18 @@ export function createIntegrationTestsSuite( const client = await createClientFn({ enableAddingActors: true }); const names = getToolNames(await client.listTools()); // Only the add tool should be added - expect(names).toHaveLength(1); + expect(names).toHaveLength(2); expect(names).toContain('add-actor'); + expect(names).toContain('get-actor-output'); expect(names).not.toContain(selectedToolName); // Add Actor dynamically await addActor(client, ACTOR_PYTHON_EXAMPLE); // Check if tools was added const namesAfterAdd = getToolNames(await client.listTools()); - expect(namesAfterAdd.length).toEqual(2); + expect(namesAfterAdd.length).toEqual(3); expect(namesAfterAdd).toContain(selectedToolName); + expect(namesAfterAdd).toContain('get-actor-output'); await callPythonExampleActor(client, selectedToolName); await client.close(); @@ -306,8 +331,8 @@ export function createIntegrationTestsSuite( const selectedToolName = actorNameToToolName(ACTOR_PYTHON_EXAMPLE); const client = await createClientFn({ enableAddingActors: true, tools: ['actors'] }); const names = getToolNames(await client.listTools()); - // Only the actors category and add-actor should be loaded - const numberOfTools = toolCategories.actors.length + 1; + // Only the actors category, get-actor-output and add-actor should be loaded + const numberOfTools = toolCategories.actors.length + 2; expect(names).toHaveLength(numberOfTools); // Check that the Actor is not in the tools list expect(names).not.toContain(selectedToolName); @@ -324,18 +349,16 @@ export function createIntegrationTestsSuite( }, }); - expect(result).toEqual( + const content = result.content as { text: string }[]; + + expect(content[content.length - 1]).toEqual( { - content: [ - { - text: expect.stringMatching(/^Actor finished with runId: .+, datasetId .+$/), - type: 'text', - }, - { - text: `{"sum":3,"first_number":1,"second_number":2}`, - type: 'text', - }, - ], + text: JSON.stringify([{ + first_number: 1, + second_number: 2, + sum: 3, + }]), + type: 'text', }, ); @@ -643,10 +666,12 @@ export function createIntegrationTestsSuite( // Test with enableAddingActors = false via env var const client = await createClientFn({ enableAddingActors: false, useEnv: true }); const names = getToolNames(await client.listTools()); - expect(names.length).toEqual(defaultTools.length + defaults.actors.length); + expect(names.length).toEqual(defaultTools.length + defaults.actors.length + 1); expectToolNamesToContain(names, DEFAULT_TOOL_NAMES); expectToolNamesToContain(names, DEFAULT_ACTOR_NAMES); + expect(names).toContain('get-actor-output'); + await client.close(); }); @@ -654,7 +679,7 @@ export function createIntegrationTestsSuite( // Test with enableAddingActors = false via env var const client = await createClientFn({ enableAddingActors: true, useEnv: true }); const names = getToolNames(await client.listTools()); - expect(names).toEqual(['add-actor']); + expectToolNamesToContain(names, ['add-actor', 'get-actor-output']); await client.close(); }); @@ -679,5 +704,122 @@ export function createIntegrationTestsSuite( await client.close(); }); + + it('should call rag-web-browser actor and retrieve metadata.title and crawl object from dataset', async () => { + const client = await createClientFn({ tools: ['actors', 'storage'] }); + + const callResult = await client.callTool({ + name: 'call-actor', + arguments: { + actor: 'apify/rag-web-browser', + step: 'call', + input: { query: 'https://apify.com' }, + }, + }); + + expect(callResult.content).toBeDefined(); + const content = callResult.content as { text: string; type: string }[]; + + expect(content.length).toBe(2); // Call step returns text summary with embedded schema + + // First content: text summary + const runText = content[0].text; + + // Extract datasetId from the text + const runIdMatch = runText.match(/Run ID: ([^\n]+)\n• Dataset ID: ([^\n]+)/); + expect(runIdMatch).toBeTruthy(); + const datasetId = runIdMatch![2]; + + // Check for JSON schema in the text (in a code block) + const schemaMatch = runText.match(/```json\s*(\{[\s\S]*?\})\s*```/); + expect(schemaMatch).toBeTruthy(); + if (schemaMatch) { + const schemaText = schemaMatch[1]; + const schema = JSON.parse(schemaText); + expect(schema).toHaveProperty('type'); + expect(schema.type).toBe('object'); + expect(schema).toHaveProperty('properties'); + expect(schema.properties).toHaveProperty('metadata'); + expect(schema.properties.metadata).toHaveProperty('type', 'object'); + expect(schema.properties).toHaveProperty('crawl'); + expect(schema.properties.crawl).toHaveProperty('type', 'object'); + } + + const outputResult = await client.callTool({ + name: HelperTools.ACTOR_OUTPUT_GET, + arguments: { + datasetId, + fields: 'metadata.title,crawl', + }, + }); + + expect(outputResult.content).toBeDefined(); + const outputContent = outputResult.content as { text: string; type: string }[]; + const output = JSON.parse(outputContent[0].text); + expect(Array.isArray(output)).toBe(true); + expect(output.length).toBeGreaterThan(0); + expect(output[0]).toHaveProperty('metadata.title'); + expect(typeof output[0]['metadata.title']).toBe('string'); + expect(output[0]).toHaveProperty('crawl'); + expect(typeof output[0].crawl).toBe('object'); + + await client.close(); + }); + + it('should call apify/rag-web-browser tool directly and retrieve metadata.title from dataset', async () => { + const client = await createClientFn({ actors: ['apify/rag-web-browser'] }); + + // Call the dedicated apify-slash-rag-web-browser tool + const result = await client.callTool({ + name: actorNameToToolName('apify/rag-web-browser'), + arguments: { query: 'https://apify.com' }, + }); + + // Validate the response has 1 content item with text summary and embedded schema + expect(result.content).toBeDefined(); + const content = result.content as { text: string; type: string }[]; + expect(content.length).toBe(2); + const { text } = content[0]; + + // Extract datasetId from the response text + const runIdMatch = text.match(/Run ID: ([^\n]+)\n• Dataset ID: ([^\n]+)/); + expect(runIdMatch).toBeTruthy(); + const datasetId = runIdMatch![2]; + + // Check for JSON schema in the text (in a code block) + const schemaMatch = text.match(/```json\s*(\{[\s\S]*?\})\s*```/); + expect(schemaMatch).toBeTruthy(); + if (schemaMatch) { + const schemaText = schemaMatch[1]; + const schema = JSON.parse(schemaText); + expect(schema).toHaveProperty('type'); + expect(schema.type).toBe('object'); + expect(schema).toHaveProperty('properties'); + expect(schema.properties).toHaveProperty('metadata'); + expect(schema.properties.metadata).toHaveProperty('type', 'object'); + expect(schema.properties).toHaveProperty('crawl'); + expect(schema.properties.crawl).toHaveProperty('type', 'object'); + } + + // Call get-actor-output with fields: 'metadata.title' + const outputResult = await client.callTool({ + name: HelperTools.ACTOR_OUTPUT_GET, + arguments: { + datasetId, + fields: 'metadata.title', + }, + }); + + // Validate the output contains the expected structure with metadata.title + expect(outputResult.content).toBeDefined(); + const outputContent = outputResult.content as { text: string; type: string }[]; + const output = JSON.parse(outputContent[0].text); + expect(Array.isArray(output)).toBe(true); + expect(output.length).toBeGreaterThan(0); + expect(output[0]).toHaveProperty('metadata.title'); + expect(typeof output[0]['metadata.title']).toBe('string'); + + await client.close(); + }); }); } diff --git a/tests/unit/schema-generation.test.ts b/tests/unit/schema-generation.test.ts new file mode 100644 index 00000000..a8aea033 --- /dev/null +++ b/tests/unit/schema-generation.test.ts @@ -0,0 +1,63 @@ +import { describe, expect, it } from 'vitest'; + +import { generateSchemaFromItems } from '../../src/utils/schema-generation.js'; + +describe('generateSchemaFromItems', () => { + it('should generate basic schema from simple objects', () => { + const items = [{ name: 'John', age: 30 }]; + const result = generateSchemaFromItems(items); + expect(result).toBeDefined(); + expect(result?.type).toBe('array'); + expect(result?.items).toBeDefined(); + const props = result?.items.properties; + expect(props).toBeDefined(); + if (props) { + expect(props.name?.type).toBe('string'); + expect(props.age?.type).toBe('integer'); + } + }); + + it('should handle different data types', () => { + const items = [ + { string: 'test', number: 42, boolean: true, object: { nested: 'value' }, array: [1, 2, 3] }, + ]; + const result = generateSchemaFromItems(items); + expect(result).toBeDefined(); + expect(result?.type).toBe('array'); + if (result?.items && typeof result.items === 'object' && 'properties' in result.items) { + const props = result.items.properties; + expect(props).toBeDefined(); + if (props) { + expect(props.string?.type).toBe('string'); + expect(props.number?.type).toBe('integer'); + expect(props.boolean?.type).toBe('boolean'); + expect(props.object?.type).toBe('object'); + expect(props.array?.type).toBe('array'); + expect(props.object?.properties?.nested?.type).toBe('string'); + expect(props.array?.items?.type).toBe('integer'); + } + } + }); + + it('should respect the limit option', () => { + const items = [ + { id: 1, name: 'A' }, + { id: 2, name: 'B' }, + { id: 3, name: 'C' }, + { id: 4, extra: 'D' }, + { id: 5, extra: 'E' }, + ]; + const result = generateSchemaFromItems(items, { limit: 3 }); + expect(result).toBeDefined(); + expect(result?.type).toBe('array'); + if (result?.items && typeof result.items === 'object' && 'properties' in result.items) { + const props = result.items.properties; + expect(props).toBeDefined(); + if (props) { + expect(props.id).toBeDefined(); + expect(props.name).toBeDefined(); + expect(props.extra).toBeUndefined(); // Should not include fields from items beyond limit + } + } + }); +}); diff --git a/tests/unit/utils.generic.test.ts b/tests/unit/utils.generic.test.ts index 4924c632..18e53a4b 100644 --- a/tests/unit/utils.generic.test.ts +++ b/tests/unit/utils.generic.test.ts @@ -43,4 +43,10 @@ describe('getValuesByDotKeys', () => { const result = getValuesByDotKeys(obj, ['a', 'b.c']); expect(result).toEqual({ a: undefined, 'b.c': undefined }); }); + + it('should return whole object', () => { + const obj = { nested: { a: 1, b: 2 } }; + const result = getValuesByDotKeys(obj, ['nested']); + expect(result).toEqual({ nested: { a: 1, b: 2 } }); + }); }); From 07035b0c75bc198d19f27c4fb27f84084deca064 Mon Sep 17 00:00:00 2001 From: MQ Date: Thu, 4 Sep 2025 11:08:23 +0200 Subject: [PATCH 02/18] update readme --- README.md | 5 +++++ src/tools/actor.ts | 1 + 2 files changed, 6 insertions(+) diff --git a/README.md b/README.md index 651f1f74..d21d3f00 100644 --- a/README.md +++ b/README.md @@ -169,6 +169,11 @@ Here is an overview list of all the tools provided by the Apify MCP Server. | `get-dataset-list` | storage | List all available datasets for the user. | | | `get-key-value-store-list`| storage | List all available key-value stores for the user. | | | `add-actor` | experimental | Add an Actor as a new tool for the user to call. | | +| `get-actor-output`* | - | Retrieve the output from an Actor call which is not included in the output preview of the Actor tool. | ✅ | + +> **Note:** +> +> The `get-actor-output` tool is automatically included with any Actor-related tool, such as `call-actor`, `add-actor`, or any specific Actor tool like `apify-slash-rag-web-browser`. When you call an Actor - either through the `call-actor` tool or directly via an Actor tool (e.g., `apify-slash-rag-web-browser`) - you receive a preview of the output. The preview depends on the Actor's output format and length; for some Actors and runs, it may include the entire output, while for others, only a limited version is returned to avoid overwhelming the LLM. To retrieve the full output of an Actor run, use the `get-actor-output` tool (supports limit, offset, and field filtering) with the `datasetId` provided by the Actor call. ### Tools configuration diff --git a/src/tools/actor.ts b/src/tools/actor.ts index 4ed8573e..7c2974a2 100644 --- a/src/tools/actor.ts +++ b/src/tools/actor.ts @@ -63,6 +63,7 @@ export async function callActorGetDataset( abortSignal?: AbortSignal, ): Promise { const CLIENT_ABORT = Symbol('CLIENT_ABORT'); // Just internal symbol to identify client abort + // TODO: we should remove this throw, we are just catching and then rethrowing with generic message try { const client = new ApifyClient({ token: apifyToken }); const actorClient = client.actor(actorName); From 4a402d848d46d0fe7d13798ef49ebd9340beadee Mon Sep 17 00:00:00 2001 From: MQ Date: Thu, 4 Sep 2025 12:53:19 +0200 Subject: [PATCH 03/18] fix output tool, write test for that --- src/tools/get-actor-output.ts | 7 ++++-- src/utils/actor.ts | 18 +++++++++++----- src/utils/generic.ts | 2 +- tests/integration/suite.ts | 40 +++++++++++++++++++++++++++++++++++ 4 files changed, 59 insertions(+), 8 deletions(-) diff --git a/src/tools/get-actor-output.ts b/src/tools/get-actor-output.ts index 55d2eded..c630d5dd 100644 --- a/src/tools/get-actor-output.ts +++ b/src/tools/get-actor-output.ts @@ -102,11 +102,14 @@ You also can retrieve only specific fields from the output if needed. Use this t return { content: [{ type: 'text', text: `Dataset '${parsed.datasetId}' not found.` }] }; } + let { items } = response; // Apply field selection if specified - const processedItems = response.items.map((item) => getValuesByDotKeys(item, fieldsArray)); + if (fieldsArray.length > 0) { + items = items.map((item) => getValuesByDotKeys(item, fieldsArray)); + } // Clean empty properties - const cleanedItems = processedItems + const cleanedItems = items .map((item) => cleanEmptyProperties(item)) .filter((item) => item !== undefined); diff --git a/src/utils/actor.ts b/src/utils/actor.ts index ef4a76fe..9871d674 100644 --- a/src/utils/actor.ts +++ b/src/utils/actor.ts @@ -39,16 +39,24 @@ export function ensureOutputWithinCharLimit(items: DatasetItem[], importantField return items; } + /** + * Items used for the final fallback - removing items until within the limit. + * If important fields are defined, use only those fields for that fallback step. + */ + let sourceItems = items; // Try only the important fields - const importantItems = items.map((item) => getValuesByDotKeys(item, importantFields)); - const importantItemsString = JSON.stringify(importantItems); - if (importantItemsString.length <= charLimit) { - return importantItems; + if (importantFields.length > 0) { + const importantItems = items.map((item) => getValuesByDotKeys(item, importantFields)); + const importantItemsString = JSON.stringify(importantItems); + if (importantItemsString.length <= charLimit) { + return importantItems; + } + sourceItems = importantItems; } // Start removing items until within the limit const result: DatasetItem[] = []; - for (const item of importantItems) { + for (const item of sourceItems) { if (JSON.stringify(result.concat(item)).length > charLimit) { break; } diff --git a/src/utils/generic.ts b/src/utils/generic.ts index b44eff86..00a7f22b 100644 --- a/src/utils/generic.ts +++ b/src/utils/generic.ts @@ -8,7 +8,7 @@ * const value = getValuesByDotKeys(obj, ['a.b.c', 'a.b.d', 'nested']); * value; // { 'a.b.c': 42, 'a.b.d': undefined, 'nested': { d: 100 } } */ -export function getValuesByDotKeys(obj: T, keys: string[]): Record { +export function getValuesByDotKeys(obj: Record, keys: string[]): Record { const result: Record = {}; for (const key of keys) { const path = key.split('.'); diff --git a/tests/integration/suite.ts b/tests/integration/suite.ts index 8eb3fcf2..5c76ba1b 100644 --- a/tests/integration/suite.ts +++ b/tests/integration/suite.ts @@ -870,5 +870,45 @@ export function createIntegrationTestsSuite( await client.close(); }); + + it('should call apify/python-example and retrieve the full dataset using get-actor-output tool', async () => { + client = await createClientFn({ actors: ['apify/python-example'] }); + const selectedToolName = actorNameToToolName('apify/python-example'); + const input = { first_number: 5, second_number: 7 }; + + const result = await client.callTool({ + name: selectedToolName, + arguments: input, + }); + + expect(result.content).toBeDefined(); + const content = result.content as { text: string; type: string }[]; + expect(content.length).toBe(2); // Call step returns text summary with embedded schema + + // First content: text summary + const runText = content[0].text; + + // Extract datasetId from the text + const runIdMatch = runText.match(/Run ID: ([^\n]+)\n• Dataset ID: ([^\n]+)/); + expect(runIdMatch).toBeTruthy(); + const datasetId = runIdMatch![2]; + + // Retrieve full dataset using get-actor-output tool + const outputResult = await client.callTool({ + name: HelperTools.ACTOR_OUTPUT_GET, + arguments: { + datasetId, + }, + }); + + expect(outputResult.content).toBeDefined(); + const outputContent = outputResult.content as { text: string; type: string }[]; + const output = JSON.parse(outputContent[0].text); + expect(Array.isArray(output)).toBe(true); + expect(output.length).toBe(1); + expect(output[0]).toHaveProperty('first_number', input.first_number); + expect(output[0]).toHaveProperty('second_number', input.second_number); + expect(output[0]).toHaveProperty('sum', input.first_number + input.second_number); + }); }); } From f207cc27b220a3adbf288cbf25511fe027d9fe16 Mon Sep 17 00:00:00 2001 From: MQ Date: Thu, 4 Sep 2025 13:07:33 +0200 Subject: [PATCH 04/18] add test based on Zuzka suggestion --- tests/integration/suite.ts | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/tests/integration/suite.ts b/tests/integration/suite.ts index 5c76ba1b..1aeda068 100644 --- a/tests/integration/suite.ts +++ b/tests/integration/suite.ts @@ -910,5 +910,35 @@ export function createIntegrationTestsSuite( expect(output[0]).toHaveProperty('second_number', input.second_number); expect(output[0]).toHaveProperty('sum', input.first_number + input.second_number); }); + + it('should return Actor details both for full Actor name and ID', async () => { + const actorName = 'apify/python-example'; + const apifyClient = new ApifyClient({ token: process.env.APIFY_TOKEN as string }); + const actor = await apifyClient.actor(actorName).get(); + expect(actor).toBeDefined(); + const actorId = actor!.id as string; + + client = await createClientFn(); + + // Fetch by full Actor name + const resultByName = await client.callTool({ + name: 'fetch-actor-details', + arguments: { actor: actorName }, + }); + expect(resultByName.content).toBeDefined(); + const contentByName = resultByName.content as { text: string }[]; + expect(contentByName[0].text).toContain(actorName); + + // Fetch by Actor ID only + const resultById = await client.callTool({ + name: 'fetch-actor-details', + arguments: { actor: actorId }, + }); + expect(resultById.content).toBeDefined(); + const contentById = resultById.content as { text: string }[]; + expect(contentById[0].text).toContain(actorName); + + await client.close(); + }); }); } From 4ff957e65b5d57a4cc6d21d49a738d1f08904bd0 Mon Sep 17 00:00:00 2001 From: MQ Date: Thu, 4 Sep 2025 13:13:39 +0200 Subject: [PATCH 05/18] lint --- tests/integration/suite.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/integration/suite.ts b/tests/integration/suite.ts index 1aeda068..38f9791f 100644 --- a/tests/integration/suite.ts +++ b/tests/integration/suite.ts @@ -917,9 +917,9 @@ export function createIntegrationTestsSuite( const actor = await apifyClient.actor(actorName).get(); expect(actor).toBeDefined(); const actorId = actor!.id as string; - + client = await createClientFn(); - + // Fetch by full Actor name const resultByName = await client.callTool({ name: 'fetch-actor-details', @@ -928,7 +928,7 @@ export function createIntegrationTestsSuite( expect(resultByName.content).toBeDefined(); const contentByName = resultByName.content as { text: string }[]; expect(contentByName[0].text).toContain(actorName); - + // Fetch by Actor ID only const resultById = await client.callTool({ name: 'fetch-actor-details', From a2bf3b5462883fcf7539f9672dcb0467c7d135dc Mon Sep 17 00:00:00 2001 From: MQ Date: Thu, 4 Sep 2025 14:40:07 +0200 Subject: [PATCH 06/18] fix output response order so LLM does not lose the instructions --- src/utils/actor-response.ts | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/utils/actor-response.ts b/src/utils/actor-response.ts index 39f393ec..4e9ffa5e 100644 --- a/src/utils/actor-response.ts +++ b/src/utils/actor-response.ts @@ -32,11 +32,9 @@ Actor output data schema: ${JSON.stringify(displaySchema, null, 2)} \`\`\` -Below this text block is a preview of the Actor output containing ${result.previewItems.length} item(s). +Above this text block is a preview of the Actor output containing ${result.previewItems.length} item(s).${itemCount !== result.previewItems.length ? ` You have access only to a limited preview of the Actor output. Do not present this as the full output, as you have only ${result.previewItems.length} item(s) available instead of the full ${itemCount} item(s). Be aware of this and inform users about the currently loaded count and the total available output items count.` : ''} -If you need to retrieve additional data, use the "get-actor-output" tool with: - datasetId: "${datasetId}" -Be sure to limit the number of results when using the "get-actor-output" tool, since you never know how large the items may be and they might exceed the output limits. +If you need to retrieve additional data, use the "get-actor-output" tool with: datasetId: "${datasetId}". Be sure to limit the number of results when using the "get-actor-output" tool, since you never know how large the items may be, and they might exceed the output limits. `; const itemsPreviewText = result.previewItems.length > 0 @@ -45,8 +43,11 @@ Be sure to limit the number of results when using the "get-actor-output" tool, s // Build content array const content: ({ type: 'text'; text: string })[] = [ - { type: 'text', text: textContent }, { type: 'text', text: itemsPreviewText }, + /** + * The metadata and instructions text must be at the end otherwise the LLM does not acknowledge it. + */ + { type: 'text', text: textContent }, ]; return content; From 26c34ccb932e4675b56b59616eced0dd9b499df3 Mon Sep 17 00:00:00 2001 From: MQ Date: Thu, 4 Sep 2025 15:16:50 +0200 Subject: [PATCH 07/18] refactor: unify string list parsing logic --- src/mcp/actors.ts | 3 ++- src/stdio.ts | 9 +++------ src/tools/dataset.ts | 7 ++++--- src/tools/get-actor-output.ts | 9 ++------- src/utils/generic.ts | 17 ++++++++++++++++ tests/unit/utils.generic.test.ts | 34 +++++++++++++++++++++++++++++++- 6 files changed, 61 insertions(+), 18 deletions(-) diff --git a/src/mcp/actors.ts b/src/mcp/actors.ts index 01859789..197d071c 100644 --- a/src/mcp/actors.ts +++ b/src/mcp/actors.ts @@ -3,6 +3,7 @@ import type { ActorDefinition } from 'apify-client'; import { ApifyClient } from '../apify-client.js'; import { MCP_STREAMABLE_ENDPOINT } from '../const.js'; import type { ActorDefinitionPruned } from '../types.js'; +import { parseCommaSeparatedList } from '../utils/generic.js'; /** * Returns the MCP server path for the given Actor ID. @@ -13,7 +14,7 @@ export function getActorMCPServerPath(actorDefinition: ActorDefinition | ActorDe if ('webServerMcpPath' in actorDefinition && typeof actorDefinition.webServerMcpPath === 'string') { const webServerMcpPath = actorDefinition.webServerMcpPath.trim(); - const paths = webServerMcpPath.split(',').map((path) => path.trim()); + const paths = parseCommaSeparatedList(webServerMcpPath); // If there is only one path, return it directly if (paths.length === 1) { return paths[0]; diff --git a/src/stdio.ts b/src/stdio.ts index 39bb309a..a288857b 100644 --- a/src/stdio.ts +++ b/src/stdio.ts @@ -25,6 +25,7 @@ import log from '@apify/log'; import { processInput } from './input.js'; import { ActorsMcpServer } from './mcp/server.js'; import type { Input, ToolSelector } from './types.js'; +import { parseCommaSeparatedList } from './utils/generic.js'; import { loadToolsFromInput } from './utils/tools-loader.js'; // Keeping this interface here and not types.ts since @@ -86,13 +87,9 @@ For more details visit https://mcp.apify.com`, // Respect either the new flag or the deprecated one const enableAddingActors = Boolean(argv.enableAddingActors || argv.enableActorAutoLoading); // Split actors argument, trim whitespace, and filter out empty strings -const actorList = argv.actors !== undefined - ? argv.actors.split(',').map((a: string) => a.trim()).filter((a: string) => a.length > 0) - : undefined; +const actorList = argv.actors !== undefined ? parseCommaSeparatedList(argv.actors) : undefined; // Split tools argument, trim whitespace, and filter out empty strings -const toolCategoryKeys = argv.tools !== undefined - ? argv.tools.split(',').map((t: string) => t.trim()).filter((t: string) => t.length > 0) - : undefined; +const toolCategoryKeys = argv.tools !== undefined ? parseCommaSeparatedList(argv.tools) : undefined; // Propagate log.error to console.error for easier debugging const originalError = log.error.bind(log); diff --git a/src/tools/dataset.ts b/src/tools/dataset.ts index af513dca..48766fb7 100644 --- a/src/tools/dataset.ts +++ b/src/tools/dataset.ts @@ -5,6 +5,7 @@ import { ApifyClient } from '../apify-client.js'; import { HelperTools } from '../const.js'; import type { InternalTool, ToolEntry } from '../types.js'; import { ajv } from '../utils/ajv.js'; +import { parseCommaSeparatedList } from '../utils/generic.js'; import { generateSchemaFromItems } from '../utils/schema-generation.js'; const getDatasetArgs = z.object({ @@ -91,9 +92,9 @@ export const getDatasetItems: ToolEntry = { const client = new ApifyClient({ token: apifyToken }); // Convert comma-separated strings to arrays - const fields = parsed.fields?.split(',').map((f) => f.trim()); - const omit = parsed.omit?.split(',').map((f) => f.trim()); - const flatten = parsed.flatten?.split(',').map((f) => f.trim()); + const fields = parseCommaSeparatedList(parsed.fields); + const omit = parseCommaSeparatedList(parsed.omit); + const flatten = parseCommaSeparatedList(parsed.flatten); const v = await client.dataset(parsed.datasetId).listItems({ clean: parsed.clean, diff --git a/src/tools/get-actor-output.ts b/src/tools/get-actor-output.ts index c630d5dd..cc7256e0 100644 --- a/src/tools/get-actor-output.ts +++ b/src/tools/get-actor-output.ts @@ -5,7 +5,7 @@ import { ApifyClient } from '../apify-client.js'; import { HelperTools, TOOL_MAX_OUTPUT_CHARS } from '../const.js'; import type { InternalTool, ToolEntry } from '../types.js'; import { ajv } from '../utils/ajv.js'; -import { getValuesByDotKeys } from '../utils/generic.js'; +import { getValuesByDotKeys, parseCommaSeparatedList } from '../utils/generic.js'; /** * Zod schema for get-actor-output tool arguments @@ -78,12 +78,7 @@ You also can retrieve only specific fields from the output if needed. Use this t const client = new ApifyClient({ token: apifyToken }); // Parse fields into array - const fieldsArray = parsed.fields - ? parsed.fields - .split(',') - .map((field) => field.trim()) - .filter((field) => field.length > 0) - : []; + const fieldsArray = parseCommaSeparatedList(parsed.fields); // TODO: we can optimize the API level field filtering in future /** diff --git a/src/utils/generic.ts b/src/utils/generic.ts index 00a7f22b..b7503fd4 100644 --- a/src/utils/generic.ts +++ b/src/utils/generic.ts @@ -8,6 +8,23 @@ * const value = getValuesByDotKeys(obj, ['a.b.c', 'a.b.d', 'nested']); * value; // { 'a.b.c': 42, 'a.b.d': undefined, 'nested': { d: 100 } } */ +/** + * Parses a comma-separated string into an array of trimmed strings. + * Empty strings are filtered out after trimming. + * + * @param input - The comma-separated string to parse. If undefined, returns an empty array. + * @returns An array of trimmed, non-empty strings. + * @example + * parseCommaSeparatedList("a, b, c"); // ["a", "b", "c"] + * parseCommaSeparatedList("a, , b"); // ["a", "b"] + */ +export function parseCommaSeparatedList(input?: string): string[] { + if (!input) { + return []; + } + return input.split(',').map((s) => s.trim()).filter((s) => s.length > 0); +} + export function getValuesByDotKeys(obj: Record, keys: string[]): Record { const result: Record = {}; for (const key of keys) { diff --git a/tests/unit/utils.generic.test.ts b/tests/unit/utils.generic.test.ts index 18e53a4b..8e0db60c 100644 --- a/tests/unit/utils.generic.test.ts +++ b/tests/unit/utils.generic.test.ts @@ -1,6 +1,6 @@ import { describe, expect, it } from 'vitest'; -import { getValuesByDotKeys } from '../../src/utils/generic.js'; +import { getValuesByDotKeys, parseCommaSeparatedList } from '../../src/utils/generic.js'; describe('getValuesByDotKeys', () => { it('should get value for a key without dot', () => { @@ -50,3 +50,35 @@ describe('getValuesByDotKeys', () => { expect(result).toEqual({ nested: { a: 1, b: 2 } }); }); }); + +describe('parseCommaSeparatedList', () => { + it('should parse comma-separated list with trimming', () => { + const result = parseCommaSeparatedList('field1, field2,field3 '); + expect(result).toEqual(['field1', 'field2', 'field3']); + }); + + it('should handle empty input', () => { + const result = parseCommaSeparatedList(); + expect(result).toEqual([]); + }); + + it('should handle empty string', () => { + const result = parseCommaSeparatedList(''); + expect(result).toEqual([]); + }); + + it('should filter empty strings', () => { + const result = parseCommaSeparatedList(' field1, , field2,,field3 '); + expect(result).toEqual(['field1', 'field2', 'field3']); + }); + + it('should handle only commas and spaces', () => { + const result = parseCommaSeparatedList(' , , '); + expect(result).toEqual([]); + }); + + it('should handle single item', () => { + const result = parseCommaSeparatedList(' single '); + expect(result).toEqual(['single']); + }); +}); From 3186c88184cb0216050feba4111048809ffa4250 Mon Sep 17 00:00:00 2001 From: MQ Date: Thu, 4 Sep 2025 15:44:25 +0200 Subject: [PATCH 08/18] fix the tests - order of the Actor run response messages --- tests/integration/suite.ts | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/integration/suite.ts b/tests/integration/suite.ts index 38f9791f..954bcecb 100644 --- a/tests/integration/suite.ts +++ b/tests/integration/suite.ts @@ -52,7 +52,7 @@ async function callPythonExampleActor(client: Client, selectedToolName: string) type: 'text', }; // Parse the JSON to compare objects regardless of property order - const actual = content[content.length - 1]; + const actual = content[0]; expect(JSON.parse(actual.text)).toEqual(JSON.parse(expected.text)); expect(actual.type).toBe(expected.type); } @@ -344,7 +344,7 @@ export function createIntegrationTestsSuite( const content = result.content as { text: string }[]; - expect(content[content.length - 1]).toEqual( + expect(content[0]).toEqual( { text: JSON.stringify([{ first_number: 1, @@ -772,7 +772,7 @@ export function createIntegrationTestsSuite( expect(content.length).toBe(2); // Call step returns text summary with embedded schema // First content: text summary - const runText = content[0].text; + const runText = content[1].text; // Extract datasetId from the text const runIdMatch = runText.match(/Run ID: ([^\n]+)\n• Dataset ID: ([^\n]+)/); @@ -828,7 +828,7 @@ export function createIntegrationTestsSuite( expect(result.content).toBeDefined(); const content = result.content as { text: string; type: string }[]; expect(content.length).toBe(2); - const { text } = content[0]; + const { text } = content[1]; // Extract datasetId from the response text const runIdMatch = text.match(/Run ID: ([^\n]+)\n• Dataset ID: ([^\n]+)/); @@ -886,7 +886,7 @@ export function createIntegrationTestsSuite( expect(content.length).toBe(2); // Call step returns text summary with embedded schema // First content: text summary - const runText = content[0].text; + const runText = content[1].text; // Extract datasetId from the text const runIdMatch = runText.match(/Run ID: ([^\n]+)\n• Dataset ID: ([^\n]+)/); From 259831a327de1fa8c2b9520fc9f48dde93ba7ff6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Kopeck=C3=BD?= Date: Thu, 4 Sep 2025 21:42:40 +0200 Subject: [PATCH 09/18] Update src/utils/schema-generation.ts Co-authored-by: Michal Kalita --- src/utils/schema-generation.ts | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/src/utils/schema-generation.ts b/src/utils/schema-generation.ts index ab6dc60b..89a2eefc 100644 --- a/src/utils/schema-generation.ts +++ b/src/utils/schema-generation.ts @@ -95,16 +95,15 @@ export function generateSchemaFromItems( arrays: { mode: arrayMode }, }) as JsonSchemaArray; return schema; - } catch { - // Fallback: try with simpler approach - try { - const fallbackSchema = toJsonSchema(processedItems, { - arrays: { mode: 'first' }, - }) as JsonSchemaArray; - return fallbackSchema; - } catch { - // If all attempts fail, return null - return null; - } - } + } catch { /* ignore */ } + + try { + const fallbackSchema = toJsonSchema(processedItems, { + arrays: { mode: 'first' }, + }) as JsonSchemaArray; + return fallbackSchema; + } catch { /* ignore */ } + + // If all attempts fail, return null + return null; } From 863dd47478eae840ddfe7418f13cc9e4b2bce7a1 Mon Sep 17 00:00:00 2001 From: MQ Date: Mon, 8 Sep 2025 10:43:53 +0200 Subject: [PATCH 10/18] address review comments --- src/const.ts | 3 --- src/utils/actor-response.ts | 15 +++++++++++---- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/src/const.ts b/src/const.ts index d8266885..d66e742a 100644 --- a/src/const.ts +++ b/src/const.ts @@ -27,10 +27,8 @@ export const USER_AGENT_ORIGIN = 'Origin/mcp-server'; export enum HelperTools { ACTOR_ADD = 'add-actor', ACTOR_CALL = 'call-actor', - ACTOR_GET = 'get-actor', ACTOR_GET_DETAILS = 'fetch-actor-details', ACTOR_OUTPUT_GET = 'get-actor-output', - ACTOR_REMOVE = 'remove-actor', ACTOR_RUNS_ABORT = 'abort-actor-run', ACTOR_RUNS_GET = 'get-actor-run', ACTOR_RUNS_LOG = 'get-actor-log', @@ -43,7 +41,6 @@ export enum HelperTools { KEY_VALUE_STORE_GET = 'get-key-value-store', KEY_VALUE_STORE_KEYS_GET = 'get-key-value-store-keys', KEY_VALUE_STORE_RECORD_GET = 'get-key-value-store-record', - APIFY_MCP_HELP_TOOL = 'apify-actor-help-tool', STORE_SEARCH = 'search-actors', DOCS_SEARCH = 'search-apify-docs', DOCS_FETCH = 'fetch-apify-docs', diff --git a/src/utils/actor-response.ts b/src/utils/actor-response.ts index 4e9ffa5e..5dc77b03 100644 --- a/src/utils/actor-response.ts +++ b/src/utils/actor-response.ts @@ -1,7 +1,16 @@ import type { CallActorGetDatasetResult } from '../tools/actor'; /** - * Builds the response content for actor tool calls. + * Builds the response content for Actor tool calls. + * Includes Actor run metadata, output schema, and a preview of output items. + * + * The response starts with a preview of Actor output items, if available. + * This must come first. Metadata and instructions for the LLM are provided last. + * The LLM may ignore metadata and instructions if it is not at the end of the response. + * + * If the preview is limited and does not show all items, the response informs the LLM. + * This is important because the LLM may assume it has all data and hallucinate missing items. + * * @param actorName - The name of the actor. * @param result - The result from callActorGetDataset. * @returns The content array for the tool response. @@ -42,13 +51,11 @@ If you need to retrieve additional data, use the "get-actor-output" tool with: d : `No items available for preview—either the Actor did not return any items or they are too large for preview. In this case, use the "get-actor-output" tool.`; // Build content array - const content: ({ type: 'text'; text: string })[] = [ + return [ { type: 'text', text: itemsPreviewText }, /** * The metadata and instructions text must be at the end otherwise the LLM does not acknowledge it. */ { type: 'text', text: textContent }, ]; - - return content; } From 5501130cb4ecb661556bdc50dbaaca0f6afc0d06 Mon Sep 17 00:00:00 2001 From: MQ Date: Tue, 9 Sep 2025 13:23:20 +0200 Subject: [PATCH 11/18] feat: agentci payments v2 --- src/actor/server.ts | 11 +++-- src/apify-client.ts | 28 +++++++++-- src/const.ts | 5 ++ src/index-internals.ts | 2 + src/main.ts | 4 +- src/mcp/actors.ts | 16 ------- src/mcp/server.ts | 68 +++++++++++++++++++++++---- src/mcp/utils.ts | 6 ++- src/stdio.ts | 4 +- src/tools/actor.ts | 73 ++++++++++++++++++++++------- src/tools/build.ts | 8 ++-- src/tools/fetch-actor-details.ts | 4 +- src/tools/helpers.ts | 4 +- src/types.ts | 6 +++ src/utils/actor-details.ts | 9 ++-- src/utils/progress.ts | 7 ++- src/utils/tools-loader.ts | 6 ++- tests/integration/internals.test.ts | 24 ++++++---- 18 files changed, 203 insertions(+), 82 deletions(-) diff --git a/src/actor/server.ts b/src/actor/server.ts index b1c52248..83d1beae 100644 --- a/src/actor/server.ts +++ b/src/actor/server.ts @@ -11,6 +11,7 @@ import express from 'express'; import log from '@apify/log'; +import { ApifyClient } from '../apify-client.js'; import { ActorsMcpServer } from '../mcp/server.js'; import { getHelpMessage, HEADER_READINESS_PROBE, Routes, TransportType } from './const.js'; import { getActorRunData } from './utils.js'; @@ -69,13 +70,14 @@ export function createExpressApp( rt: Routes.SSE, tr: TransportType.SSE, }); - const mcpServer = new ActorsMcpServer(false); + const mcpServer = new ActorsMcpServer({ setupSigintHandler: false }); const transport = new SSEServerTransport(Routes.MESSAGE, res); // Load MCP server tools const apifyToken = process.env.APIFY_TOKEN as string; log.debug('Loading tools from URL', { sessionId: transport.sessionId, tr: TransportType.SSE }); - await mcpServer.loadToolsFromUrl(req.url, apifyToken); + const apifyClient = new ApifyClient({ token: apifyToken }); + await mcpServer.loadToolsFromUrl(req.url, apifyClient); transportsSSE[transport.sessionId] = transport; mcpServers[transport.sessionId] = mcpServer; @@ -152,12 +154,13 @@ export function createExpressApp( sessionIdGenerator: () => randomUUID(), enableJsonResponse: false, // Use SSE response mode }); - const mcpServer = new ActorsMcpServer(false); + const mcpServer = new ActorsMcpServer({ setupSigintHandler: false }); // Load MCP server tools const apifyToken = process.env.APIFY_TOKEN as string; log.debug('Loading tools from URL', { sessionId: transport.sessionId, tr: TransportType.HTTP }); - await mcpServer.loadToolsFromUrl(req.url, apifyToken); + const apifyClient = new ApifyClient({ token: apifyToken }); + await mcpServer.loadToolsFromUrl(req.url, apifyClient); // Connect the transport to the MCP server BEFORE handling the request await mcpServer.connect(transport); diff --git a/src/apify-client.ts b/src/apify-client.ts index 026ba79d..e3c6cc8e 100644 --- a/src/apify-client.ts +++ b/src/apify-client.ts @@ -4,6 +4,11 @@ import type { AxiosRequestConfig } from 'axios'; import { USER_AGENT_ORIGIN } from './const.js'; +interface ExtendedApifyClientOptions extends Omit { + token?: string | null | undefined; + skyfirePayId?: string; +} + /** * Adds a User-Agent header to the request config. * @param config @@ -23,22 +28,37 @@ export function getApifyAPIBaseUrl(): string { } export class ApifyClient extends _ApifyClient { - constructor(options: ApifyClientOptions) { + constructor(options: ExtendedApifyClientOptions) { /** * In order to publish to DockerHub, we need to run their build task to validate our MCP server. * This was failing since we were sending this dummy token to Apify in order to build the Actor tools. * So if we encounter this dummy value, we remove it to use Apify client as unauthenticated, which is sufficient * for server start and listing of tools. */ - if (options.token?.toLowerCase() === 'your-apify-token') { + if (options.token?.toLowerCase() === 'your-apify-token' || options.token === null) { // eslint-disable-next-line no-param-reassign delete options.token; } + const { skyfirePayId, ...clientOptions } = options; + const requestInterceptors = [addUserAgent]; + /** + * Add skyfire-pay-id header if provided. + */ + if (skyfirePayId) { + requestInterceptors.push((config) => { + const updatedConfig = { ...config }; + updatedConfig.headers = updatedConfig.headers ?? {}; + updatedConfig.headers['skyfire-pay-id'] = skyfirePayId; + return updatedConfig; + }); + } + super({ - ...options, + // token null case is handled, we can assert type here + ...clientOptions as ApifyClientOptions, baseUrl: getApifyAPIBaseUrl(), - requestInterceptors: [addUserAgent], + requestInterceptors, }); } } diff --git a/src/const.ts b/src/const.ts index d66e742a..8871a9c2 100644 --- a/src/const.ts +++ b/src/const.ts @@ -53,6 +53,11 @@ export const defaults = { }; export const ACTOR_ADDITIONAL_INSTRUCTIONS = 'Never call/execute tool/Actor unless confirmed by the user.'; +export const SKYFIRE_MIN_CHARGE_USD = 0.5; + +export const SKYFIRE_PAY_ID_MISSING_STRING = `To run the Actor, you need to provide a Skyfire payment ID in the \`skyfire-pay-id\` input property. You first need to create the Skyfire PAY token by calling the \`create-pay-token\` tool from the Skyfire MCP server and then provide the created token in the \`skyfire-pay-id\` input property when calling the \`call-actor\` tool. The Skyfire PAY token is used for billing purposes and needs to be charged with at least ${SKYFIRE_MIN_CHARGE_USD} USD.`; + +export const SKYFIRE_PAY_ID_PROPERTY_DESCRIPTION = `Skyfire payment ID for the Actor run. Used for billing purposes. The PAY token needs to be charged with at least ${SKYFIRE_MIN_CHARGE_USD} USD. If you do not have the Skyfire payment ID, you can create it by calling the \`create-pay-token\` tool from the Skyfire MCP server.`; // Cache export const ACTOR_CACHE_MAX_SIZE = 500; diff --git a/src/index-internals.ts b/src/index-internals.ts index 364fe5ae..0c805246 100644 --- a/src/index-internals.ts +++ b/src/index-internals.ts @@ -2,6 +2,7 @@ This file provides essential internal functions for Apify MCP servers, serving as an internal library. */ +import { ApifyClient } from './apify-client.js'; import { defaults, HelperTools } from './const.js'; import { processParamsGetTools } from './mcp/utils.js'; import { addTool } from './tools/helpers.js'; @@ -12,6 +13,7 @@ import { getExpectedToolNamesByCategories, getToolPublicFieldOnly } from './util import { TTLLRUCache } from './utils/ttl-lru.js'; export { + ApifyClient, getExpectedToolNamesByCategories, TTLLRUCache, actorNameToToolName, diff --git a/src/main.ts b/src/main.ts index 5150d199..9bba3257 100644 --- a/src/main.ts +++ b/src/main.ts @@ -9,6 +9,7 @@ import type { ActorCallOptions } from 'apify-client'; import log from '@apify/log'; import { createExpressApp } from './actor/server.js'; +import { ApifyClient } from './apify-client.js'; import { processInput } from './input.js'; import { callActorGetDataset } from './tools/index.js'; import type { Input } from './types.js'; @@ -44,7 +45,8 @@ if (STANDBY_MODE) { await Actor.fail('If you need to debug a specific Actor, please provide the debugActor and debugActorInput fields in the input'); } const options = { memory: input.maxActorMemoryBytes } as ActorCallOptions; - const callResult = await callActorGetDataset(input.debugActor!, input.debugActorInput!, process.env.APIFY_TOKEN, options); + const apifyClient = new ApifyClient({ token: process.env.APIFY_TOKEN }); + const callResult = await callActorGetDataset(input.debugActor!, input.debugActorInput!, apifyClient, options); if (callResult && callResult.previewItems.length > 0) { await Actor.pushData(callResult.previewItems); diff --git a/src/mcp/actors.ts b/src/mcp/actors.ts index 197d071c..64d62991 100644 --- a/src/mcp/actors.ts +++ b/src/mcp/actors.ts @@ -64,19 +64,3 @@ export async function getRealActorID(actorIdOrName: string, apifyToken: string): export async function getActorStandbyURL(realActorId: string, standbyBaseUrl = 'apify.actor'): Promise { return `https://${realActorId}.${standbyBaseUrl}`; } - -export async function getActorDefinition(actorID: string, apifyToken: string): Promise { - const apifyClient = new ApifyClient({ token: apifyToken }); - const actor = apifyClient.actor(actorID); - const defaultBuildClient = await actor.defaultBuild(); - const buildInfo = await defaultBuildClient.get(); - if (!buildInfo) { - throw new Error(`Default build for Actor ${actorID} not found`); - } - const { actorDefinition } = buildInfo; - if (!actorDefinition) { - throw new Error(`Actor default build ${actorID} does not have Actor definition`); - } - - return actorDefinition; -} diff --git a/src/mcp/server.ts b/src/mcp/server.ts index 9d5267a4..622d803d 100644 --- a/src/mcp/server.ts +++ b/src/mcp/server.ts @@ -23,9 +23,12 @@ import { type ActorCallOptions, ApifyApiError } from 'apify-client'; import log from '@apify/log'; +import { ApifyClient } from '../apify-client.js'; import { SERVER_NAME, SERVER_VERSION, + SKYFIRE_PAY_ID_MISSING_STRING, + SKYFIRE_PAY_ID_PROPERTY_DESCRIPTION, } from '../const.js'; import { prompts } from '../prompts/index.js'; import { callActorGetDataset, defaultTools, getActorsAsTools, toolCategories } from '../tools/index.js'; @@ -40,6 +43,14 @@ import { processParamsGetTools } from './utils.js'; type ToolsChangedHandler = (toolNames: string[]) => void; +interface ActorsMcpServerOptions { + setupSigintHandler?: boolean; + /** + * Switch to enable Skyfire agentic payment mode. + */ + skyfireMode?: boolean; +} + /** * Create Apify MCP server */ @@ -49,8 +60,11 @@ export class ActorsMcpServer { private toolsChangedHandler: ToolsChangedHandler | undefined; private sigintHandler: (() => Promise) | undefined; private currentLogLevel = 'info'; + public readonly options: ActorsMcpServerOptions; - constructor(setupSigintHandler = true) { + constructor(options: ActorsMcpServerOptions = {}) { + this.options = options; + const { setupSigintHandler = true } = options; this.server = new Server( { name: SERVER_NAME, @@ -161,7 +175,7 @@ export class ActorsMcpServer { * @param toolNames - Array of tool names to ensure are loaded * @param apifyToken - Apify API token for authentication */ - public async loadToolsByName(toolNames: string[], apifyToken: string) { + public async loadToolsByName(toolNames: string[], apifyClient: ApifyClient) { const loadedTools = this.listAllToolNames(); const actorsToLoad: string[] = []; const toolsToLoad: ToolEntry[] = []; @@ -186,7 +200,7 @@ export class ActorsMcpServer { } if (actorsToLoad.length > 0) { - await this.loadActorsAsTools(actorsToLoad, apifyToken); + await this.loadActorsAsTools(actorsToLoad, apifyClient); } } @@ -197,8 +211,8 @@ export class ActorsMcpServer { * @param apifyToken - Apify API token for authentication * @returns Promise - Array of loaded tool entries */ - public async loadActorsAsTools(actorIdsOrNames: string[], apifyToken: string): Promise { - const actorTools = await getActorsAsTools(actorIdsOrNames, apifyToken); + public async loadActorsAsTools(actorIdsOrNames: string[], apifyClient: ApifyClient): Promise { + const actorTools = await getActorsAsTools(actorIdsOrNames, apifyClient); if (actorTools.length > 0) { this.upsertTools(actorTools, true); } @@ -212,8 +226,8 @@ export class ActorsMcpServer { * * Used primarily for SSE. */ - public async loadToolsFromUrl(url: string, apifyToken: string) { - const tools = await processParamsGetTools(url, apifyToken); + public async loadToolsFromUrl(url: string, apifyClient: ApifyClient) { + const tools = await processParamsGetTools(url, apifyClient); if (tools.length > 0) { log.debug('Loading tools from query parameters'); this.upsertTools(tools, false); @@ -368,6 +382,24 @@ export class ActorsMcpServer { * @returns {object} - The response object containing the tools. */ this.server.setRequestHandler(ListToolsRequestSchema, async () => { + /** + * Hack for the Skyfire agentic payments, we check if Skyfire mode is enabled we ad-hoc add + * the `skyfire-pay-id` input property to all Actor tools and `call-actor` tool. + */ + if (this.options.skyfireMode) { + for (const toolEntry of this.tools.values()) { + if (toolEntry.type === 'actor' || (toolEntry.type === 'internal' && toolEntry.tool.name === 'call-actor')) { + if (toolEntry.tool.inputSchema && 'properties' in toolEntry.tool.inputSchema) { + (toolEntry.tool.inputSchema.properties as Record)['skyfire-pay-id'] = { + type: 'string', + description: SKYFIRE_PAY_ID_PROPERTY_DESCRIPTION, + }; + } + // Update description to include Skyfire instructions + toolEntry.tool.description = `${SKYFIRE_PAY_ID_MISSING_STRING}\n\n${toolEntry.tool.description}`; + } + } + } const tools = Array.from(this.tools.values()).map((tool) => getToolPublicFieldOnly(tool.tool)); return { tools }; }); @@ -391,7 +423,7 @@ export class ActorsMcpServer { delete request.params.userRentedActorIds; // Validate token - if (!apifyToken) { + if (!apifyToken && !this.options.skyfireMode) { const msg = 'APIFY_TOKEN is required. It must be set in the environment variables or passed as a parameter in the body.'; log.error(msg); await this.server.sendLoggingMessage({ level: 'error', data: msg }); @@ -516,6 +548,17 @@ export class ActorsMcpServer { // Handle actor tool if (tool.type === 'actor') { + if (this.options.skyfireMode + && args['skyfire-pay-id'] === undefined + ) { + return { + content: [{ + type: 'text', + text: SKYFIRE_PAY_ID_MISSING_STRING, + }], + }; + } + const actorTool = tool.tool as ActorTool; // Create progress tracker if progressToken is available @@ -523,12 +566,19 @@ export class ActorsMcpServer { const callOptions: ActorCallOptions = { memory: actorTool.memoryMbytes }; + /** + * Create Apify token, for Skyfire mode use `skyfire-pay-id` and for normal mode use `apifyToken`. + */ + const apifyClient = this.options.skyfireMode && typeof args['skyfire-pay-id'] === 'string' + ? new ApifyClient({ skyfirePayId: args['skyfire-pay-id'] }) + : new ApifyClient({ token: apifyToken }); + try { log.info('Calling Actor', { actorName: actorTool.actorFullName, input: args }); const callResult = await callActorGetDataset( actorTool.actorFullName, args, - apifyToken as string, + apifyClient, callOptions, progressTracker, extra.signal, diff --git a/src/mcp/utils.ts b/src/mcp/utils.ts index 8c682c06..9963646b 100644 --- a/src/mcp/utils.ts +++ b/src/mcp/utils.ts @@ -1,6 +1,8 @@ import { createHash } from 'node:crypto'; import { parse } from 'node:querystring'; +import type { ApifyClient } from 'apify-client'; + import { processInput } from '../input.js'; import type { Input } from '../types.js'; import { loadToolsFromInput } from '../utils/tools-loader.js'; @@ -39,9 +41,9 @@ export function getProxyMCPServerToolName(url: string, toolName: string): string * @param url * @param apifyToken */ -export async function processParamsGetTools(url: string, apifyToken: string) { +export async function processParamsGetTools(url: string, apifyClient: ApifyClient) { const input = parseInputParamsFromUrl(url); - return await loadToolsFromInput(input, apifyToken); + return await loadToolsFromInput(input, apifyClient); } export function parseInputParamsFromUrl(url: string): Input { diff --git a/src/stdio.ts b/src/stdio.ts index a288857b..a0b96b24 100644 --- a/src/stdio.ts +++ b/src/stdio.ts @@ -22,6 +22,7 @@ import { hideBin } from 'yargs/helpers'; import log from '@apify/log'; +import { ApifyClient } from './apify-client.js'; import { processInput } from './input.js'; import { ActorsMcpServer } from './mcp/server.js'; import type { Input, ToolSelector } from './types.js'; @@ -118,8 +119,9 @@ async function main() { // Normalize (merges actors into tools for backward compatibility) const normalized = processInput(input); + const apifyClient = new ApifyClient({ token: process.env.APIFY_TOKEN }); // Use the shared tools loading logic - const tools = await loadToolsFromInput(normalized, process.env.APIFY_TOKEN as string); + const tools = await loadToolsFromInput(normalized, apifyClient); mcpServer.upsertTools(tools); diff --git a/src/tools/actor.ts b/src/tools/actor.ts index 7c2974a2..4bf9f820 100644 --- a/src/tools/actor.ts +++ b/src/tools/actor.ts @@ -10,13 +10,14 @@ import { ACTOR_ADDITIONAL_INSTRUCTIONS, ACTOR_MAX_MEMORY_MBYTES, HelperTools, + SKYFIRE_PAY_ID_MISSING_STRING, TOOL_MAX_OUTPUT_CHARS, } from '../const.js'; import { getActorMCPServerPath, getActorMCPServerURL } from '../mcp/actors.js'; import { connectMCPClient } from '../mcp/client.js'; import { getMCPServerTools } from '../mcp/proxy.js'; import { actorDefinitionPrunedCache } from '../state.js'; -import type { ActorDefinitionStorage, ActorInfo, DatasetItem, ToolEntry } from '../types.js'; +import type { ActorDefinitionStorage, ActorInfo, ApifyToken, DatasetItem, ToolEntry } from '../types.js'; import { ensureOutputWithinCharLimit, getActorDefinitionStorageFieldNames } from '../utils/actor.js'; import { fetchActorDetails } from '../utils/actor-details.js'; import { buildActorResponseContent } from '../utils/actor-response.js'; @@ -57,7 +58,7 @@ export type CallActorGetDatasetResult = { export async function callActorGetDataset( actorName: string, input: unknown, - apifyToken: string, + apifyClient: ApifyClient, callOptions: ActorCallOptions | undefined = undefined, progressTracker?: ProgressTracker | null, abortSignal?: AbortSignal, @@ -65,15 +66,14 @@ export async function callActorGetDataset( const CLIENT_ABORT = Symbol('CLIENT_ABORT'); // Just internal symbol to identify client abort // TODO: we should remove this throw, we are just catching and then rethrowing with generic message try { - const client = new ApifyClient({ token: apifyToken }); - const actorClient = client.actor(actorName); + const actorClient = apifyClient.actor(actorName); // Start the actor run const actorRun: ActorRun = await actorClient.start(input, callOptions); // Start progress tracking if tracker is provided if (progressTracker) { - progressTracker.startActorRunUpdates(actorRun.id, apifyToken, actorName); + progressTracker.startActorRunUpdates(actorRun.id, apifyClient, actorName); } // Create abort promise that handles both API abort and race rejection @@ -81,7 +81,7 @@ export async function callActorGetDataset( abortSignal?.addEventListener('abort', async () => { // Abort the actor run via API try { - await client.run(actorRun.id).abort({ gracefully: false }); + await apifyClient.run(actorRun.id).abort({ gracefully: false }); } catch (e) { log.error('Error aborting Actor run', { error: e, runId: actorRun.id }); } @@ -92,7 +92,7 @@ export async function callActorGetDataset( // Wait for completion or cancellation const potentialAbortedRun = await Promise.race([ - client.run(actorRun.id).waitForFinish(), + apifyClient.run(actorRun.id).waitForFinish(), ...(abortSignal ? [abortPromise()] : []), ]); @@ -103,7 +103,7 @@ export async function callActorGetDataset( const completedRun = potentialAbortedRun as ActorRun; // Process the completed run - const dataset = client.dataset(completedRun.defaultDatasetId); + const dataset = apifyClient.dataset(completedRun.defaultDatasetId); const [datasetItems, defaultBuild] = await Promise.all([ dataset.listItems(), (await actorClient.defaultBuild()).get(), @@ -191,7 +191,8 @@ Instructions: ${ACTOR_ADDITIONAL_INSTRUCTIONS}`, properties: {}, required: [], }, - ajvValidate: fixedAjvCompile(ajv, actorDefinitionPruned.input || {}), + // Additional props true to allow skyfire-pay-id + ajvValidate: fixedAjvCompile(ajv, { ...actorDefinitionPruned.input, additionalProperties: true }), memoryMbytes: memoryMbytes > ACTOR_MAX_MEMORY_MBYTES ? ACTOR_MAX_MEMORY_MBYTES : memoryMbytes, }, }; @@ -206,8 +207,16 @@ Instructions: ${ACTOR_ADDITIONAL_INSTRUCTIONS}`, async function getMCPServersAsTools( actorsInfo: ActorInfo[], - apifyToken: string, + apifyToken: ApifyToken, ): Promise { + /** + * This is case for the Skyfire request without any Apify token, we do not support + * standby Actors in this case so we can skip MCP servers since they would fail anyway (they are standby Actors). + */ + if (apifyToken === null || apifyToken === undefined) { + return []; + } + const actorsMCPServerTools: ToolEntry[] = []; for (const actorInfo of actorsInfo) { const actorId = actorInfo.actorDefinitionPruned.id; @@ -243,7 +252,7 @@ async function getMCPServersAsTools( export async function getActorsAsTools( actorIdsOrNames: string[], - apifyToken: string, + apifyClient: ApifyClient, ): Promise { log.debug('Fetching Actors as tools', { actorNames: actorIdsOrNames }); @@ -258,7 +267,7 @@ export async function getActorsAsTools( } as ActorInfo; } - const actorDefinitionPruned = await getActorDefinition(actorIdOrName, apifyToken); + const actorDefinitionPruned = await getActorDefinition(actorIdOrName, apifyClient); if (!actorDefinitionPruned) { log.error('Actor not found or definition is not available', { actorName: actorIdOrName }); return null; @@ -280,7 +289,7 @@ export async function getActorsAsTools( const [normalTools, mcpServerTools] = await Promise.all([ getNormalActorsAsTools(normalActorsInfo), - getMCPServersAsTools(actorMCPServersInfo, apifyToken), + getMCPServersAsTools(actorMCPServersInfo, apifyClient.token), ]); return [...normalTools, ...mcpServerTools]; @@ -335,15 +344,20 @@ Step 2: Call Actor (step="call") The step parameter enforces this workflow - you cannot call an Actor without first getting its info.`, inputSchema: zodToJsonSchema(callActorArgs), - ajvValidate: ajv.compile(zodToJsonSchema(callActorArgs)), + ajvValidate: ajv.compile({ + ...zodToJsonSchema(callActorArgs), + // Additional props true to allow skyfire-pay-id + additionalProperties: true, + }), call: async (toolArgs) => { - const { args, apifyToken, progressTracker, extra } = toolArgs; + const { args, apifyToken, progressTracker, extra, apifyMcpServer } = toolArgs; const { actor: actorName, step, input, callOptions } = callActorArgs.parse(args); try { if (step === 'info') { + const apifyClient = new ApifyClient({ token: apifyToken }); // Step 1: Return Actor card and schema directly - const details = await fetchActorDetails(apifyToken, actorName); + const details = await fetchActorDetails(apifyClient, actorName); if (!details) { return { content: [{ type: 'text', text: `Actor information for '${actorName}' was not found. Please check the Actor ID or name and ensure the Actor exists.` }], @@ -355,6 +369,29 @@ The step parameter enforces this workflow - you cannot call an Actor without fir ], }; } + + /** + * In Skyfire mode, we check for the presence of `skyfire-pay-id`. + * If it is missing, we return instructions to the LLM on how to create it and pass it to the tool. + */ + if (apifyMcpServer.options.skyfireMode + && args['skyfire-pay-id'] === undefined + ) { + return { + content: [{ + type: 'text', + text: SKYFIRE_PAY_ID_MISSING_STRING, + }], + }; + } + + /** + * Create Apify token, for Skyfire mode use `skyfire-pay-id` and for normal mode use `apifyToken`. + */ + const apifyClient = apifyMcpServer.options.skyfireMode && typeof args['skyfire-pay-id'] === 'string' + ? new ApifyClient({ skyfirePayId: args['skyfire-pay-id'] }) + : new ApifyClient({ token: apifyToken }); + // Step 2: Call the Actor if (!input) { return { @@ -364,7 +401,7 @@ The step parameter enforces this workflow - you cannot call an Actor without fir }; } - const [actor] = await getActorsAsTools([actorName], apifyToken); + const [actor] = await getActorsAsTools([actorName], apifyClient); if (!actor) { return { @@ -389,7 +426,7 @@ The step parameter enforces this workflow - you cannot call an Actor without fir const callResult = await callActorGetDataset( actorName, input, - apifyToken, + apifyClient, callOptions, progressTracker, extra.signal, diff --git a/src/tools/build.ts b/src/tools/build.ts index 64fffeb5..be6044db 100644 --- a/src/tools/build.ts +++ b/src/tools/build.ts @@ -26,11 +26,10 @@ import { filterSchemaProperties, shortenProperties } from './utils.js'; */ export async function getActorDefinition( actorIdOrName: string, - apifyToken: string, + apifyClient: ApifyClient, limit: number = ACTOR_README_MAX_LENGTH, ): Promise { - const client = new ApifyClient({ token: apifyToken }); - const actorClient = client.actor(actorIdOrName); + const actorClient = apifyClient.actor(actorIdOrName); try { // Fetch actor details const actor = await actorClient.get(); @@ -123,7 +122,8 @@ export const actorDefinitionTool: ToolEntry = { const { args, apifyToken } = toolArgs; const parsed = getActorDefinitionArgsSchema.parse(args); - const v = await getActorDefinition(parsed.actorName, apifyToken, parsed.limit); + const apifyClient = new ApifyClient({ token: apifyToken }); + const v = await getActorDefinition(parsed.actorName, apifyClient, parsed.limit); if (!v) { return { content: [{ type: 'text', text: `Actor '${parsed.actorName}' not found.` }] }; } diff --git a/src/tools/fetch-actor-details.ts b/src/tools/fetch-actor-details.ts index ef3dbd74..32742be1 100644 --- a/src/tools/fetch-actor-details.ts +++ b/src/tools/fetch-actor-details.ts @@ -1,6 +1,7 @@ import { z } from 'zod'; import zodToJsonSchema from 'zod-to-json-schema'; +import { ApifyClient } from '../apify-client.js'; import { HelperTools } from '../const.js'; import type { InternalTool, ToolEntry } from '../types.js'; import { fetchActorDetails } from '../utils/actor-details.js'; @@ -30,7 +31,8 @@ export const fetchActorDetailsTool: ToolEntry = { call: async (toolArgs) => { const { args, apifyToken } = toolArgs; const parsed = fetchActorDetailsToolArgsSchema.parse(args); - const details = await fetchActorDetails(apifyToken, parsed.actor); + const apifyClient = new ApifyClient({ token: apifyToken }); + const details = await fetchActorDetails(apifyClient, parsed.actor); if (!details) { return { content: [{ type: 'text', text: `Actor information for '${parsed.actor}' was not found. Please check the Actor ID or name and ensure the Actor exists.` }], diff --git a/src/tools/helpers.ts b/src/tools/helpers.ts index 47268897..d030d793 100644 --- a/src/tools/helpers.ts +++ b/src/tools/helpers.ts @@ -1,6 +1,7 @@ import { z } from 'zod'; import zodToJsonSchema from 'zod-to-json-schema'; +import { ApifyClient } from '../apify-client.js'; import { HelperTools } from '../const.js'; import type { InternalTool, ToolEntry } from '../types.js'; import { ajv } from '../utils/ajv.js'; @@ -35,7 +36,8 @@ export const addTool: ToolEntry = { }; } - const tools = await apifyMcpServer.loadActorsAsTools([parsed.actor], apifyToken); + const apifyClient = new ApifyClient({ token: apifyToken }); + const tools = await apifyMcpServer.loadActorsAsTools([parsed.actor], apifyClient); /** * If no tools were found, return a message that the Actor was not found * instead of returning that non existent tool was added since the diff --git a/src/types.ts b/src/types.ts index bff0e89d..a90de611 100644 --- a/src/types.ts +++ b/src/types.ts @@ -296,3 +296,9 @@ export type PromptBase = Prompt & { export type ActorInputSchemaProperties = Record; export type DatasetItem = Record; +/** + * Apify token type. + * + * Can be null or undefined in case of Skyfire requests. + */ +export type ApifyToken = string | null | undefined; diff --git a/src/utils/actor-details.ts b/src/utils/actor-details.ts index 494db6fb..3a8915bc 100644 --- a/src/utils/actor-details.ts +++ b/src/utils/actor-details.ts @@ -1,6 +1,6 @@ import type { Actor, Build } from 'apify-client'; -import { ApifyClient } from '../apify-client.js'; +import type { ApifyClient } from '../apify-client.js'; import { filterSchemaProperties, shortenProperties } from '../tools/utils.js'; import type { IActorInputSchema } from '../types.js'; import { formatActorToActorCard } from './actor-card.js'; @@ -14,11 +14,10 @@ export interface ActorDetailsResult { readme: string; } -export async function fetchActorDetails(apifyToken: string, actorName: string): Promise { - const client = new ApifyClient({ token: apifyToken }); +export async function fetchActorDetails(apifyClient: ApifyClient, actorName: string): Promise { const [actorInfo, buildInfo]: [Actor | undefined, Build | undefined] = await Promise.all([ - client.actor(actorName).get(), - client.actor(actorName).defaultBuild().then(async (build) => build.get()), + apifyClient.actor(actorName).get(), + apifyClient.actor(actorName).defaultBuild().then(async (build) => build.get()), ]); if (!actorInfo || !buildInfo || !buildInfo.actorDefinition) return null; const inputSchema = (buildInfo.actorDefinition.input || { diff --git a/src/utils/progress.ts b/src/utils/progress.ts index 385c90ff..893051c2 100644 --- a/src/utils/progress.ts +++ b/src/utils/progress.ts @@ -1,6 +1,6 @@ import type { ProgressNotification } from '@modelcontextprotocol/sdk/types.js'; -import { ApifyClient } from '../apify-client.js'; +import type { ApifyClient } from '../apify-client.js'; import { PROGRESS_NOTIFICATION_INTERVAL_MS } from '../const.js'; export class ProgressTracker { @@ -36,15 +36,14 @@ export class ProgressTracker { } } - startActorRunUpdates(runId: string, apifyToken: string, actorName: string): void { + startActorRunUpdates(runId: string, apifyClient: ApifyClient, actorName: string): void { this.stop(); - const client = new ApifyClient({ token: apifyToken }); let lastStatus = ''; let lastStatusMessage = ''; this.intervalId = setInterval(async () => { try { - const run = await client.run(runId).get(); + const run = await apifyClient.run(runId).get(); if (!run) return; const { status, statusMessage } = run; diff --git a/src/utils/tools-loader.ts b/src/utils/tools-loader.ts index f16953f4..f1ee806d 100644 --- a/src/utils/tools-loader.ts +++ b/src/utils/tools-loader.ts @@ -3,6 +3,8 @@ * This eliminates duplication between stdio.ts and processParamsGetTools. */ +import type { ApifyClient } from 'apify'; + import log from '@apify/log'; import { defaults } from '../const.js'; @@ -35,7 +37,7 @@ function getInternalToolByNameMap(): Map { */ export async function loadToolsFromInput( input: Input, - apifyToken: string, + apifyClient: ApifyClient, ): Promise { // Helpers for readability const normalizeSelectors = (value: Input['tools']): (string | ToolCategory)[] | undefined => { @@ -120,7 +122,7 @@ export async function loadToolsFromInput( // Actor tools (if any) if (actorNamesToLoad.length > 0) { - const actorTools = await getActorsAsTools(actorNamesToLoad, apifyToken); + const actorTools = await getActorsAsTools(actorNamesToLoad, apifyClient); result.push(...actorTools); } diff --git a/tests/integration/internals.test.ts b/tests/integration/internals.test.ts index 800d4ab5..9f348bb0 100644 --- a/tests/integration/internals.test.ts +++ b/tests/integration/internals.test.ts @@ -3,6 +3,7 @@ import { beforeAll, describe, expect, it } from 'vitest'; import log from '@apify/log'; import { actorNameToToolName } from '../../dist/tools/utils.js'; +import { ApifyClient } from '../../src/apify-client.js'; import { ActorsMcpServer } from '../../src/index.js'; import { addTool } from '../../src/tools/helpers.js'; import { getActorsAsTools } from '../../src/tools/index.js'; @@ -17,14 +18,15 @@ beforeAll(() => { describe('MCP server internals integration tests', () => { it('should load and restore tools from a tool list', async () => { - const actorsMcpServer = new ActorsMcpServer(false); + const actorsMcpServer = new ActorsMcpServer({ setupSigintHandler: false }); + const apifyClient = new ApifyClient({ token: process.env.APIFY_TOKEN }); const initialTools = await loadToolsFromInput({ enableAddingActors: true, - } as Input, process.env.APIFY_TOKEN as string); + } as Input, apifyClient); actorsMcpServer.upsertTools(initialTools); // Load new tool - const newTool = await getActorsAsTools([ACTOR_PYTHON_EXAMPLE], process.env.APIFY_TOKEN as string); + const newTool = await getActorsAsTools([ACTOR_PYTHON_EXAMPLE], apifyClient); actorsMcpServer.upsertTools(newTool); // Store the tool name list @@ -42,7 +44,7 @@ describe('MCP server internals integration tests', () => { expect(actorsMcpServer.listAllToolNames()).toEqual([]); // Load the tool state from the tool name list - await actorsMcpServer.loadToolsByName(names, process.env.APIFY_TOKEN as string); + await actorsMcpServer.loadToolsByName(names, apifyClient); // Check if the tool name list is restored expectArrayWeakEquals(actorsMcpServer.listAllToolNames(), expectedToolNames); @@ -59,14 +61,15 @@ describe('MCP server internals integration tests', () => { toolNotificationCount++; }; - const actorsMCPServer = new ActorsMcpServer(false); - const seeded = await loadToolsFromInput({ enableAddingActors: true } as Input, process.env.APIFY_TOKEN as string); + const actorsMCPServer = new ActorsMcpServer({ setupSigintHandler: false }); + const apifyClient = new ApifyClient({ token: process.env.APIFY_TOKEN }); + const seeded = await loadToolsFromInput({ enableAddingActors: true } as Input, apifyClient); actorsMCPServer.upsertTools(seeded); actorsMCPServer.registerToolsChangedHandler(onToolsChanged); // Add a new Actor const actor = ACTOR_PYTHON_EXAMPLE; - const newTool = await getActorsAsTools([actor], process.env.APIFY_TOKEN as string); + const newTool = await getActorsAsTools([actor], apifyClient); actorsMCPServer.upsertTools(newTool, true); // Check if the notification was received with the correct tools @@ -96,14 +99,15 @@ describe('MCP server internals integration tests', () => { notificationCount++; }; - const actorsMCPServer = new ActorsMcpServer(false); - const seeded = await loadToolsFromInput({ enableAddingActors: true } as Input, process.env.APIFY_TOKEN as string); + const actorsMCPServer = new ActorsMcpServer({ setupSigintHandler: false }); + const apifyClient = new ApifyClient({ token: process.env.APIFY_TOKEN }); + const seeded = await loadToolsFromInput({ enableAddingActors: true } as Input, apifyClient); actorsMCPServer.upsertTools(seeded); actorsMCPServer.registerToolsChangedHandler(onToolsChanged); // Add a new Actor const actor = ACTOR_PYTHON_EXAMPLE; - const newTool = await getActorsAsTools([actor], process.env.APIFY_TOKEN as string); + const newTool = await getActorsAsTools([actor], apifyClient); actorsMCPServer.upsertTools(newTool, true); // Check if the notification was received From dde4fd24e9094402bff3e7964d5d4c8eacbda75c Mon Sep 17 00:00:00 2001 From: MQ Date: Tue, 9 Sep 2025 15:13:40 +0200 Subject: [PATCH 12/18] add skyfire usage resource, fix skyfire pay id handling and passing to Actor --- src/const.ts | 8 ++++--- src/mcp/server.ts | 56 ++++++++++++++++++++++++++++++++++++++-------- src/tools/actor.ts | 4 ++-- 3 files changed, 54 insertions(+), 14 deletions(-) diff --git a/src/const.ts b/src/const.ts index 8871a9c2..a19a0250 100644 --- a/src/const.ts +++ b/src/const.ts @@ -53,11 +53,13 @@ export const defaults = { }; export const ACTOR_ADDITIONAL_INSTRUCTIONS = 'Never call/execute tool/Actor unless confirmed by the user.'; -export const SKYFIRE_MIN_CHARGE_USD = 0.5; +export const SKYFIRE_MIN_CHARGE_USD = 0.005; -export const SKYFIRE_PAY_ID_MISSING_STRING = `To run the Actor, you need to provide a Skyfire payment ID in the \`skyfire-pay-id\` input property. You first need to create the Skyfire PAY token by calling the \`create-pay-token\` tool from the Skyfire MCP server and then provide the created token in the \`skyfire-pay-id\` input property when calling the \`call-actor\` tool. The Skyfire PAY token is used for billing purposes and needs to be charged with at least ${SKYFIRE_MIN_CHARGE_USD} USD.`; +export const SKYFIRE_TOOL_INSTRUCTIONS = `To run the Actor, you need to provide a Skyfire payment ID in the \`skyfire-pay-id\` input property. You first need to create the Skyfire PAY token by calling the \`create-pay-token\` tool from the Skyfire MCP server and then provide the created token in the \`skyfire-pay-id\` input property when calling Actor tool. The Skyfire PAY token is used for billing purposes and needs to be charged with at least ${SKYFIRE_MIN_CHARGE_USD} USD.`; -export const SKYFIRE_PAY_ID_PROPERTY_DESCRIPTION = `Skyfire payment ID for the Actor run. Used for billing purposes. The PAY token needs to be charged with at least ${SKYFIRE_MIN_CHARGE_USD} USD. If you do not have the Skyfire payment ID, you can create it by calling the \`create-pay-token\` tool from the Skyfire MCP server.`; +export const SKYFIRE_PAY_ID_PROPERTY_DESCRIPTION = `Skyfire PAY token ID used for billing purposes.`; + +export const SKYFIRE_README_CONTENT = `The Apify MCP Server allows clients to interact with Apify Actors, storage, and documentation. Before running any Apify Actor, you need to set up a Skyfire PAY token for billing purposes. To set up the PAY token you need to use the create-pay-token tool from the Skyfire MCP server. The PAY token must be charged with a minimum of ${SKYFIRE_MIN_CHARGE_USD} USD to be valid.`; // Cache export const ACTOR_CACHE_MAX_SIZE = 500; diff --git a/src/mcp/server.ts b/src/mcp/server.ts index 622d803d..5b4fa312 100644 --- a/src/mcp/server.ts +++ b/src/mcp/server.ts @@ -14,6 +14,7 @@ import { ListResourcesRequestSchema, ListResourceTemplatesRequestSchema, ListToolsRequestSchema, + ReadResourceRequestSchema, McpError, ServerNotificationSchema, SetLevelRequestSchema, @@ -27,8 +28,9 @@ import { ApifyClient } from '../apify-client.js'; import { SERVER_NAME, SERVER_VERSION, - SKYFIRE_PAY_ID_MISSING_STRING, + SKYFIRE_TOOL_INSTRUCTIONS, SKYFIRE_PAY_ID_PROPERTY_DESCRIPTION, + SKYFIRE_README_CONTENT, } from '../const.js'; import { prompts } from '../prompts/index.js'; import { callActorGetDataset, defaultTools, getActorsAsTools, toolCategories } from '../tools/index.js'; @@ -321,10 +323,45 @@ export class ActorsMcpServer { private setupResourceHandlers(): void { this.server.setRequestHandler(ListResourcesRequestSchema, async () => { - // No resources available, return empty response - return { resources: [] }; + /** + * Return the usage guide resource only if Skyfire mode is enabled. No resources otherwise for normal mode. + */ + if (this.options.skyfireMode) { + return { + resources: [ + { + uri: 'file://readme.md', + name: 'readme', + description: 'Apify MCP Server usage guide. Read this to understand how to use the server, especially in Skyfire mode before interacting with it.', + mimeType: 'text/markdown', + }, + ], + }; + } else { + return { resources: [] }; + } }); + if (this.options.skyfireMode) { + this.server.setRequestHandler(ReadResourceRequestSchema, async (request) => { + const { uri } = request.params; + if (uri === 'file://readme.md') { + return { + contents: [{ + uri: 'file://readme.md', + mimeType: 'text/markdown', + text: SKYFIRE_README_CONTENT, + }], + }; + } + return { + contents: [{ + uri, mimeType: 'text/plain', text: `Resource ${uri} not found`, + }] + }; + }); + } + this.server.setRequestHandler(ListResourceTemplatesRequestSchema, async () => { // No resource templates available, return empty response return { resourceTemplates: [] }; @@ -396,7 +433,7 @@ export class ActorsMcpServer { }; } // Update description to include Skyfire instructions - toolEntry.tool.description = `${SKYFIRE_PAY_ID_MISSING_STRING}\n\n${toolEntry.tool.description}`; + toolEntry.tool.description += `\n\n${SKYFIRE_TOOL_INSTRUCTIONS}`; } } } @@ -554,7 +591,7 @@ export class ActorsMcpServer { return { content: [{ type: 'text', - text: SKYFIRE_PAY_ID_MISSING_STRING, + text: SKYFIRE_TOOL_INSTRUCTIONS, }], }; } @@ -569,15 +606,16 @@ export class ActorsMcpServer { /** * Create Apify token, for Skyfire mode use `skyfire-pay-id` and for normal mode use `apifyToken`. */ - const apifyClient = this.options.skyfireMode && typeof args['skyfire-pay-id'] === 'string' - ? new ApifyClient({ skyfirePayId: args['skyfire-pay-id'] }) + const {'skyfire-pay-id': skyfirePayId, ...actorArgs} = args as Record; + const apifyClient = this.options.skyfireMode && typeof skyfirePayId === 'string' + ? new ApifyClient({ skyfirePayId }) : new ApifyClient({ token: apifyToken }); try { - log.info('Calling Actor', { actorName: actorTool.actorFullName, input: args }); + log.info('Calling Actor', { actorName: actorTool.actorFullName, input: actorArgs }); const callResult = await callActorGetDataset( actorTool.actorFullName, - args, + actorArgs, apifyClient, callOptions, progressTracker, diff --git a/src/tools/actor.ts b/src/tools/actor.ts index 4bf9f820..068d7b16 100644 --- a/src/tools/actor.ts +++ b/src/tools/actor.ts @@ -10,7 +10,7 @@ import { ACTOR_ADDITIONAL_INSTRUCTIONS, ACTOR_MAX_MEMORY_MBYTES, HelperTools, - SKYFIRE_PAY_ID_MISSING_STRING, + SKYFIRE_TOOL_INSTRUCTIONS, TOOL_MAX_OUTPUT_CHARS, } from '../const.js'; import { getActorMCPServerPath, getActorMCPServerURL } from '../mcp/actors.js'; @@ -380,7 +380,7 @@ The step parameter enforces this workflow - you cannot call an Actor without fir return { content: [{ type: 'text', - text: SKYFIRE_PAY_ID_MISSING_STRING, + text: SKYFIRE_TOOL_INSTRUCTIONS, }], }; } From 83067b145747c1b63adcfb244df519a3fe486993 Mon Sep 17 00:00:00 2001 From: MQ Date: Tue, 9 Sep 2025 15:19:29 +0200 Subject: [PATCH 13/18] add skyfire instructions also to the call-actor info step result content --- src/tools/actor.ts | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/src/tools/actor.ts b/src/tools/actor.ts index 068d7b16..31aab4ce 100644 --- a/src/tools/actor.ts +++ b/src/tools/actor.ts @@ -363,11 +363,19 @@ The step parameter enforces this workflow - you cannot call an Actor without fir content: [{ type: 'text', text: `Actor information for '${actorName}' was not found. Please check the Actor ID or name and ensure the Actor exists.` }], }; } - return { - content: [ - { type: 'text', text: `**Input Schema:**\n${JSON.stringify(details.inputSchema, null, 0)}` }, - ], - }; + const content = [ + { type: 'text', text: `**Input Schema:**\n${JSON.stringify(details.inputSchema, null, 0)}` }, + ]; + /** + * Add Skyfire instructions also in the info step since clients are most likely truncating the long tool description of the call-actor. + */ + if (apifyMcpServer.options.skyfireMode) { + content.push({ + type: 'text', + text: SKYFIRE_TOOL_INSTRUCTIONS, + }); + } + return { content }; } /** From 07a8d19f68f3772f4d6e733ffcdfcab9640a1d32 Mon Sep 17 00:00:00 2001 From: MQ Date: Tue, 9 Sep 2025 15:49:18 +0200 Subject: [PATCH 14/18] Squashed commit of the following: MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 20e6753a9a7b7a69b44daee4863cfb578df0f0c2 Author: Apify Release Bot Date: Tue Sep 9 11:41:31 2025 +0000 chore(release): Update changelog, package.json and manifest.json versions [skip ci] commit 7ef726d59c49355dc5caa48def838a0f5ebf97c8 Author: Jakub Kopecký Date: Tue Sep 9 13:40:04 2025 +0200 feat: improve actor tool output (#260) * feat: improve actor tool output * update readme * fix output tool, write test for that * add test based on Zuzka suggestion * lint * fix output response order so LLM does not lose the instructions * refactor: unify string list parsing logic * fix the tests - order of the Actor run response messages * Update src/utils/schema-generation.ts Co-authored-by: Michal Kalita * address review comments * add get-actor-output tools note about when its loaded --------- Co-authored-by: Michal Kalita commit 279293f2ee0ca7ddf1bb935b1c6135747e79ede3 Author: Michal Kalita Date: Mon Sep 8 12:05:25 2025 +0200 fix: error when content type is json (#265) * fix: error when content type is json * fix: do not make json schema formatted for human readable commit 4659e03d77bb56f025b02ad1333072bb9385c169 Author: Apify Release Bot Date: Thu Sep 4 12:30:50 2025 +0000 chore(release): Update changelog, package.json and manifest.json versions [skip ci] --- CHANGELOG.md | 22 ++++++++++++++++++++++ manifest.json | 2 +- package-lock.json | 4 ++-- package.json | 2 +- src/tools/actor.ts | 2 +- src/tools/get-actor-output.ts | 4 +++- 6 files changed, 30 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8509d20d..1f338fb8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,28 @@ All notable changes to this project will be documented in this file. +## [0.4.7](https://github.com/apify/apify-mcp-server/releases/tag/v0.4.7) (2025-09-09) + +### 🚀 Features + +- Improve actor tool output ([#260](https://github.com/apify/apify-mcp-server/pull/260)) ([7ef726d](https://github.com/apify/apify-mcp-server/commit/7ef726d59c49355dc5caa48def838a0f5ebf97c8)) by [@MQ37](https://github.com/MQ37) + +### 🐛 Bug Fixes + +- Error when content type is json ([#265](https://github.com/apify/apify-mcp-server/pull/265)) ([279293f](https://github.com/apify/apify-mcp-server/commit/279293f2ee0ca7ddf1bb935b1c6135747e79ede3)) by [@MichalKalita](https://github.com/MichalKalita), closes [#264](https://github.com/apify/apify-mcp-server/issues/264) + + +## [0.4.5](https://github.com/apify/apify-mcp-server/releases/tag/v0.4.5) (2025-09-04) + +### 🚀 Features + +- Cancellable Actor run ([#228](https://github.com/apify/apify-mcp-server/pull/228)) ([9fc9094](https://github.com/apify/apify-mcp-server/commit/9fc9094f65c5ac70ec8f3d8d6a43ac7839b34cb1)) by [@MichalKalita](https://github.com/MichalKalita), closes [#160](https://github.com/apify/apify-mcp-server/issues/160) + +### 🐛 Bug Fixes + +- Handle deprecated tool preview ([#251](https://github.com/apify/apify-mcp-server/pull/251)) ([ff565f7](https://github.com/apify/apify-mcp-server/commit/ff565f7a77b57dd847411466c961f427ba56c371)) by [@jirispilka](https://github.com/jirispilka), closes [#252](https://github.com/apify/apify-mcp-server/issues/252) + + ## [0.4.4](https://github.com/apify/apify-mcp-server/releases/tag/v0.4.4) (2025-08-28) ### 🐛 Bug Fixes diff --git a/manifest.json b/manifest.json index 7e4d614b..a56ed13c 100644 --- a/manifest.json +++ b/manifest.json @@ -1,7 +1,7 @@ { "dxt_version": "0.1", "name": "Apify", - "version": "0.4.4", + "version": "0.4.7", "description": "Extract data from any website using thousands of tools from the Apify Store.", "long_description": "Apify is the world's largest marketplace of tools for web scraping, data extraction, and web automation. You can extract structured data from social media, e-commerce, search engines, maps, travel sites, or any other website.", "keywords": [ diff --git a/package-lock.json b/package-lock.json index 9f8649c3..1c283901 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "@apify/actors-mcp-server", - "version": "0.4.4", + "version": "0.4.7", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@apify/actors-mcp-server", - "version": "0.4.4", + "version": "0.4.7", "license": "MIT", "dependencies": { "@apify/datastructures": "^2.0.3", diff --git a/package.json b/package.json index 0ca0dee3..49a02e5f 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@apify/actors-mcp-server", - "version": "0.4.4", + "version": "0.4.7", "type": "module", "description": "Apify MCP Server", "engines": { diff --git a/src/tools/actor.ts b/src/tools/actor.ts index 31aab4ce..dd5ddc32 100644 --- a/src/tools/actor.ts +++ b/src/tools/actor.ts @@ -425,7 +425,7 @@ The step parameter enforces this workflow - you cannot call an Actor without fir return { content: [ { type: 'text', text: `Input validation failed for Actor '${actorName}': ${errors.map((e) => e.message).join(', ')}` }, - { type: 'json', json: actor.tool.inputSchema }, + { type: 'text', text: `Input Schema:\n${JSON.stringify(actor.tool.inputSchema)}` }, ], }; } diff --git a/src/tools/get-actor-output.ts b/src/tools/get-actor-output.ts index cc7256e0..488d30e1 100644 --- a/src/tools/get-actor-output.ts +++ b/src/tools/get-actor-output.ts @@ -69,7 +69,9 @@ export const getActorOutput: ToolEntry = { name: HelperTools.ACTOR_OUTPUT_GET, actorFullName: HelperTools.ACTOR_OUTPUT_GET, description: `Retrieves the output of a specific Actor execution based on its dataset ID. -You also can retrieve only specific fields from the output if needed. Use this tool to get Actor output data outside of the Actor dataset output preview, or to access fields from the Actor output dataset schema that are not included in the preview.`, +You can also retrieve only specific fields from the output if needed. Use this tool to get Actor output data outside of the Actor dataset output preview, or to access fields from the Actor output dataset schema that are not included in the preview. + +Note: This tool is automatically included if the Apify MCP Server is configured with any Actor tools (e.g. \`apify-slash-rag-web-browser\`) or tools that can interact with Actors (e.g. \`call-actor\`, \`add-actor\`).`, inputSchema: zodToJsonSchema(getActorOutputArgs), ajvValidate: ajv.compile(zodToJsonSchema(getActorOutputArgs)), call: async (toolArgs) => { From 575f4cfbd9e744e4c15fb0009602a9c3616486fd Mon Sep 17 00:00:00 2001 From: MQ Date: Tue, 9 Sep 2025 16:19:18 +0200 Subject: [PATCH 15/18] fix the port already in use issue with tests --- tests/integration/actor.server-sse.test.ts | 12 +++++++++--- .../integration/actor.server-streamable.test.ts | 12 +++++++++--- tests/integration/utils/port.ts | 17 +++++++++++++++++ 3 files changed, 35 insertions(+), 6 deletions(-) create mode 100644 tests/integration/utils/port.ts diff --git a/tests/integration/actor.server-sse.test.ts b/tests/integration/actor.server-sse.test.ts index 6142cfc1..802dc3e9 100644 --- a/tests/integration/actor.server-sse.test.ts +++ b/tests/integration/actor.server-sse.test.ts @@ -7,12 +7,13 @@ import log from '@apify/log'; import { createExpressApp } from '../../src/actor/server.js'; import { createMcpSseClient } from '../helpers.js'; import { createIntegrationTestsSuite } from './suite.js'; +import { getAvailablePort } from './utils/port.js'; let app: Express; let httpServer: HttpServer; -const httpServerPort = 50000; -const httpServerHost = `http://localhost:${httpServerPort}`; -const mcpUrl = `${httpServerHost}/sse`; +let httpServerPort: number; +let httpServerHost: string; +let mcpUrl: string; createIntegrationTestsSuite({ suiteName: 'Apify MCP Server SSE', @@ -20,6 +21,11 @@ createIntegrationTestsSuite({ createClientFn: async (options) => await createMcpSseClient(mcpUrl, options), beforeAllFn: async () => { log.setLevel(log.LEVELS.OFF); + + // Get an available port + httpServerPort = await getAvailablePort(); + httpServerHost = `http://localhost:${httpServerPort}`; + mcpUrl = `${httpServerHost}/sse`; // Create an express app app = createExpressApp(httpServerHost); diff --git a/tests/integration/actor.server-streamable.test.ts b/tests/integration/actor.server-streamable.test.ts index 56aa5226..e0fa7c17 100644 --- a/tests/integration/actor.server-streamable.test.ts +++ b/tests/integration/actor.server-streamable.test.ts @@ -7,12 +7,13 @@ import log from '@apify/log'; import { createExpressApp } from '../../src/actor/server.js'; import { createMcpStreamableClient } from '../helpers.js'; import { createIntegrationTestsSuite } from './suite.js'; +import { getAvailablePort } from './utils/port.js'; let app: Express; let httpServer: HttpServer; -const httpServerPort = 50001; -const httpServerHost = `http://localhost:${httpServerPort}`; -const mcpUrl = `${httpServerHost}/mcp`; +let httpServerPort: number; +let httpServerHost: string; +let mcpUrl: string; createIntegrationTestsSuite({ suiteName: 'Apify MCP Server Streamable HTTP', @@ -20,6 +21,11 @@ createIntegrationTestsSuite({ createClientFn: async (options) => await createMcpStreamableClient(mcpUrl, options), beforeAllFn: async () => { log.setLevel(log.LEVELS.OFF); + + // Get an available port + httpServerPort = await getAvailablePort(); + httpServerHost = `http://localhost:${httpServerPort}`; + mcpUrl = `${httpServerHost}/mcp`; // Create an express app app = createExpressApp(httpServerHost); diff --git a/tests/integration/utils/port.ts b/tests/integration/utils/port.ts new file mode 100644 index 00000000..0c36c3cf --- /dev/null +++ b/tests/integration/utils/port.ts @@ -0,0 +1,17 @@ +import { createServer } from 'node:net'; + +/** + * Finds an available port by letting the OS assign one dynamically. + * This is to prevent the address already in use errors to prevent flaky tests. + * @returns Promise - An available port assigned by the OS + */ +export async function getAvailablePort(): Promise { + return new Promise((resolve, reject) => { + const server = createServer(); + server.listen(0, () => { + const { port } = server.address() as { port: number }; + server.close(() => resolve(port)); + }); + server.on('error', reject); + }); +} \ No newline at end of file From 143833aa511256e1a2abca6cb5ec4ee95b263726 Mon Sep 17 00:00:00 2001 From: MQ Date: Tue, 9 Sep 2025 16:20:12 +0200 Subject: [PATCH 16/18] lint --- src/mcp/server.ts | 23 +++++++++---------- tests/integration/actor.server-sse.test.ts | 2 +- .../actor.server-streamable.test.ts | 2 +- tests/integration/utils/port.ts | 2 +- 4 files changed, 14 insertions(+), 15 deletions(-) diff --git a/src/mcp/server.ts b/src/mcp/server.ts index 5b4fa312..be47c6ac 100644 --- a/src/mcp/server.ts +++ b/src/mcp/server.ts @@ -14,8 +14,8 @@ import { ListResourcesRequestSchema, ListResourceTemplatesRequestSchema, ListToolsRequestSchema, - ReadResourceRequestSchema, McpError, + ReadResourceRequestSchema, ServerNotificationSchema, SetLevelRequestSchema, } from '@modelcontextprotocol/sdk/types.js'; @@ -28,9 +28,9 @@ import { ApifyClient } from '../apify-client.js'; import { SERVER_NAME, SERVER_VERSION, - SKYFIRE_TOOL_INSTRUCTIONS, SKYFIRE_PAY_ID_PROPERTY_DESCRIPTION, SKYFIRE_README_CONTENT, + SKYFIRE_TOOL_INSTRUCTIONS, } from '../const.js'; import { prompts } from '../prompts/index.js'; import { callActorGetDataset, defaultTools, getActorsAsTools, toolCategories } from '../tools/index.js'; @@ -332,14 +332,13 @@ export class ActorsMcpServer { { uri: 'file://readme.md', name: 'readme', - description: 'Apify MCP Server usage guide. Read this to understand how to use the server, especially in Skyfire mode before interacting with it.', + description: `Apify MCP Server usage guide. Read this to understand how to use the server, especially in Skyfire mode before interacting with it.`, mimeType: 'text/markdown', }, ], }; - } else { - return { resources: [] }; } + return { resources: [] }; }); if (this.options.skyfireMode) { @@ -348,16 +347,16 @@ export class ActorsMcpServer { if (uri === 'file://readme.md') { return { contents: [{ - uri: 'file://readme.md', - mimeType: 'text/markdown', - text: SKYFIRE_README_CONTENT, - }], - }; + uri: 'file://readme.md', + mimeType: 'text/markdown', + text: SKYFIRE_README_CONTENT, + }], + }; } return { contents: [{ uri, mimeType: 'text/plain', text: `Resource ${uri} not found`, - }] + }], }; }); } @@ -606,7 +605,7 @@ export class ActorsMcpServer { /** * Create Apify token, for Skyfire mode use `skyfire-pay-id` and for normal mode use `apifyToken`. */ - const {'skyfire-pay-id': skyfirePayId, ...actorArgs} = args as Record; + const { 'skyfire-pay-id': skyfirePayId, ...actorArgs } = args as Record; const apifyClient = this.options.skyfireMode && typeof skyfirePayId === 'string' ? new ApifyClient({ skyfirePayId }) : new ApifyClient({ token: apifyToken }); diff --git a/tests/integration/actor.server-sse.test.ts b/tests/integration/actor.server-sse.test.ts index 802dc3e9..a75408d7 100644 --- a/tests/integration/actor.server-sse.test.ts +++ b/tests/integration/actor.server-sse.test.ts @@ -21,7 +21,7 @@ createIntegrationTestsSuite({ createClientFn: async (options) => await createMcpSseClient(mcpUrl, options), beforeAllFn: async () => { log.setLevel(log.LEVELS.OFF); - + // Get an available port httpServerPort = await getAvailablePort(); httpServerHost = `http://localhost:${httpServerPort}`; diff --git a/tests/integration/actor.server-streamable.test.ts b/tests/integration/actor.server-streamable.test.ts index e0fa7c17..c21923b3 100644 --- a/tests/integration/actor.server-streamable.test.ts +++ b/tests/integration/actor.server-streamable.test.ts @@ -21,7 +21,7 @@ createIntegrationTestsSuite({ createClientFn: async (options) => await createMcpStreamableClient(mcpUrl, options), beforeAllFn: async () => { log.setLevel(log.LEVELS.OFF); - + // Get an available port httpServerPort = await getAvailablePort(); httpServerHost = `http://localhost:${httpServerPort}`; diff --git a/tests/integration/utils/port.ts b/tests/integration/utils/port.ts index 0c36c3cf..30d6b329 100644 --- a/tests/integration/utils/port.ts +++ b/tests/integration/utils/port.ts @@ -14,4 +14,4 @@ export async function getAvailablePort(): Promise { }); server.on('error', reject); }); -} \ No newline at end of file +} From b9000e36101fe9a31bb198f55908e10bb25f1898 Mon Sep 17 00:00:00 2001 From: MQ Date: Tue, 9 Sep 2025 16:31:54 +0200 Subject: [PATCH 17/18] remove the try catch that was rethrowing generic error in callActorGetDataset --- src/tools/actor.ts | 130 +++++++++++++++++++++------------------------ 1 file changed, 62 insertions(+), 68 deletions(-) diff --git a/src/tools/actor.ts b/src/tools/actor.ts index dd5ddc32..139ac558 100644 --- a/src/tools/actor.ts +++ b/src/tools/actor.ts @@ -47,9 +47,9 @@ export type CallActorGetDatasetResult = { * If the `APIFY_IS_AT_HOME` the dataset items are pushed to the Apify dataset. * * @param {string} actorName - The name of the Actor to call. - * @param {ActorCallOptions} callOptions - The options to pass to the Actor. * @param {unknown} input - The input to pass to the actor. - * @param {string} apifyToken - The Apify token to use for authentication. + * @param {ApifyClient} apifyClient - The Apify client to use for authentication. + * @param {ActorCallOptions} callOptions - The options to pass to the Actor. * @param {ProgressTracker} progressTracker - Optional progress tracker for real-time updates. * @param {AbortSignal} abortSignal - Optional abort signal to cancel the actor run. * @returns {Promise} - A promise that resolves to an object containing the actor run and dataset items. @@ -64,77 +64,71 @@ export async function callActorGetDataset( abortSignal?: AbortSignal, ): Promise { const CLIENT_ABORT = Symbol('CLIENT_ABORT'); // Just internal symbol to identify client abort - // TODO: we should remove this throw, we are just catching and then rethrowing with generic message - try { - const actorClient = apifyClient.actor(actorName); + const actorClient = apifyClient.actor(actorName); - // Start the actor run - const actorRun: ActorRun = await actorClient.start(input, callOptions); + // Start the actor run + const actorRun: ActorRun = await actorClient.start(input, callOptions); - // Start progress tracking if tracker is provided - if (progressTracker) { - progressTracker.startActorRunUpdates(actorRun.id, apifyClient, actorName); - } - - // Create abort promise that handles both API abort and race rejection - const abortPromise = async () => new Promise((resolve) => { - abortSignal?.addEventListener('abort', async () => { - // Abort the actor run via API - try { - await apifyClient.run(actorRun.id).abort({ gracefully: false }); - } catch (e) { - log.error('Error aborting Actor run', { error: e, runId: actorRun.id }); - } - // Reject to stop waiting - resolve(CLIENT_ABORT); - }, { once: true }); - }); + // Start progress tracking if tracker is provided + if (progressTracker) { + progressTracker.startActorRunUpdates(actorRun.id, apifyClient, actorName); + } - // Wait for completion or cancellation - const potentialAbortedRun = await Promise.race([ - apifyClient.run(actorRun.id).waitForFinish(), - ...(abortSignal ? [abortPromise()] : []), - ]); + // Create abort promise that handles both API abort and race rejection + const abortPromise = async () => new Promise((resolve) => { + abortSignal?.addEventListener('abort', async () => { + // Abort the actor run via API + try { + await apifyClient.run(actorRun.id).abort({ gracefully: false }); + } catch (e) { + log.error('Error aborting Actor run', { error: e, runId: actorRun.id }); + } + // Reject to stop waiting + resolve(CLIENT_ABORT); + }, { once: true }); + }); + + // Wait for completion or cancellation + const potentialAbortedRun = await Promise.race([ + apifyClient.run(actorRun.id).waitForFinish(), + ...(abortSignal ? [abortPromise()] : []), + ]); - if (potentialAbortedRun === CLIENT_ABORT) { - log.info('Actor run aborted by client', { actorName, input }); - return null; - } - const completedRun = potentialAbortedRun as ActorRun; - - // Process the completed run - const dataset = apifyClient.dataset(completedRun.defaultDatasetId); - const [datasetItems, defaultBuild] = await Promise.all([ - dataset.listItems(), - (await actorClient.defaultBuild()).get(), - ]); - - // Generate schema using the shared utility - const generatedSchema = generateSchemaFromItems(datasetItems.items, { - clean: true, - arrayMode: 'all', - }); - const schema = generatedSchema || { type: 'object', properties: {} }; - - /** - * Get important fields that are using in any dataset view as they MAY be used in filtering to ensure the output fits - * the tool output limits. Client has to use the get-actor-output tool to retrieve the full dataset or filtered out fields. - */ - const storageDefinition = defaultBuild?.actorDefinition?.storages?.dataset as ActorDefinitionStorage | undefined; - const importantProperties = getActorDefinitionStorageFieldNames(storageDefinition || {}); - const previewItems = ensureOutputWithinCharLimit(datasetItems.items, importantProperties, TOOL_MAX_OUTPUT_CHARS); - - return { - runId: actorRun.id, - datasetId: completedRun.defaultDatasetId, - itemCount: datasetItems.count, - schema, - previewItems, - }; - } catch (error) { - log.error('Error calling Actor', { error, actorName, input }); - throw new Error(`Error calling Actor: ${error}`); + if (potentialAbortedRun === CLIENT_ABORT) { + log.info('Actor run aborted by client', { actorName, input }); + return null; } + const completedRun = potentialAbortedRun as ActorRun; + + // Process the completed run + const dataset = apifyClient.dataset(completedRun.defaultDatasetId); + const [datasetItems, defaultBuild] = await Promise.all([ + dataset.listItems(), + (await actorClient.defaultBuild()).get(), + ]); + + // Generate schema using the shared utility + const generatedSchema = generateSchemaFromItems(datasetItems.items, { + clean: true, + arrayMode: 'all', + }); + const schema = generatedSchema || { type: 'object', properties: {} }; + + /** + * Get important fields that are using in any dataset view as they MAY be used in filtering to ensure the output fits + * the tool output limits. Client has to use the get-actor-output tool to retrieve the full dataset or filtered out fields. + */ + const storageDefinition = defaultBuild?.actorDefinition?.storages?.dataset as ActorDefinitionStorage | undefined; + const importantProperties = getActorDefinitionStorageFieldNames(storageDefinition || {}); + const previewItems = ensureOutputWithinCharLimit(datasetItems.items, importantProperties, TOOL_MAX_OUTPUT_CHARS); + + return { + runId: actorRun.id, + datasetId: completedRun.defaultDatasetId, + itemCount: datasetItems.count, + schema, + previewItems, + }; } /** From 72d0a7ed31bbbb154d959ec962b8fb8de98371f2 Mon Sep 17 00:00:00 2001 From: Jiri Spilka Date: Wed, 10 Sep 2025 11:01:43 +0200 Subject: [PATCH 18/18] feat: Add apifyClient factory --- src/actor/server.ts | 7 +-- src/apify-client-factory.ts | 100 +++++++++++++++++++++++++++++++ src/tools/actor.ts | 16 ++--- src/tools/build.ts | 9 +-- src/tools/fetch-actor-details.ts | 6 +- src/utils/actor-details.ts | 5 +- src/utils/tools-loader.ts | 11 ++-- 7 files changed, 128 insertions(+), 26 deletions(-) create mode 100644 src/apify-client-factory.ts diff --git a/src/actor/server.ts b/src/actor/server.ts index 83d1beae..3506e4f0 100644 --- a/src/actor/server.ts +++ b/src/actor/server.ts @@ -11,7 +11,7 @@ import express from 'express'; import log from '@apify/log'; -import { ApifyClient } from '../apify-client.js'; +import { resolveApifyClient } from '../apify-client-factory.js'; import { ActorsMcpServer } from '../mcp/server.js'; import { getHelpMessage, HEADER_READINESS_PROBE, Routes, TransportType } from './const.js'; import { getActorRunData } from './utils.js'; @@ -74,9 +74,8 @@ export function createExpressApp( const transport = new SSEServerTransport(Routes.MESSAGE, res); // Load MCP server tools - const apifyToken = process.env.APIFY_TOKEN as string; log.debug('Loading tools from URL', { sessionId: transport.sessionId, tr: TransportType.SSE }); - const apifyClient = new ApifyClient({ token: apifyToken }); + const apifyClient = resolveApifyClient({ token: null }, { sessionId: transport.sessionId }); await mcpServer.loadToolsFromUrl(req.url, apifyClient); transportsSSE[transport.sessionId] = transport; @@ -159,7 +158,7 @@ export function createExpressApp( // Load MCP server tools const apifyToken = process.env.APIFY_TOKEN as string; log.debug('Loading tools from URL', { sessionId: transport.sessionId, tr: TransportType.HTTP }); - const apifyClient = new ApifyClient({ token: apifyToken }); + const apifyClient = resolveApifyClient({ token: apifyToken }, { sessionId: transport.sessionId }); await mcpServer.loadToolsFromUrl(req.url, apifyClient); // Connect the transport to the MCP server BEFORE handling the request diff --git a/src/apify-client-factory.ts b/src/apify-client-factory.ts new file mode 100644 index 00000000..f5286d10 --- /dev/null +++ b/src/apify-client-factory.ts @@ -0,0 +1,100 @@ +import type { ApifyClientOptions } from 'apify'; + +import { ApifyClient, getApifyAPIBaseUrl } from './apify-client.js'; + +/** + * Context passed to getApifyClient factory. Useful for per-session overrides. + * - sessionId: a stable identifier (e.g., MCP transport session) you can use to + * memoize clients and avoid recreating them for every request. + * - headers: request-scoped headers (e.g., "skyfire-pay-id") that should be + * propagated to the Apify API calls. If provided, resolveApifyClient prefers + * these over static options to prevent header leakage across sessions. + */ +export interface ResolveClientContext { + sessionId?: string; + headers?: Record; +} + +/** + * Options for resolving an ApifyClient. You can: + * - Inject an already constructed client via `apifyClient`. + * - Provide a factory `getApifyClient(ctx)` to build per-session clients. + * - Or let the helper construct a client from `token`/`baseUrl`/`skyfirePayId`. + * + * Precedence (highest to lowest): getApifyClient(ctx) -> apifyClient -> construct from options/env. + * + * Notes + * - token: If omitted, resolveApifyClient falls back to process.env.APIFY_TOKEN. + * - baseUrl: If omitted, uses getApifyAPIBaseUrl() which respects APIFY_API_BASE_URL + * and special AT_HOME handling. + * - skyfirePayId: Forwarded to our ApifyClient wrapper which adds an interceptor to + * set the "skyfire-pay-id" HTTP header. When a header is present in ctx.headers, + * it overrides this option for the current resolution. + */ +export interface ResolveClientOptions extends Omit { + // Convenience auth/config + token?: string | null | undefined; + baseUrl?: string; + skyfirePayId?: string; + // Direct injection or factory + apifyClient?: ApifyClient; + getApifyClient?: (ctx?: ResolveClientContext) => ApifyClient; +} + +/** + * Resolve an ApifyClient instance from multiple inputs in a consistent order: + * 1) If getApifyClient provided, call it with the context. + * 2) Else if apifyClient provided, return it as-is. + * 3) Else construct a new ApifyClient using provided token/baseUrl/skyfirePayId/options. + * - baseUrl falls back to getApifyAPIBaseUrl(). + * + * Examples + * -------- + * 1) Simplest: use env APIFY_TOKEN + * const client = resolveApifyClient(); + * + * 2) Pass a token explicitly + * const client = resolveApifyClient({ token: 'apify-XXX' }); + * + * 3) Inject a prebuilt client (useful for tests or custom interceptors) + * const injected = new ApifyClient({ token: 'apify-XXX' }); + * const client = resolveApifyClient({ apifyClient: injected }); + * + * 4) Use a factory to provide per-session clients and headers + * const clients = new Map(); + * function getApifyClient(ctx?: ResolveClientContext) { + * const id = ctx?.sessionId ?? 'default'; + * const skyfire = (ctx?.headers?.['skyfire-pay-id'] as string | undefined) ?? 'global-skyfire'; + * let c = clients.get(id); + * if (!c) { + * c = new ApifyClient({ token: process.env.APIFY_TOKEN, skyfirePayId: skyfire }); + * clients.set(id, c); + * } + * return c; + * } + * const client = resolveApifyClient({ getApifyClient }, { sessionId: 's1', headers: { 'skyfire-pay-id': 'per-session' } }); + * + * 5) Change base URL (e.g., staging) and unauthenticated use + * const client = resolveApifyClient({ baseUrl: 'https://api.staging.apify.com', token: null }); + */ +export function resolveApifyClient(options: ResolveClientOptions = {}, ctx?: ResolveClientContext): ApifyClient { + if (typeof options.getApifyClient === 'function') { + return options.getApifyClient(ctx); + } + if (options.apifyClient) { + return options.apifyClient; + } + + const { token, baseUrl, skyfirePayId, getApifyClient: _ignored, apifyClient: _ignored2, ...rest } = options; + + // If ctx carries a skyfire-pay-id header, prefer it over provided option to support per-session overrides + const headerSkyfire = ctx?.headers?.['skyfire-pay-id'] as string | undefined; + + return new ApifyClient({ + ...(rest as ApifyClientOptions), + token: token ?? process.env.APIFY_TOKEN, + baseUrl: baseUrl ?? getApifyAPIBaseUrl(), + // Our ApifyClient wrapper supports this custom option to inject header via interceptor + skyfirePayId: headerSkyfire ?? skyfirePayId, + } as unknown as ApifyClientOptions & { token?: string | null; skyfirePayId?: string }); +} diff --git a/src/tools/actor.ts b/src/tools/actor.ts index 139ac558..16723105 100644 --- a/src/tools/actor.ts +++ b/src/tools/actor.ts @@ -6,6 +6,7 @@ import zodToJsonSchema from 'zod-to-json-schema'; import log from '@apify/log'; import { ApifyClient } from '../apify-client.js'; +import { resolveApifyClient } from '../apify-client-factory.js'; import { ACTOR_ADDITIONAL_INSTRUCTIONS, ACTOR_MAX_MEMORY_MBYTES, @@ -246,10 +247,12 @@ async function getMCPServersAsTools( export async function getActorsAsTools( actorIdsOrNames: string[], - apifyClient: ApifyClient, + apifyClient?: ApifyClient, ): Promise { log.debug('Fetching Actors as tools', { actorNames: actorIdsOrNames }); + const client = apifyClient ?? resolveApifyClient({ token: null }); + const actorsInfo: (ActorInfo | null)[] = await Promise.all( actorIdsOrNames.map(async (actorIdOrName) => { const actorDefinitionPrunedCached = actorDefinitionPrunedCache.get(actorIdOrName); @@ -261,7 +264,7 @@ export async function getActorsAsTools( } as ActorInfo; } - const actorDefinitionPruned = await getActorDefinition(actorIdOrName, apifyClient); + const actorDefinitionPruned = await getActorDefinition(actorIdOrName, client); if (!actorDefinitionPruned) { log.error('Actor not found or definition is not available', { actorName: actorIdOrName }); return null; @@ -283,7 +286,7 @@ export async function getActorsAsTools( const [normalTools, mcpServerTools] = await Promise.all([ getNormalActorsAsTools(normalActorsInfo), - getMCPServersAsTools(actorMCPServersInfo, apifyClient.token), + getMCPServersAsTools(actorMCPServersInfo, (client as ApifyClient).token), ]); return [...normalTools, ...mcpServerTools]; @@ -332,7 +335,7 @@ Step 1: Get Actor Info (step="info", default) • This returns the Actor description, documentation, and required input schema • You MUST do this step first - it's required to understand how to call the Actor -Step 2: Call Actor (step="call") +Step 2: Call Actor (step="call") • Only after step 1, call again with step="call" and proper input based on the schema • This executes the Actor and returns the results @@ -349,9 +352,8 @@ The step parameter enforces this workflow - you cannot call an Actor without fir try { if (step === 'info') { - const apifyClient = new ApifyClient({ token: apifyToken }); - // Step 1: Return Actor card and schema directly - const details = await fetchActorDetails(apifyClient, actorName); + // Step 1: Return Actor card and schema directly (no token needed) + const details = await fetchActorDetails(actorName); if (!details) { return { content: [{ type: 'text', text: `Actor information for '${actorName}' was not found. Please check the Actor ID or name and ensure the Actor exists.` }], diff --git a/src/tools/build.ts b/src/tools/build.ts index be6044db..14789803 100644 --- a/src/tools/build.ts +++ b/src/tools/build.ts @@ -3,7 +3,8 @@ import zodToJsonSchema from 'zod-to-json-schema'; import log from '@apify/log'; -import { ApifyClient } from '../apify-client.js'; +import type { ApifyClient } from '../apify-client.js'; +import { resolveApifyClient } from '../apify-client-factory.js'; import { ACTOR_README_MAX_LENGTH, HelperTools } from '../const.js'; import type { ActorDefinitionPruned, @@ -21,7 +22,7 @@ import { filterSchemaProperties, shortenProperties } from './utils.js'; * Then, fetch the build details and return actorName, description, and input schema. * @param {string} actorIdOrName - Actor ID or Actor full name. * @param {number} limit - Truncate the README to this limit. - * @param {string} apifyToken + * @param {string} apifyClient * @returns {Promise} - The actor definition with description or null if not found. */ export async function getActorDefinition( @@ -119,10 +120,10 @@ export const actorDefinitionTool: ToolEntry = { inputSchema: zodToJsonSchema(getActorDefinitionArgsSchema), ajvValidate: ajv.compile(zodToJsonSchema(getActorDefinitionArgsSchema)), call: async (toolArgs) => { - const { args, apifyToken } = toolArgs; + const { args } = toolArgs; const parsed = getActorDefinitionArgsSchema.parse(args); - const apifyClient = new ApifyClient({ token: apifyToken }); + const apifyClient = resolveApifyClient({ token: null }); const v = await getActorDefinition(parsed.actorName, apifyClient, parsed.limit); if (!v) { return { content: [{ type: 'text', text: `Actor '${parsed.actorName}' not found.` }] }; diff --git a/src/tools/fetch-actor-details.ts b/src/tools/fetch-actor-details.ts index 32742be1..79ca8b67 100644 --- a/src/tools/fetch-actor-details.ts +++ b/src/tools/fetch-actor-details.ts @@ -1,7 +1,6 @@ import { z } from 'zod'; import zodToJsonSchema from 'zod-to-json-schema'; -import { ApifyClient } from '../apify-client.js'; import { HelperTools } from '../const.js'; import type { InternalTool, ToolEntry } from '../types.js'; import { fetchActorDetails } from '../utils/actor-details.js'; @@ -29,10 +28,9 @@ export const fetchActorDetailsTool: ToolEntry = { inputSchema: zodToJsonSchema(fetchActorDetailsToolArgsSchema), ajvValidate: ajv.compile(zodToJsonSchema(fetchActorDetailsToolArgsSchema)), call: async (toolArgs) => { - const { args, apifyToken } = toolArgs; + const { args } = toolArgs; const parsed = fetchActorDetailsToolArgsSchema.parse(args); - const apifyClient = new ApifyClient({ token: apifyToken }); - const details = await fetchActorDetails(apifyClient, parsed.actor); + const details = await fetchActorDetails(parsed.actor); if (!details) { return { content: [{ type: 'text', text: `Actor information for '${parsed.actor}' was not found. Please check the Actor ID or name and ensure the Actor exists.` }], diff --git a/src/utils/actor-details.ts b/src/utils/actor-details.ts index 3a8915bc..90eba141 100644 --- a/src/utils/actor-details.ts +++ b/src/utils/actor-details.ts @@ -1,6 +1,6 @@ import type { Actor, Build } from 'apify-client'; -import type { ApifyClient } from '../apify-client.js'; +import { resolveApifyClient } from '../apify-client-factory.js'; import { filterSchemaProperties, shortenProperties } from '../tools/utils.js'; import type { IActorInputSchema } from '../types.js'; import { formatActorToActorCard } from './actor-card.js'; @@ -14,7 +14,8 @@ export interface ActorDetailsResult { readme: string; } -export async function fetchActorDetails(apifyClient: ApifyClient, actorName: string): Promise { +export async function fetchActorDetails(actorName: string): Promise { + const apifyClient = resolveApifyClient({ token: null }); const [actorInfo, buildInfo]: [Actor | undefined, Build | undefined] = await Promise.all([ apifyClient.actor(actorName).get(), apifyClient.actor(actorName).defaultBuild().then(async (build) => build.get()), diff --git a/src/utils/tools-loader.ts b/src/utils/tools-loader.ts index f1ee806d..bcf7e6af 100644 --- a/src/utils/tools-loader.ts +++ b/src/utils/tools-loader.ts @@ -3,10 +3,10 @@ * This eliminates duplication between stdio.ts and processParamsGetTools. */ -import type { ApifyClient } from 'apify'; - import log from '@apify/log'; +import type { ApifyClient } from '../apify-client.js'; +import { resolveApifyClient } from '../apify-client-factory.js'; import { defaults } from '../const.js'; import { callActor } from '../tools/actor.js'; import { getActorOutput } from '../tools/get-actor-output.js'; @@ -32,12 +32,12 @@ function getInternalToolByNameMap(): Map { * This function is used by both the stdio.ts and the processParamsGetTools function. * * @param input The processed Input object - * @param apifyToken The Apify API token + * @param apifyClient * @returns An array of tool entries */ export async function loadToolsFromInput( input: Input, - apifyClient: ApifyClient, + apifyClient?: ApifyClient, ): Promise { // Helpers for readability const normalizeSelectors = (value: Input['tools']): (string | ToolCategory)[] | undefined => { @@ -122,7 +122,8 @@ export async function loadToolsFromInput( // Actor tools (if any) if (actorNamesToLoad.length > 0) { - const actorTools = await getActorsAsTools(actorNamesToLoad, apifyClient); + const client = apifyClient ?? resolveApifyClient({ token: null }); + const actorTools = await getActorsAsTools(actorNamesToLoad, client); result.push(...actorTools); }