Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 1 addition & 7 deletions src/const.ts
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,7 @@ export const ACTOR_OUTPUT_MAX_CHARS_PER_ITEM = 5_000;
export const ACTOR_OUTPUT_TRUNCATED_MESSAGE = `Output was truncated because it will not fit into context.`
+ `There is no reason to call this tool again! You can use ${HelperTools.DATASET_GET_ITEMS} tool to get more items from the dataset.`;

export const ACTOR_ADDITIONAL_INSTRUCTIONS = `Never call/execute tool/Actor unless confirmed by the user.
Workflow: When an Actor runs, it processes data and stores results in Apify storage,
Datasets (for structured/tabular data) and Key-Value Store (for various data types like JSON, images, HTML).
Each Actor run produces a dataset ID and key-value store ID for accessing the results.
By default, the number of items returned from an Actor run is limited to ${ACTOR_RUN_DATASET_OUTPUT_MAX_ITEMS}.
You can always use ${HelperTools.DATASET_GET_ITEMS} tool to get more items from the dataset.
Actor run input is always stored in the key-value store, recordKey: INPUT.`;
export const ACTOR_ADDITIONAL_INSTRUCTIONS = 'Never call/execute tool/Actor unless confirmed by the user.';

export const ACTOR_CACHE_MAX_SIZE = 500;
export const ACTOR_CACHE_TTL_SECS = 30 * 60; // 30 minutes
Expand Down
4 changes: 2 additions & 2 deletions src/main.ts
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,10 @@ if (STANDBY_MODE) {
await Actor.fail('If you need to debug a specific Actor, please provide the debugActor and debugActorInput fields in the input');
}
const options = { memory: input.maxActorMemoryBytes } as ActorCallOptions;
const { datasetInfo, items } = await callActorGetDataset(input.debugActor!, input.debugActorInput!, process.env.APIFY_TOKEN, options);
const { items } = await callActorGetDataset(input.debugActor!, input.debugActorInput!, process.env.APIFY_TOKEN, options);

await Actor.pushData(items);
log.info(`Pushed ${datasetInfo?.itemCount} items to the dataset`);
log.info(`Pushed ${items.count} items to the dataset`);
await Actor.exit();
}

Expand Down
25 changes: 9 additions & 16 deletions src/mcp/server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@ import { type ActorCallOptions, ApifyApiError } from 'apify-client';
import log from '@apify/log';

import {
ACTOR_OUTPUT_MAX_CHARS_PER_ITEM,
ACTOR_OUTPUT_TRUNCATED_MESSAGE,
defaults,
SERVER_NAME,
SERVER_VERSION,
Expand Down Expand Up @@ -468,25 +466,20 @@ export class ActorsMcpServer {
const actorTool = tool.tool as ActorTool;

const callOptions: ActorCallOptions = { memory: actorTool.memoryMbytes };
const { actorRun, datasetInfo, items } = await callActorGetDataset(
const { items } = await callActorGetDataset(
actorTool.actorFullName,
args,
apifyToken as string,
callOptions,
);
const content = [
{ type: 'text', text: `Actor finished with run information: ${JSON.stringify(actorRun)}` },
{ type: 'text', text: `Dataset information: ${JSON.stringify(datasetInfo)}` },
];

const itemContents = items.items.map((item: Record<string, unknown>) => {
const text = JSON.stringify(item).slice(0, ACTOR_OUTPUT_MAX_CHARS_PER_ITEM);
return text.length === ACTOR_OUTPUT_MAX_CHARS_PER_ITEM
? { type: 'text', text: `${text} ... ${ACTOR_OUTPUT_TRUNCATED_MESSAGE}` }
: { type: 'text', text };
});
content.push(...itemContents);
return { content };
return {
content: items.items.map((item: Record<string, unknown>) => {
return {
type: 'text',
text: JSON.stringify(item),
};
}),
};
}
} catch (error) {
if (error instanceof ApifyApiError) {
Expand Down
52 changes: 39 additions & 13 deletions src/tools/actor.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import type { Client } from '@modelcontextprotocol/sdk/client/index.js';
import { Ajv } from 'ajv';
import type { ActorCallOptions, ActorRun, Dataset, PaginatedList } from 'apify-client';
import type { ActorCallOptions, ActorRun, PaginatedList } from 'apify-client';
import { z } from 'zod';
import zodToJsonSchema from 'zod-to-json-schema';

Expand All @@ -10,14 +10,15 @@ import { ApifyClient } from '../apify-client.js';
import {
ACTOR_ADDITIONAL_INSTRUCTIONS,
ACTOR_MAX_MEMORY_MBYTES,
ACTOR_RUN_DATASET_OUTPUT_MAX_ITEMS,
HelperTools,
} from '../const.js';
import { getActorMCPServerPath, getActorMCPServerURL } from '../mcp/actors.js';
import { connectMCPClient } from '../mcp/client.js';
import { getMCPServerTools } from '../mcp/proxy.js';
import { actorDefinitionPrunedCache } from '../state.js';
import type { ActorInfo, InternalTool, ToolEntry } from '../types.js';
import type { ActorDefinitionStorage, ActorInfo, InternalTool, ToolEntry } from '../types.js';
import { getActorDefinitionStorageFieldNames } from '../utils/actor.js';
import { getValuesByDotKeys } from '../utils/generic.js';
import { getActorDefinition } from './build.js';
import {
actorNameToToolName,
Expand All @@ -34,8 +35,6 @@ const ajv = new Ajv({ coerceTypes: 'array', strict: false });

// Define a named return type for callActorGetDataset
export type CallActorGetDatasetResult = {
actorRun: ActorRun;
datasetInfo: Dataset | undefined;
items: PaginatedList<Record<string, unknown>>;
};

Expand All @@ -50,7 +49,6 @@ export type CallActorGetDatasetResult = {
* @param {ActorCallOptions} callOptions - The options to pass to the actor.
* @param {unknown} input - The input to pass to the actor.
* @param {string} apifyToken - The Apify token to use for authentication.
* @param {number} limit - The maximum number of items to retrieve from the dataset.
* @returns {Promise<{ actorRun: any, items: object[] }>} - A promise that resolves to an object containing the actor run and dataset items.
* @throws {Error} - Throws an error if the `APIFY_TOKEN` is not set
*/
Expand All @@ -59,7 +57,6 @@ export async function callActorGetDataset(
input: unknown,
apifyToken: string,
callOptions: ActorCallOptions | undefined = undefined,
limit = ACTOR_RUN_DATASET_OUTPUT_MAX_ITEMS,
): Promise<CallActorGetDatasetResult> {
try {
log.info(`Calling Actor ${actorName} with input: ${JSON.stringify(input)}`);
Expand All @@ -69,13 +66,24 @@ export async function callActorGetDataset(

const actorRun: ActorRun = await actorClient.call(input, callOptions);
const dataset = client.dataset(actorRun.defaultDatasetId);
const [datasetInfo, items] = await Promise.all([
dataset.get(),
dataset.listItems({ limit }),
// const dataset = client.dataset('Ehtn0Y4wIKviFT2WB');
const [items, defaultBuild] = await Promise.all([
dataset.listItems(),
(await actorClient.defaultBuild()).get(),
]);
log.info(`Actor ${actorName} finished with ${datasetInfo?.itemCount} items`);

return { actorRun, datasetInfo, items };
// Get important properties from storage view definitions and if available return only those properties
const storageDefinition = defaultBuild?.actorDefinition?.storages?.dataset as ActorDefinitionStorage | undefined;
const importantProperties = getActorDefinitionStorageFieldNames(storageDefinition || {});
if (importantProperties.length > 0) {
items.items = items.items.map((item) => {
return getValuesByDotKeys(item, importantProperties);
});
}

log.info(`Actor ${actorName} finished with ${items.count} items`);

return { items };
} catch (error) {
log.error(`Error calling actor: ${error}. Actor: ${actorName}, input: ${JSON.stringify(input)}`);
throw new Error(`Error calling Actor: ${error}`);
Expand Down Expand Up @@ -115,6 +123,18 @@ export async function getNormalActorsAsTools(
if (actorDefinitionPruned) {
const schemaID = getToolSchemaID(actorDefinitionPruned.actorFullName);
if (actorDefinitionPruned.input && 'properties' in actorDefinitionPruned.input && actorDefinitionPruned.input) {
// Filter non-required properties except integers if `required` is defined in the input schema and not empty.
const { required } = actorDefinitionPruned.input;
if (Array.isArray(required) && required.length > 0) {
actorDefinitionPruned.input.properties = Object.fromEntries(
Object.entries(actorDefinitionPruned.input.properties)
// Keep all integer properties, as these include
// properties related to output item counts that users
// might want to change if they need more results than the default limit.
.filter(([key, value]) => required.includes(key) || value.type === 'integer'),
);
}

actorDefinitionPruned.input.properties = markInputPropertiesAsRequired(actorDefinitionPruned.input);
actorDefinitionPruned.input.properties = buildNestedProperties(actorDefinitionPruned.input.properties);
actorDefinitionPruned.input.properties = filterSchemaProperties(actorDefinitionPruned.input.properties);
Expand All @@ -132,7 +152,13 @@ export async function getNormalActorsAsTools(
name: actorNameToToolName(actorDefinitionPruned.actorFullName),
actorFullName: actorDefinitionPruned.actorFullName,
description: `${actorDefinitionPruned.description} Instructions: ${ACTOR_ADDITIONAL_INSTRUCTIONS}`,
inputSchema: actorDefinitionPruned.input || {},
inputSchema: actorDefinitionPruned.input
// So Actor without input schema works - MCP client expects JSON schema valid output
|| {
type: 'object',
properties: {},
required: [],
},
ajvValidate: fixedAjvCompile(ajv, actorDefinitionPruned.input || {}),
memoryMbytes: memoryMbytes > ACTOR_MAX_MEMORY_MBYTES ? ACTOR_MAX_MEMORY_MBYTES : memoryMbytes,
},
Expand Down
118 changes: 118 additions & 0 deletions src/tools/get-actor-details.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
import { z } from 'zod';
import zodToJsonSchema from 'zod-to-json-schema';

import { ApifyClient } from '../apify-client.js';
import { HelperTools } from '../const.js';
import type { ExtendedPricingInfo, IActorInputSchema, InternalTool, ToolEntry } from '../types.js';
import { ajv } from '../utils/ajv.js';
import { getCurrentPricingInfo, pricingInfoToString } from '../utils/pricing-info.js';
import { filterSchemaProperties, shortenProperties } from './utils.js';

const getActorDetailsToolArgsSchema = z.object({
actor: z.string()
.min(1)
.describe(`Actor ID or full name in the format "username/name", e.g., "apify/rag-web-browser".`),
});

interface IGetActorDetailsToolResult {
id: string;
actorFullName: string;

isPublic: boolean;
isDeprecated: boolean;
createdAt: string;
modifiedAt: string;

categories?: string[];
description: string;
readme: string;

inputSchema: IActorInputSchema;

pricingInfo: string; // We convert the pricing info into a string representation

usageStatistics: {
totalUsers: {
allTime: number;
last7Days: number;
last30Days: number;
last90Days: number;
};
failedRunsInLast30Days: number | string; // string for 'unknown' case
}
}

export const getActorDetailsTool: ToolEntry = {
type: 'internal',
tool: {
name: HelperTools.ACTOR_GET_DETAILS,
description: `Retrieve information about an Actor by its ID or full name.
The Actor name is always composed of "username/name", for example, "apify/rag-web-browser".
This tool returns information about the Actor, including whether it is public or deprecated, when it was created or modified, the categories in which the Actor is listed, a description, a README (the Actor's documentation), the input schema, and usage statistics—such as how many users are using it and the number of failed runs of the Actor.
For example, use this tool when a user wants to know more about a specific Actor or wants to use optional or advanced parameters of the Actor that are not listed in the default Actor tool input schema - so you know the details and how to pass them.`,
inputSchema: zodToJsonSchema(getActorDetailsToolArgsSchema),
ajvValidate: ajv.compile(zodToJsonSchema(getActorDetailsToolArgsSchema)),
call: async (toolArgs) => {
const { args, apifyToken } = toolArgs;

const parsed = getActorDetailsToolArgsSchema.parse(args);
const client = new ApifyClient({ token: apifyToken });

const [actorInfo, buildInfo] = await Promise.all([
client.actor(parsed.actor).get(),
client.actor(parsed.actor).defaultBuild().then(async (build) => build.get()),
]);

if (!actorInfo || !buildInfo || !buildInfo.actorDefinition) {
return {
content: [{ type: 'text', text: `Actor information for '${parsed.actor}' was not found. Please check the Actor ID or name and ensure the Actor exists.` }],
};
}

const inputSchema = (buildInfo.actorDefinition.input || {
type: 'object',
properties: {},
}) as IActorInputSchema;
inputSchema.properties = filterSchemaProperties(inputSchema.properties);
inputSchema.properties = shortenProperties(inputSchema.properties);

const currentPricingInfo = getCurrentPricingInfo(actorInfo.pricingInfos || [], new Date());

const result: IGetActorDetailsToolResult = {
id: actorInfo.id,
actorFullName: `${actorInfo.username}/${actorInfo.name}`,

isPublic: actorInfo.isPublic,
isDeprecated: actorInfo.isDeprecated || false,
createdAt: actorInfo.createdAt.toISOString(),
modifiedAt: actorInfo.modifiedAt.toISOString(),

categories: actorInfo.categories,
description: actorInfo.description || 'No description provided.',
readme: buildInfo.actorDefinition.readme || 'No README provided.',

inputSchema,

pricingInfo: pricingInfoToString(currentPricingInfo as (ExtendedPricingInfo | null)),

usageStatistics: {
totalUsers: {
allTime: actorInfo.stats.totalUsers,
last7Days: actorInfo.stats.totalUsers7Days,
last30Days: actorInfo.stats.totalUsers30Days,
last90Days: actorInfo.stats.totalUsers90Days,
},
failedRunsInLast30Days: (
'publicActorRunStats30Days' in actorInfo.stats && 'FAILED' in (actorInfo.stats.publicActorRunStats30Days as object)
) ? (actorInfo.stats.publicActorRunStats30Days as { FAILED: number }).FAILED : 'unknown',
},
};
return {
content: [{
type: 'text',
text: JSON.stringify(result),
}],
};
},
} as InternalTool,
};
Loading