diff --git a/README.md b/README.md index 737f3990..94a97fd9 100644 --- a/README.md +++ b/README.md @@ -92,10 +92,8 @@ You can refer to the specific Actor's documentation for a list of available argu ### Helper tools One of the powerful features of MCP with Apify is dynamic actor tooling – the ability for an AI agent to find new tools (Actors) as needed and incorporate them. Here are some special MCP operations and how Apify MCP Server supports them: -- Actor discovery and management: Search for Actors (`search-actors`), view details (`get-actor-details`), and dynamically add or remove tools (`add-actor`, `remove-actor`). -- Actor execution and monitoring: Start Actor runs, fetch run results (`get-actor-run`), logs (`get-actor-log`), and abort runs (`abort-actor-run`). -- Dataset access: List datasets, retrieve dataset info and items (`get-dataset`, `get-dataset-list`, `get-dataset-items`). -- Key-value store access: List key-value stores, view keys, and retrieve records (`get-key-value-store-list`, `get-key-value-store`, `get-key-value-store-keys`, `get-key-value-store-record`). +- Actor discovery and management: Search for Actors (`search-actors`), view details (`get-actor-details`), and dynamically add them (`add-actor`). +- Apify documentation: Search Apify documentation (`search-apify-docs`) and fetch specific documents (`fetch-apify-docs`). - Built-in help tool: A static helper (`apify-actor-help-tool`) that returns usage info for the Apify MCP Server. ## Prompt & Resources diff --git a/package-lock.json b/package-lock.json index 991414f7..2396a1d6 100644 --- a/package-lock.json +++ b/package-lock.json @@ -12,10 +12,13 @@ "@apify/datastructures": "^2.0.3", "@apify/log": "^2.5.16", "@modelcontextprotocol/sdk": "^1.13.2", + "@types/turndown": "^5.0.5", "ajv": "^8.17.1", + "algoliasearch": "^5.31.0", "apify": "^3.4.2", "apify-client": "^2.12.6", "express": "^4.21.2", + "turndown": "^7.2.0", "yargs": "^17.7.2", "zod": "^3.24.1", "zod-to-json-schema": "^3.24.1" @@ -43,6 +46,186 @@ "node": ">=18.0.0" } }, + "node_modules/@algolia/client-abtesting": { + "version": "5.31.0", + "resolved": "https://registry.npmjs.org/@algolia/client-abtesting/-/client-abtesting-5.31.0.tgz", + "integrity": "sha512-J+wZq5uotbisEsbKmXv79dsENI/AW6IZWIvfTqebE6QcH/S2yGDeNh6b4qa4koJ1eQx7+wKkLMfZ+nOZpBWclA==", + "license": "MIT", + "dependencies": { + "@algolia/client-common": "5.31.0", + "@algolia/requester-browser-xhr": "5.31.0", + "@algolia/requester-fetch": "5.31.0", + "@algolia/requester-node-http": "5.31.0" + }, + "engines": { + "node": ">= 14.0.0" + } + }, + "node_modules/@algolia/client-analytics": { + "version": "5.31.0", + "resolved": "https://registry.npmjs.org/@algolia/client-analytics/-/client-analytics-5.31.0.tgz", + "integrity": "sha512-zxz9ooi6HsMG7gS7xCG9NkUlWkpwMT/oYr8+cojchB98pEmn3OqHA7KaY1w8GKqKXNM4MiQD15N2/aZhDa9b9g==", + "license": "MIT", + "dependencies": { + "@algolia/client-common": "5.31.0", + "@algolia/requester-browser-xhr": "5.31.0", + "@algolia/requester-fetch": "5.31.0", + "@algolia/requester-node-http": "5.31.0" + }, + "engines": { + "node": ">= 14.0.0" + } + }, + "node_modules/@algolia/client-common": { + "version": "5.31.0", + "resolved": "https://registry.npmjs.org/@algolia/client-common/-/client-common-5.31.0.tgz", + "integrity": "sha512-lO6oZLEPiCgtUcUHIFyfrRvcS8iB3Je1LqW3c04anjrCO7dqhkccXHC/5XuH0fIW4l7V5AtbPS2tpJGtRp1NJw==", + "license": "MIT", + "engines": { + "node": ">= 14.0.0" + } + }, + "node_modules/@algolia/client-insights": { + "version": "5.31.0", + "resolved": "https://registry.npmjs.org/@algolia/client-insights/-/client-insights-5.31.0.tgz", + "integrity": "sha512-gwWTW4CMM6pov3aJv2a+Ex4v7fWG9wtey43qWBq5rABk3p3uYYFkzfylrht18rcq1zA99Wxo8UEireExHuzs2w==", + "license": "MIT", + "dependencies": { + "@algolia/client-common": "5.31.0", + "@algolia/requester-browser-xhr": "5.31.0", + "@algolia/requester-fetch": "5.31.0", + "@algolia/requester-node-http": "5.31.0" + }, + "engines": { + "node": ">= 14.0.0" + } + }, + "node_modules/@algolia/client-personalization": { + "version": "5.31.0", + "resolved": "https://registry.npmjs.org/@algolia/client-personalization/-/client-personalization-5.31.0.tgz", + "integrity": "sha512-3G8ZpoLCgrcuILTQGVU9WXxUmK4R8uUmAiU31Qqd/pkta/9J8DHQjNh+Fs/i27ls2YxQq36GqXvVM2eoQFmFJw==", + "license": "MIT", + "dependencies": { + "@algolia/client-common": "5.31.0", + "@algolia/requester-browser-xhr": "5.31.0", + "@algolia/requester-fetch": "5.31.0", + "@algolia/requester-node-http": "5.31.0" + }, + "engines": { + "node": ">= 14.0.0" + } + }, + "node_modules/@algolia/client-query-suggestions": { + "version": "5.31.0", + "resolved": "https://registry.npmjs.org/@algolia/client-query-suggestions/-/client-query-suggestions-5.31.0.tgz", + "integrity": "sha512-+YIHy+n+x2/DqRdnrPv2Eck2pbZ4Q5Lu1mWpwOUZ2u2XG6JVQx0goePomtYl8evsDGspDRZJPpGD+CFJboe0gQ==", + "license": "MIT", + "dependencies": { + "@algolia/client-common": "5.31.0", + "@algolia/requester-browser-xhr": "5.31.0", + "@algolia/requester-fetch": "5.31.0", + "@algolia/requester-node-http": "5.31.0" + }, + "engines": { + "node": ">= 14.0.0" + } + }, + "node_modules/@algolia/client-search": { + "version": "5.31.0", + "resolved": "https://registry.npmjs.org/@algolia/client-search/-/client-search-5.31.0.tgz", + "integrity": "sha512-2I79ICkuTqbXeK5RGSmzCN1Uj86NghWxaWt41lIcFk1OXuUWhyXTxC2fN5M8ASRBf/qWSeXr6AzL8jb3opya3g==", + "license": "MIT", + "dependencies": { + "@algolia/client-common": "5.31.0", + "@algolia/requester-browser-xhr": "5.31.0", + "@algolia/requester-fetch": "5.31.0", + "@algolia/requester-node-http": "5.31.0" + }, + "engines": { + "node": ">= 14.0.0" + } + }, + "node_modules/@algolia/ingestion": { + "version": "1.31.0", + "resolved": "https://registry.npmjs.org/@algolia/ingestion/-/ingestion-1.31.0.tgz", + "integrity": "sha512-HiBWdO7ztzgFoR+SnbHq0iBQtDUusRZPSVMkPIR/MNbNJrH/OhrCsxk6Y7dUvQAIjypKmFl38raf1XEKz9fdUA==", + "license": "MIT", + "dependencies": { + "@algolia/client-common": "5.31.0", + "@algolia/requester-browser-xhr": "5.31.0", + "@algolia/requester-fetch": "5.31.0", + "@algolia/requester-node-http": "5.31.0" + }, + "engines": { + "node": ">= 14.0.0" + } + }, + "node_modules/@algolia/monitoring": { + "version": "1.31.0", + "resolved": "https://registry.npmjs.org/@algolia/monitoring/-/monitoring-1.31.0.tgz", + "integrity": "sha512-ifrQ3BMg7Z4EGBPouUINd7xVU2ySTrJ2FtuAoiRHaZ7rT1Kp56JW40kuHiCvmDI4ZBaIzrQuGxWYKUZ29QWR6g==", + "license": "MIT", + "dependencies": { + "@algolia/client-common": "5.31.0", + "@algolia/requester-browser-xhr": "5.31.0", + "@algolia/requester-fetch": "5.31.0", + "@algolia/requester-node-http": "5.31.0" + }, + "engines": { + "node": ">= 14.0.0" + } + }, + "node_modules/@algolia/recommend": { + "version": "5.31.0", + "resolved": "https://registry.npmjs.org/@algolia/recommend/-/recommend-5.31.0.tgz", + "integrity": "sha512-dA94TKQ9FiZ8E1BlpfAMVKC3XimhDBjNFLPR3w5eRgSXymJbbK93xr/LrhyCWHbJPxtUcJvaO+Xg0pFKP+HZvw==", + "license": "MIT", + "dependencies": { + "@algolia/client-common": "5.31.0", + "@algolia/requester-browser-xhr": "5.31.0", + "@algolia/requester-fetch": "5.31.0", + "@algolia/requester-node-http": "5.31.0" + }, + "engines": { + "node": ">= 14.0.0" + } + }, + "node_modules/@algolia/requester-browser-xhr": { + "version": "5.31.0", + "resolved": "https://registry.npmjs.org/@algolia/requester-browser-xhr/-/requester-browser-xhr-5.31.0.tgz", + "integrity": "sha512-akbqE63Scw3dttQatKhjiHdFXpqihCCpcAciIHpdebw3/zWfb+e/Tkf6tDv/05AGcG5BHC365dp8LIl9+NchSA==", + "license": "MIT", + "dependencies": { + "@algolia/client-common": "5.31.0" + }, + "engines": { + "node": ">= 14.0.0" + } + }, + "node_modules/@algolia/requester-fetch": { + "version": "5.31.0", + "resolved": "https://registry.npmjs.org/@algolia/requester-fetch/-/requester-fetch-5.31.0.tgz", + "integrity": "sha512-qYOEOCIqXvbVKNTabgKmPFltpNxB1U38hhrMEbypyOc/X9zjdxnVi/dqZ+jKsYY4X7MSQTtowLK4AR++OdMD/g==", + "license": "MIT", + "dependencies": { + "@algolia/client-common": "5.31.0" + }, + "engines": { + "node": ">= 14.0.0" + } + }, + "node_modules/@algolia/requester-node-http": { + "version": "5.31.0", + "resolved": "https://registry.npmjs.org/@algolia/requester-node-http/-/requester-node-http-5.31.0.tgz", + "integrity": "sha512-eq8uTVUc/E7YIOqTVfXgGQ3ZSsAWqZZHy5ntuwm6WxnvdcAyhyzRo0sncX1zWFkFpNGvJ8xyONDWq/Ef2e31Tg==", + "license": "MIT", + "dependencies": { + "@algolia/client-common": "5.31.0" + }, + "engines": { + "node": ">= 14.0.0" + } + }, "node_modules/@anthropic-ai/sdk": { "version": "0.33.1", "resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.33.1.tgz", @@ -1065,6 +1248,12 @@ "dev": true, "license": "MIT" }, + "node_modules/@mixmark-io/domino": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/@mixmark-io/domino/-/domino-2.2.0.tgz", + "integrity": "sha512-Y28PR25bHXUg88kCV7nivXrP2Nj2RueZ3/l/jdx6J9f8J4nsEGcgX0Qe6lt7Pa+J79+kPiJU3LguR6O/6zrLOw==", + "license": "BSD-2-Clause" + }, "node_modules/@modelcontextprotocol/sdk": { "version": "1.13.2", "resolved": "https://registry.npmjs.org/@modelcontextprotocol/sdk/-/sdk-1.13.2.tgz", @@ -1950,6 +2139,12 @@ "@types/send": "*" } }, + "node_modules/@types/turndown": { + "version": "5.0.5", + "resolved": "https://registry.npmjs.org/@types/turndown/-/turndown-5.0.5.tgz", + "integrity": "sha512-TL2IgGgc7B5j78rIccBtlYAnkuv8nUQqhQc+DSYV5j9Be9XOcm/SKOVRuA47xAVI3680Tk9B1d8flK2GWT2+4w==", + "license": "MIT" + }, "node_modules/@types/yargs": { "version": "17.0.33", "resolved": "https://registry.npmjs.org/@types/yargs/-/yargs-17.0.33.tgz", @@ -2449,6 +2644,30 @@ "url": "https://github.com/sponsors/epoberezkin" } }, + "node_modules/algoliasearch": { + "version": "5.31.0", + "resolved": "https://registry.npmjs.org/algoliasearch/-/algoliasearch-5.31.0.tgz", + "integrity": "sha512-LBpwGyNPOcprdu1OnRtgaWeKLjnDR3T+vp64WRiQEgHYACIXgU+djAvj88m3OQc+6MfWbw7rKUjXtdRMLfU7Aw==", + "license": "MIT", + "dependencies": { + "@algolia/client-abtesting": "5.31.0", + "@algolia/client-analytics": "5.31.0", + "@algolia/client-common": "5.31.0", + "@algolia/client-insights": "5.31.0", + "@algolia/client-personalization": "5.31.0", + "@algolia/client-query-suggestions": "5.31.0", + "@algolia/client-search": "5.31.0", + "@algolia/ingestion": "1.31.0", + "@algolia/monitoring": "1.31.0", + "@algolia/recommend": "5.31.0", + "@algolia/requester-browser-xhr": "5.31.0", + "@algolia/requester-fetch": "5.31.0", + "@algolia/requester-node-http": "5.31.0" + }, + "engines": { + "node": ">= 14.0.0" + } + }, "node_modules/ansi-colors": { "version": "4.1.3", "resolved": "https://registry.npmjs.org/ansi-colors/-/ansi-colors-4.1.3.tgz", @@ -7538,6 +7757,15 @@ "fsevents": "~2.3.3" } }, + "node_modules/turndown": { + "version": "7.2.0", + "resolved": "https://registry.npmjs.org/turndown/-/turndown-7.2.0.tgz", + "integrity": "sha512-eCZGBN4nNNqM9Owkv9HAtWRYfLA4h909E/WGAWWBpmB275ehNhZyk87/Tpvjbp0jjNl9XwCsbe6bm6CqFsgD+A==", + "license": "MIT", + "dependencies": { + "@mixmark-io/domino": "^2.2.0" + } + }, "node_modules/type-check": { "version": "0.4.0", "resolved": "https://registry.npmjs.org/type-check/-/type-check-0.4.0.tgz", diff --git a/package.json b/package.json index 70dacb64..59fc89f4 100644 --- a/package.json +++ b/package.json @@ -33,10 +33,13 @@ "@apify/datastructures": "^2.0.3", "@apify/log": "^2.5.16", "@modelcontextprotocol/sdk": "^1.13.2", + "@types/turndown": "^5.0.5", "ajv": "^8.17.1", + "algoliasearch": "^5.31.0", "apify": "^3.4.2", "apify-client": "^2.12.6", "express": "^4.21.2", + "turndown": "^7.2.0", "yargs": "^17.7.2", "zod": "^3.24.1", "zod-to-json-schema": "^3.24.1" diff --git a/src/const.ts b/src/const.ts index 859943b1..b198653d 100644 --- a/src/const.ts +++ b/src/const.ts @@ -34,6 +34,8 @@ export enum HelperTools { KEY_VALUE_STORE_RECORD_GET = 'get-key-value-store-record', APIFY_MCP_HELP_TOOL = 'apify-actor-help-tool', STORE_SEARCH = 'search-actors', + DOCS_SEARCH = 'search-apify-docs', + DOCS_FETCH = 'fetch-apify-docs', } export const defaults = { @@ -49,8 +51,11 @@ export const ACTOR_OUTPUT_TRUNCATED_MESSAGE = `Output was truncated because it w export const ACTOR_ADDITIONAL_INSTRUCTIONS = 'Never call/execute tool/Actor unless confirmed by the user.'; +// Cache export const ACTOR_CACHE_MAX_SIZE = 500; export const ACTOR_CACHE_TTL_SECS = 30 * 60; // 30 minutes +export const APIFY_DOCS_CACHE_MAX_SIZE = 500; +export const APIFY_DOCS_CACHE_TTL_SECS = 60 * 60; // 1 hour export const ACTOR_PRICING_MODEL = { /** Rental actors */ @@ -69,3 +74,9 @@ export const ACTOR_PRICING_MODEL = { export const ACTOR_SEARCH_ABOVE_LIMIT = 50; export const MCP_STREAMABLE_ENDPOINT = '/mcp'; + +export const ALGOLIA = { + appId: 'N8EOCSBQGH', + apiKey: 'e97714a64e2b4b8b8fe0b01cd8592870', // search only (public) API key + indexName: 'test_test_apify_sdk', +}; diff --git a/src/state.ts b/src/state.ts index 1eff0864..5fbd1601 100644 --- a/src/state.ts +++ b/src/state.ts @@ -1,5 +1,8 @@ -import { ACTOR_CACHE_MAX_SIZE, ACTOR_CACHE_TTL_SECS } from './const.js'; -import type { ActorDefinitionPruned } from './types.js'; +import { ACTOR_CACHE_MAX_SIZE, ACTOR_CACHE_TTL_SECS, APIFY_DOCS_CACHE_MAX_SIZE, APIFY_DOCS_CACHE_TTL_SECS } from './const.js'; +import type { ActorDefinitionPruned, ApifyDocsSearchResult } from './types.js'; import { TTLLRUCache } from './utils/ttl-lru.js'; export const actorDefinitionPrunedCache = new TTLLRUCache(ACTOR_CACHE_MAX_SIZE, ACTOR_CACHE_TTL_SECS); +export const searchApifyDocsCache = new TTLLRUCache(APIFY_DOCS_CACHE_MAX_SIZE, APIFY_DOCS_CACHE_TTL_SECS); +/** Stores processed Markdown content */ +export const fetchApifyDocsCache = new TTLLRUCache(APIFY_DOCS_CACHE_MAX_SIZE, APIFY_DOCS_CACHE_TTL_SECS); diff --git a/src/tools/fetch-apify-docs.ts b/src/tools/fetch-apify-docs.ts new file mode 100644 index 00000000..96520b21 --- /dev/null +++ b/src/tools/fetch-apify-docs.ts @@ -0,0 +1,81 @@ +import { z } from 'zod'; +import zodToJsonSchema from 'zod-to-json-schema'; + +import log from '@apify/log'; + +import { HelperTools } from '../const.js'; +import { fetchApifyDocsCache } from '../state.js'; +import type { InternalTool, ToolEntry } from '../types.js'; +import { ajv } from '../utils/ajv.js'; +import { htmlToMarkdown } from '../utils/html-to-md.js'; + +const fetchApifyDocsToolArgsSchema = z.object({ + url: z.string() + .min(1) + .describe(`URL of the Apify documentation page to fetch. This should be the full URL, including the protocol (e.g., https://docs.apify.com/).`), +}); + +export const fetchApifyDocsTool: ToolEntry = { + type: 'internal', + tool: { + name: HelperTools.DOCS_FETCH, + description: `Apify documentation fetch tool. This tool allows you to fetch the full content of an Apify documentation page by its URL.`, + args: fetchApifyDocsToolArgsSchema, + inputSchema: zodToJsonSchema(fetchApifyDocsToolArgsSchema), + ajvValidate: ajv.compile(zodToJsonSchema(fetchApifyDocsToolArgsSchema)), + call: async (toolArgs) => { + const { args } = toolArgs; + + const parsed = fetchApifyDocsToolArgsSchema.parse(args); + const url = parsed.url.trim(); + const urlWithoutFragment = url.split('#')[0]; + + // Only allow URLs starting with https://docs.apify.com + if (!url.startsWith('https://docs.apify.com')) { + return { + content: [{ + type: 'text', + text: `Only URLs starting with https://docs.apify.com are allowed.`, + }], + }; + } + + // Cache URL without fragment to avoid fetching the same page multiple times + let markdown = fetchApifyDocsCache.get(urlWithoutFragment); + // If the content is not cached, fetch it from the URL + if (!markdown) { + try { + const response = await fetch(url); + if (!response.ok) { + return { + content: [{ + type: 'text', + text: `Failed to fetch the documentation page at ${url}. Status: ${response.status} ${response.statusText}`, + }], + }; + } + const html = await response.text(); + markdown = htmlToMarkdown(html); + // Cache the processed Markdown content + // Use the URL without fragment as the key to avoid caching same page with different fragments + fetchApifyDocsCache.set(urlWithoutFragment, markdown); + } catch (error) { + log.error(`Failed to fetch the documentation page at ${url}.`, { error }); + return { + content: [{ + type: 'text', + text: `Failed to fetch the documentation page at ${url}. Please check the URL and try again.`, + }], + }; + } + } + + return { + content: [{ + type: 'text', + text: `Fetched content from ${url}:\n\n${markdown}`, + }], + }; + }, + } as InternalTool, +}; diff --git a/src/tools/index.ts b/src/tools/index.ts index bebc3cbe..f2523e97 100644 --- a/src/tools/index.ts +++ b/src/tools/index.ts @@ -1,7 +1,9 @@ // Import specific tools that are being used import { callActor, callActorGetDataset, getActorsAsTools } from './actor.js'; +import { fetchApifyDocsTool } from './fetch-apify-docs.js'; import { getActorDetailsTool } from './get-actor-details.js'; import { addTool, helpTool } from './helpers.js'; +import { searchApifyDocsTool } from './search-apify-docs.js'; import { searchActors } from './store_collection.js'; export const defaultTools = [ @@ -22,6 +24,8 @@ export const defaultTools = [ getActorDetailsTool, helpTool, searchActors, + searchApifyDocsTool, + fetchApifyDocsTool, ]; export const addRemoveTools = [ diff --git a/src/tools/search-apify-docs.ts b/src/tools/search-apify-docs.ts new file mode 100644 index 00000000..750e935d --- /dev/null +++ b/src/tools/search-apify-docs.ts @@ -0,0 +1,70 @@ +import { z } from 'zod'; +import zodToJsonSchema from 'zod-to-json-schema'; + +import { HelperTools } from '../const.js'; +import type { InternalTool, ToolEntry } from '../types.js'; +import { ajv } from '../utils/ajv.js'; +import { searchApifyDocsCached } from '../utils/apify-docs.js'; + +const searchApifyDocsToolArgsSchema = z.object({ + query: z.string() + .min(1) + .describe( + `Algolia full-text search query to find relevant documentation pages. +Use only keywords, do not use full sentences or questions. +For example, "standby actor" will return documentation pages that contain the words "standby" and "actor".`, + ), + limit: z.number() + .optional() + .default(5) + .describe(`Maximum number of search results to return. Defaults to 5. +You can increase this limit if you need more results, but keep in mind that the search results are limited to the most relevant pages.`), + offset: z.number() + .optional() + .default(0) + .describe(`Offset for the search results. Defaults to 0. +Use this to paginate through the search results. For example, if you want to get the next 5 results, set the offset to 5 and limit to 5.`), +}); + +export const searchApifyDocsTool: ToolEntry = { + type: 'internal', + tool: { + name: HelperTools.DOCS_SEARCH, + description: `Apify documentation search tool. This tool allows you to search the Apify documentation using Algolia's full-text search. +You can use it to find relevant documentation pages based on keywords. The results will include the URL of the documentation page, a fragment identifier (if available), and a limited piece of content that matches the search query. You can then fetch the full content of the document using the ${HelperTools.DOCS_FETCH} tool by providing the URL. +Use this tool when a user asks for help with Apify documentation or when you need to find relevant documentation pages based on keywords. For example, when a user wants to build an Apify Actor, you can search "How to build Actors" to find relevant guidance.`, + args: searchApifyDocsToolArgsSchema, + inputSchema: zodToJsonSchema(searchApifyDocsToolArgsSchema), + ajvValidate: ajv.compile(zodToJsonSchema(searchApifyDocsToolArgsSchema)), + call: async (toolArgs) => { + const { args } = toolArgs; + + const parsed = searchApifyDocsToolArgsSchema.parse(args); + const query = parsed.query.trim(); + + const resultsRaw = await searchApifyDocsCached(query); + const results = resultsRaw.slice(parsed.offset, parsed.offset + parsed.limit); + + if (results.length === 0) { + return { + content: [{ + type: 'text', + text: `No results found for the query "${query}" with limit ${parsed.limit} and offset ${parsed.offset}. Try a different query or adjust the limit and offset.`, + }], + }; + } + + const textContent = `You can use the Apify docs fetch tool to retrieve the full content of a document by its URL. The document fragment refers to the section of the content containing the relevant part for the search result item. +Search results for "${query}": + +${results.map((result) => `- Document URL: ${result.url}${result.fragment ? `\n Document fragment: ${result.fragment}` : ''} + Content: ${result.content}`).join('\n\n')}`; + return { + content: [{ + type: 'text', + text: textContent, + }], + }; + }, + } as InternalTool, +}; diff --git a/src/types.ts b/src/types.ts index c06c6cc4..d9c64dd0 100644 --- a/src/types.ts +++ b/src/types.ts @@ -258,3 +258,12 @@ export type ActorDefinitionStorage = { } >; }; + +export interface ApifyDocsSearchResult { + /** URL of the documentation page */ + url: string; + /** Fragment identifier, e.g. "document-heading-1" so LLM knows what section to use when fetching whole document */ + fragment?: string; + /** Piece of content that matches the search query from Algolia */ + content: string; +} diff --git a/src/utils/apify-docs.ts b/src/utils/apify-docs.ts new file mode 100644 index 00000000..8189872e --- /dev/null +++ b/src/utils/apify-docs.ts @@ -0,0 +1,95 @@ +/** + * Utilities for searching Apify documentation using Algolia. + * + * Provides a function to query the Apify docs via Algolia's search API and return structured results. + * + * @module utils/apify-docs + */ +import { algoliasearch } from 'algoliasearch'; + +import { ALGOLIA } from '../const.js'; +import { searchApifyDocsCache } from '../state.js'; +import type { ApifyDocsSearchResult } from '../types.js'; + +/** + * Algolia search client instance, configured with Apify's Algolia credentials. + */ +const client = algoliasearch(ALGOLIA.appId, ALGOLIA.apiKey); + +/** + * Represents a single search hit from Algolia's response. + */ +interface AlgoliaResultHit { + url_without_anchor?: string; + anchor?: string; + content?: string; +} + +/** + * Represents a single Algolia search result containing hits. + */ +interface AlgoliaResult { + hits?: AlgoliaResultHit[]; +} + +/** + * Searches the Apify documentation using Algolia and returns relevant results. + * + * @param {string} query - The search query string. + * @returns {Promise} Array of search results with URL, optional fragment, and content. + */ +export async function searchApifyDocs(query: string): Promise { + const response = await client.search({ + requests: [ + { + indexName: ALGOLIA.indexName, + query: query.trim(), + filters: 'version:latest', + }, + ], + }); + // So we can access the results without TypeScript errors + const results = response.results as unknown as AlgoliaResult[]; + + const searchResults: ApifyDocsSearchResult[] = []; + for (const result of results) { + if (result.hits && result.hits.length > 0) { + for (const hit of result.hits) { + // Check the fields, just in case + if (!hit.url_without_anchor || !hit.content) { + continue; // Skip hits with missing fields + } + searchResults.push({ + url: hit.url_without_anchor, + fragment: hit.anchor + ? hit.anchor + : undefined, + content: hit.content, + }); + } + } + } + + return searchResults; +} + +/** + * Searches the Apify documentation using Algolia and caches the results. + * + * If the query has been previously searched, it returns cached results. + * Otherwise, it performs a new search and caches the results for future use. + * + * @param {string} query - The search query string. + * @returns {Promise} Array of search results with URL, optional fragment, and content. + */ +export async function searchApifyDocsCached(query: string): Promise { + const normalizedQuery = query.trim(); + const cachedResults = searchApifyDocsCache.get(normalizedQuery); + if (cachedResults) { + return cachedResults; + } + + const results = await searchApifyDocs(normalizedQuery); + searchApifyDocsCache.set(normalizedQuery, results); + return results; +} diff --git a/src/utils/html-to-md.ts b/src/utils/html-to-md.ts new file mode 100644 index 00000000..6c10f9a0 --- /dev/null +++ b/src/utils/html-to-md.ts @@ -0,0 +1,39 @@ +import TurndownService from 'turndown'; + +const turndown = new TurndownService(); + +// Remove non-visible elements +turndown.remove('script'); +turndown.remove('style'); +turndown.remove('noscript'); + +// Remove multimedia elements +turndown.remove('svg'); +turndown.remove('img'); +turndown.remove('figure'); +turndown.remove('video'); +turndown.remove('audio'); +turndown.remove('picture'); + +// Remove interactive elements +turndown.remove('canvas'); +turndown.remove('button'); +turndown.remove('select'); +turndown.remove('input'); + +// Remove embedded +turndown.remove('iframe'); +turndown.remove('embed'); +turndown.remove('object'); + +// Remove navigation and footer elements +turndown.remove('aside'); +turndown.remove('nav'); +turndown.remove('footer'); + +/** + * Converts HTML content to Markdown format using Turndown. + */ +export function htmlToMarkdown(html: string): string { + return turndown.turndown(html); +} diff --git a/tests/integration/suite.ts b/tests/integration/suite.ts index a5b69a8b..5fffe577 100644 --- a/tests/integration/suite.ts +++ b/tests/integration/suite.ts @@ -344,6 +344,50 @@ export function createIntegrationTestsSuite( await client.close(); }); + it('should search Apify documentation', async () => { + const client = await createClientFn(); + const toolName = HelperTools.DOCS_SEARCH; + + const query = 'standby actor'; + const result = await client.callTool({ + name: toolName, + arguments: { + query, + limit: 5, + offset: 0, + }, + }); + + expect(result.content).toBeDefined(); + const content = result.content as { text: string }[]; + expect(content.length).toBeGreaterThan(0); + // At least one result should contain the standby actor docs URL + const standbyDocUrl = 'https://docs.apify.com/platform/actors/running/standby'; + expect(content.some((item) => item.text.includes(standbyDocUrl))).toBe(true); + + await client.close(); + }); + + it('should fetch Apify documentation page', async () => { + const client = await createClientFn(); + const toolName = HelperTools.DOCS_FETCH; + + const documentUrl = 'https://docs.apify.com/academy/getting-started/creating-actors'; + const result = await client.callTool({ + name: toolName, + arguments: { + url: documentUrl, + }, + }); + + expect(result.content).toBeDefined(); + const content = result.content as { text: string }[]; + expect(content.length).toBeGreaterThan(0); + expect(content[0].text).toContain(documentUrl); + + await client.close(); + }); + // Session termination is only possible for streamable HTTP transport. it.runIf(options.transport === 'streamable-http')('should successfully terminate streamable session', async () => { const client = await createClientFn();