diff --git a/README.md b/README.md index ccd329e..5821fd7 100644 --- a/README.md +++ b/README.md @@ -121,8 +121,7 @@ The `/search` GET HTTP endpoint accepts the following query parameters: | `removeCookieWarnings` | boolean | `true` | If enabled, removes cookie consent dialogs to improve text extraction accuracy. This might increase latency. | | `removeElementsCssSelector` | string | `see input` | A CSS selector matching HTML elements that will be removed from the DOM, before converting it to text, Markdown, or saving as HTML. This is useful to skip irrelevant page content. The value must be a valid CSS selector as accepted by the `document.querySelectorAll()` function. \n\nBy default, the Actor removes common navigation elements, headers, footers, modals, scripts, and inline image. You can disable the removal by setting this value to some non-existent CSS selector like `dummy_keep_everything`. | | `debugMode` | boolean | `false` | If enabled, the Actor will store debugging information in the dataset's debug field. | - - +Standby mode also supports the rest of the input parameters [described on the Actor page](https://apify.com/apify/rag-web-browser/input-schema). Object parameters like `proxyConfiguration` should be passed as url-encoded JSON strings. ## 🔌 Integration with LLMs diff --git a/src/search.ts b/src/search.ts index 306f7f4..5942928 100644 --- a/src/search.ts +++ b/src/search.ts @@ -9,7 +9,6 @@ import { createResponsePromise } from './responses.js'; import { Input, Output, ContentScraperSettings, ContentCrawlerOptions } from './types.js'; import { addTimeMeasureEvent, - checkAndRemoveExtraParams, createRequest, createSearchRequest, interpretAsUrl, @@ -100,7 +99,6 @@ export async function handleSearchRequest(request: IncomingMessage, response: Se try { const params = parseParameters(request.url?.slice(Routes.SEARCH.length) ?? ''); log.info(`Received query parameters: ${JSON.stringify(params)}`); - checkAndRemoveExtraParams(params); const results = await runSearchProcess(params); diff --git a/src/utils.ts b/src/utils.ts index 4028e28..b47819a 100644 --- a/src/utils.ts +++ b/src/utils.ts @@ -3,23 +3,44 @@ import { parse, ParsedUrlQuery } from 'querystring'; import { defaults } from './const.js'; import { OrganicResult, ContentScraperSettings, TimeMeasure, ContentCrawlerUserData, SearchCrawlerUserData } from './types.js'; +import inputSchema from '../.actor/input_schema.json' with { type: 'json' }; export function parseParameters(url: string): ParsedUrlQuery { - return parse(url.slice(1)); -} + const params = parse(url.slice(1)); -/** - * Check whether the query parameters are valid (do not support extra parameters) - */ -export function checkAndRemoveExtraParams(params: ParsedUrlQuery) { - const keys = Object.keys(defaults); - keys.push('token', '?token'); // token is a special parameter - for (const key of Object.keys(params)) { - if (!keys.includes(key)) { + type SupportedParamKey = keyof typeof defaults; + + const parsedValidatedParams = {} as Record; + for (const [key, value] of Object.entries(params)) { + // If the key is not supported by schema or is not Apify API token, skip it + if (key !== 'token' && !Object.keys(defaults).includes(key)) { log.warning(`Unknown parameter: ${key}. Supported parameters: ${Object.keys(defaults).join(', ')}`); - delete params[key]; + continue; + } + const typedKey = key as SupportedParamKey; + // Schema keys are subset of SupportedParams so we can safely cast + type SchemaKey = keyof typeof inputSchema.properties; + + // Parse non-primitive parameters following input schema because querystring doesn't parse objects + if ( + !!inputSchema.properties[typedKey as SchemaKey] + && ['object', 'array'].includes(inputSchema.properties[typedKey as SchemaKey].type) + && typeof value === 'string' + ) { + try { + parsedValidatedParams[typedKey] = JSON.parse(value); + log.debug(`Parsed parameter ${key} from string: ${value} to object`, parsedValidatedParams[typedKey] as object); + } catch (e) { + log.warning(`Failed to parse parameter ${key}, it must be valid JSON. Skipping it: ${e}`); + } + } else { + parsedValidatedParams[typedKey] = value; } } + + // TODO: We should unify the type for parameters to single source, + // now we have ParsedUrlQuery, Input and SupportedParams + return parsedValidatedParams as ParsedUrlQuery; } export function randomId() {