From 3cc8e5ae4e1ff33995a5f352161692e683e9acd0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Ad=C3=A1mek?= Date: Wed, 17 May 2023 14:44:10 +0200 Subject: [PATCH] feat(utilities): add new helpers for "run on apify" button --- packages/utilities/src/index.ts | 1 + packages/utilities/src/url_params_utils.ts | 30 ++++++++++++++ test/url-params-utils.test.ts | 48 ++++++++++++++++++++++ 3 files changed, 79 insertions(+) create mode 100644 packages/utilities/src/url_params_utils.ts create mode 100644 test/url-params-utils.test.ts diff --git a/packages/utilities/src/index.ts b/packages/utilities/src/index.ts index 30e336aa8..1001a13ad 100644 --- a/packages/utilities/src/index.ts +++ b/packages/utilities/src/index.ts @@ -6,3 +6,4 @@ export * from './parse_jsonl_stream'; export * from './streams_utilities'; export * from './webhook_payload_template'; export * from './crypto'; +export * from './url_params_utils'; diff --git a/packages/utilities/src/url_params_utils.ts b/packages/utilities/src/url_params_utils.ts new file mode 100644 index 000000000..80d8c032a --- /dev/null +++ b/packages/utilities/src/url_params_utils.ts @@ -0,0 +1,30 @@ +/** + * Encodes object (e.g. input for actor) to a string hash. + */ +export function encodeInput(input: T) { + const data = JSON.stringify(input); + const buffer = Buffer.from(data, 'utf8'); + + return buffer.toString('base64url'); +} + +/** + * Decodes a string hash produced via `encodeInput` back into the original object. + */ +export function decodeInput(urlHash: string) { + const buffer = Buffer.from(urlHash, 'base64url'); + const decoded = buffer.toString('utf8'); + + return JSON.parse(decoded); +} + +/** + * Extract import statements from the code. + */ +export function separateImports(code: string): { code: string; imports: string } { + const lines = code.split('\n'); + return { + code: lines.filter((line) => !line.trim().startsWith('import')).join('\n'), + imports: lines.filter((line) => line.trim().startsWith('import')).join('\n'), + }; +} diff --git a/test/url-params-utils.test.ts b/test/url-params-utils.test.ts new file mode 100644 index 000000000..4e0ef6655 --- /dev/null +++ b/test/url-params-utils.test.ts @@ -0,0 +1,48 @@ +import { encodeInput, decodeInput, separateImports } from '@apify/utilities'; + +const input = { + code: `import { PlaywrightCrawler, Dataset } from 'crawlee'; + +// PlaywrightCrawler crawls the web using a headless +// browser controlled by the Playwright library. +const crawler = new PlaywrightCrawler({ +// Use the requestHandler to process each of the crawled pages. +async requestHandler({ request, page, enqueueLinks, log }) { + const title = await page.title(); + log.info(\`Title of \${request.loadedUrl} is '\${title}'\`); + + // Save results as JSON to ./storage/datasets/default + await Dataset.pushData({ title, url: request.loadedUrl }); + + // Extract links from the current page + // and add them to the crawling queue. + await enqueueLinks(); +}, +// Uncomment this option to see the browser window. +// headless: false, +}); + +// Add first URL to the queue and start the crawl. +await crawler.run(['https://crawlee.dev']);`, +}; + +test('encode/decode', async () => { + const hash = encodeInput(input); + const decoded = decodeInput(hash); + + expect(input).toEqual(decoded); +}); + +test('import extraction', async () => { + const { code, imports } = separateImports(input.code); + const codeLines = code.split('\n'); + const importLines = imports.split('\n'); + + for (const line of codeLines) { + expect(line).not.toMatch(/^import/); + } + + for (const line of importLines) { + expect(line).toMatch(/^import/); + } +});