diff --git a/packages/utilities/src/code_hash_manager.ts b/packages/utilities/src/code_hash_manager.ts new file mode 100644 index 000000000..5c6e6400f --- /dev/null +++ b/packages/utilities/src/code_hash_manager.ts @@ -0,0 +1,71 @@ +import { timingSafeEqual, createHmac } from 'node:crypto'; + +export enum CodeHashMetaKey { + VERSION = 'v', + USER = 'u', +} + +/** + * Allows hashing of an Actor input together with some metadata into a shareable link for the "Run on Apify" button. + * Uses a common secret for checking the signatures. + * + * The hash consists of 3 parts separated by a dot, as in `ABC.DEF.GHI`, each being a base64url encoded string: + * - `meta` object with the `version` and `user` properties. + * - `data` data object (the one that gets encoded) + * - `signature` used for verification of the URL hash, computed from the `meta` and `data` objects + */ +export class CodeHashManager { + static readonly SECTION_SEPARATOR = '.'; + static readonly VERSION = 1; + + constructor(private readonly secret: string) {} + + /** + * Encodes object (e.g. input for actor) to a string hash and uses the `secret` to sign the hash. + */ + encode(data: T, user: string) { + const meta = { + [CodeHashMetaKey.USER]: user, + [CodeHashMetaKey.VERSION]: CodeHashManager.VERSION, + }; + const metaBase64 = this.toBase64(JSON.stringify(meta)); + const inputBase64 = this.toBase64(JSON.stringify(data)); + const dataToSign = [metaBase64, inputBase64].join(CodeHashManager.SECTION_SEPARATOR); + const signature = this.generateSignature(dataToSign); + const signatureBase64 = this.toBase64(signature); + const parts = [metaBase64, inputBase64, signatureBase64]; + + return parts.join(CodeHashManager.SECTION_SEPARATOR); + } + + decode(urlHash: string) { + const parts = urlHash.split(CodeHashManager.SECTION_SEPARATOR); + const dataToSign = parts.slice(0, 2).join(CodeHashManager.SECTION_SEPARATOR); + const meta = JSON.parse(this.fromBase64(parts[0]).toString()); + const data = JSON.parse(this.fromBase64(parts[1]).toString()); + const signature = this.fromBase64(parts[2]); + const expectedSignature = this.generateSignature(dataToSign); + const validSignature = timingSafeEqual(signature, expectedSignature); + + return { + data, + meta: { + user: meta[CodeHashMetaKey.USER], + version: meta[CodeHashMetaKey.VERSION], + validSignature, + }, + }; + } + + private toBase64(data: string | Buffer) { + return Buffer.from(data).toString('base64url'); + } + + private fromBase64(encoded: string) { + return Buffer.from(encoded, 'base64url'); + } + + private generateSignature(data: string) { + return createHmac('sha256', this.secret).update(data).digest(); + } +} diff --git a/packages/utilities/src/index.ts b/packages/utilities/src/index.ts index 1001a13ad..c7939b9df 100644 --- a/packages/utilities/src/index.ts +++ b/packages/utilities/src/index.ts @@ -7,3 +7,4 @@ export * from './streams_utilities'; export * from './webhook_payload_template'; export * from './crypto'; export * from './url_params_utils'; +export * from './code_hash_manager'; diff --git a/packages/utilities/src/url_params_utils.ts b/packages/utilities/src/url_params_utils.ts index 90f7015ba..e2839197a 100644 --- a/packages/utilities/src/url_params_utils.ts +++ b/packages/utilities/src/url_params_utils.ts @@ -1,47 +1,3 @@ -function base64urlToBase64(input: string) { - // Replace non-url compatible chars with base64 standard chars - input = input - .replace(/-/g, '+') - .replace(/_/g, '/'); - - // Pad out with standard base64 required padding characters - const pad = input.length % 4; - if (pad) { - if (pad === 1) { - throw new Error('InvalidLengthError: Input base64url string is the wrong length to determine padding'); - } - input += new Array(5 - pad).join('='); - } - - return input; -} - -function base64ToBase64Url(input: string) { - return input.replace(/\+/g, '-').replace(/\//g, '_').replace(/=+$/m, ''); -} - -/** - * Encodes object (e.g. input for actor) to a string hash. - */ -export function encodeInput(input: T) { - const data = JSON.stringify(input); - const buffer = Buffer.from(data, 'utf8'); - - const base64 = buffer.toString('base64'); - return base64ToBase64Url(base64); -} - -/** - * Decodes a string hash produced via `encodeInput` back into the original object. - */ -export function decodeInput(urlHash: string) { - const base64 = base64urlToBase64(urlHash); - const buffer = Buffer.from(base64, 'base64'); - const decoded = buffer.toString('utf8'); - - return JSON.parse(decoded); -} - /** * Extract import statements from the code. */ diff --git a/test/code_hash_manager.test.ts b/test/code_hash_manager.test.ts new file mode 100644 index 000000000..7adb0df51 --- /dev/null +++ b/test/code_hash_manager.test.ts @@ -0,0 +1,59 @@ +import { CodeHashManager } from '@apify/utilities'; + +const secret = 'abcd'; +const input = { + code: `import { PlaywrightCrawler, Dataset } from 'crawlee'; + +// PlaywrightCrawler crawls the web using a headless +// browser controlled by the Playwright library. +const crawler = new PlaywrightCrawler({ +// Use the requestHandler to process each of the crawled pages. +async requestHandler({ request, page, enqueueLinks, log }) { + const title = await page.title(); + log.info(\`Title of \${request.loadedUrl} is '\${title}'\`); + + // Save results as JSON to ./storage/datasets/default + await Dataset.pushData({ title, url: request.loadedUrl }); + + // Extract links from the current page + // and add them to the crawling queue. + await enqueueLinks(); +}, +// Uncomment this option to see the browser window. +// headless: false, +}); + +// Add first URL to the queue and start the crawl. +await crawler.run(['https://crawlee.dev']);`, +}; + +const manager = new CodeHashManager(secret); + +test('encode/decode', async () => { + const hash = manager.encode(input, '123'); + const { data, meta } = manager.decode(hash); + + expect(typeof hash).toBe('string'); + expect(hash.split(CodeHashManager.SECTION_SEPARATOR)).toHaveLength(3); + expect(input).toEqual(data); + expect(meta).toEqual({ + version: 1, + user: '123', + validSignature: true, + }); +}); + +test('encode without secret', async () => { + const manager2 = new CodeHashManager(''); + const hash = manager2.encode(input, '123'); + const { data, meta } = manager.decode(hash); + + expect(typeof hash).toBe('string'); + expect(hash.split(CodeHashManager.SECTION_SEPARATOR)).toHaveLength(3); + expect(input).toEqual(data); + expect(meta).toEqual({ + version: 1, + user: '123', + validSignature: false, + }); +}); diff --git a/test/url-params-utils.test.ts b/test/url_params_utils.test.ts similarity index 68% rename from test/url-params-utils.test.ts rename to test/url_params_utils.test.ts index e1147e675..2e11bc75e 100644 --- a/test/url-params-utils.test.ts +++ b/test/url_params_utils.test.ts @@ -1,11 +1,4 @@ -import { encodeInput, decodeInput, separateImports } from '@apify/utilities'; - -export function base64UrlEncode(input: T) { - const data = JSON.stringify(input); - const buffer = Buffer.from(data, 'utf8'); - - return buffer.toString('base64url'); -} +import { separateImports } from '@apify/utilities'; const input = { code: `import { PlaywrightCrawler, Dataset } from 'crawlee'; @@ -33,18 +26,6 @@ async requestHandler({ request, page, enqueueLinks, log }) { await crawler.run(['https://crawlee.dev']);`, }; -test('encode/decode', async () => { - const hash = encodeInput(input); - const decoded = decodeInput(hash); - - expect(input).toEqual(decoded); -}); - -test('encode uses base64url', () => { - const hash = encodeInput(input); - expect(hash).toEqual(base64UrlEncode(input)); -}); - test('import extraction', async () => { const { code, imports } = separateImports(input.code); const codeLines = code.split('\n');