Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 71 additions & 0 deletions packages/utilities/src/code_hash_manager.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import { timingSafeEqual, createHmac } from 'node:crypto';

export enum CodeHashMetaKey {
VERSION = 'v',
USER = 'u',
}

/**
* Allows hashing of an Actor input together with some metadata into a shareable link for the "Run on Apify" button.
* Uses a common secret for checking the signatures.
*
* The hash consists of 3 parts separated by a dot, as in `ABC.DEF.GHI`, each being a base64url encoded string:
* - `meta` object with the `version` and `user` properties.
* - `data` data object (the one that gets encoded)
* - `signature` used for verification of the URL hash, computed from the `meta` and `data` objects
*/
export class CodeHashManager {
static readonly SECTION_SEPARATOR = '.';
static readonly VERSION = 1;

constructor(private readonly secret: string) {}

/**
* Encodes object (e.g. input for actor) to a string hash and uses the `secret` to sign the hash.
*/
encode<T extends object>(data: T, user: string) {
const meta = {
[CodeHashMetaKey.USER]: user,
[CodeHashMetaKey.VERSION]: CodeHashManager.VERSION,
};
const metaBase64 = this.toBase64(JSON.stringify(meta));
const inputBase64 = this.toBase64(JSON.stringify(data));
const dataToSign = [metaBase64, inputBase64].join(CodeHashManager.SECTION_SEPARATOR);
const signature = this.generateSignature(dataToSign);
const signatureBase64 = this.toBase64(signature);
const parts = [metaBase64, inputBase64, signatureBase64];

return parts.join(CodeHashManager.SECTION_SEPARATOR);
}

decode(urlHash: string) {
const parts = urlHash.split(CodeHashManager.SECTION_SEPARATOR);
const dataToSign = parts.slice(0, 2).join(CodeHashManager.SECTION_SEPARATOR);
const meta = JSON.parse(this.fromBase64(parts[0]).toString());
const data = JSON.parse(this.fromBase64(parts[1]).toString());
const signature = this.fromBase64(parts[2]);
const expectedSignature = this.generateSignature(dataToSign);
const validSignature = timingSafeEqual(signature, expectedSignature);

return {
data,
meta: {
user: meta[CodeHashMetaKey.USER],
version: meta[CodeHashMetaKey.VERSION],
validSignature,
},
};
}

private toBase64(data: string | Buffer) {
return Buffer.from(data).toString('base64url');
}

private fromBase64(encoded: string) {
return Buffer.from(encoded, 'base64url');
}

private generateSignature(data: string) {
return createHmac('sha256', this.secret).update(data).digest();
}
}
1 change: 1 addition & 0 deletions packages/utilities/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@ export * from './streams_utilities';
export * from './webhook_payload_template';
export * from './crypto';
export * from './url_params_utils';
export * from './code_hash_manager';
44 changes: 0 additions & 44 deletions packages/utilities/src/url_params_utils.ts
Original file line number Diff line number Diff line change
@@ -1,47 +1,3 @@
function base64urlToBase64(input: string) {
// Replace non-url compatible chars with base64 standard chars
input = input
.replace(/-/g, '+')
.replace(/_/g, '/');

// Pad out with standard base64 required padding characters
const pad = input.length % 4;
if (pad) {
if (pad === 1) {
throw new Error('InvalidLengthError: Input base64url string is the wrong length to determine padding');
}
input += new Array(5 - pad).join('=');
}

return input;
}

function base64ToBase64Url(input: string) {
return input.replace(/\+/g, '-').replace(/\//g, '_').replace(/=+$/m, '');
}

/**
* Encodes object (e.g. input for actor) to a string hash.
*/
export function encodeInput<T extends object>(input: T) {
const data = JSON.stringify(input);
const buffer = Buffer.from(data, 'utf8');

const base64 = buffer.toString('base64');
return base64ToBase64Url(base64);
}

/**
* Decodes a string hash produced via `encodeInput` back into the original object.
*/
export function decodeInput(urlHash: string) {
const base64 = base64urlToBase64(urlHash);
const buffer = Buffer.from(base64, 'base64');
const decoded = buffer.toString('utf8');

return JSON.parse(decoded);
}

/**
* Extract import statements from the code.
*/
Expand Down
59 changes: 59 additions & 0 deletions test/code_hash_manager.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import { CodeHashManager } from '@apify/utilities';

const secret = 'abcd';
const input = {
code: `import { PlaywrightCrawler, Dataset } from 'crawlee';

// PlaywrightCrawler crawls the web using a headless
// browser controlled by the Playwright library.
const crawler = new PlaywrightCrawler({
// Use the requestHandler to process each of the crawled pages.
async requestHandler({ request, page, enqueueLinks, log }) {
const title = await page.title();
log.info(\`Title of \${request.loadedUrl} is '\${title}'\`);

// Save results as JSON to ./storage/datasets/default
await Dataset.pushData({ title, url: request.loadedUrl });

// Extract links from the current page
// and add them to the crawling queue.
await enqueueLinks();
},
// Uncomment this option to see the browser window.
// headless: false,
});

// Add first URL to the queue and start the crawl.
await crawler.run(['https://crawlee.dev']);`,
};

const manager = new CodeHashManager(secret);

test('encode/decode', async () => {
const hash = manager.encode(input, '123');
const { data, meta } = manager.decode(hash);

expect(typeof hash).toBe('string');
expect(hash.split(CodeHashManager.SECTION_SEPARATOR)).toHaveLength(3);
expect(input).toEqual(data);
expect(meta).toEqual({
version: 1,
user: '123',
validSignature: true,
});
});

test('encode without secret', async () => {
const manager2 = new CodeHashManager('');
const hash = manager2.encode(input, '123');
const { data, meta } = manager.decode(hash);

expect(typeof hash).toBe('string');
expect(hash.split(CodeHashManager.SECTION_SEPARATOR)).toHaveLength(3);
expect(input).toEqual(data);
expect(meta).toEqual({
version: 1,
user: '123',
validSignature: false,
});
});
21 changes: 1 addition & 20 deletions test/url-params-utils.test.ts → test/url_params_utils.test.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,4 @@
import { encodeInput, decodeInput, separateImports } from '@apify/utilities';

export function base64UrlEncode<T extends object>(input: T) {
const data = JSON.stringify(input);
const buffer = Buffer.from(data, 'utf8');

return buffer.toString('base64url');
}
import { separateImports } from '@apify/utilities';

const input = {
code: `import { PlaywrightCrawler, Dataset } from 'crawlee';
Expand Down Expand Up @@ -33,18 +26,6 @@ async requestHandler({ request, page, enqueueLinks, log }) {
await crawler.run(['https://crawlee.dev']);`,
};

test('encode/decode', async () => {
const hash = encodeInput(input);
const decoded = decodeInput(hash);

expect(input).toEqual(decoded);
});

test('encode uses base64url', () => {
const hash = encodeInput(input);
expect(hash).toEqual(base64UrlEncode(input));
});

test('import extraction', async () => {
const { code, imports } = separateImports(input.code);
const codeLines = code.split('\n');
Expand Down