diff --git a/packages/basic-crawler/src/internals/basic-crawler.ts b/packages/basic-crawler/src/internals/basic-crawler.ts index 1a3447e20436..ff9fb550cf37 100644 --- a/packages/basic-crawler/src/internals/basic-crawler.ts +++ b/packages/basic-crawler/src/internals/basic-crawler.ts @@ -1362,7 +1362,7 @@ export class BasicCrawler Promise; +} + +interface BrowserPage { + content: () => Promise; +} + +export interface SnapshotResult { + screenshotFileName?: string; + htmlFileName?: string; +} + +/** + * ErrorSnapshotter class is used to capture a screenshot of the page and a snapshot of the HTML when an error occurs during web crawling. + * + * This functionality is opt-in, and can be enabled via the crawler options: + * + * ```ts + * const crawler = new BasicCrawler({ + * // ... + * statisticsOptions: { + * saveErrorSnapshots: true, + * }, + * }); + * ``` + */ +export class ErrorSnapshotter { + static readonly MAX_ERROR_CHARACTERS = 30; + static readonly MAX_HASH_LENGTH = 30; + static readonly MAX_FILENAME_LENGTH = 250; + static readonly BASE_MESSAGE = 'An error occurred'; + static readonly SNAPSHOT_PREFIX = 'ERROR_SNAPSHOT'; + + /** + * Capture a snapshot of the error context. + */ + async captureSnapshot(error: ErrnoException, context: CrawlingContext): Promise<{ screenshotFileName?: string; htmlFileName?: string }> { + try { + const page = context?.page as BrowserPage | undefined; + const body = context?.body; + + const keyValueStore = await context?.getKeyValueStore(); + // If the key-value store is not available, or the body and page are not available, return empty filenames + if (!keyValueStore || (!body && !page)) { + return {}; + } + + const fileName = this.generateFilename(error); + + let screenshotFileName: string | undefined; + let htmlFileName: string | undefined; + + if (page) { + const capturedFiles = await this.contextCaptureSnapshot( + context as unknown as BrowserCrawlingContext, + fileName, + ); + + if (capturedFiles) { + screenshotFileName = capturedFiles.screenshotFileName; + htmlFileName = capturedFiles.htmlFileName; + } + + // If the snapshot for browsers failed to capture the HTML, try to capture it from the page content + if (!htmlFileName) { + const html = await page.content(); + htmlFileName = html ? await this.saveHTMLSnapshot(html, keyValueStore, fileName) : undefined; + } + } else if (typeof body === 'string') { // for non-browser contexts + htmlFileName = await this.saveHTMLSnapshot(body, keyValueStore, fileName); + } + + return { + screenshotFileName, + htmlFileName, + }; + } catch { + return {}; + } + } + + /** + * Captures a snapshot of the current page using the context.saveSnapshot function. + * This function is applicable for browser contexts only. + * Returns an object containing the filenames of the screenshot and HTML file. + */ + async contextCaptureSnapshot(context: BrowserCrawlingContext, fileName: string): Promise { + try { + await context.saveSnapshot({ key: fileName }); + return { + screenshotFileName: `${fileName}.jpg`, + htmlFileName: `${fileName}.html`, + }; + } catch { + return undefined; + } + } + + /** + * Save the HTML snapshot of the page, and return the fileName with the extension. + */ + async saveHTMLSnapshot(html: string, keyValueStore: KeyValueStore, fileName: string): Promise { + try { + await keyValueStore.setValue(fileName, html, { contentType: 'text/html' }); + return `${fileName}.html`; + } catch { + return undefined; + } + } + + /** + * Generate a unique fileName for each error snapshot. + */ + generateFilename(error: ErrnoException): string { + const { SNAPSHOT_PREFIX, BASE_MESSAGE, MAX_HASH_LENGTH, MAX_ERROR_CHARACTERS, MAX_FILENAME_LENGTH } = ErrorSnapshotter; + // Create a hash of the error stack trace + const errorStackHash = crypto.createHash('sha1').update(error.stack || error.message || '').digest('hex').slice(0, MAX_HASH_LENGTH); + const errorMessagePrefix = (error.message || BASE_MESSAGE).slice(0, MAX_ERROR_CHARACTERS).trim(); + + /** + * Remove non-word characters from the start and end of a string. + */ + const sanitizeString = (str: string): string => { + return str.replace(/^\W+|\W+$/g, ''); + }; + + // Generate fileName and remove disallowed characters + const fileName = `${SNAPSHOT_PREFIX}_${sanitizeString(errorStackHash)}_${sanitizeString(errorMessagePrefix)}` + .replace(/\W+/g, '-') // Replace non-word characters with a dash + .slice(0, MAX_FILENAME_LENGTH); + + return fileName; + } +} diff --git a/packages/utils/src/internals/error_tracker.ts b/packages/core/src/crawlers/error_tracker.ts similarity index 86% rename from packages/utils/src/internals/error_tracker.ts rename to packages/core/src/crawlers/error_tracker.ts index 259a4c33b5bd..e592ca84678c 100644 --- a/packages/utils/src/internals/error_tracker.ts +++ b/packages/core/src/crawlers/error_tracker.ts @@ -1,13 +1,16 @@ import { inspect } from 'node:util'; +import { ErrorSnapshotter } from './error_snapshotter'; +import type { CrawlingContext } from '../crawlers/crawler_commons'; + /** * Node.js Error interface */ - interface ErrnoException extends Error { - errno?: number | undefined; - code?: string | number | undefined; - path?: string | undefined; - syscall?: string | undefined; +export interface ErrnoException extends Error { + errno?: number; + code?: string | number; + path?: string; + syscall?: string; cause?: any; } @@ -18,6 +21,7 @@ export interface ErrorTrackerOptions { showFullStack: boolean; showErrorMessage: boolean; showFullMessage: boolean; + saveErrorSnapshots: boolean; } const extractPathFromStackTraceLine = (line: string) => { @@ -283,6 +287,8 @@ export class ErrorTracker { total: number; + errorSnapshotter?: ErrorSnapshotter; + constructor(options: Partial = {}) { this.#options = { showErrorCode: true, @@ -291,16 +297,19 @@ export class ErrorTracker { showFullStack: false, showErrorMessage: true, showFullMessage: false, + saveErrorSnapshots: false, ...options, }; + if (this.#options.saveErrorSnapshots) { + this.errorSnapshotter = new ErrorSnapshotter(); + } + this.result = Object.create(null); this.total = 0; } - add(error: ErrnoException) { - this.total++; - + private updateGroup(error: ErrnoException) { let group = this.result; if (this.#options.showStackTrace) { @@ -321,11 +330,38 @@ export class ErrorTracker { increaseCount(group as { count: number }); + return group; + } + + add(error: ErrnoException) { + this.total++; + + this.updateGroup(error); + if (typeof error.cause === 'object' && error.cause !== null) { this.add(error.cause); } } + /** + * This method is async, because it captures a snapshot of the error context. + * We added this new method to avoid breaking changes. + */ + async addAsync(error: ErrnoException, context?: CrawlingContext) { + this.total++; + + const group = this.updateGroup(error); + + // Capture a snapshot (screenshot and HTML) on the first occurrence of an error + if (group.count === 1 && context) { + await this.captureSnapshot(group, error, context).catch(() => { }); + } + + if (typeof error.cause === 'object' && error.cause !== null) { + await this.addAsync(error.cause); + } + } + getUniqueErrorCount() { let count = 0; @@ -366,6 +402,17 @@ export class ErrorTracker { return result.sort((a, b) => b[0] - a[0]).slice(0, count); } + async captureSnapshot(storage: Record, error: ErrnoException, context: CrawlingContext) { + if (!this.errorSnapshotter) { + return; + } + + const { screenshotFileName, htmlFileName } = await this.errorSnapshotter.captureSnapshot(error, context); + + storage.firstErrorScreenshot = screenshotFileName; + storage.firstErrorHtml = htmlFileName; + } + reset() { // This actually safe, since we Object.create(null) so no prototype pollution can happen. // eslint-disable-next-line no-restricted-syntax, guard-for-in diff --git a/packages/core/src/crawlers/index.ts b/packages/core/src/crawlers/index.ts index 8f206d4e6043..77a83511e413 100644 --- a/packages/core/src/crawlers/index.ts +++ b/packages/core/src/crawlers/index.ts @@ -2,3 +2,5 @@ export * from './crawler_commons'; export * from './crawler_extension'; export * from './crawler_utils'; export * from './statistics'; +export * from './error_tracker'; +export * from './error_snapshotter'; diff --git a/packages/core/src/crawlers/statistics.ts b/packages/core/src/crawlers/statistics.ts index 345b2859dc12..2f346ca6a202 100644 --- a/packages/core/src/crawlers/statistics.ts +++ b/packages/core/src/crawlers/statistics.ts @@ -1,6 +1,6 @@ -import { ErrorTracker } from '@crawlee/utils'; import ow from 'ow'; +import { ErrorTracker } from './error_tracker'; import { Configuration } from '../configuration'; import type { EventManager } from '../events/event_manager'; import { EventType } from '../events/event_manager'; @@ -66,12 +66,12 @@ export class Statistics { /** * An error tracker for final retry errors. */ - errorTracker = new ErrorTracker(errorTrackerConfig); + errorTracker: ErrorTracker; /** * An error tracker for retry errors prior to the final retry. */ - errorTrackerRetry = new ErrorTracker(errorTrackerConfig); + errorTrackerRetry: ErrorTracker; /** * Statistic instance id. @@ -115,6 +115,7 @@ export class Statistics { keyValueStore: ow.optional.object, config: ow.optional.object, persistenceOptions: ow.optional.object, + saveErrorSnapshots: ow.optional.boolean, })); const { @@ -125,8 +126,11 @@ export class Statistics { persistenceOptions = { enable: true, }, + saveErrorSnapshots = false, } = options; + this.errorTracker = new ErrorTracker({ ...errorTrackerConfig, saveErrorSnapshots }); + this.errorTrackerRetry = new ErrorTracker({ ...errorTrackerConfig, saveErrorSnapshots }); this.logIntervalMillis = logIntervalSecs * 1000; this.logMessage = logMessage; this.keyValueStore = keyValueStore; @@ -444,6 +448,12 @@ export interface StatisticsOptions { * Control how and when to persist the statistics. */ persistenceOptions?: PersistenceOptions; + + /** + * Save HTML snapshot (and a screenshot if possible) when an error occurs. + * @default false + */ + saveErrorSnapshots?: boolean; } /** diff --git a/packages/utils/src/index.ts b/packages/utils/src/index.ts index 7ed721e9ec42..9ba820c5ab78 100644 --- a/packages/utils/src/index.ts +++ b/packages/utils/src/index.ts @@ -7,7 +7,6 @@ export * from './internals/memory-info'; export * from './internals/debug'; export * as social from './internals/social'; export * from './internals/typedefs'; -export * from './internals/error_tracker'; export * from './internals/open_graph_parser'; export * from './internals/gotScraping'; export * from './internals/robots'; diff --git a/packages/utils/test/non-error-objects-working.test.ts b/packages/utils/test/non-error-objects-working.test.ts index 167f1b44b212..c7adfbfbb511 100644 --- a/packages/utils/test/non-error-objects-working.test.ts +++ b/packages/utils/test/non-error-objects-working.test.ts @@ -1,4 +1,4 @@ -import { ErrorTracker } from '../src/internals/error_tracker'; +import { ErrorTracker } from '../../core/src/crawlers/error_tracker'; describe('ErrorTracker', () => { test('processing a non-error error should not crash', () => { diff --git a/test/core/error_tracker.test.ts b/test/core/error_tracker.test.ts index d51dec7e8a00..f3ae1a9afd5e 100644 --- a/test/core/error_tracker.test.ts +++ b/test/core/error_tracker.test.ts @@ -1,7 +1,5 @@ /* eslint-disable no-multi-spaces */ -import exp from 'node:constants'; - -import { ErrorTracker } from '../../packages/utils/src/internals/error_tracker'; +import { ErrorTracker } from '../../packages/core/src/crawlers/error_tracker'; const random = () => Math.random().toString(36).slice(2); diff --git a/test/e2e/cheerio-error-snapshot/actor/.actor/actor.json b/test/e2e/cheerio-error-snapshot/actor/.actor/actor.json new file mode 100644 index 000000000000..13d855466bf0 --- /dev/null +++ b/test/e2e/cheerio-error-snapshot/actor/.actor/actor.json @@ -0,0 +1,7 @@ +{ + "actorSpecification": 1, + "name": "test-cheerio-error-snapshot", + "version": "0.0", + "buildTag": "latest", + "env": null +} diff --git a/test/e2e/cheerio-error-snapshot/actor/.gitignore b/test/e2e/cheerio-error-snapshot/actor/.gitignore new file mode 100644 index 000000000000..ced7cbfc582d --- /dev/null +++ b/test/e2e/cheerio-error-snapshot/actor/.gitignore @@ -0,0 +1,7 @@ +.idea +.DS_Store +node_modules +package-lock.json +apify_storage +crawlee_storage +storage diff --git a/test/e2e/cheerio-error-snapshot/actor/Dockerfile b/test/e2e/cheerio-error-snapshot/actor/Dockerfile new file mode 100644 index 000000000000..36afd80b9648 --- /dev/null +++ b/test/e2e/cheerio-error-snapshot/actor/Dockerfile @@ -0,0 +1,16 @@ +FROM apify/actor-node:20-beta + +COPY packages ./packages +COPY package*.json ./ + +RUN npm --quiet set progress=false \ + && npm install --only=prod --no-optional --no-audit \ + && npm update --no-audit \ + && echo "Installed NPM packages:" \ + && (npm list --only=prod --no-optional --all || true) \ + && echo "Node.js version:" \ + && node --version \ + && echo "NPM version:" \ + && npm --version + +COPY . ./ diff --git a/test/e2e/cheerio-error-snapshot/actor/main.js b/test/e2e/cheerio-error-snapshot/actor/main.js new file mode 100644 index 000000000000..de40770870c2 --- /dev/null +++ b/test/e2e/cheerio-error-snapshot/actor/main.js @@ -0,0 +1,48 @@ +import { CheerioCrawler } from '@crawlee/cheerio'; +import { sleep } from '@crawlee/utils'; +import { Actor } from 'apify'; + +const mainOptions = { + exit: Actor.isAtHome(), + storage: process.env.STORAGE_IMPLEMENTATION === 'LOCAL' ? new (await import('@apify/storage-local')).ApifyStorageLocal() : undefined, +}; + +const LABELS = { + TIMEOUT: 'TIMEOUT', + TYPE_ERROR: 'TYPE_ERROR', + ERROR_OPENING_PAGE: 'ERROR_OPENING_PAGE', + POST_NAVIGATION_ERROR: 'POST_NAVIGATION_ERROR', +}; + +// Pre Navigation errors snapshots will not be saved as we don't get the response in the context +await Actor.main(async () => { + const crawler = new CheerioCrawler({ + requestHandlerTimeoutSecs: 2, + maxRequestRetries: 0, + async requestHandler({ $, request, log }) { + const { userData: { label } } = request; + + if (label === LABELS.TIMEOUT) { + log.error('Timeout error'); + await sleep(20_000); + } if (label === LABELS.TYPE_ERROR) { + log.error('TypeError: $(...).error is not a function'); + $().error(); + } else if (label === LABELS.ERROR_OPENING_PAGE) { + log.error('Error opening page'); + throw new Error('An error occurred while opening the page'); + } + }, + postNavigationHooks: [async ({ request, log }) => { + const { userData: { label } } = request; + + // Post navigation errors snapshots are not saved as we don't get the body in the context + if (label === LABELS.POST_NAVIGATION_ERROR) { + log.error('Post navigation error'); + throw new Error('Unable to navigate to the requested post'); + } + }], + }); + + await crawler.run(Object.values(LABELS).map((label) => ({ url: 'https://example.com', userData: { label }, uniqueKey: label }))); +}, mainOptions); diff --git a/test/e2e/cheerio-error-snapshot/actor/package.json b/test/e2e/cheerio-error-snapshot/actor/package.json new file mode 100644 index 000000000000..988e6e0806c8 --- /dev/null +++ b/test/e2e/cheerio-error-snapshot/actor/package.json @@ -0,0 +1,28 @@ +{ + "name": "test-cheerio-error-snapshot", + "version": "0.0.1", + "description": "Cheerio Crawler Test - Should save errors snapshots", + "dependencies": { + "apify": "next", + "@apify/storage-local": "^2.1.3", + "@crawlee/basic": "file:./packages/basic-crawler", + "@crawlee/browser-pool": "file:./packages/browser-pool", + "@crawlee/http": "file:./packages/http-crawler", + "@crawlee/cheerio": "file:./packages/cheerio-crawler", + "@crawlee/core": "file:./packages/core", + "@crawlee/memory-storage": "file:./packages/memory-storage", + "@crawlee/types": "file:./packages/types", + "@crawlee/utils": "file:./packages/utils" + }, + "overrides": { + "apify": { + "@crawlee/core": "file:./packages/core", + "@crawlee/utils": "file:./packages/utils" + } + }, + "scripts": { + "start": "node main.js" + }, + "type": "module", + "license": "ISC" +} diff --git a/test/e2e/cheerio-error-snapshot/test.mjs b/test/e2e/cheerio-error-snapshot/test.mjs new file mode 100644 index 000000000000..912f6a7bf24d --- /dev/null +++ b/test/e2e/cheerio-error-snapshot/test.mjs @@ -0,0 +1,21 @@ +import { initialize, getActorTestDir, runActor, expect, hasNestedKey } from '../tools.mjs'; + +const testActorDirname = getActorTestDir(import.meta.url); +await initialize(testActorDirname); + +const { stats, defaultKeyValueStoreItems } = await runActor(testActorDirname); + +// All requests should fail to test the error snapshots +await expect(stats.requestsFailed === 4, 'All requests failed'); + +let totalErrorHtmlFiles = 0; +for (const error of Object.values(stats.errors)) { + if (hasNestedKey(error, 'firstErrorHtmlUrl')) { + totalErrorHtmlFiles++; + } +} + +// Count of error HTML files stored in the stats to make sure they are saved +await expect(totalErrorHtmlFiles === 3, 'Number of HTML error files in stats should be 3'); +// Count of error HTML files stored in the Key-Value store +await expect(defaultKeyValueStoreItems.length === 3, 'Number of HTML error files in KV store should be 3'); diff --git a/test/e2e/puppeteer-error-snapshot/actor/.actor/actor.json b/test/e2e/puppeteer-error-snapshot/actor/.actor/actor.json new file mode 100644 index 000000000000..827dc94c4e26 --- /dev/null +++ b/test/e2e/puppeteer-error-snapshot/actor/.actor/actor.json @@ -0,0 +1,7 @@ +{ + "actorSpecification": 1, + "name": "test-puppeteer-error-snapshot", + "version": "0.0", + "buildTag": "latest", + "env": null +} diff --git a/test/e2e/puppeteer-error-snapshot/actor/.gitignore b/test/e2e/puppeteer-error-snapshot/actor/.gitignore new file mode 100644 index 000000000000..ced7cbfc582d --- /dev/null +++ b/test/e2e/puppeteer-error-snapshot/actor/.gitignore @@ -0,0 +1,7 @@ +.idea +.DS_Store +node_modules +package-lock.json +apify_storage +crawlee_storage +storage diff --git a/test/e2e/puppeteer-error-snapshot/actor/Dockerfile b/test/e2e/puppeteer-error-snapshot/actor/Dockerfile new file mode 100644 index 000000000000..1e7718842183 --- /dev/null +++ b/test/e2e/puppeteer-error-snapshot/actor/Dockerfile @@ -0,0 +1,23 @@ +FROM node:20 AS builder + +COPY /packages ./packages +COPY /package*.json ./ +RUN npm --quiet set progress=false \ + && npm install --only=prod --no-optional --no-audit \ + && npm update + +FROM apify/actor-node-puppeteer-chrome:20-beta + +RUN rm -r node_modules +COPY --from=builder /node_modules ./node_modules +COPY --from=builder /packages ./packages +COPY --from=builder /package*.json ./ +COPY /.actor ./.actor +COPY /main.js ./ + +RUN echo "Installed NPM packages:" \ + && (npm list --only=prod --no-optional --all || true) \ + && echo "Node.js version:" \ + && node --version \ + && echo "NPM version:" \ + && npm --version diff --git a/test/e2e/puppeteer-error-snapshot/actor/main.js b/test/e2e/puppeteer-error-snapshot/actor/main.js new file mode 100644 index 000000000000..70ca82f8f890 --- /dev/null +++ b/test/e2e/puppeteer-error-snapshot/actor/main.js @@ -0,0 +1,47 @@ +import { PuppeteerCrawler } from '@crawlee/puppeteer'; +import { sleep } from '@crawlee/utils'; +import { Actor } from 'apify'; + +const mainOptions = { + exit: Actor.isAtHome(), + storage: process.env.STORAGE_IMPLEMENTATION === 'LOCAL' ? new (await import('@apify/storage-local')).ApifyStorageLocal() : undefined, +}; + +const LABELS = { + TIMEOUT: 'TIMEOUT', + TYPE_ERROR: 'TYPE_ERROR', + ERROR_OPENING_PAGE: 'ERROR_OPENING_PAGE', + POST_NAVIGATION_ERROR: 'POST_NAVIGATION_ERROR', +}; + +// Pre Navigation errors snapshots will not be saved as we don't get the response in the context +await Actor.main(async () => { + const crawler = new PuppeteerCrawler({ + requestHandlerTimeoutSecs: 15, + maxRequestRetries: 0, + async requestHandler({ request, log, page }) { + const { userData: { label } } = request; + + if (label === LABELS.TIMEOUT) { + log.error('Timeout error'); + await sleep(30_000); + } if (label === LABELS.TYPE_ERROR) { + log.error('TypeError: page.error is not a function'); + page.error(); + } else if (label === LABELS.ERROR_OPENING_PAGE) { + log.error('Error opening page'); + throw new Error('An error occurred while opening the page'); + } + }, + postNavigationHooks: [async ({ request, log }) => { + const { userData: { label } } = request; + + if (label === LABELS.POST_NAVIGATION_ERROR) { + log.error('Post navigation error'); + throw new Error('Unable to navigate to the requested post'); + } + }], + }); + + await crawler.run(Object.values(LABELS).map((label) => ({ url: 'https://example.com', userData: { label }, uniqueKey: label }))); +}, mainOptions); diff --git a/test/e2e/puppeteer-error-snapshot/actor/package.json b/test/e2e/puppeteer-error-snapshot/actor/package.json new file mode 100644 index 000000000000..ce3638b8fd90 --- /dev/null +++ b/test/e2e/puppeteer-error-snapshot/actor/package.json @@ -0,0 +1,29 @@ +{ + "name": "test-puppeteer-error-snapshot", + "version": "0.0.1", + "description": "Puppeteer Test - Should save errors snapshots", + "dependencies": { + "apify": "next", + "@apify/storage-local": "^2.1.3", + "@crawlee/basic": "file:./packages/basic-crawler", + "@crawlee/browser": "file:./packages/browser-crawler", + "@crawlee/browser-pool": "file:./packages/browser-pool", + "@crawlee/core": "file:./packages/core", + "@crawlee/memory-storage": "file:./packages/memory-storage", + "@crawlee/puppeteer": "file:./packages/puppeteer-crawler", + "@crawlee/types": "file:./packages/types", + "@crawlee/utils": "file:./packages/utils", + "puppeteer": "*" + }, + "overrides": { + "apify": { + "@crawlee/core": "file:./packages/core", + "@crawlee/utils": "file:./packages/utils" + } + }, + "scripts": { + "start": "node main.js" + }, + "type": "module", + "license": "ISC" +} diff --git a/test/e2e/puppeteer-error-snapshot/test.mjs b/test/e2e/puppeteer-error-snapshot/test.mjs new file mode 100644 index 000000000000..404d64e916be --- /dev/null +++ b/test/e2e/puppeteer-error-snapshot/test.mjs @@ -0,0 +1,30 @@ +import { initialize, getActorTestDir, runActor, expect, hasNestedKey } from '../tools.mjs'; + +const testActorDirname = getActorTestDir(import.meta.url); +await initialize(testActorDirname); + +const { stats, defaultKeyValueStoreItems } = await runActor(testActorDirname); + +// All requests should fail to test the error snapshots +await expect(stats.requestsFailed === 4, 'All requests failed'); + +let totalErrorHtmlFiles = 0; +let totalErrorScreenshotFiles = 0; +for (const error of Object.values(stats.errors)) { + if (hasNestedKey(error, 'firstErrorHtmlUrl')) { + totalErrorHtmlFiles++; + } +} + +for (const error of Object.values(stats.errors)) { + if (hasNestedKey(error, 'firstErrorScreenshotUrl')) { + totalErrorScreenshotFiles++; + } +} + +// Count of error HTML files stored in the stats to make sure they are saved +await expect(totalErrorHtmlFiles === 4, 'Number of HTML error files in stats should be 4'); +// Count of error Screenshot files stored in the stats to make sure they are saved +await expect(totalErrorScreenshotFiles === 4, 'Number of screenshots error files in stats should be 4'); +// Count of error HTML files and screenshot files stored in the Key-Value store +await expect(defaultKeyValueStoreItems.length === 8, 'Number of HTML and screenshot error snapshots in KV store should be 8'); diff --git a/test/e2e/run.mjs b/test/e2e/run.mjs index 8a9b5dc1c69f..ae107458d842 100644 --- a/test/e2e/run.mjs +++ b/test/e2e/run.mjs @@ -1,9 +1,11 @@ -import { dirname } from 'node:path'; -import { fileURLToPath } from 'node:url'; +/* eslint-disable no-loop-func */ +import { execSync } from 'node:child_process'; import { once } from 'node:events'; import { readdir } from 'node:fs/promises'; +import { dirname } from 'node:path'; +import { fileURLToPath } from 'node:url'; import { isMainThread, Worker, workerData } from 'node:worker_threads'; -import { execSync } from 'node:child_process'; + import { colors, getApifyToken, clearPackages, clearStorage, SKIPPED_TEST_CLOSE_CODE } from './tools.mjs'; const basePath = dirname(fileURLToPath(import.meta.url)); diff --git a/test/e2e/tools.mjs b/test/e2e/tools.mjs index ae18eda80da1..5dec93e9c8ee 100644 --- a/test/e2e/tools.mjs +++ b/test/e2e/tools.mjs @@ -1,16 +1,22 @@ -import { dirname, join } from 'node:path'; -import { fileURLToPath } from 'node:url'; +import { execSync as execSyncOriginal } from 'node:child_process'; import { existsSync } from 'node:fs'; import { readdir, readFile } from 'node:fs/promises'; import { homedir } from 'node:os'; +import { dirname, join } from 'node:path'; import { setTimeout } from 'node:timers/promises'; -import { execSync as execSyncOriginal } from 'node:child_process'; -import { got } from 'got'; -import fs from 'fs-extra'; +import { fileURLToPath } from 'node:url'; + import { Actor } from 'apify'; +import fs from 'fs-extra'; +import { got } from 'got'; + // eslint-disable-next-line import/no-relative-packages import { URL_NO_COMMAS_REGEX } from '../../packages/utils/dist/index.mjs'; +/** + * @param {string} command + * @param {import('node:child_process').ExecSyncOptions} options + */ function execSync(command, options) { return execSyncOriginal(command, { ...options, encoding: 'utf-8' }); } @@ -469,3 +475,26 @@ function isItemHidden(item) { } return true; } + +/** + * @param {any} obj the object to search + * @param {string} keyName the key to search for + * @returns {boolean} + */ +export function hasNestedKey(obj, keyName) { + if (typeof obj !== 'object' || obj === null) { + return false; + } + + for (const key of Object.keys(obj)) { + if (key === keyName) { + return true; + } + + if (typeof obj[key] === 'object' && obj[key] !== null && hasNestedKey(obj[key], keyName)) { + return true; + } + } + + return false; +}