Skip to content

Commit

Permalink
feat: hideInternalConsole in JSDOMCrawler (#1707)
Browse files Browse the repository at this point in the history
Brings `JSDOMCrawler`'s logs closer to `Playwright`/`PuppeteerCrawler` -
when set to `true`, the `hideInternalConsole` toggle turns off the
JSDOM's calls to `console.log`, hiding the "page" script execution from
the user.

Co-authored-by: Martin Adámek <banan23@gmail.com>
  • Loading branch information
barjin and B4nan committed Dec 7, 2022
1 parent 11b9565 commit 8975f90
Showing 1 changed file with 35 additions and 1 deletion.
36 changes: 35 additions & 1 deletion packages/jsdom-crawler/src/internals/jsdom-crawler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ import { HttpCrawler, enqueueLinks, Router, resolveBaseUrlForEnqueueLinksFilteri
import type { BatchAddRequestsResult, Dictionary } from '@crawlee/types';
import { concatStreamToBuffer } from '@apify/utilities';
import type { DOMWindow } from 'jsdom';
import { JSDOM, ResourceLoader } from 'jsdom';
import { JSDOM, ResourceLoader, VirtualConsole } from 'jsdom';
import type { IncomingMessage } from 'http';

export type JSDOMErrorHandler<
Expand All @@ -29,6 +29,10 @@ export interface JSDOMCrawlerOptions<
* Download and run scripts.
*/
runScripts?: boolean;
/**
* Supress the logs from JSDOM internal console.
*/
hideInternalConsole?: boolean;
}

export type JSDOMHook<
Expand Down Expand Up @@ -133,19 +137,48 @@ export class JSDOMCrawler extends HttpCrawler<JSDOMCrawlingContext> {
protected static override optionsShape = {
...HttpCrawler.optionsShape,
runScripts: ow.optional.boolean,
hideInternalConsole: ow.optional.boolean,
};

protected runScripts: boolean;
protected hideInternalConsole: boolean;
protected virtualConsole: VirtualConsole | null = null;

constructor(options: JSDOMCrawlerOptions = {}, config?: Configuration) {
const {
runScripts = false,
hideInternalConsole = false,
...httpOptions
} = options;

super(httpOptions, config);

this.runScripts = runScripts;
this.hideInternalConsole = hideInternalConsole;
}

/**
* Returns the currently used `VirtualConsole` instance. Can be used to listen for the JSDOM's internal console messages.
*
* If the `hideInternalConsole` option is set to `true`, the messages aren't logged to the console by default,
* but the virtual console can still be listened to.
*
* **Example usage:**
* ```javascript
* const console = crawler.getVirtualConsole();
* console.on('error', (e) => {
* log.error(e);
* });
* ```
*/
getVirtualConsole() {
if (!this.virtualConsole) {
this.virtualConsole = new VirtualConsole();
if (!this.hideInternalConsole) {
this.virtualConsole.sendTo(console);
}
}
return this.virtualConsole;
}

protected override async _cleanupContext(context: JSDOMCrawlingContext) {
Expand All @@ -160,6 +193,7 @@ export class JSDOMCrawler extends HttpCrawler<JSDOMCrawlingContext> {
contentType: isXml ? 'text/xml' : 'text/html',
runScripts: this.runScripts ? 'dangerously' : undefined,
resources,
virtualConsole: this.getVirtualConsole(),
});

if (this.runScripts) {
Expand Down

0 comments on commit 8975f90

Please sign in to comment.