Skip to content

Commit 788913e

Browse files
authored
feat: update @apify/scraper-tools (#37)
needed to add playwright scraper
1 parent 38c9bc2 commit 788913e

File tree

10 files changed

+1355
-373
lines changed

10 files changed

+1355
-373
lines changed

package-lock.json

Lines changed: 1307 additions & 339 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@
6363
"@typescript-eslint/eslint-plugin": "5.33.1",
6464
"@typescript-eslint/parser": "5.33.1",
6565
"commitlint": "^17.0.3",
66-
"crawlee": "^3.0.2",
66+
"crawlee": "^3.0.3",
6767
"eslint": "^8.19.0",
6868
"fs-extra": "^10.1.0",
6969
"gen-esm-wrapper": "^1.1.3",

packages/actor-scraper/cheerio-scraper/package.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@
77
"type": "module",
88
"dependencies": {
99
"@apify/scraper-tools": "^1.0.0",
10-
"@crawlee/cheerio": "^3.0.0",
11-
"apify": "^3.0.0"
10+
"@crawlee/cheerio": "^3.0.3",
11+
"apify": "^3.0.2"
1212
},
1313
"devDependencies": {
1414
"markdown-toc": "^1.2.0"

packages/actor-scraper/puppeteer-scraper/package.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@
77
"type": "module",
88
"dependencies": {
99
"@apify/scraper-tools": "^1.0.0",
10-
"@crawlee/puppeteer": "^3.0.0",
11-
"apify": "^3.0.0",
10+
"@crawlee/puppeteer": "^3.0.3",
11+
"apify": "^3.0.2",
1212
"puppeteer": "*"
1313
},
1414
"devDependencies": {

packages/actor-scraper/web-scraper/package.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@
77
"type": "module",
88
"dependencies": {
99
"@apify/scraper-tools": "^1.0.0",
10-
"apify": "^3.0.0",
11-
"@crawlee/puppeteer": "^3.0.0",
10+
"apify": "^3.0.2",
11+
"@crawlee/puppeteer": "^3.0.3",
1212
"content-type": "^1.0.4",
1313
"devtools-server": "^0.0.2",
1414
"puppeteer": "*"

packages/actor-scraper/web-scraper/src/internals/consts.ts

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -67,11 +67,9 @@ export const enum BreakpointLocation {
6767

6868
declare global {
6969
// eslint-disable-next-line vars-on-top, no-var
70-
var window: Window;
70+
var window: Window & typeof globalThis;
7171
// eslint-disable-next-line vars-on-top, no-var
72-
var document: {
73-
readyState: string;
74-
};
72+
var document: Document;
7573

7674
interface Window {
7775
[K: string]: any;

packages/apify/package.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,9 +57,9 @@
5757
"@apify/consts": "^2.0.0",
5858
"@apify/log": "^2.1.0",
5959
"@apify/utilities": "^2.1.1",
60-
"@crawlee/core": "^3.0.2",
61-
"@crawlee/types": "^3.0.2",
62-
"@crawlee/utils": "^3.0.2",
60+
"@crawlee/core": "^3.0.3",
61+
"@crawlee/types": "^3.0.3",
62+
"@crawlee/utils": "^3.0.3",
6363
"semver": "^7.3.7",
6464
"apify-client": "^2.6.0",
6565
"ow": "^0.28.1",

packages/scraper-tools/package.json

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -38,22 +38,28 @@
3838
"ajv": "^6.12.6",
3939
"content-type": "^1.0.4"
4040
},
41-
"peerDependencies": {
42-
"apify": "^3.0.0",
43-
"@crawlee/core": "^3.0.0",
44-
"@crawlee/utils": "^3.0.0",
45-
"@crawlee/puppeteer": "^3.0.0",
46-
"@crawlee/types": "^3.0.0"
47-
},
4841
"devDependencies": {
49-
"apify": "^3.0.0",
50-
"@crawlee/core": "^3.0.0",
51-
"@crawlee/utils": "^3.0.0",
52-
"@crawlee/puppeteer": "^3.0.0"
42+
"apify": "^3.0.2",
43+
"@crawlee/core": "^3.0.3",
44+
"@crawlee/types": "^3.0.3",
45+
"@crawlee/utils": "^3.0.3",
46+
"@crawlee/puppeteer": "^3.0.3",
47+
"@crawlee/playwright": "^3.0.3"
48+
},
49+
"peerDependencies": {
50+
"apify": "^3.0.2",
51+
"@crawlee/core": "^3.0.3",
52+
"@crawlee/types": "^3.0.3",
53+
"@crawlee/utils": "^3.0.3",
54+
"@crawlee/puppeteer": "^3.0.3",
55+
"@crawlee/playwright": "^3.0.3"
5356
},
5457
"peerDependenciesMeta": {
5558
"@crawlee/puppeteer": {
5659
"optional": true
60+
},
61+
"@crawlee/playwright": {
62+
"optional": true
5763
}
5864
}
5965
}

packages/scraper-tools/src/browser_tools.ts

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,17 @@
11
import { Actor } from 'apify';
22
import log from '@apify/log';
3-
import type { Page } from 'puppeteer';
4-
import { inspect } from 'util';
3+
import type { CommonPage } from '@crawlee/browser-pool';
4+
import { inspect } from 'node:util';
55
import { RESOURCE_LOAD_ERROR_MESSAGE, SNAPSHOT } from './consts';
66
import { createRandomHash } from './tools';
77

8+
export interface Page extends CommonPage {
9+
exposeFunction(name: string, callback: () => unknown): Promise<void>;
10+
on(eventName: string, handler: (msg: any) => Promise<void>): unknown;
11+
content(): unknown;
12+
screenshot(): unknown;
13+
}
14+
815
/**
916
* Creates a string with an appended pageFunction to be evaluated in
1017
* the browser context and placed within the given namespace.
@@ -97,10 +104,10 @@ export interface DumpConsoleOptions {
97104
*
98105
* This is used instead of the "dumpio" launch option
99106
* to prevent cluttering the STDOUT with unnecessary
100-
* Chromium messages, usually internal errors, occuring in page.
107+
* Chromium messages, usually internal errors, occurring in page.
101108
*/
102109
export function dumpConsole(page: Page, options: DumpConsoleOptions = {}) {
103-
page.on('console', async (msg) => {
110+
page.on('console', async (msg: any) => {
104111
const msgType = msg.type();
105112

106113
if (msgType === 'error' && !options.logErrors) return;
@@ -116,12 +123,12 @@ export function dumpConsole(page: Page, options: DumpConsoleOptions = {}) {
116123
// Otherwise, just use the text immediately.
117124
let message;
118125
if (hasJSHandles) {
119-
const msgPromises = msg.args().map((jsh) => {
126+
const msgPromises = msg.args().map((jsh: any) => {
120127
return jsh.jsonValue()
121-
.catch((e) => log.exception(e, `Stringification of console.${msgType} in browser failed.`));
128+
.catch((e: Error) => log.exception(e, `Stringification of console.${msgType} in browser failed.`));
122129
});
123130
message = (await Promise.all(msgPromises))
124-
.map((m) => inspect(m))
131+
.map((m: string) => inspect(m))
125132
.join(' '); // console.log('a', 'b') produces 'a b'
126133
} else {
127134
message = msg.text();

packages/scraper-tools/src/context.ts

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ import type {
88
RequestQueue,
99
RequestQueueOperationOptions,
1010
} from '@crawlee/core';
11-
import { puppeteerUtils } from '@crawlee/puppeteer';
1211
import type { Dictionary } from '@crawlee/utils';
1312
import log from '@apify/log';
1413
import type { MediaType } from 'content-type';
@@ -25,6 +24,8 @@ export interface CrawlerSetupOptions {
2524
requestQueue: RequestQueue;
2625
keyValueStore: KeyValueStore;
2726
customData: unknown;
27+
playwrightUtils?: unknown;
28+
puppeteerUtils?: unknown;
2829
}
2930

3031
export interface MapLike<K, V> extends Omit<Map<K, V>, 'values' | 'keys' | 'entries'| 'set'> {
@@ -64,7 +65,6 @@ class Context<Options extends ContextOptions = ContextOptions, ExtraFields = Opt
6465
readonly Actor = Actor;
6566
readonly Apify = Actor; // for back compatibility
6667
readonly log = log;
67-
readonly puppeteerUtils = puppeteerUtils;
6868
readonly input: any;
6969
readonly env: ApifyEnv;
7070
readonly customData: unknown;
@@ -82,6 +82,9 @@ class Context<Options extends ContextOptions = ContextOptions, ExtraFields = Opt
8282
skipLinks: false,
8383
};
8484

85+
this.playwrightUtils = crawlerSetup?.playwrightUtils;
86+
this.puppeteerUtils = crawlerSetup?.puppeteerUtils;
87+
8588
this.input = JSON.parse(crawlerSetup.rawInput);
8689
this.env = { ...crawlerSetup.env };
8790
this.customData = crawlerSetup.customData;

0 commit comments

Comments
 (0)