Skip to content

Commit

Permalink
feat: update @apify/scraper-tools (#37)
Browse files Browse the repository at this point in the history
needed to add playwright scraper
  • Loading branch information
AndreyBykov authored Aug 18, 2022
1 parent 38c9bc2 commit 788913e
Show file tree
Hide file tree
Showing 10 changed files with 1,355 additions and 373 deletions.
1,646 changes: 1,307 additions & 339 deletions package-lock.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@
"@typescript-eslint/eslint-plugin": "5.33.1",
"@typescript-eslint/parser": "5.33.1",
"commitlint": "^17.0.3",
"crawlee": "^3.0.2",
"crawlee": "^3.0.3",
"eslint": "^8.19.0",
"fs-extra": "^10.1.0",
"gen-esm-wrapper": "^1.1.3",
Expand Down
4 changes: 2 additions & 2 deletions packages/actor-scraper/cheerio-scraper/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
"type": "module",
"dependencies": {
"@apify/scraper-tools": "^1.0.0",
"@crawlee/cheerio": "^3.0.0",
"apify": "^3.0.0"
"@crawlee/cheerio": "^3.0.3",
"apify": "^3.0.2"
},
"devDependencies": {
"markdown-toc": "^1.2.0"
Expand Down
4 changes: 2 additions & 2 deletions packages/actor-scraper/puppeteer-scraper/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
"type": "module",
"dependencies": {
"@apify/scraper-tools": "^1.0.0",
"@crawlee/puppeteer": "^3.0.0",
"apify": "^3.0.0",
"@crawlee/puppeteer": "^3.0.3",
"apify": "^3.0.2",
"puppeteer": "*"
},
"devDependencies": {
Expand Down
4 changes: 2 additions & 2 deletions packages/actor-scraper/web-scraper/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
"type": "module",
"dependencies": {
"@apify/scraper-tools": "^1.0.0",
"apify": "^3.0.0",
"@crawlee/puppeteer": "^3.0.0",
"apify": "^3.0.2",
"@crawlee/puppeteer": "^3.0.3",
"content-type": "^1.0.4",
"devtools-server": "^0.0.2",
"puppeteer": "*"
Expand Down
6 changes: 2 additions & 4 deletions packages/actor-scraper/web-scraper/src/internals/consts.ts
Original file line number Diff line number Diff line change
Expand Up @@ -67,11 +67,9 @@ export const enum BreakpointLocation {

declare global {
// eslint-disable-next-line vars-on-top, no-var
var window: Window;
var window: Window & typeof globalThis;
// eslint-disable-next-line vars-on-top, no-var
var document: {
readyState: string;
};
var document: Document;

interface Window {
[K: string]: any;
Expand Down
6 changes: 3 additions & 3 deletions packages/apify/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,9 @@
"@apify/consts": "^2.0.0",
"@apify/log": "^2.1.0",
"@apify/utilities": "^2.1.1",
"@crawlee/core": "^3.0.2",
"@crawlee/types": "^3.0.2",
"@crawlee/utils": "^3.0.2",
"@crawlee/core": "^3.0.3",
"@crawlee/types": "^3.0.3",
"@crawlee/utils": "^3.0.3",
"semver": "^7.3.7",
"apify-client": "^2.6.0",
"ow": "^0.28.1",
Expand Down
28 changes: 17 additions & 11 deletions packages/scraper-tools/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -38,22 +38,28 @@
"ajv": "^6.12.6",
"content-type": "^1.0.4"
},
"peerDependencies": {
"apify": "^3.0.0",
"@crawlee/core": "^3.0.0",
"@crawlee/utils": "^3.0.0",
"@crawlee/puppeteer": "^3.0.0",
"@crawlee/types": "^3.0.0"
},
"devDependencies": {
"apify": "^3.0.0",
"@crawlee/core": "^3.0.0",
"@crawlee/utils": "^3.0.0",
"@crawlee/puppeteer": "^3.0.0"
"apify": "^3.0.2",
"@crawlee/core": "^3.0.3",
"@crawlee/types": "^3.0.3",
"@crawlee/utils": "^3.0.3",
"@crawlee/puppeteer": "^3.0.3",
"@crawlee/playwright": "^3.0.3"
},
"peerDependencies": {
"apify": "^3.0.2",
"@crawlee/core": "^3.0.3",
"@crawlee/types": "^3.0.3",
"@crawlee/utils": "^3.0.3",
"@crawlee/puppeteer": "^3.0.3",
"@crawlee/playwright": "^3.0.3"
},
"peerDependenciesMeta": {
"@crawlee/puppeteer": {
"optional": true
},
"@crawlee/playwright": {
"optional": true
}
}
}
21 changes: 14 additions & 7 deletions packages/scraper-tools/src/browser_tools.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,17 @@
import { Actor } from 'apify';
import log from '@apify/log';
import type { Page } from 'puppeteer';
import { inspect } from 'util';
import type { CommonPage } from '@crawlee/browser-pool';
import { inspect } from 'node:util';
import { RESOURCE_LOAD_ERROR_MESSAGE, SNAPSHOT } from './consts';
import { createRandomHash } from './tools';

export interface Page extends CommonPage {
exposeFunction(name: string, callback: () => unknown): Promise<void>;
on(eventName: string, handler: (msg: any) => Promise<void>): unknown;
content(): unknown;
screenshot(): unknown;
}

/**
* Creates a string with an appended pageFunction to be evaluated in
* the browser context and placed within the given namespace.
Expand Down Expand Up @@ -97,10 +104,10 @@ export interface DumpConsoleOptions {
*
* This is used instead of the "dumpio" launch option
* to prevent cluttering the STDOUT with unnecessary
* Chromium messages, usually internal errors, occuring in page.
* Chromium messages, usually internal errors, occurring in page.
*/
export function dumpConsole(page: Page, options: DumpConsoleOptions = {}) {
page.on('console', async (msg) => {
page.on('console', async (msg: any) => {
const msgType = msg.type();

if (msgType === 'error' && !options.logErrors) return;
Expand All @@ -116,12 +123,12 @@ export function dumpConsole(page: Page, options: DumpConsoleOptions = {}) {
// Otherwise, just use the text immediately.
let message;
if (hasJSHandles) {
const msgPromises = msg.args().map((jsh) => {
const msgPromises = msg.args().map((jsh: any) => {
return jsh.jsonValue()
.catch((e) => log.exception(e, `Stringification of console.${msgType} in browser failed.`));
.catch((e: Error) => log.exception(e, `Stringification of console.${msgType} in browser failed.`));
});
message = (await Promise.all(msgPromises))
.map((m) => inspect(m))
.map((m: string) => inspect(m))
.join(' '); // console.log('a', 'b') produces 'a b'
} else {
message = msg.text();
Expand Down
7 changes: 5 additions & 2 deletions packages/scraper-tools/src/context.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ import type {
RequestQueue,
RequestQueueOperationOptions,
} from '@crawlee/core';
import { puppeteerUtils } from '@crawlee/puppeteer';
import type { Dictionary } from '@crawlee/utils';
import log from '@apify/log';
import type { MediaType } from 'content-type';
Expand All @@ -25,6 +24,8 @@ export interface CrawlerSetupOptions {
requestQueue: RequestQueue;
keyValueStore: KeyValueStore;
customData: unknown;
playwrightUtils?: unknown;
puppeteerUtils?: unknown;
}

export interface MapLike<K, V> extends Omit<Map<K, V>, 'values' | 'keys' | 'entries'| 'set'> {
Expand Down Expand Up @@ -64,7 +65,6 @@ class Context<Options extends ContextOptions = ContextOptions, ExtraFields = Opt
readonly Actor = Actor;
readonly Apify = Actor; // for back compatibility
readonly log = log;
readonly puppeteerUtils = puppeteerUtils;
readonly input: any;
readonly env: ApifyEnv;
readonly customData: unknown;
Expand All @@ -82,6 +82,9 @@ class Context<Options extends ContextOptions = ContextOptions, ExtraFields = Opt
skipLinks: false,
};

this.playwrightUtils = crawlerSetup?.playwrightUtils;
this.puppeteerUtils = crawlerSetup?.puppeteerUtils;

this.input = JSON.parse(crawlerSetup.rawInput);
this.env = { ...crawlerSetup.env };
this.customData = crawlerSetup.customData;
Expand Down

0 comments on commit 788913e

Please sign in to comment.