-
Notifications
You must be signed in to change notification settings - Fork 581
/
main.js
39 lines (34 loc) · 1.14 KB
/
main.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import { Actor, LogLevel, log as Logger } from 'apify';
import { CheerioCrawler, Dataset } from '@crawlee/cheerio';
const mainOptions = {
exit: Actor.isAtHome(),
storage:
process.env.STORAGE_IMPLEMENTATION === 'LOCAL'
? new (await import('@apify/storage-local')).ApifyStorageLocal()
: undefined,
};
await Actor.main(async () => {
const crawler = new CheerioCrawler({
async requestHandler({ $, enqueueLinks, request, log }) {
const { url } = request;
await enqueueLinks({
globs: ['https://crawlee.dev/docs/**'],
});
const pageTitle = $('title').first().text();
log.info(`REQUEST ID: ${request.id} URL: ${url} TITLE: ${pageTitle}`);
await Dataset.pushData({ url, pageTitle });
},
experiments: {
requestLocking: true,
},
log: Logger.child({
prefix: 'CheerioCrawler',
// level: LogLevel.DEBUG,
}),
});
try {
await crawler.run(['https://crawlee.dev/docs/quick-start']);
} catch (e) {
console.error(e);
}
}, mainOptions);