-
Notifications
You must be signed in to change notification settings - Fork 0
/
scrape.js
68 lines (59 loc) · 2.05 KB
/
scrape.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#!/usr/bin/env node
const fs = require("fs");
const cheerio = require('cheerio');
const playwright = require('playwright');
(async () => {
const browser = await playwright.chromium.launch({headless: false });
const context = await browser.newContext();
const page = await context.newPage();
try {
const data = fs.readFileSync("export.json", "utf8");
let cookies = JSON.parse(data);
await context.addCookies(cookies);
await page.goto('https://journey.cloud/timeline');
} catch (e) {
if (e.code === "ENOENT") {
console.log('no cookies');
}
}
const LOAD_MORE_SELECTOR = '#cardFooterLoaded button';
await page.click(LOAD_MORE_SELECTOR);
await page.waitForTimeout(5000);
const CLOSE_MODAL_SELECTOR = '.sellModal .close';
await page.waitForSelector(CLOSE_MODAL_SELECTOR);
await page.click(CLOSE_MODAL_SELECTOR);
async function parse() {
const result = [];
const pageContent = await page.content();
const $page = cheerio.load(pageContent);
const CARD_SELECTOR = '.timelinex-card.share';
$page(CARD_SELECTOR).each(async function() {
// https://github.com/cheeriojs/cheerio/issues/839
$page(this).find(".cardText").find('br').replaceWith('\n')
const text = $page(this).find(".cardText").text().trim();
const time = $page(this).find(".time > div:nth-child(1)").text().trim();
const metadata = $page(this).find(".time .metadata").text().trim();
result.push({text, time, metadata});
});
fs.writeFileSync('./data.json', JSON.stringify(result, null, 2) , 'utf-8');
}
async function loadMore() {
const loadMoreButton = await page.$(LOAD_MORE_SELECTOR);
if (loadMoreButton) {
await loadMoreButton.scrollIntoViewIfNeeded();
await page.click(LOAD_MORE_SELECTOR);
await page.waitForTimeout(10000);
try {
await loadMore();
} catch (e) {
// console.log(e);
await parse();
}
} else {
console.log('scrolled to end');
}
}
await loadMore()
await parse();
// await browser.close();
})();