-
Notifications
You must be signed in to change notification settings - Fork 0
/
politifact.js
79 lines (67 loc) · 2.86 KB
/
politifact.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
const puppeteer = require('puppeteer');
const { Parser } = require("json2csv");
const fs = require('fs');
(async () => {
const browser = await puppeteer.launch({ headless: false });
const page = await browser.newPage();
let counter = 0;
page.on('response', async (response) => {
const matches = /.*\.(jpg|png|svg|gif)$/.exec(response.url());
if (matches && (matches.length === 2)) {
const extension = matches[1];
const buffer = await response.buffer();
fs.writeFileSync(`./images/image-${counter}.${extension}`, buffer, 'base64');
counter += 1;
}
});//stackoverflow
// Navigate to the Politifact homepage
await page.goto('https://www.politifact.com/');
// Set screen size
await page.setViewport({ width: 1080, height: 1024 });
const data = [];
// Wait for the articles to be present
await page.waitForSelector('article.m-statement');
await page.waitForSelector('.m-teaser')
// Iterate through articles
const articles = await page.$$('article.m-statement');
const listItems = await page.$$('.m-teaser')
for (const article of articles) {
const names = await article.$('.m-statement__name');
const articlesText = await article.$('.m-statement__quote');
const description = await article.$('.m-statement__desc');
const footer = await article.$('.m-statement__footer');
const name = names ? await names.evaluate(element => element.textContent.trim()) : '-';
const articleText = articlesText ? await articlesText.evaluate(element => element.textContent.trim()) : '-';
const desc = description ? await description.evaluate(element => element.textContent.trim()) : '-';
const foot = footer ? await footer.evaluate(element => element.textContent.trim()) : '-';
data.push({
Names: name,
Title: articleText,
Description: desc,
Footer: foot,
});
}
for(const items of listItems)
{
// const names = await items.$('.m-statement__name');
const articlesText = await items.$('.m-teaser__title');
//const description = await items.$('.m-teaser__meta');
const footer = await items.$('.m-teaser__meta');
//const name = names ? await names.evaluate(element => element.textContent.trim()) : '-';
const articleText = articlesText ? await articlesText.evaluate(element => element.textContent.trim()) : '-';
//const desc = description ? await description.evaluate(element => element.textContent.trim()) : '-';
const foot = footer ? await footer.evaluate(element => element.textContent.trim()) : '-';
data.push({
// Names: name,
Title: articleText,
// Description: desc,
Footer: foot,
});
}
console.log(data);
const fields = ["Names", "Title", "Description", "Footer"];
const json2csvParser = new Parser({ fields });
const csv = json2csvParser.parse(data);
fs.writeFileSync("./output.csv", csv);
await browser.close();
})();