forked from words/wiktionary
/
index.js
39 lines (35 loc) · 1.14 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
const get = require('got')
const URL = require('url')
const flat = require('flat')
const {get: getProp} = require('lodash')
const cheerio = require('cheerio')
async function lookup (query, locale = 'en') {
// Example query
// https://en.wiktionary.org/w/api.php?action=query&prop=extracts&titles=pomology&format=json
const url = URL.format({
protocol: 'https',
hostname: `${locale}.wiktionary.org`,
pathname: '/w/api.php',
query: {
action: 'query',
format: 'json',
prop: 'extracts',
titles: query
}
})
const {body} = await get(url, {json: true})
const key = Object.keys(flat(body)).find(key => key.endsWith('.extract'))
if (!key) return null // 404 word not found
const html = getProp(body, key)
const text = cheerio.load(html).text()
.trim() // remove extra whitespace and newlines
.replace('English\n', '')
.replace('Noun\n', '')
.replace('Etymology\n', 'Etymology: ')
.replace('Translation\n', 'Translation: ')
.replace('Anagrams\n', 'Anagrams: ')
.replace(/\n+/gm, '\n') // duplicate newlines
.replace(/\n/gm, '; ')
return {query, html, text}
}
module.exports = lookup