-
Notifications
You must be signed in to change notification settings - Fork 51
/
date.js
75 lines (64 loc) · 2.63 KB
/
date.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
const chrono = require('chrono-node')
const isoformat = '^\\d{4}-\\d{2}-\\d{2}' + // Match YYYY-MM-DD
'((T\\d{2}:\\d{2}(:\\d{2})?)' + // Match THH:mm:ss
'(\\.\\d{1,6})?' + // Match .sssss
'(Z|(\\+|-)\\d{2}:\\d{2})?)?$' // Time zone (Z or +hh:mm)
function isIso (val) {
const matcher = new RegExp(isoformat)
return typeof val === 'string' && matcher.test(val) && !isNaN(Date.parse(val))
}
/**
* Wrap a rule with validation and formatting logic.
*
* @param {Function} rule
* @return {Function} wrapped
*/
function wrap (rule) {
return function ($) {
let value = rule($)
if (!value) return
// remove whitespace for easier parsing
value = value.trim()
// convert isodates to restringify, because sometimes they are truncated
if (isIso(value)) return new Date(value).toISOString()
// try to parse with the built-in date parser
const native = new Date(value)
if (!isNaN(native.getTime())) return native.toISOString()
// try to parse a complex date string
const parsed = chrono.parseDate(value)
if (parsed) return parsed.toISOString()
}
}
/**
* Rules.
*/
module.exports = [
wrap($ => $('meta[property="article:published_time"]').attr('content')),
wrap($ => $('meta[name="dc.date"]').attr('content')),
wrap($ => $('meta[name="dc.date.issued"]').attr('content')),
wrap($ => $('meta[name="dc.date.created"]').attr('content')),
wrap($ => $('meta[name="date"]').attr('content')),
wrap($ => $('meta[name="dcterms.date"]').attr('content')),
wrap($ => $('[itemprop="datePublished"]').attr('content')),
wrap($ => $('time[itemprop*="pubdate"]').attr('datetime')),
wrap($ => $('[property*="dc:date"]').attr('content')),
wrap($ => $('[property*="dc:created"]').attr('content')),
wrap($ => $('time[datetime][pubdate]').attr('datetime')),
wrap($ => $('meta[name="sailthru.date"]').attr('content')),
wrap($ => $('meta[property="book:release_date"]').attr('content')),
wrap($ => $('time[datetime]').attr('datetime')),
wrap($ => $('[class*="byline"]').text()),
wrap($ => $('[class*="dateline"]').text()),
wrap($ => $('[id*="date"]').text()),
wrap($ => $('[class*="date"]').text()),
wrap($ => $('[id*="publish"]').text()),
wrap($ => $('[class*="publish"]').text()),
wrap($ => $('[id*="post-timestamp"]').text()),
wrap($ => $('[class*="post-timestamp"]').text()),
wrap($ => $('[id*="post-meta"]').text()),
wrap($ => $('[class*="post-meta"]').text()),
wrap($ => $('[id*="metadata"]').text()),
wrap($ => $('[class*="metadata"]').text()),
wrap($ => $('[id*="time"]').text()),
wrap($ => $('[class*="time"]').text())
]