-
Notifications
You must be signed in to change notification settings - Fork 24
/
extractors.fn.js
82 lines (73 loc) · 2.53 KB
/
extractors.fn.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
const chrono = require('chrono-node')
const humanname = require('humanname')
const addressit = require('addressit')
// const phoneUtil = require('google-libphonenumber').PhoneNumberUtil.getInstance()
let getData = function (data, extractor, multiple = false) {
let result = data
let emailRegex = /([a-zA-Z0-9._-]{1,30}@[a-zA-Z0-9._-]{2,15}\.[a-zA-Z0-9._-]{2,15})/gmi
let phoneRegex = /\+?\(?\d*\)? ?\(?\d+\)?\d*([\s./-]\d{2,})+/gmi
let websiteRegex = /(?:[\s\W])((https?:\/\/)?(www\.)?[-a-zA-Z0-9:%._\+~#=]{2,256}\.[a-z]{2,6}\b[-a-zA-Z0-9@:%_\+.~#?&/=]*)/gmi
if (["phone", "telephone"].includes(extractor)) {
if (multiple) {
result = data.match(phoneRegex) || ""
} else {
result = data.match(phoneRegex) !== null ? data.match(phoneRegex)[0] : ""
}
} else if (["numbers", "nb"].includes(extractor)) {
if (multiple) {
result = result.match(/\d+/gm) || ""
} else {
result = result.match(/\d+/gm) !== null ? result.match(/\d+/gm)[0] : ""
}
} else if (["website"].includes(extractor)) {
let websites = data.match(websiteRegex)
if (websites && websites.length > 0) {
websites = websites.map(function (x) {
return x.substr(1, x.length) // remove first character
})
if (multiple) {
result = websites || ""
} else {
result = websites !== null ? websites[0] : ""
}
}
} else if (["address", "add"].includes(extractor)) {
result = addressit(data)
} else if (["email", "mail"].includes(extractor)) {
if (multiple) {
result = data.match(emailRegex) || data
if (_.isArray(result) && result.length === 1) {
result = result[0]
}
} else {
result = data.match(emailRegex) !== null ? data.match(emailRegex)[0] : ""
}
} else if (["date", "d"].includes(extractor)) {
let date = chrono.casual.parseDate(data)
if (date) {
result = date.toString()
} else {
result = ""
}
} else if (["fullName", "prenom", "firstName", "nom", "lastName", "initials", "suffix", "salutation"].includes(extractor)) {
// compact data before to parse it
result = humanname.parse(filterData(data, "cmp"))
if ("fullName".includes(extractor)) {
// return the object
} else if (["firstName", "prenom"].includes(extractor)) {
result = result.firstName
} else if (["lastName", "nom"].includes(extractor)) {
result = result.lastName
} else if ("initials".includes(extractor)) {
result = result.initials
} else if ("suffix".includes(extractor)) {
result = result.suffix
} else if ("salutation".includes(extractor)) {
result = result.salutation
}
}
return result
}
module.exports = {
getData
}