-
Notifications
You must be signed in to change notification settings - Fork 2
/
index.js
142 lines (135 loc) · 4.07 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
// require modules
const Nightmare = require('nightmare');
const config = require('../../config');
const Promise = require('promise');
// set proxy details
/* --> Uncomment for Proxy Setup <--
let proxy = 'http://'+ config.host + ':' + config.port;
*/
// construct Nightmare instance
/* --> Uncomment for Proxy Setup <--
const nightmare = Nightmare({
show: false,
switches: {
'proxy-server': proxy
}
});
*/
// --> Comment for Proxy Setup : Line 20-24
const nightmare = Nightmare({
show: true // Set it to false if no GUI is required
});
/**
* Scraps wikipedia using nightmare for predefined queries and given search string
*
* @param {String} searchString String to search for on wikipedia
* @return {Array} Array containing the scrapped info
*/
const scrapWiki = (searchString) => {
return new Promise((resolve, reject) => {
let selector = '#mw-content-text table.infobox tr';
/* --> Uncomment for Proxy Setup <--
nightmare.authentication(config.proxyUsername, config.proxyPassword) // comment this to disable proxy authentication
*/
nightmare
.goto('https://en.wikipedia.org')
.wait('#searchInput')
.type('#searchInput', searchString)
.click('#searchButton')
.wait('#content')
.evaluate((selector) => {
let nodeList = (document.querySelectorAll(selector))
// map array from the obtained NodeList
let arr = [].slice.call(nodeList).map(nodeList => nodeList.innerText)
return arr;
}, selector)
.end()
.then((result) => {
console.log('[Scrapper] \tInfo collected from wikipedia');
resolve(result);
})
.catch((err) => {
console.log('[Scrapper] \tCan not collect info from wikipedia');
reject(err);
});
})
};
/**
* Scraps emedexpert using nightmare for predefined query
*
* @return {String} String containing scrapped medication
*/
const scrapEMed = () => {
return new Promise((resolve, reject) => {
let selector = 'table.listtable tr td';
/* --> Uncomment for Proxy Setup <--
nightmare.authentication(config.proxyUsername, config.proxyPassword) // comment this to disable proxy authentication
*/
nightmare
.goto('https://www.emedexpert.com/lists/conditions.shtml')
.wait('#fb-root')
.wait(3000)
.evaluate((selector) => {
let nodeList = document.querySelectorAll(selector)
// map array from the obtained NodeList
let arr = [].slice.call(nodeList).map(nodeList => nodeList.innerText)
return arr
}, selector)
.end()
.then((result) => {
console.log('[Scrapper] \tInfo collected from emedexpert');
resolve(result);
})
.catch((err) => {
console.log('[Scrapper] \tCan not collect info from emedexpert');
reject(err);
});
})
};
/**
* Extract the required info out of array from scrapping wikipedia using string match
*
* @param {Array} keywords Array of strings to match in 'array'
* @param {Array} array Array of scarpped data
* @return {Array} Array of strings containing required data extract
*/
const extractFromWiki = (keywords, array) => {
console.log('[Scrapper] \tExtracting info from wikipedia');
let data = [];
array.forEach((index) => {
for (let j = 0; j< keywords.length; j++) {
if (index.toLowerCase().startsWith(keywords[j].toLowerCase())) {
data[j] = index.slice(keywords[j].length).trim().replace(/\\n|\d|\n|[[\]]/g, '');
}
}
})
console.log('[Scrapper] \tInfo extracted');
return data;
};
/**
* Extract the required info out of string from emedexpert scrapping using string match
*
* @param {String} condition String to match in 'array'
* @param {Array} array Array of scrapped data
* @return {String} String containing required data extract
*/
const extractFromEMed = (condition, array) => {
console.log('[Scrapper] \tExtracting info from emedexpert');
let data = '';
for (let i = 0; i < array.length; i++) {
if (array[i].toLowerCase().startsWith(condition.toLowerCase())) {
data = array[i+1];
console.log('[Scrapper] \tInfo extracted');
return data;
}
}
data = 'Please consult physician. Could not find medications.';
console.log('[Scrapper] \tInfo extracted');
return data;
};
module.exports = {
scrapWiki,
scrapEMed,
extractFromWiki,
extractFromEMed
};