/
lib.js
60 lines (54 loc) · 1.72 KB
/
lib.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
var _ = require('lodash');
var Promise = require('bluebird');
var java = require('java');
var utils = require('./utils');
Promise.promisifyAll(java);
var logger = utils.logging.newConsoleLogger('pipeline');
var setupGlobalOptions = function() {
java.options.push('-Xmx4g');
java.classpath.push('./java-lib/stanford-corenlp-3.5.2.jar');
java.classpath.push('./java-lib/stanford-corenlp-3.5.2-models.jar');
java.classpath.push('./java-lib/xom.jar');
};
setupGlobalOptions();
var pipeline = java.newInstanceAsync('java.util.Properties')
.then(Promise.promisifyAll)
.then(function(properties) {
return properties.setPropertyAsync('annotators', 'tokenize,ssplit,pos')
.then(function() {
return java.newInstanceAsync('edu.stanford.nlp.pipeline.StanfordCoreNLP', properties)
.then(Promise.promisifyAll);
});
});
var processText = function(text) {
return java.newInstanceAsync('java.io.StringWriter')
.then(Promise.promisifyAll)
.then(function(stringWriter) {
return pipeline
.then(function(pipeline) {
logger.info('processing text:', text);
return pipeline.processAsync(text)
.then(function(annotation) {
return pipeline.jsonPrintAsync(annotation, stringWriter);
})
.then(function() {
return stringWriter.toStringAsync();
})
.then(JSON.parse)
.then(function(result) {
if (result.sentences.length == 0) {
return [];
} else {
return _.map(result.sentences[0].tokens, function(i) {
return [
i.word
, i.pos
, [ parseInt(i.characterOffsetBegin)
, parseInt(i.characterOffsetEnd)]];
});
}
});
});
});
};
exports.processText = processText;