/
NeuralWSDDecodeUFSAC.java
59 lines (52 loc) · 2.41 KB
/
NeuralWSDDecodeUFSAC.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import getalp.wsd.common.wordnet.WordnetHelper;
import getalp.wsd.method.neural.NeuralDisambiguator;
import getalp.wsd.ufsac.core.Sentence;
import getalp.wsd.ufsac.core.Word;
import getalp.wsd.ufsac.streaming.modifier.StreamingCorpusModifierSentence;
import getalp.wsd.ufsac.streaming.reader.StreamingCorpusReaderSentence;
import getalp.wsd.ufsac.streaming.writer.StreamingCorpusWriterSentence;
import getalp.wsd.ufsac.utils.CorpusPOSTaggerAndLemmatizer;
import getalp.wsd.utils.ArgumentParser;
import getalp.wsd.utils.WordnetUtils;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.util.List;
public class NeuralWSDDecodeUFSAC
{
public static void main(String[] args) throws Exception
{
ArgumentParser parser = new ArgumentParser();
parser.addArgument("python_path");
parser.addArgument("data_path");
parser.addArgumentList("weights");
parser.addArgument("input");
parser.addArgument("output");
parser.addArgument("lowercase", "true");
parser.addArgument("sense_reduction", "true");
if (!parser.parse(args)) return;
String pythonPath = parser.getArgValue("python_path");
String dataPath = parser.getArgValue("data_path");
List<String> weights = parser.getArgValueList("weights");
String inputPath = parser.getArgValue("input");
String outputPath = parser.getArgValue("output");
boolean lowercase = parser.getArgValueBoolean("lowercase");
boolean senseReduction = parser.getArgValueBoolean("sense_reduction");
CorpusPOSTaggerAndLemmatizer tagger = new CorpusPOSTaggerAndLemmatizer();
NeuralDisambiguator disambiguator = new NeuralDisambiguator(pythonPath, dataPath, weights);
disambiguator.lowercaseWords = lowercase;
if (senseReduction) disambiguator.reducedOutputVocabulary = WordnetUtils.getReducedSynsetKeysWithHypernyms3(WordnetHelper.wn30());
else disambiguator.reducedOutputVocabulary = null;
StreamingCorpusModifierSentence modifier = new StreamingCorpusModifierSentence()
{
public void modifySentence(Sentence sentence)
{
tagger.tag(sentence.getWords());
disambiguator.disambiguate(sentence, "wsd");
}
};
modifier.load(inputPath, outputPath);
disambiguator.close();
}
}