From 59a258a2332cea84a383f70d862e8c83c37e180f Mon Sep 17 00:00:00 2001 From: smarthi Date: Fri, 23 Dec 2016 20:52:40 -0500 Subject: [PATCH] OPENNLP-871: Clean up code base for release --- .../brat/BratNameSampleStreamFactory.java | 5 +- .../ConstitParseSampleStream.java | 6 +- .../formats/muc/DocumentSplitterStream.java | 2 +- .../muc/Muc6NameSampleStreamFactory.java | 10 +-- .../tools/formats/muc/MucElementNames.java | 2 +- .../formats/muc/MucNameContentHandler.java | 12 +-- .../formats/muc/MucNameSampleStream.java | 2 +- .../opennlp/tools/formats/muc/SgmlParser.java | 2 +- .../ontonotes/DocumentToLineStream.java | 2 +- .../ontonotes/OntoNotesNameSampleStream.java | 8 +- .../OntoNotesNameSampleStreamFactory.java | 20 ++--- .../OntoNotesParseSampleStreamFactory.java | 18 ++--- .../lemmatizer/DictionaryLemmatizer.java | 5 +- .../opennlp/tools/ml/maxent/GISTrainer.java | 14 ++-- .../tools/ml/model/GenericModelReader.java | 25 +++--- .../ml/model/OnePassRealValueDataIndexer.java | 6 +- .../BinaryNaiveBayesModelWriter.java | 2 +- .../ml/naivebayes/NaiveBayesTrainer.java | 2 +- .../BinaryPerceptronModelWriter.java | 2 +- .../ml/perceptron/PerceptronModelWriter.java | 8 +- .../ml/perceptron/PerceptronTrainer.java | 43 ++++------- .../PlainTextPerceptronModelWriter.java | 6 +- .../SimplePerceptronSequenceTrainer.java | 47 ++++++----- .../opennlp/tools/namefind/NameFinderME.java | 14 ++-- .../java/opennlp/tools/ngram/NGramModel.java | 11 +-- .../tools/parser/AbstractBottomUpParser.java | 6 +- .../parser/AbstractContextGenerator.java | 1 - .../tools/parser/ChunkContextGenerator.java | 2 +- .../treeinsert/AttachContextGenerator.java | 10 +-- .../treeinsert/BuildContextGenerator.java | 2 +- .../treeinsert/CheckContextGenerator.java | 2 +- .../tools/parser/treeinsert/Parser.java | 34 +++----- .../parser/treeinsert/ParserEventStream.java | 77 +++++++++---------- .../java/opennlp/tools/postag/POSModel.java | 8 +- .../opennlp/tools/postag/POSTaggerME.java | 15 ++-- .../opennlp/tools/util/BaseToolFactory.java | 6 +- .../tools/eval/OntoNotes4NameFinderEval.java | 22 ++---- .../tools/eval/OntoNotes4ParserEval.java | 22 ++---- .../tools/eval/OntoNotes4PosTaggerEval.java | 22 ++---- .../tools/postag/POSTaggerFactoryTest.java | 28 ++++--- 40 files changed, 219 insertions(+), 312 deletions(-) diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratNameSampleStreamFactory.java b/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratNameSampleStreamFactory.java index b7a8c4147..669c6996c 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratNameSampleStreamFactory.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratNameSampleStreamFactory.java @@ -18,10 +18,7 @@ package opennlp.tools.formats.brat; import java.io.File; -import java.io.FileInputStream; import java.io.IOException; -import java.io.InputStream; - import opennlp.tools.cmdline.ArgumentParser; import opennlp.tools.cmdline.ArgumentParser.OptionalParameter; import opennlp.tools.cmdline.ArgumentParser.ParameterDescription; @@ -75,7 +72,7 @@ protected BratNameSampleStreamFactory() { * Checks that non of the passed values are null. * * @param objects - * @return + * @return true or false */ private boolean notNull(Object... objects) { diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/frenchtreebank/ConstitParseSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/frenchtreebank/ConstitParseSampleStream.java index e908cb11f..1964131d9 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/frenchtreebank/ConstitParseSampleStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/frenchtreebank/ConstitParseSampleStream.java @@ -36,7 +36,7 @@ public class ConstitParseSampleStream extends FilterObjectStream private SAXParser saxParser; - private List parses = new ArrayList(); + private List parses = new ArrayList<>(); protected ConstitParseSampleStream(ObjectStream samples) { super(samples); @@ -57,12 +57,12 @@ public Parse read() throws IOException { if (xmlbytes != null) { - List producedParses = new ArrayList(); + List producedParses = new ArrayList<>(); try { saxParser.parse(new ByteArrayInputStream(xmlbytes), new ConstitDocumentHandler(producedParses)); } catch (SAXException e) { //TODO update after Java6 upgrade - throw (IOException) new IOException(e.getMessage()).initCause(e); + throw new IOException(e.getMessage(), e); } parses.addAll(producedParses); diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/muc/DocumentSplitterStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/muc/DocumentSplitterStream.java index 257505d06..9ac951475 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/muc/DocumentSplitterStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/muc/DocumentSplitterStream.java @@ -30,7 +30,7 @@ class DocumentSplitterStream extends FilterObjectStream { private static final String DOC_START_ELEMENT = ""; private static final String DOC_END_ELEMENT = ""; - private List docs = new ArrayList(); + private List docs = new ArrayList<>(); DocumentSplitterStream(ObjectStream samples) { super(samples); diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/muc/Muc6NameSampleStreamFactory.java b/opennlp-tools/src/main/java/opennlp/tools/formats/muc/Muc6NameSampleStreamFactory.java index b76613c5c..496253b4d 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/muc/Muc6NameSampleStreamFactory.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/muc/Muc6NameSampleStreamFactory.java @@ -18,9 +18,7 @@ package opennlp.tools.formats.muc; import java.io.File; -import java.io.FileFilter; import java.nio.charset.Charset; - import opennlp.tools.cmdline.ArgumentParser; import opennlp.tools.cmdline.ArgumentParser.ParameterDescription; import opennlp.tools.cmdline.StreamFactoryRegistry; @@ -55,12 +53,8 @@ public ObjectStream create(String[] args) { Tokenizer tokenizer = new TokenizerME(tokenizerModel); ObjectStream mucDocStream = new FileToStringSampleStream( - new DirectorySampleStream(params.getData(), new FileFilter() { - - public boolean accept(File file) { - return StringUtil.toLowerCase(file.getName()).endsWith(".sgm"); - } - }, false), Charset.forName("UTF-8")); + new DirectorySampleStream(params.getData(), + file -> StringUtil.toLowerCase(file.getName()).endsWith(".sgm"), false), Charset.forName("UTF-8")); return new MucNameSampleStream(tokenizer, mucDocStream); } diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/muc/MucElementNames.java b/opennlp-tools/src/main/java/opennlp/tools/formats/muc/MucElementNames.java index 35b499d5a..6574fd041 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/muc/MucElementNames.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/muc/MucElementNames.java @@ -32,7 +32,7 @@ class MucElementNames { static final Set CONTENT_ELEMENTS; static { - Set contentElementNames = new HashSet(); + Set contentElementNames = new HashSet<>(); contentElementNames.add(MucElementNames.HEADLINE_ELEMENT); contentElementNames.add(MucElementNames.DATELINE_ELEMENT); contentElementNames.add(MucElementNames.DD_ELEMENT); diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/muc/MucNameContentHandler.java b/opennlp-tools/src/main/java/opennlp/tools/formats/muc/MucNameContentHandler.java index 73002510f..4d6d3a471 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/muc/MucNameContentHandler.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/muc/MucNameContentHandler.java @@ -43,7 +43,7 @@ public class MucNameContentHandler extends SgmlParser.ContentHandler { private static final Set EXPECTED_TYPES; static { - Set types = new HashSet(); + Set types = new HashSet<>(); types.add("PERSON"); types.add("ORGANIZATION"); @@ -55,7 +55,7 @@ public class MucNameContentHandler extends SgmlParser.ContentHandler { EXPECTED_TYPES = Collections.unmodifiableSet(types); - Set nameElements = new HashSet(); + Set nameElements = new HashSet<>(); nameElements.add(ENTITY_ELEMENT_NAME); nameElements.add(TIME_ELEMENT_NAME); nameElements.add(NUM_ELEMENT_NAME); @@ -65,12 +65,12 @@ public class MucNameContentHandler extends SgmlParser.ContentHandler { private final Tokenizer tokenizer; private final List storedSamples; - boolean isInsideContentElement = false; - private final List text = new ArrayList(); + private boolean isInsideContentElement = false; + private final List text = new ArrayList<>(); private boolean isClearAdaptiveData = false; - private final Stack incompleteNames = new Stack(); + private final Stack incompleteNames = new Stack<>(); - private List names = new ArrayList(); + private List names = new ArrayList<>(); public MucNameContentHandler(Tokenizer tokenizer, List storedSamples) { diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/muc/MucNameSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/muc/MucNameSampleStream.java index 530302dad..281df5d6c 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/muc/MucNameSampleStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/muc/MucNameSampleStream.java @@ -31,7 +31,7 @@ public class MucNameSampleStream extends FilterObjectStream private final Tokenizer tokenizer; - private List storedSamples = new ArrayList(); + private List storedSamples = new ArrayList<>(); protected MucNameSampleStream(Tokenizer tokenizer, ObjectStream samples) { super(samples); diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/muc/SgmlParser.java b/opennlp-tools/src/main/java/opennlp/tools/formats/muc/SgmlParser.java index fd18f6fe9..e85e99503 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/muc/SgmlParser.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/muc/SgmlParser.java @@ -75,7 +75,7 @@ private static Map getAttributes(CharSequence tagChars) { // value chars // " <- end - Map attributes = new HashMap(); + Map attributes = new HashMap<>(); StringBuilder key = new StringBuilder(); StringBuilder value = new StringBuilder(); diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/DocumentToLineStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/DocumentToLineStream.java index 4cab6eabb..eb8b3e23a 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/DocumentToLineStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/DocumentToLineStream.java @@ -41,7 +41,7 @@ protected List read(String sample) throws IOException { // documents must be empty line terminated if (!lines.get(lines.size() - 1).trim().isEmpty()) { - lines = new ArrayList(lines); + lines = new ArrayList<>(lines); lines.add(""); } diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesNameSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesNameSampleStream.java index 770a6984d..744e134ab 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesNameSampleStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesNameSampleStream.java @@ -42,12 +42,12 @@ public class OntoNotesNameSampleStream extends private final Map tokenConversionMap; - private List nameSamples = new LinkedList(); + private List nameSamples = new LinkedList<>(); public OntoNotesNameSampleStream(ObjectStream samples) { super(samples); - Map tokenConversionMap = new HashMap(); + Map tokenConversionMap = new HashMap<>(); tokenConversionMap.put("-LRB-", "("); tokenConversionMap.put("-RRB-", ")"); tokenConversionMap.put("-LSB-", "["); @@ -107,8 +107,8 @@ public NameSample read() throws IOException { String tokens[] = WhitespaceTokenizer.INSTANCE.tokenize(line); - List entities = new LinkedList(); - List cleanedTokens = new ArrayList(tokens.length); + List entities = new LinkedList<>(); + List cleanedTokens = new ArrayList<>(tokens.length); int tokenIndex = 0; int entityBeginIndex = -1; diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesNameSampleStreamFactory.java b/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesNameSampleStreamFactory.java index 88b142472..c0dfdbdfd 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesNameSampleStreamFactory.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesNameSampleStreamFactory.java @@ -18,9 +18,7 @@ package opennlp.tools.formats.ontonotes; import java.io.File; -import java.io.FileFilter; import java.nio.charset.Charset; - import opennlp.tools.cmdline.ArgumentParser; import opennlp.tools.cmdline.StreamFactoryRegistry; import opennlp.tools.formats.AbstractSampleStreamFactory; @@ -41,19 +39,15 @@ public ObjectStream create(String[] args) { OntoNotesFormatParameters params = ArgumentParser.parse(args, OntoNotesFormatParameters.class); ObjectStream documentStream = new DirectorySampleStream(new File( - params.getOntoNotesDir()), new FileFilter() { - - public boolean accept(File file) { - if (file.isFile()) { - return file.getName().endsWith(".name"); - } + params.getOntoNotesDir()), file -> { + if (file.isFile()) { + return file.getName().endsWith(".name"); + } - return file.isDirectory(); - } - }, true); + return file.isDirectory(); + }, true); - return new OntoNotesNameSampleStream(new FileToStringSampleStream( - documentStream, Charset.forName("UTF-8"))); + return new OntoNotesNameSampleStream(new FileToStringSampleStream(documentStream, Charset.forName("UTF-8"))); } public static void registerFactory() { diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesParseSampleStreamFactory.java b/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesParseSampleStreamFactory.java index e77edcf6b..a99bc4e05 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesParseSampleStreamFactory.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesParseSampleStreamFactory.java @@ -18,9 +18,7 @@ package opennlp.tools.formats.ontonotes; import java.io.File; -import java.io.FileFilter; import java.nio.charset.Charset; - import opennlp.tools.cmdline.ArgumentParser; import opennlp.tools.cmdline.StreamFactoryRegistry; import opennlp.tools.formats.AbstractSampleStreamFactory; @@ -31,7 +29,6 @@ public class OntoNotesParseSampleStreamFactory extends AbstractSampleStreamFactory { - protected OntoNotesParseSampleStreamFactory() { super(OntoNotesFormatParameters.class); } @@ -41,16 +38,13 @@ public ObjectStream create(String[] args) { OntoNotesFormatParameters params = ArgumentParser.parse(args, OntoNotesFormatParameters.class); ObjectStream documentStream = new DirectorySampleStream(new File( - params.getOntoNotesDir()), new FileFilter() { - - public boolean accept(File file) { - if (file.isFile()) { - return file.getName().endsWith(".parse"); - } + params.getOntoNotesDir()), file -> { + if (file.isFile()) { + return file.getName().endsWith(".parse"); + } - return file.isDirectory(); - } - }, true); + return file.isDirectory(); + }, true); // We need file to line here ... and that is probably best doen with the plain text stream // lets copy it over here, refactor it, and then at some point we replace the current version diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DictionaryLemmatizer.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DictionaryLemmatizer.java index 683b97c31..6d8b51de6 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DictionaryLemmatizer.java +++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DictionaryLemmatizer.java @@ -25,6 +25,7 @@ import java.util.Arrays; import java.util.HashMap; import java.util.List; +import java.util.Map; /** * Lemmatize by simple dictionary lookup into a hashmap built from a file @@ -36,7 +37,7 @@ public class DictionaryLemmatizer implements Lemmatizer { /** * The hashmap containing the dictionary. */ - private final HashMap, String> dictMap; + private final Map, String> dictMap; /** * Construct a hashmap from the input tab separated dictionary. @@ -66,7 +67,7 @@ public DictionaryLemmatizer(final InputStream dictionary) { * * @return dictMap the Map */ - public HashMap, String> getDictMap() { + public Map, String> getDictMap() { return this.dictMap; } diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GISTrainer.java b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GISTrainer.java index 05a54242c..7f087b0b9 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GISTrainer.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GISTrainer.java @@ -20,16 +20,12 @@ package opennlp.tools.ml.maxent; import java.io.IOException; -import java.util.ArrayList; -import java.util.List; import java.util.concurrent.Callable; import java.util.concurrent.CompletionService; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorCompletionService; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; -import java.util.concurrent.Future; - import opennlp.tools.ml.model.DataIndexer; import opennlp.tools.ml.model.EvalParameters; import opennlp.tools.ml.model.Event; @@ -256,7 +252,7 @@ public GISModel trainModel(int iterations, DataIndexer di, Prior modelPrior, int modelExpects = new MutableContext[threads][]; - /************** Incorporate all of the needed info ******************/ + /* Incorporate all of the needed info *****/ display("Incorporating indexed data for training... \n"); contexts = di.getContexts(); values = di.getValues(); @@ -387,7 +383,7 @@ else if (useSimpleSmoothing) { display("...done.\n"); - /***************** Find the parameters ************************/ + /* Find the parameters *****/ if (threads == 1) display("Computing model parameters ...\n"); else @@ -395,7 +391,7 @@ else if (useSimpleSmoothing) { findParameters(iterations, correctionConstant); - /*************** Create and return the model ******************/ + /* Create and return the model ****/ // To be compatible with old models the correction constant is always 1 return new GISModel(params, predLabels, outcomeLabels, 1, evalParams.getCorrectionParam()); @@ -405,7 +401,7 @@ else if (useSimpleSmoothing) { private void findParameters(int iterations, double correctionConstant) { int threads=modelExpects.length; ExecutorService executor = Executors.newFixedThreadPool(threads); - CompletionService completionService=new ExecutorCompletionService(executor); + CompletionService completionService = new ExecutorCompletionService<>(executor); double prevLL = 0.0; double currLL; display("Performing " + iterations + " iterations.\n"); @@ -571,7 +567,7 @@ private double nextIteration(double correctionConstant, CompletionService eventsToCompare,boolean sort) t } protected List index(LinkedList events, Map predicateIndex) { - Map omap = new HashMap(); + Map omap = new HashMap<>(); int numEvents = events.size(); int outcomeCount = 0; - List eventsToCompare = new ArrayList(numEvents); - List indexedContext = new ArrayList(); + List eventsToCompare = new ArrayList<>(numEvents); + List indexedContext = new ArrayList<>(); for (int eventIndex=0; eventIndex> computeOutcomePatterns(ComparablePredicate[] sorted) { ComparablePredicate cp = sorted[0]; - List> outcomePatterns = new ArrayList>(); - List newGroup = new ArrayList(); + List> outcomePatterns = new ArrayList<>(); + List newGroup = new ArrayList<>(); for (ComparablePredicate predicate : sorted) { if (cp.compareTo(predicate) == 0) { newGroup.add(predicate); } else { cp = predicate; outcomePatterns.add(newGroup); - newGroup = new ArrayList(); + newGroup = new ArrayList<>(); newGroup.add(predicate); } } diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/PerceptronTrainer.java b/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/PerceptronTrainer.java index d90d856ab..68883fa35 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/PerceptronTrainer.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/PerceptronTrainer.java @@ -86,12 +86,7 @@ public PerceptronTrainer() { public boolean isValid() { String algorithmName = getAlgorithm(); - - if (algorithmName != null && !(PERCEPTRON_VALUE.equals(algorithmName))) { - return false; - } - - return true; + return !(algorithmName != null && !(PERCEPTRON_VALUE.equals(algorithmName))); } public boolean isSortAndMerge() { @@ -217,7 +212,7 @@ public AbstractModel trainModel(int iterations, DataIndexer di, int cutoff, bool display("...done.\n"); - /*************** Create and return the model ******************/ + /* Create and return the model *************/ return new PerceptronModel(finalParameters, predLabels, outcomeLabels); } @@ -229,7 +224,7 @@ private MutableContext[] findParameters (int iterations, boolean useAverage) { for (int oi = 0; oi < numOutcomes; oi++) allOutcomesPattern[oi] = oi; - /** Stores the estimated parameter value of each predicate during iteration. */ + /* Stores the estimated parameter value of each predicate during iteration. */ MutableContext[] params = new MutableContext[numPreds]; for (int pi = 0; pi < numPreds; pi++) { params[pi] = new MutableContext(allOutcomesPattern,new double[numOutcomes]); @@ -239,7 +234,7 @@ private MutableContext[] findParameters (int iterations, boolean useAverage) { EvalParameters evalParams = new EvalParameters(params,numOutcomes); - /** Stores the sum of parameter values of each predicate over many iterations. */ + /* Stores the sum of parameter values of each predicate over many iterations. */ MutableContext[] summedParams = new MutableContext[numPreds]; if (useAverage) { for (int pi = 0; pi < numPreds; pi++) { @@ -273,7 +268,7 @@ private MutableContext[] findParameters (int iterations, boolean useAverage) { for (int ei = 0; ei < numUniqueEvents; ei++) { int targetOutcome = outcomeList[ei]; - for (int ni=0; ni(); + pmap = new HashMap<>(); for (int i = 0; i < predLabels.length; i++) { pmap.put(predLabels[i], i); @@ -139,7 +136,7 @@ public AbstractModel trainModel(int iterations, SequenceStream sequenceStream, i this.iterations = iterations; outcomeLabels = di.getOutcomeLabels(); - omap = new HashMap(); + omap = new HashMap<>(); for (int oli=0;oli[] featureCounts = new Map[numOutcomes]; + List> featureCounts = new ArrayList<>(numOutcomes); for (int oi=0;oi(); + featureCounts.add(new HashMap<>()); } PerceptronModel model = new PerceptronModel(params,predLabels,pmap,outcomeLabels); @@ -249,7 +246,7 @@ public void nextIteration(int iteration) throws IOException { } if (update) { for (int oi=0;oi { + if (resources != null) { + return resources.get(key); } + return null; }); } else { featureGenerator = null; @@ -440,7 +436,7 @@ static String extractNameType(String outcome) { */ public static Span[] dropOverlappingSpans(Span spans[]) { - List sortedSpans = new ArrayList(spans.length); + List sortedSpans = new ArrayList<>(spans.length); Collections.addAll(sortedSpans, spans); Collections.sort(sortedSpans); diff --git a/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramModel.java b/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramModel.java index 0e597e068..6d912da4e 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramModel.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramModel.java @@ -25,12 +25,10 @@ import java.util.Iterator; import java.util.Map; import java.util.NoSuchElementException; - import opennlp.tools.dictionary.Dictionary; import opennlp.tools.dictionary.serializer.Attributes; import opennlp.tools.dictionary.serializer.DictionarySerializer; import opennlp.tools.dictionary.serializer.Entry; -import opennlp.tools.dictionary.serializer.EntryInserter; import opennlp.tools.util.InvalidFormatException; import opennlp.tools.util.StringList; import opennlp.tools.util.StringUtil; @@ -57,11 +55,9 @@ public NGramModel() { * * @param in the serialized model stream * @throws IOException - * @throws InvalidFormatException */ - public NGramModel(InputStream in) throws IOException, InvalidFormatException { - DictionarySerializer.create(in, new EntryInserter() { - public void insert(Entry entry) throws InvalidFormatException { + public NGramModel(InputStream in) throws IOException { + DictionarySerializer.create(in, entry -> { int count; String countValueString = null; @@ -82,8 +78,7 @@ public void insert(Entry entry) throws InvalidFormatException { add(entry.getTokens()); setCount(entry.getTokens(), count); - } - }); + }); } /** diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/AbstractBottomUpParser.java b/opennlp-tools/src/main/java/opennlp/tools/parser/AbstractBottomUpParser.java index ff4bea807..cc8eab58b 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/parser/AbstractBottomUpParser.java +++ b/opennlp-tools/src/main/java/opennlp/tools/parser/AbstractBottomUpParser.java @@ -349,7 +349,7 @@ else if (numParses == 1){ return new Parse[] {completeParses.first()}; } else { - List topParses = new ArrayList(numParses); + List topParses = new ArrayList<>(numParses); while(!completeParses.isEmpty() && topParses.size() < numParses) { Parse tp = completeParses.extract(); topParses.add(tp); @@ -460,9 +460,6 @@ protected Parse[] advanceTags(final Parse p) { words[i] = children[i].getCoveredText(); } Sequence[] ts = tagger.topKSequences(words); -// if (ts.length == 0) { -// System.err.println("no tag sequence"); -// } Parse[] newParses = new Parse[ts.length]; for (int i = 0; i < ts.length; i++) { String[] tags = ts[i].getOutcomes().toArray(new String[words.length]); @@ -475,7 +472,6 @@ protected Parse[] advanceTags(final Parse p) { double prob = probs[j]; newParses[i].insert(new Parse(word.getText(), word.getSpan(), tags[j], prob,j)); newParses[i].addProb(Math.log(prob)); - //newParses[i].show(); } } return newParses; diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/AbstractContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/parser/AbstractContextGenerator.java index 09d3b0fb3..85de098c2 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/parser/AbstractContextGenerator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/parser/AbstractContextGenerator.java @@ -118,7 +118,6 @@ protected void cons2(List features, Cons c0, Cons c1, Collection if (punct1s != null) { for (Iterator pi = punct1s.iterator();pi.hasNext();) { Parse p = pi.next(); -// String punct = punct(p,c1.index); String punctbo = punctbo(p,c1.index <= 0 ? c1.index -1 : c1.index); //punctbo(1); diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/ChunkContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/parser/ChunkContextGenerator.java index d3d830320..b3de9b2b1 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/parser/ChunkContextGenerator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/parser/ChunkContextGenerator.java @@ -115,7 +115,7 @@ public String[] getContext(int i, String[] words, String[] tags, String[] preds) String cacheKey = i +t_2+t1+t0+t1+t2+p_2+p_1; if (contextsCache!= null) { if (wordsKey == words) { - String[] contexts = (String[]) contextsCache.get(cacheKey); + String[] contexts = contextsCache.get(cacheKey); if (contexts != null) { return contexts; } diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/AttachContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/AttachContextGenerator.java index 9bce4cc61..93b23ad7c 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/AttachContextGenerator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/AttachContextGenerator.java @@ -60,8 +60,7 @@ private boolean containsPunct(Collection puncts, String punct){ * @return A set of contextual features about this attachment. */ public String[] getContext(Parse[] constituents, int index, List rightFrontier, int rfi) { - List features = new ArrayList(100); - int nodeDistance = rfi; + List features = new ArrayList<>(100); Parse fn = rightFrontier.get(rfi); Parse fp = null; if (rfi+1 < rightFrontier.size()) { @@ -147,9 +146,9 @@ public String[] getContext(Parse[] constituents, int index, List rightFro */ int headDistance = (p0.getHeadIndex()-fn.getHeadIndex()); features.add("hd="+headDistance); - features.add("nd="+nodeDistance); + features.add("nd="+ rfi); - features.add("nd="+p0.getType()+"."+nodeDistance); + features.add("nd="+p0.getType()+"."+ rfi); features.add("hd="+p0.getType()+"."+headDistance); //features.add("fs="+rightFrontier.size()); //paired punct features @@ -158,9 +157,6 @@ public String[] getContext(Parse[] constituents, int index, List rightFro features.add("quotematch");//? not generating feature correctly } - else { - //features.add("noquotematch"); - } } return features.toArray(new String[features.size()]); } diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/BuildContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/BuildContextGenerator.java index 867717179..589a1dc88 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/BuildContextGenerator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/BuildContextGenerator.java @@ -115,7 +115,7 @@ public String[] getContext(Parse[] constituents, int index) { Cons c1 = new Cons(consp1,consbop1,1,true); Cons c2 = new Cons(consp2,consbop2,2,true); - List features = new ArrayList(); + List features = new ArrayList<>(); features.add("default"); //unigrams diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/CheckContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/CheckContextGenerator.java index 694d65dc0..de6824d0d 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/CheckContextGenerator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/CheckContextGenerator.java @@ -42,7 +42,7 @@ public String[] getContext(Object arg0) { } public String[] getContext(Parse parent, Parse[] constituents, int index, boolean trimFrontier) { - List features = new ArrayList(100); + List features = new ArrayList<>(100); //default features.add("default"); Parse[] children = Parser.collapsePunctuation(parent.getChildren(),punctSet); diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/Parser.java b/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/Parser.java index e2c62d7fe..85d6cf958 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/Parser.java +++ b/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/Parser.java @@ -140,7 +140,7 @@ private Parser(MaxentModel buildModel, MaxentModel attachModel, MaxentModel chec * @return The right frontier of the specified parse tree. */ public static List getRightFrontier(Parse root,Set punctSet) { - List rf = new LinkedList(); + List rf = new LinkedList<>(); Parse top; if (AbstractBottomUpParser.TOP_NODE.equals(root.getType()) || AbstractBottomUpParser.INC_NODE.equals(root.getType())) { @@ -154,7 +154,7 @@ public static List getRightFrontier(Parse root,Set punctSet) { Parse[] kids = top.getChildren(); top = kids[kids.length-1]; } - return new ArrayList(rf); + return new ArrayList<>(rf); } private void setBuilt(Parse p) { @@ -193,22 +193,12 @@ private void setIncomplete(Parse p) { private boolean isBuilt(Parse p) { String l = p.getLabel(); - if (l == null) { - return false; - } - else { - return l.startsWith(Parser.BUILT); - } + return l != null && l.startsWith(Parser.BUILT); } private boolean isComplete(Parse p) { String l = p.getLabel(); - if (l == null) { - return false; - } - else { - return l.endsWith(Parser.COMPLETE); - } + return l != null && l.endsWith(Parser.COMPLETE); } @Override @@ -226,9 +216,9 @@ protected Parse[] advanceChunks(Parse p, double minChunkScore) { @Override protected Parse[] advanceParses(Parse p, double probMass) { double q = 1 - probMass; - /** The index of the node which will be labeled in this iteration of advancing the parse. */ + /* The index of the node which will be labeled in this iteration of advancing the parse. */ int advanceNodeIndex; - /** The node which will be labeled in this iteration of advancing the parse. */ + /* The node which will be labeled in this iteration of advancing the parse. */ Parse advanceNode=null; Parse[] originalChildren = p.getChildren(); Parse[] children = collapsePunctuation(originalChildren,punctSet); @@ -254,7 +244,7 @@ else if (numNodes == 1) { //put sentence initial and final punct in top node } int originalZeroIndex = mapParseIndex(0,children,originalChildren); int originalAdvanceIndex = mapParseIndex(advanceNodeIndex,children,originalChildren); - List newParsesList = new ArrayList(); + List newParsesList = new ArrayList<>(); //call build model buildModel.eval(buildContextGenerator.getContext(children, advanceNodeIndex), bprobs); double doneProb = bprobs[doneIndex]; @@ -262,7 +252,7 @@ else if (numNodes == 1) { //put sentence initial and final punct in top node if (1-doneProb > q) { double bprobSum = 0; while (bprobSum < probMass) { - /** The largest unadvanced labeling. */ + /* The largest unadvanced labeling. */ int max = 0; for (int pi = 1; pi < bprobs.length; pi++) { //for each build outcome if (bprobs[pi] > bprobs[max]) { @@ -434,7 +424,7 @@ public static ParserModel train(String languageCode, ObjectStream parseSamples, HeadRules rules, TrainingParameters mlParams) throws IOException { - Map manifestInfoEntries = new HashMap(); + Map manifestInfoEntries = new HashMap<>(); System.err.println("Building dictionary"); Dictionary mdict = buildDictionary(parseSamples, rules, mlParams); @@ -457,7 +447,7 @@ public static ParserModel train(String languageCode, System.err.println("Training builder"); ObjectStream bes = new ParserEventStream(parseSamples, rules, ParserEventTypeEnum.BUILD, mdict); - Map buildReportMap = new HashMap(); + Map buildReportMap = new HashMap<>(); EventTrainer buildTrainer = TrainerFactory.getEventTrainer(mlParams.getSettings("build"), buildReportMap); MaxentModel buildModel = buildTrainer.train(bes); @@ -469,7 +459,7 @@ public static ParserModel train(String languageCode, System.err.println("Training checker"); ObjectStream kes = new ParserEventStream(parseSamples, rules, ParserEventTypeEnum.CHECK); - Map checkReportMap = new HashMap(); + Map checkReportMap = new HashMap<>(); EventTrainer checkTrainer = TrainerFactory.getEventTrainer(mlParams.getSettings("check"), checkReportMap); MaxentModel checkModel = checkTrainer.train(kes); @@ -481,7 +471,7 @@ public static ParserModel train(String languageCode, System.err.println("Training attacher"); ObjectStream attachEvents = new ParserEventStream(parseSamples, rules, ParserEventTypeEnum.ATTACH); - Map attachReportMap = new HashMap(); + Map attachReportMap = new HashMap<>(); EventTrainer attachTrainer = TrainerFactory.getEventTrainer(mlParams.getSettings("attach"), attachReportMap); MaxentModel attachModel = attachTrainer.train(attachEvents); opennlp.tools.parser.chunking.Parser.mergeReportIntoManifest(manifestInfoEntries, attachReportMap, "attach"); diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/ParserEventStream.java b/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/ParserEventStream.java index c693ef689..5949414b5 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/ParserEventStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/ParserEventStream.java @@ -25,7 +25,6 @@ import java.util.HashMap; import java.util.List; import java.util.Map; - import opennlp.tools.cmdline.SystemInputStreamFactory; import opennlp.tools.dictionary.Dictionary; import opennlp.tools.ml.maxent.io.SuffixSensitiveGISModelReader; @@ -37,7 +36,6 @@ import opennlp.tools.parser.Parse; import opennlp.tools.parser.ParseSampleStream; import opennlp.tools.parser.ParserEventTypeEnum; -import opennlp.tools.util.InvalidFormatException; import opennlp.tools.util.ObjectStream; import opennlp.tools.util.PlainTextByLineStream; import opennlp.tools.util.Span; @@ -73,7 +71,7 @@ public ParserEventStream(ObjectStream d, HeadRules rules, ParserEventType * @return a set of parent nodes. */ private Map getNonAdjoinedParent(Parse node) { - Map parents = new HashMap(); + Map parents = new HashMap<>(); Parse parent = node.getParent(); int index = indexOf(node,parent); parents.put(parent, index); @@ -130,11 +128,11 @@ protected boolean lastChild(Parse child, Parse parent) { @Override protected void addParseEvents(List parseEvents, Parse[] chunks) { - /** Frontier nodes built from node in a completed parse. Specifically, + /* Frontier nodes built from node in a completed parse. Specifically, * they have all their children regardless of the stage of parsing.*/ - List rightFrontier = new ArrayList(); - List builtNodes = new ArrayList(); - /** Nodes which characterize what the parse looks like to the parser as its being built. + List rightFrontier = new ArrayList<>(); + List builtNodes = new ArrayList<>(); + /* Nodes which characterize what the parse looks like to the parser as its being built. * Specifically, these nodes don't have all their children attached like the parents of * the chunk nodes do.*/ Parse[] currentChunks = new Parse[chunks.length]; @@ -200,7 +198,7 @@ protected void addParseEvents(List parseEvents, Parse[] chunks) { } //attach node String attachType = null; - /** Node selected for attachment. */ + /* Node selected for attachment. */ Parse attachNode = null; int attachNodeIndex = -1; if (ci == 0){ @@ -208,7 +206,7 @@ protected void addParseEvents(List parseEvents, Parse[] chunks) { top.insert(currentChunks[ci]); } else { - /** Right frontier consisting of partially-built nodes based on current state of the parse.*/ + /* Right frontier consisting of partially-built nodes based on current state of the parse.*/ List currentRightFrontier = Parser.getRightFrontier(currentChunks[0],punctSet); if (currentRightFrontier.size() != rightFrontier.size()) { System.err.println("fontiers mis-aligned: "+currentRightFrontier.size()+" != "+rightFrontier.size()+" "+currentRightFrontier+" "+rightFrontier); @@ -333,7 +331,7 @@ else if (Parser.ATTACH_SISTER.equals(attachType)) { } } - public static void main(String[] args) throws java.io.IOException, InvalidFormatException { + public static void main(String[] args) throws java.io.IOException { if (args.length == 0) { System.err.println("Usage ParserEventStream -[tag|chunk|build|attach] [-fun] [-dict dictionary] [-model model] head_rules < parses"); System.exit(1); @@ -345,35 +343,36 @@ public static void main(String[] args) throws java.io.IOException, InvalidFormat AbstractModel model = null; while (ai < args.length && args[ai].startsWith("-")) { - if (args[ai].equals("-build")) { - etype = ParserEventTypeEnum.BUILD; - } - else if (args[ai].equals("-attach")) { - etype = ParserEventTypeEnum.ATTACH; - } - else if (args[ai].equals("-chunk")) { - etype = ParserEventTypeEnum.CHUNK; - } - else if (args[ai].equals("-check")) { - etype = ParserEventTypeEnum.CHECK; - } - else if (args[ai].equals("-tag")) { - etype = ParserEventTypeEnum.TAG; - } - else if (args[ai].equals("-fun")) { - fun = true; - } - else if (args[ai].equals("-dict")) { - ai++; - dict = new Dictionary(new FileInputStream(args[ai])); - } - else if (args[ai].equals("-model")) { - ai++; - model = (new SuffixSensitiveGISModelReader(new File(args[ai]))).getModel(); - } - else { - System.err.println("Invalid option " + args[ai]); - System.exit(1); + switch (args[ai]) { + case "-build": + etype = ParserEventTypeEnum.BUILD; + break; + case "-attach": + etype = ParserEventTypeEnum.ATTACH; + break; + case "-chunk": + etype = ParserEventTypeEnum.CHUNK; + break; + case "-check": + etype = ParserEventTypeEnum.CHECK; + break; + case "-tag": + etype = ParserEventTypeEnum.TAG; + break; + case "-fun": + fun = true; + break; + case "-dict": + ai++; + dict = new Dictionary(new FileInputStream(args[ai])); + break; + case "-model": + ai++; + model = (new SuffixSensitiveGISModelReader(new File(args[ai]))).getModel(); + break; + default: + System.err.println("Invalid option " + args[ai]); + System.exit(1); } ai++; } diff --git a/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java b/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java index 34b9f7937..21522a5ce 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java +++ b/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java @@ -43,7 +43,7 @@ public final class POSModel extends BaseModel { private static final String COMPONENT_NAME = "POSTaggerME"; - public static final String POS_MODEL_ENTRY_NAME = "pos.model"; + static final String POS_MODEL_ENTRY_NAME = "pos.model"; public POSModel(String languageCode, SequenceClassificationModel posModel, Map manifestInfoEntries, POSTaggerFactory posFactory) { @@ -78,15 +78,15 @@ public POSModel(String languageCode, MaxentModel posModel, int beamSize, checkArtifactMap(); } - public POSModel(InputStream in) throws IOException, InvalidFormatException { + public POSModel(InputStream in) throws IOException { super(COMPONENT_NAME, in); } - public POSModel(File modelFile) throws IOException, InvalidFormatException { + public POSModel(File modelFile) throws IOException { super(COMPONENT_NAME, modelFile); } - public POSModel(URL modelURL) throws IOException, InvalidFormatException { + public POSModel(URL modelURL) throws IOException { super(COMPONENT_NAME, modelURL); } diff --git a/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerME.java b/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerME.java index e4c1c1b8e..366a91a83 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerME.java +++ b/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerME.java @@ -24,9 +24,7 @@ import java.util.List; import java.util.Map; import java.util.Map.Entry; -import java.util.StringTokenizer; import java.util.concurrent.atomic.AtomicInteger; - import opennlp.tools.dictionary.Dictionary; import opennlp.tools.ml.BeamSearch; import opennlp.tools.ml.EventModelSequenceTrainer; @@ -45,7 +43,6 @@ import opennlp.tools.util.StringUtil; import opennlp.tools.util.TrainingParameters; import opennlp.tools.util.featuregen.StringPattern; -import opennlp.tools.util.model.ModelType; /** * A part-of-speech tagger that uses maximum entropy. Tries to predict whether @@ -117,7 +114,7 @@ public POSTaggerME(POSModel model) { this.model = model.getPosSequenceModel(); } else { - this.model = new opennlp.tools.ml.BeamSearch(beamSize, + this.model = new opennlp.tools.ml.BeamSearch<>(beamSize, model.getPosModel(), 0); } @@ -127,7 +124,7 @@ public POSTaggerME(POSModel model) { * Retrieves an array of all possible part-of-speech tags from the * tagger. * - * @return + * @return String[] */ public String[] getAllPosTags() { return model.getOutcomes(); @@ -237,7 +234,7 @@ public static POSModel train(String languageCode, POSContextGenerator contextGenerator = posFactory.getPOSContextGenerator(); - Map manifestInfoEntries = new HashMap(); + Map manifestInfoEntries = new HashMap<>(); TrainerType trainerType = TrainerFactory.getTrainerType(trainParams.getSettings()); @@ -302,7 +299,7 @@ public static void populatePOSDictionary(ObjectStream samples, // the data structure will store the word, the tag, and the number of // occurrences - Map> newEntries = new HashMap>(); + Map> newEntries = new HashMap<>(); POSSample sample; while ((sample = samples.read()) != null) { String[] words = sample.getSentence(); @@ -319,7 +316,7 @@ public static void populatePOSDictionary(ObjectStream samples, } if (!newEntries.containsKey(word)) { - newEntries.put(word, new HashMap()); + newEntries.put(word, new HashMap<>()); } String[] dictTags = dict.getTags(word); @@ -346,7 +343,7 @@ public static void populatePOSDictionary(ObjectStream samples, // add it to the dictionary for (Entry> wordEntry : newEntries .entrySet()) { - List tagsForWord = new ArrayList(); + List tagsForWord = new ArrayList<>(); for (Entry entry : wordEntry.getValue().entrySet()) { if (entry.getValue().get() >= cutoff) { tagsForWord.add(entry.getKey()); diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/BaseToolFactory.java b/opennlp-tools/src/main/java/opennlp/tools/util/BaseToolFactory.java index 873c2c4da..499aa8f70 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/util/BaseToolFactory.java +++ b/opennlp-tools/src/main/java/opennlp/tools/util/BaseToolFactory.java @@ -63,7 +63,7 @@ protected void init(ArtifactProvider artifactProvider) { */ @SuppressWarnings("rawtypes") public Map createArtifactSerializersMap() { - return new HashMap(); + return new HashMap<>(); } /** @@ -75,7 +75,7 @@ public Map createArtifactSerializersMap() { * populated by sub-classes. */ public Map createArtifactMap() { - return new HashMap(); + return new HashMap<>(); } /** @@ -84,7 +84,7 @@ public Map createArtifactMap() { * @return the manifest entries to added to the model manifest */ public Map createManifestEntries() { - return new HashMap(); + return new HashMap<>(); } /** diff --git a/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4NameFinderEval.java b/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4NameFinderEval.java index f81cd3655..737fae3e0 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4NameFinderEval.java +++ b/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4NameFinderEval.java @@ -18,13 +18,8 @@ package opennlp.tools.eval; import java.io.File; -import java.io.FileFilter; import java.io.IOException; import java.nio.charset.Charset; - -import org.junit.Assert; -import org.junit.Test; - import opennlp.tools.formats.DirectorySampleStream; import opennlp.tools.formats.convert.FileToStringSampleStream; import opennlp.tools.formats.ontonotes.OntoNotesNameSampleStream; @@ -35,6 +30,8 @@ import opennlp.tools.util.ObjectStream; import opennlp.tools.util.TrainingParameters; import opennlp.tools.util.model.ModelUtil; +import org.junit.Assert; +import org.junit.Test; public class OntoNotes4NameFinderEval { @@ -42,16 +39,13 @@ private static void crossEval(TrainingParameters params, String type, double exp throws IOException { ObjectStream documentStream = new DirectorySampleStream(new File( - EvalUtil.getOpennlpDataDir(), "ontonotes4/data/files/data/english"), new FileFilter() { - - public boolean accept(File file) { - if (file.isFile()) { - return file.getName().endsWith(".name"); - } + EvalUtil.getOpennlpDataDir(), "ontonotes4/data/files/data/english"), file -> { + if (file.isFile()) { + return file.getName().endsWith(".name"); + } - return file.isDirectory(); - } - }, true); + return file.isDirectory(); + }, true); ObjectStream samples = new OntoNotesNameSampleStream(new FileToStringSampleStream( documentStream, Charset.forName("UTF-8"))); diff --git a/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4ParserEval.java b/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4ParserEval.java index 84185e04b..087ab7ef4 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4ParserEval.java +++ b/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4ParserEval.java @@ -18,15 +18,10 @@ package opennlp.tools.eval; import java.io.File; -import java.io.FileFilter; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.nio.charset.Charset; - -import org.junit.Assert; -import org.junit.Test; - import opennlp.tools.formats.DirectorySampleStream; import opennlp.tools.formats.convert.FileToStringSampleStream; import opennlp.tools.formats.ontonotes.DocumentToLineStream; @@ -38,6 +33,8 @@ import opennlp.tools.util.ObjectStream; import opennlp.tools.util.TrainingParameters; import opennlp.tools.util.model.ModelUtil; +import org.junit.Assert; +import org.junit.Test; public class OntoNotes4ParserEval { @@ -45,16 +42,13 @@ private static void crossEval(TrainingParameters params, HeadRules rules, double throws IOException { ObjectStream documentStream = new DirectorySampleStream(new File( - EvalUtil.getOpennlpDataDir(), "ontonotes4/data/files/data/english"), new FileFilter() { - - public boolean accept(File file) { - if (file.isFile()) { - return file.getName().endsWith(".parse"); - } + EvalUtil.getOpennlpDataDir(), "ontonotes4/data/files/data/english"), file -> { + if (file.isFile()) { + return file.getName().endsWith(".parse"); + } - return file.isDirectory(); - } - }, true); + return file.isDirectory(); + }, true); OntoNotesParseSampleStream samples = new OntoNotesParseSampleStream( new DocumentToLineStream(new FileToStringSampleStream( diff --git a/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4PosTaggerEval.java b/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4PosTaggerEval.java index ca1676abc..fb258367a 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4PosTaggerEval.java +++ b/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4PosTaggerEval.java @@ -18,13 +18,8 @@ package opennlp.tools.eval; import java.io.File; -import java.io.FileFilter; import java.io.IOException; import java.nio.charset.Charset; - -import org.junit.Assert; -import org.junit.Test; - import opennlp.tools.formats.DirectorySampleStream; import opennlp.tools.formats.convert.FileToStringSampleStream; import opennlp.tools.formats.convert.ParseToPOSSampleStream; @@ -35,6 +30,8 @@ import opennlp.tools.util.ObjectStream; import opennlp.tools.util.TrainingParameters; import opennlp.tools.util.model.ModelUtil; +import org.junit.Assert; +import org.junit.Test; public class OntoNotes4PosTaggerEval { @@ -42,16 +39,13 @@ private static void crossEval(TrainingParameters params, double expectedScore) throws IOException { ObjectStream documentStream = new DirectorySampleStream(new File( - EvalUtil.getOpennlpDataDir(), "ontonotes4/data/files/data/english"), new FileFilter() { - - public boolean accept(File file) { - if (file.isFile()) { - return file.getName().endsWith(".parse"); - } + EvalUtil.getOpennlpDataDir(), "ontonotes4/data/files/data/english"), file -> { + if (file.isFile()) { + return file.getName().endsWith(".parse"); + } - return file.isDirectory(); - } - }, true); + return file.isDirectory(); + }, true); ParseToPOSSampleStream samples = new ParseToPOSSampleStream(new OntoNotesParseSampleStream( new DocumentToLineStream( diff --git a/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerFactoryTest.java b/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerFactoryTest.java index 2f721249b..ec7656596 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerFactoryTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerFactoryTest.java @@ -18,14 +18,10 @@ package opennlp.tools.postag; import static java.nio.charset.StandardCharsets.UTF_8; -import static org.junit.Assert.assertTrue; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; - -import org.junit.Test; - import opennlp.tools.dictionary.Dictionary; import opennlp.tools.formats.ResourceAsStreamFactory; import opennlp.tools.postag.DummyPOSTaggerFactory.DummyPOSContextGenerator; @@ -37,7 +33,9 @@ import opennlp.tools.util.ObjectStream; import opennlp.tools.util.PlainTextByLineStream; import opennlp.tools.util.TrainingParameters; -import opennlp.tools.util.model.ModelType; +import org.junit.Test; + +import static org.junit.Assert.assertTrue; /** * Tests for the {@link POSTaggerFactory} class. @@ -53,7 +51,7 @@ private static ObjectStream createSampleStream() return new WordTagSampleStream(new PlainTextByLineStream(in, UTF_8)); } - static POSModel trainPOSModel(ModelType type, POSTaggerFactory factory) + private static POSModel trainPOSModel(POSTaggerFactory factory) throws IOException { return POSTaggerME.train("en", createSampleStream(), TrainingParameters.defaultParams(), factory); @@ -66,8 +64,8 @@ public void testPOSTaggerWithCustomFactory() throws IOException { .getResourceAsStream("TagDictionaryCaseSensitive.xml"))); Dictionary dic = POSTaggerME.buildNGramDictionary(createSampleStream(), 0); - POSModel posModel = trainPOSModel(ModelType.MAXENT, - new DummyPOSTaggerFactory(dic, posDict)); + POSModel posModel = trainPOSModel( + new DummyPOSTaggerFactory(dic, posDict)); POSTaggerFactory factory = posModel.getFactory(); assertTrue(factory.getTagDictionary() instanceof DummyPOSDictionary); @@ -84,7 +82,7 @@ public void testPOSTaggerWithCustomFactory() throws IOException { assertTrue(factory.getTagDictionary() instanceof DummyPOSDictionary); assertTrue(factory.getPOSContextGenerator() instanceof DummyPOSContextGenerator); assertTrue(factory.getSequenceValidator() instanceof DummyPOSSequenceValidator); - assertTrue(factory.getDictionary() instanceof Dictionary); + assertTrue(factory.getDictionary() != null); } @Test @@ -93,14 +91,14 @@ public void testPOSTaggerWithDefaultFactory() throws IOException { .getResourceAsStream("TagDictionaryCaseSensitive.xml")); Dictionary dic = POSTaggerME.buildNGramDictionary(createSampleStream(), 0); - POSModel posModel = trainPOSModel(ModelType.MAXENT, - new POSTaggerFactory(dic, posDict)); + POSModel posModel = trainPOSModel( + new POSTaggerFactory(dic, posDict)); POSTaggerFactory factory = posModel.getFactory(); assertTrue(factory.getTagDictionary() instanceof POSDictionary); - assertTrue(factory.getPOSContextGenerator() instanceof POSContextGenerator); + assertTrue(factory.getPOSContextGenerator() != null); assertTrue(factory.getSequenceValidator() instanceof DefaultPOSSequenceValidator); - assertTrue(factory.getDictionary() instanceof Dictionary); + assertTrue(factory.getDictionary() != null); ByteArrayOutputStream out = new ByteArrayOutputStream(); posModel.serialize(out); @@ -110,9 +108,9 @@ public void testPOSTaggerWithDefaultFactory() throws IOException { factory = fromSerialized.getFactory(); assertTrue(factory.getTagDictionary() instanceof POSDictionary); - assertTrue(factory.getPOSContextGenerator() instanceof POSContextGenerator); + assertTrue(factory.getPOSContextGenerator() != null); assertTrue(factory.getSequenceValidator() instanceof DefaultPOSSequenceValidator); - assertTrue(factory.getDictionary() instanceof Dictionary); + assertTrue(factory.getDictionary() != null); } @Test(expected = InvalidFormatException.class)