From 4283df79dab8558ddacc6cfbb0e66b7f965e3a2e Mon Sep 17 00:00:00 2001 From: smarthi Date: Wed, 21 Dec 2016 12:58:26 -0500 Subject: [PATCH] OPENNLP-871: Cleanup for Java 8 --- .../tokenize/TokenizerCrossValidator.java | 3 +- .../opennlp/tools/tokenize/TokenizerME.java | 22 +++++------- .../opennlp/tools/util/ObjectStreamUtils.java | 2 ++ .../tools/tokenize/TokenizerFactoryTest.java | 25 ++++++------- .../opennlp/uima/chunker/ChunkerTrainer.java | 12 +++---- .../doccat/AbstractDocumentCategorizer.java | 4 +-- .../uima/doccat/DocumentCategorizer.java | 3 +- .../doccat/DocumentCategorizerTrainer.java | 10 ++---- .../uima/namefind/AbstractNameFinder.java | 6 ++-- .../uima/namefind/NameFinderTrainer.java | 24 ++++--------- .../opennlp/uima/normalizer/Normalizer.java | 5 +-- .../uima/normalizer/StringDictionary.java | 6 ++-- .../main/java/opennlp/uima/parser/Parser.java | 4 +-- .../java/opennlp/uima/postag/POSTagger.java | 4 +-- .../opennlp/uima/postag/POSTaggerTrainer.java | 17 +++++---- .../sentdetect/SentenceDetectorTrainer.java | 10 +++--- .../uima/tokenize/TokenizerTrainer.java | 35 +++++++++---------- .../uima/util/ContainingConstraint.java | 11 ++---- .../java/opennlp/uima/util/OpennlpUtil.java | 4 +-- .../main/java/opennlp/uima/util/UimaUtil.java | 2 +- 20 files changed, 84 insertions(+), 125 deletions(-) diff --git a/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerCrossValidator.java b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerCrossValidator.java index 7a85d6aa4..811165cf5 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerCrossValidator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerCrossValidator.java @@ -86,8 +86,7 @@ public TokenizerCrossValidator(String language, */ public void evaluate(ObjectStream samples, int nFolds) throws IOException { - CrossValidationPartitioner partitioner = - new CrossValidationPartitioner(samples, nFolds); + CrossValidationPartitioner partitioner = new CrossValidationPartitioner<>(samples, nFolds); while (partitioner.hasNext()) { diff --git a/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerME.java b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerME.java index 3d03943e6..5412c2822 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerME.java +++ b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerME.java @@ -18,7 +18,6 @@ package opennlp.tools.tokenize; import java.io.IOException; -import java.io.ObjectStreamException; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; @@ -26,7 +25,6 @@ import java.util.Map; import java.util.Set; import java.util.regex.Pattern; - import opennlp.tools.dictionary.Dictionary; import opennlp.tools.ml.EventTrainer; import opennlp.tools.ml.TrainerFactory; @@ -127,8 +125,8 @@ public TokenizerME(TokenizerModel model) { this.model = model.getMaxentModel(); this.useAlphaNumericOptimization = factory.isUseAlphaNumericOptmization(); - newTokens = new ArrayList(); - tokProbs = new ArrayList(50); + newTokens = new ArrayList<>(); + tokProbs = new ArrayList<>(50); } /** @@ -145,13 +143,13 @@ public TokenizerME(TokenizerModel model, Factory factory) { this.model = model.getMaxentModel(); useAlphaNumericOptimization = model.useAlphaNumericOptimization(); - newTokens = new ArrayList(); - tokProbs = new ArrayList(50); + newTokens = new ArrayList<>(); + tokProbs = new ArrayList<>(50); } private static Set getAbbreviations(Dictionary abbreviations) { if(abbreviations == null) { - return Collections.emptySet(); + return Collections.emptySet(); } return abbreviations.asStringSet(); } @@ -241,7 +239,7 @@ else if (useAlphaNumericOptimization() && alphanumeric.matcher(tok).matches()) { public static TokenizerModel train(ObjectStream samples, TokenizerFactory factory, TrainingParameters mlParams) throws IOException { - Map manifestInfoEntries = new HashMap(); + Map manifestInfoEntries = new HashMap<>(); ObjectStream eventStream = new TokSpanEventStream(samples, factory.isUseAlphaNumericOptmization(), @@ -305,7 +303,7 @@ public static TokenizerModel train(String languageCode, throws IOException { Factory factory = new Factory(); - Map manifestInfoEntries = new HashMap(); + Map manifestInfoEntries = new HashMap<>(); ObjectStream eventStream = new TokSpanEventStream(samples, useAlphaNumericOptimization, factory.getAlphanumeric(languageCode), @@ -334,16 +332,12 @@ public static TokenizerModel train(String languageCode, * @throws IOException it throws an {@link IOException} if an {@link IOException} * is thrown during IO operations on a temp file which is * - * @throws ObjectStreamException if reading from the {@link ObjectStream} fails - * created during training. - * - * * @deprecated Use * {@link #train(ObjectStream, TokenizerFactory, TrainingParameters)} * and pass in a {@link TokenizerFactory} */ public static TokenizerModel train(String languageCode, ObjectStream samples, - boolean useAlphaNumericOptimization) throws IOException, ObjectStreamException { + boolean useAlphaNumericOptimization) throws IOException { return train(languageCode, samples, useAlphaNumericOptimization, ModelUtil.createDefaultTrainingParameters()); } diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/ObjectStreamUtils.java b/opennlp-tools/src/main/java/opennlp/tools/util/ObjectStreamUtils.java index b9b1fe8ae..9ad98ddf5 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/util/ObjectStreamUtils.java +++ b/opennlp-tools/src/main/java/opennlp/tools/util/ObjectStreamUtils.java @@ -31,6 +31,7 @@ public class ObjectStreamUtils { * * @return the object stream over the array elements */ + @SafeVarargs public static ObjectStream createObjectStream(final T... array) { return new ObjectStream() { @@ -90,6 +91,7 @@ public void close() { * @param streams * @return */ + @SafeVarargs public static ObjectStream createObjectStream(final ObjectStream... streams) { for (ObjectStream stream : streams) { diff --git a/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerFactoryTest.java b/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerFactoryTest.java index 96d19a68f..2fb3d401d 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerFactoryTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerFactoryTest.java @@ -18,19 +18,12 @@ package opennlp.tools.tokenize; import static java.nio.charset.StandardCharsets.UTF_8; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertTrue; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.util.regex.Pattern; - -import org.junit.Test; - import opennlp.tools.dictionary.Dictionary; import opennlp.tools.formats.ResourceAsStreamFactory; import opennlp.tools.tokenize.DummyTokenizerFactory.DummyContextGenerator; @@ -40,6 +33,12 @@ import opennlp.tools.util.ObjectStream; import opennlp.tools.util.PlainTextByLineStream; import opennlp.tools.util.TrainingParameters; +import org.junit.Test; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; /** * Tests for the {@link TokenizerFactory} class. @@ -56,11 +55,10 @@ private static ObjectStream createSampleStream() private static TokenizerModel train(TokenizerFactory factory) throws IOException { - return TokenizerME.train(createSampleStream(), - factory, TrainingParameters.defaultParams()); + return TokenizerME.train(createSampleStream(), factory, TrainingParameters.defaultParams()); } - static Dictionary loadAbbDictionary() throws IOException { + private static Dictionary loadAbbDictionary() throws IOException { InputStream in = TokenizerFactoryTest.class.getClassLoader() .getResourceAsStream("opennlp/tools/sentdetect/abb.xml"); @@ -76,7 +74,7 @@ public void testDefault() throws IOException { TokenizerModel model = train(new TokenizerFactory(lang, dic, false, null)); TokenizerFactory factory = model.getFactory(); - assertTrue(factory.getAbbreviationDictionary() instanceof Dictionary); + assertTrue(factory.getAbbreviationDictionary() != null); assertTrue(factory.getContextGenerator() instanceof DefaultTokenContextGenerator); assertEquals(Factory.DEFAULT_ALPHANUMERIC, factory.getAlphaNumericPattern() @@ -92,7 +90,7 @@ public void testDefault() throws IOException { TokenizerModel fromSerialized = new TokenizerModel(in); factory = fromSerialized.getFactory(); - assertTrue(factory.getAbbreviationDictionary() instanceof Dictionary); + assertTrue(factory.getAbbreviationDictionary() != null); assertTrue(factory.getContextGenerator() instanceof DefaultTokenContextGenerator); assertEquals(Factory.DEFAULT_ALPHANUMERIC, factory.getAlphaNumericPattern() @@ -130,8 +128,7 @@ public void testNullDict() throws IOException { assertNull(factory.getAbbreviationDictionary()); assertTrue(factory.getContextGenerator() instanceof DefaultTokenContextGenerator); - assertEquals(Factory.DEFAULT_ALPHANUMERIC, factory.getAlphaNumericPattern() - .pattern()); + assertEquals(Factory.DEFAULT_ALPHANUMERIC, factory.getAlphaNumericPattern().pattern()); assertEquals(lang, factory.getLanguageCode()); assertEquals(lang, model.getLanguage()); assertFalse(factory.isUseAlphaNumericOptmization()); diff --git a/opennlp-uima/src/main/java/opennlp/uima/chunker/ChunkerTrainer.java b/opennlp-uima/src/main/java/opennlp/uima/chunker/ChunkerTrainer.java index d22879d7d..8c6232b79 100644 --- a/opennlp-uima/src/main/java/opennlp/uima/chunker/ChunkerTrainer.java +++ b/opennlp-uima/src/main/java/opennlp/uima/chunker/ChunkerTrainer.java @@ -66,7 +66,7 @@ */ public class ChunkerTrainer extends CasConsumer_ImplBase { - private List mChunkSamples = new ArrayList(); + private List mChunkSamples = new ArrayList<>(); private UimaContext mContext; @@ -82,8 +82,6 @@ public class ChunkerTrainer extends CasConsumer_ImplBase { private Feature mChunkTagFeature; - private Logger mLogger; - private String language; /** @@ -95,7 +93,7 @@ public void initialize() throws ResourceInitializationException { mContext = getUimaContext(); - mLogger = mContext.getLogger(); + Logger mLogger = mContext.getLogger(); if (mLogger.isLoggable(Level.INFO)) { mLogger.log(Level.INFO, "Initializing the OpenNLP Chunker Trainer."); @@ -183,9 +181,9 @@ private void processChunk(CAS tcas, AnnotationFS chunk) { Iterator tokenIterator = tcas.createFilteredIterator(tokenIndex.iterator(), containingConstraint); - List tokens = new ArrayList(); - List tags = new ArrayList();; - List chunkTags = new ArrayList();; + List tokens = new ArrayList<>(); + List tags = new ArrayList<>(); + List chunkTags = new ArrayList<>(); while (tokenIterator.hasNext()) { AnnotationFS tokenAnnotation = tokenIterator.next(); diff --git a/opennlp-uima/src/main/java/opennlp/uima/doccat/AbstractDocumentCategorizer.java b/opennlp-uima/src/main/java/opennlp/uima/doccat/AbstractDocumentCategorizer.java index 5abfd76b0..d2591d597 100644 --- a/opennlp-uima/src/main/java/opennlp/uima/doccat/AbstractDocumentCategorizer.java +++ b/opennlp-uima/src/main/java/opennlp/uima/doccat/AbstractDocumentCategorizer.java @@ -41,8 +41,6 @@ abstract class AbstractDocumentCategorizer extends CasAnnotator_ImplBase { private UimaContext context; - private Logger mLogger; - private opennlp.tools.doccat.DocumentCategorizer mCategorizer; private Type mTokenType; @@ -54,7 +52,7 @@ public void initialize(UimaContext context) this.context = context; - mLogger = context.getLogger(); + Logger mLogger = context.getLogger(); if (mLogger.isLoggable(Level.INFO)) { mLogger.log(Level.INFO, "Initializing the OpenNLP Categorizer."); diff --git a/opennlp-uima/src/main/java/opennlp/uima/doccat/DocumentCategorizer.java b/opennlp-uima/src/main/java/opennlp/uima/doccat/DocumentCategorizer.java index 5cf337f3f..cb71a508c 100644 --- a/opennlp-uima/src/main/java/opennlp/uima/doccat/DocumentCategorizer.java +++ b/opennlp-uima/src/main/java/opennlp/uima/doccat/DocumentCategorizer.java @@ -56,8 +56,7 @@ public void typeSystemInit(TypeSystem typeSystem) protected void setBestCategory(CAS tcas, String bestCategory) { FSIndex categoryIndex = tcas.getAnnotationIndex(mCategoryType); - AnnotationFS categoryAnnotation = categoryIndex.size() > 0 ? - categoryIndex.iterator().next() : null; + AnnotationFS categoryAnnotation; if (categoryIndex.size() > 0) { categoryAnnotation = categoryIndex.iterator().next(); diff --git a/opennlp-uima/src/main/java/opennlp/uima/doccat/DocumentCategorizerTrainer.java b/opennlp-uima/src/main/java/opennlp/uima/doccat/DocumentCategorizerTrainer.java index 31c639221..5b35a8204 100644 --- a/opennlp-uima/src/main/java/opennlp/uima/doccat/DocumentCategorizerTrainer.java +++ b/opennlp-uima/src/main/java/opennlp/uima/doccat/DocumentCategorizerTrainer.java @@ -56,13 +56,9 @@ public class DocumentCategorizerTrainer extends CasConsumer_ImplBase { private UimaContext mContext; - private Logger mLogger; - private String mModelName; - private List documentSamples = new ArrayList(); - - private Type mTokenType; + private List documentSamples = new ArrayList<>(); private Type mCategoryType; @@ -76,7 +72,7 @@ public void initialize() throws ResourceInitializationException { mContext = getUimaContext(); - mLogger = mContext.getLogger(); + Logger mLogger = mContext.getLogger(); if (mLogger.isLoggable(Level.INFO)) { mLogger.log(Level.INFO, "Initializing the OpenNLP Doccat Trainer."); @@ -95,7 +91,7 @@ public void typeSystemInit(TypeSystem typeSystem) String tokenTypeName = CasConsumerUtil.getRequiredStringParameter(mContext, UimaUtil.SENTENCE_TYPE_PARAMETER); - mTokenType = CasConsumerUtil.getType(typeSystem, tokenTypeName); + Type mTokenType = CasConsumerUtil.getType(typeSystem, tokenTypeName); String categoryTypeName = CasConsumerUtil.getRequiredStringParameter(mContext, "opennlp.uima.doccat.CategoryType"); diff --git a/opennlp-uima/src/main/java/opennlp/uima/namefind/AbstractNameFinder.java b/opennlp-uima/src/main/java/opennlp/uima/namefind/AbstractNameFinder.java index c0bad5de9..dbc604cbc 100644 --- a/opennlp-uima/src/main/java/opennlp/uima/namefind/AbstractNameFinder.java +++ b/opennlp-uima/src/main/java/opennlp/uima/namefind/AbstractNameFinder.java @@ -123,7 +123,7 @@ public final void process(CAS cas) { final AnnotationComboIterator sentenceNameCombo = new AnnotationComboIterator(cas, mSentenceType, mNameType); - List removeAnnotations = new LinkedList(); + List removeAnnotations = new LinkedList<>(); for (AnnotationIteratorPair annotationIteratorPair : sentenceNameCombo) { for (AnnotationFS nameAnnotation : annotationIteratorPair.getSubIterator()) { removeAnnotations.add(nameAnnotation); @@ -140,9 +140,9 @@ public final void process(CAS cas) { for (AnnotationIteratorPair annotationIteratorPair : sentenceTokenCombo) { - final List sentenceTokenAnnotationList = new LinkedList(); + final List sentenceTokenAnnotationList = new LinkedList<>(); - final List sentenceTokenList = new LinkedList(); + final List sentenceTokenList = new LinkedList<>(); for (AnnotationFS tokenAnnotation : annotationIteratorPair.getSubIterator()) { diff --git a/opennlp-uima/src/main/java/opennlp/uima/namefind/NameFinderTrainer.java b/opennlp-uima/src/main/java/opennlp/uima/namefind/NameFinderTrainer.java index dcc0ddcbd..620758089 100644 --- a/opennlp-uima/src/main/java/opennlp/uima/namefind/NameFinderTrainer.java +++ b/opennlp-uima/src/main/java/opennlp/uima/namefind/NameFinderTrainer.java @@ -126,7 +126,7 @@ public final class NameFinderTrainer extends CasConsumer_ImplBase { // - Directly start indexing with a blocking sample stream, the indexer will then write everything // to disk or could store the events much more space efficient in memory - private List nameFinderSamples = new ArrayList(); + private List nameFinderSamples = new ArrayList<>(); private TrainingParameters trainingParams; /** @@ -220,7 +220,7 @@ public void typeSystemInit(TypeSystem typeSystem) * @return */ private static List iteratorToList(Iterator it) { - List list = new LinkedList(); + List list = new LinkedList<>(); while (it.hasNext()) { list.add(it.next()); @@ -231,19 +231,9 @@ private static List iteratorToList(Iterator it) { private static boolean isContaining(AnnotationFS annotation, AnnotationFS containtedAnnotation) { - boolean isStartContaining = annotation.getBegin() <= containtedAnnotation - .getBegin(); - if (!isStartContaining) { - return false; - } - - boolean isEndContaining = annotation.getEnd() >= containtedAnnotation - .getEnd(); - if (!isEndContaining) { - return false; - } + boolean isStartContaining = annotation.getBegin() <= containtedAnnotation.getBegin(); + return isStartContaining && annotation.getEnd() >= containtedAnnotation.getEnd(); - return true; } /** @@ -258,7 +248,7 @@ private static boolean isContaining(AnnotationFS annotation, */ private static Span[] createNames(List tokenList, List entityAnnotations) { - List nameList = new LinkedList(); + List nameList = new LinkedList<>(); AnnotationFS currentEntity = null; @@ -299,7 +289,7 @@ private static Span[] createNames(List tokenList, List(samples, samplesOut); + samples = new SampleTraceStream<>(samples, samplesOut); } Map resourceMap; diff --git a/opennlp-uima/src/main/java/opennlp/uima/normalizer/Normalizer.java b/opennlp-uima/src/main/java/opennlp/uima/normalizer/Normalizer.java index 39150ace3..2118a0ae2 100644 --- a/opennlp-uima/src/main/java/opennlp/uima/normalizer/Normalizer.java +++ b/opennlp-uima/src/main/java/opennlp/uima/normalizer/Normalizer.java @@ -65,7 +65,7 @@ public class Normalizer extends CasAnnotator_ImplBase { private static final Set SUPPORTED_TYPES; static { - Set supportedTypes = new HashSet(); + Set supportedTypes = new HashSet<>(); supportedTypes.add(CAS.TYPE_NAME_STRING); supportedTypes.add(CAS.TYPE_NAME_BYTE); @@ -232,9 +232,6 @@ public void process(CAS tcas) { .getName())) { nameAnnotation .setDoubleValue(mStructureFeature, number.doubleValue()); - } else { - // assert false : mStructureFeature.getRange().getName() - // + " is not supported!"; } } } diff --git a/opennlp-uima/src/main/java/opennlp/uima/normalizer/StringDictionary.java b/opennlp-uima/src/main/java/opennlp/uima/normalizer/StringDictionary.java index e55f4aeed..5631544e1 100644 --- a/opennlp-uima/src/main/java/opennlp/uima/normalizer/StringDictionary.java +++ b/opennlp-uima/src/main/java/opennlp/uima/normalizer/StringDictionary.java @@ -34,7 +34,7 @@ // lookup a string for given token list public class StringDictionary { - private Map entries = new HashMap(); + private Map entries = new HashMap<>(); public StringDictionary() { } @@ -44,10 +44,8 @@ public StringDictionary() { * * @param in * @throws IOException - * @throws InvalidFormatException */ - public StringDictionary(InputStream in) throws IOException, - InvalidFormatException { + public StringDictionary(InputStream in) throws IOException { DictionarySerializer.create(in, new EntryInserter() { public void insert(Entry entry) throws InvalidFormatException { String valueString = entry.getAttributes().getValue("value"); diff --git a/opennlp-uima/src/main/java/opennlp/uima/parser/Parser.java b/opennlp-uima/src/main/java/opennlp/uima/parser/Parser.java index a3b3f0077..6e7ecda61 100644 --- a/opennlp-uima/src/main/java/opennlp/uima/parser/Parser.java +++ b/opennlp-uima/src/main/java/opennlp/uima/parser/Parser.java @@ -70,7 +70,7 @@ public class Parser extends CasAnnotator_ImplBase { private static class ParseConverter { - private Map mIndexMap = new HashMap(); + private Map mIndexMap = new HashMap<>(); private Parse mParseForTagger; @@ -271,7 +271,7 @@ protected void process(CAS cas, AnnotationFS sentenceAnnotation) { Iterator containingTokens = cas.createFilteredIterator( allTokens.iterator(), containingConstraint); - List tokenSpans = new LinkedList(); + List tokenSpans = new LinkedList<>(); while(containingTokens.hasNext()) { AnnotationFS token = containingTokens.next(); diff --git a/opennlp-uima/src/main/java/opennlp/uima/postag/POSTagger.java b/opennlp-uima/src/main/java/opennlp/uima/postag/POSTagger.java index 2fdc47c22..358e82c25 100644 --- a/opennlp-uima/src/main/java/opennlp/uima/postag/POSTagger.java +++ b/opennlp-uima/src/main/java/opennlp/uima/postag/POSTagger.java @@ -164,9 +164,9 @@ public void process(CAS tcas) { for (AnnotationIteratorPair annotationIteratorPair : comboIterator) { - final List sentenceTokenAnnotationList = new LinkedList(); + final List sentenceTokenAnnotationList = new LinkedList<>(); - final List sentenceTokenList = new LinkedList(); + final List sentenceTokenList = new LinkedList<>(); for (AnnotationFS tokenAnnotation : annotationIteratorPair.getSubIterator()) { diff --git a/opennlp-uima/src/main/java/opennlp/uima/postag/POSTaggerTrainer.java b/opennlp-uima/src/main/java/opennlp/uima/postag/POSTaggerTrainer.java index 23158b976..16e160585 100644 --- a/opennlp-uima/src/main/java/opennlp/uima/postag/POSTaggerTrainer.java +++ b/opennlp-uima/src/main/java/opennlp/uima/postag/POSTaggerTrainer.java @@ -17,26 +17,25 @@ package opennlp.uima.postag; -import java.io.BufferedReader; import java.io.File; import java.io.IOException; import java.io.InputStream; -import java.io.InputStreamReader; import java.util.ArrayList; import java.util.Iterator; import java.util.List; - import opennlp.tools.ml.maxent.GIS; -import opennlp.tools.postag.*; +import opennlp.tools.postag.POSDictionary; +import opennlp.tools.postag.POSModel; +import opennlp.tools.postag.POSSample; +import opennlp.tools.postag.POSTaggerFactory; +import opennlp.tools.postag.POSTaggerME; import opennlp.tools.util.ObjectStreamUtils; import opennlp.tools.util.TrainingParameters; -import opennlp.tools.util.model.ModelType; import opennlp.uima.util.AnnotatorUtil; import opennlp.uima.util.CasConsumerUtil; import opennlp.uima.util.ContainingConstraint; import opennlp.uima.util.OpennlpUtil; import opennlp.uima.util.UimaUtil; - import org.apache.uima.UimaContext; import org.apache.uima.cas.CAS; import org.apache.uima.cas.FSIndex; @@ -82,7 +81,7 @@ public class POSTaggerTrainer extends CasConsumer_ImplBase { private Logger mLogger; - private List mPOSSamples = new ArrayList(); + private List mPOSSamples = new ArrayList<>(); private String language; @@ -173,8 +172,8 @@ private void process(CAS tcas, AnnotationFS sentence) { ContainingConstraint containingConstraint = new ContainingConstraint(sentence); - List tokens = new ArrayList(); - List tags = new ArrayList(); + List tokens = new ArrayList<>(); + List tags = new ArrayList<>(); Iterator containingTokens = tcas.createFilteredIterator( allTokens.iterator(), containingConstraint); diff --git a/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetectorTrainer.java b/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetectorTrainer.java index 8fa22d730..2c110bd2a 100644 --- a/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetectorTrainer.java +++ b/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetectorTrainer.java @@ -67,7 +67,7 @@ */ public final class SentenceDetectorTrainer extends CasConsumer_ImplBase { - private List sentenceSamples = new ArrayList(); + private List sentenceSamples = new ArrayList<>(); private Type mSentenceType; @@ -75,8 +75,6 @@ public final class SentenceDetectorTrainer extends CasConsumer_ImplBase { private String language = "en"; - private Logger mLogger; - private UimaContext mContext; private String eosChars; @@ -94,7 +92,7 @@ public void initialize() throws ResourceInitializationException { mContext = getUimaContext(); - mLogger = mContext.getLogger(); + Logger mLogger = mContext.getLogger(); if (mLogger.isLoggable(Level.INFO)) { mLogger.log(Level.INFO, "Initializing the OpenNLP SentenceDetector " + @@ -172,11 +170,11 @@ public void collectionProcessComplete(ProcessTrace trace) TrainingParameters mlParams = ModelUtil.createDefaultTrainingParameters(); ObjectStream samples = ObjectStreamUtils.createObjectStream(sentenceSamples); - Writer samplesOut = null; + Writer samplesOut; if (sampleTraceFile != null) { samplesOut = new OutputStreamWriter(new FileOutputStream(sampleTraceFile), sampleTraceFileEncoding); - samples = new SampleTraceStream(samples, samplesOut); + samples = new SampleTraceStream<>(samples, samplesOut); } SentenceModel sentenceModel = SentenceDetectorME.train(language, samples, diff --git a/opennlp-uima/src/main/java/opennlp/uima/tokenize/TokenizerTrainer.java b/opennlp-uima/src/main/java/opennlp/uima/tokenize/TokenizerTrainer.java index ece9eca46..66d1dfaa0 100644 --- a/opennlp-uima/src/main/java/opennlp/uima/tokenize/TokenizerTrainer.java +++ b/opennlp-uima/src/main/java/opennlp/uima/tokenize/TokenizerTrainer.java @@ -29,19 +29,6 @@ import java.util.LinkedList; import java.util.List; -import org.apache.uima.UimaContext; -import org.apache.uima.cas.CAS; -import org.apache.uima.cas.FSIndex; -import org.apache.uima.cas.Type; -import org.apache.uima.cas.TypeSystem; -import org.apache.uima.cas.text.AnnotationFS; -import org.apache.uima.collection.CasConsumer_ImplBase; -import org.apache.uima.resource.ResourceInitializationException; -import org.apache.uima.resource.ResourceProcessException; -import org.apache.uima.util.Level; -import org.apache.uima.util.Logger; -import org.apache.uima.util.ProcessTrace; - import opennlp.tools.ml.maxent.GIS; import opennlp.tools.tokenize.TokenSample; import opennlp.tools.tokenize.TokenSampleStream; @@ -58,6 +45,18 @@ import opennlp.uima.util.OpennlpUtil; import opennlp.uima.util.SampleTraceStream; import opennlp.uima.util.UimaUtil; +import org.apache.uima.UimaContext; +import org.apache.uima.cas.CAS; +import org.apache.uima.cas.FSIndex; +import org.apache.uima.cas.Type; +import org.apache.uima.cas.TypeSystem; +import org.apache.uima.cas.text.AnnotationFS; +import org.apache.uima.collection.CasConsumer_ImplBase; +import org.apache.uima.resource.ResourceInitializationException; +import org.apache.uima.resource.ResourceProcessException; +import org.apache.uima.util.Level; +import org.apache.uima.util.Logger; +import org.apache.uima.util.ProcessTrace; /** * OpenNLP Tokenizer trainer. @@ -80,10 +79,10 @@ */ public final class TokenizerTrainer extends CasConsumer_ImplBase { - public static final String IS_ALPHA_NUMERIC_OPTIMIZATION = + private static final String IS_ALPHA_NUMERIC_OPTIMIZATION = "opennlp.uima.tokenizer.IsAlphaNumericOptimization"; - private List tokenSamples = new ArrayList(); + private List tokenSamples = new ArrayList<>(); private UimaContext mContext; @@ -194,7 +193,7 @@ private void process(CAS tcas, AnnotationFS sentence) { Iterator containingTokens = tcas.createFilteredIterator( allTokens.iterator(), containingConstraint); - List openNLPSpans = new LinkedList(); + List openNLPSpans = new LinkedList<>(); while (containingTokens.hasNext()) { AnnotationFS tokenAnnotation = containingTokens.next(); @@ -231,7 +230,7 @@ public void collectionProcessComplete(ProcessTrace arg0) // if trace file // serialize events ... - Writer samplesOut = null; + Writer samplesOut; TokenizerModel tokenModel; if (additionalTrainingDataFile != null) { @@ -255,7 +254,7 @@ public void collectionProcessComplete(ProcessTrace arg0) if (sampleTraceFile != null) { samplesOut = new OutputStreamWriter(new FileOutputStream(sampleTraceFile), sampleTraceFileEncoding); - samples = new SampleTraceStream(samples, samplesOut); + samples = new SampleTraceStream<>(samples, samplesOut); } tokenModel = TokenizerME.train(language, samples, isSkipAlphaNumerics); diff --git a/opennlp-uima/src/main/java/opennlp/uima/util/ContainingConstraint.java b/opennlp-uima/src/main/java/opennlp/uima/util/ContainingConstraint.java index 20c00edc8..57ae61274 100644 --- a/opennlp-uima/src/main/java/opennlp/uima/util/ContainingConstraint.java +++ b/opennlp-uima/src/main/java/opennlp/uima/util/ContainingConstraint.java @@ -30,8 +30,7 @@ public final class ContainingConstraint implements FSMatchConstraint { private static final long serialVersionUID = 1; - private Collection mContainingAnnotations = - new LinkedList(); + private Collection mContainingAnnotations = new LinkedList<>(); /** * Initializes a new instance. @@ -69,12 +68,8 @@ public boolean match(FeatureStructure featureStructure) { } private boolean isContaining(AnnotationFS annotation, AnnotationFS containing) { - if ((containing.getBegin() <= annotation.getBegin()) - && (containing.getEnd() >= annotation.getEnd())) { - return true; - } else { - return false; - } + return (containing.getBegin() <= annotation.getBegin()) + && (containing.getEnd() >= annotation.getEnd()); } } \ No newline at end of file diff --git a/opennlp-uima/src/main/java/opennlp/uima/util/OpennlpUtil.java b/opennlp-uima/src/main/java/opennlp/uima/util/OpennlpUtil.java index 59e856694..23d0b3a5b 100644 --- a/opennlp-uima/src/main/java/opennlp/uima/util/OpennlpUtil.java +++ b/opennlp-uima/src/main/java/opennlp/uima/util/OpennlpUtil.java @@ -56,7 +56,7 @@ public static void serialize(BaseModel model, File modelFile) } } - public static final byte[] loadBytes(File inFile) throws IOException { + public static byte[] loadBytes(File inFile) throws IOException { ByteArrayOutputStream bytes = new ByteArrayOutputStream(); try (InputStream in = new FileInputStream(inFile)) { @@ -71,7 +71,7 @@ public static final byte[] loadBytes(File inFile) throws IOException { return bytes.toByteArray(); } - public static final TrainingParameters loadTrainingParams(String inFileValue, + public static TrainingParameters loadTrainingParams(String inFileValue, boolean isSequenceTrainingAllowed) throws ResourceInitializationException { TrainingParameters params; diff --git a/opennlp-uima/src/main/java/opennlp/uima/util/UimaUtil.java b/opennlp-uima/src/main/java/opennlp/uima/util/UimaUtil.java index 610cdfd4b..5d4efc6d3 100644 --- a/opennlp-uima/src/main/java/opennlp/uima/util/UimaUtil.java +++ b/opennlp-uima/src/main/java/opennlp/uima/util/UimaUtil.java @@ -102,7 +102,7 @@ public static void removeAnnotations(CAS cas, Iterator containingTokens = cas.createFilteredIterator( allRemoveAnnotations.iterator(), containingConstraint); - Collection removeAnnotations = new LinkedList(); + Collection removeAnnotations = new LinkedList<>(); while (containingTokens.hasNext()) { removeAnnotations.add(containingTokens.next());