From 1ed2bd133428c6460d0ce5eddab48a5fe5ea2a98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rn=20Kottmann?= Date: Tue, 3 Jan 2017 17:45:12 +0100 Subject: [PATCH] Correct indentation and white spaces See issue OPENNLP-914 --- .../opennlp/bratann/NameFinderResource.java | 6 +- .../builder/MorfologikDictionayBuilder.java | 14 +- .../java/opennlp/morfologik/cmdline/CLI.java | 250 +++++++------- .../MorfologikDictionaryBuilderParams.java | 24 +- .../builder/XMLDictionaryToTableParams.java | 18 +- .../builder/XMLDictionaryToTableTool.java | 16 +- .../lemmatizer/MorfologikLemmatizer.java | 10 +- .../tagdict/MorfologikPOSTaggerFactory.java | 19 +- .../morfologik/util/MorfologikUtil.java | 4 +- .../builder/POSDictionayBuilderTest.java | 30 +- .../lemmatizer/MorfologikLemmatizerTest.java | 18 +- .../tagdict/POSTaggerFactoryTest.java | 6 +- .../opennlp/tools/chunker/ChunkSample.java | 18 +- .../chunker/ChunkSampleSequenceStream.java | 2 +- .../tools/chunker/ChunkSampleStream.java | 4 +- .../java/opennlp/tools/chunker/ChunkerME.java | 18 +- .../DefaultChunkerContextGenerator.java | 2 +- .../DefaultChunkerSequenceValidator.java | 4 +- .../tools/cmdline/AbstractConverterTool.java | 4 +- .../opennlp/tools/cmdline/ArgumentParser.java | 32 +- .../main/java/opennlp/tools/cmdline/CLI.java | 18 +- .../opennlp/tools/cmdline/CmdLineUtil.java | 22 +- .../tools/cmdline/EvaluationErrorPrinter.java | 4 +- .../tools/cmdline/GenerateManualTool.java | 6 +- .../tools/cmdline/PerformanceMonitor.java | 6 +- .../tools/cmdline/StreamFactoryRegistry.java | 2 +- .../tools/cmdline/TypedCmdLineTool.java | 2 +- .../ChunkerDetailedFMeasureListener.java | 2 +- .../cmdline/chunker/ChunkerEvaluatorTool.java | 4 +- .../tools/cmdline/chunker/ChunkerMETool.java | 1 + .../cmdline/chunker/ChunkerTrainerTool.java | 2 +- .../tools/cmdline/doccat/DoccatTool.java | 1 + .../cmdline/doccat/DoccatTrainerTool.java | 6 +- .../cmdline/lemmatizer/LemmatizerMETool.java | 1 + .../namefind/CensusDictionaryCreatorTool.java | 4 +- .../namefind/NameSampleCountersStream.java | 28 +- .../TokenNameFinderCrossValidatorTool.java | 2 +- .../TokenNameFinderEvaluatorTool.java | 2 +- .../cmdline/namefind/TokenNameFinderTool.java | 5 +- .../namefind/TokenNameFinderTrainerTool.java | 14 +- .../tools/cmdline/params/CVParams.java | 4 +- .../DetailedFMeasureEvaluatorParams.java | 2 +- .../tools/cmdline/params/EvaluatorParams.java | 2 +- .../cmdline/parser/BuildModelUpdaterTool.java | 20 +- .../cmdline/parser/CheckModelUpdaterTool.java | 22 +- .../cmdline/parser/ParserConverterTool.java | 1 + .../tools/cmdline/parser/ParserTool.java | 4 +- .../cmdline/parser/ParserTrainerTool.java | 10 +- .../tools/cmdline/postag/POSModelLoader.java | 2 +- .../tools/cmdline/postag/POSTaggerTool.java | 1 + .../cmdline/postag/POSTaggerTrainerTool.java | 2 +- .../SentenceDetectorEvaluatorTool.java | 2 +- .../SentenceDetectorTrainerTool.java | 2 +- .../tokenizer/CommandLineTokenizer.java | 1 + .../tokenizer/DictionaryDetokenizerTool.java | 16 +- .../tokenizer/SimpleTokenizerTool.java | 2 +- .../cmdline/tokenizer/TokenizerMETool.java | 2 +- .../tokenizer/TokenizerTrainerTool.java | 2 +- .../opennlp/tools/dictionary/Dictionary.java | 45 +-- .../serializer/DictionarySerializer.java | 248 +++++++------- .../opennlp/tools/doccat/DoccatFactory.java | 2 +- .../tools/doccat/DocumentCategorizer.java | 1 + .../DocumentCategorizerContextGenerator.java | 1 + .../doccat/DocumentCategorizerEvaluator.java | 3 +- .../DocumentCategorizerEventStream.java | 14 +- .../tools/doccat/DocumentCategorizerME.java | 9 +- .../opennlp/tools/doccat/DocumentSample.java | 3 +- .../tools/doccat/DocumentSampleStream.java | 2 +- .../opennlp/tools/entitylinker/BaseLink.java | 19 +- .../tools/entitylinker/EntityLinker.java | 1 + .../entitylinker/EntityLinkerFactory.java | 1 + .../entitylinker/EntityLinkerProperties.java | 1 + .../tools/entitylinker/LinkedSpan.java | 1 + .../formats/BioNLP2004NameSampleStream.java | 2 +- .../formats/ChunkerSampleStreamFactory.java | 1 + .../formats/Conll02NameSampleStream.java | 4 +- .../formats/Conll03NameSampleStream.java | 12 +- .../tools/formats/ConllXPOSSampleStream.java | 52 +-- .../tools/formats/DirectorySampleStream.java | 4 +- .../formats/DocumentSampleStreamFactory.java | 3 +- .../formats/EvalitaNameSampleStream.java | 6 +- .../formats/LeipzigDoccatSampleStream.java | 2 +- .../LeipzigDocumentSampleStreamFactory.java | 8 +- .../LemmatizerSampleStreamFactory.java | 1 + .../formats/NameFinderCensus90NameStream.java | 2 +- .../formats/NameSampleDataStreamFactory.java | 1 + .../formats/ParseSampleStreamFactory.java | 1 + .../formats/SentenceSampleStreamFactory.java | 2 +- .../formats/TokenSampleStreamFactory.java | 1 + .../formats/WordTagSampleStreamFactory.java | 3 +- .../tools/formats/ad/ADChunkSampleStream.java | 304 +++++++++--------- .../ad/ADChunkSampleStreamFactory.java | 6 +- .../tools/formats/ad/ADNameSampleStream.java | 96 +++--- .../formats/ad/ADNameSampleStreamFactory.java | 2 +- .../formats/ad/ADPOSSampleStreamFactory.java | 2 +- .../formats/ad/ADSentenceSampleStream.java | 6 +- .../ad/ADSentenceSampleStreamFactory.java | 2 +- .../tools/formats/ad/ADSentenceStream.java | 167 +++++----- .../formats/brat/AnnotationConfiguration.java | 26 +- .../tools/formats/brat/BratAnnotation.java | 2 +- .../formats/brat/BratAnnotationStream.java | 2 +- .../formats/brat/BratDocumentStream.java | 4 +- .../brat/BratNameSampleStreamFactory.java | 2 +- .../convert/ParseToPOSSampleStream.java | 2 +- .../ConstitDocumentHandler.java | 2 +- .../muc/Muc6NameSampleStreamFactory.java | 2 +- .../OntoNotesNameSampleStreamFactory.java | 3 +- .../ontonotes/OntoNotesParseSampleStream.java | 2 +- .../OntoNotesParseSampleStreamFactory.java | 3 +- .../tools/languagemodel/LanguageModel.java | 1 + .../languagemodel/NGramLanguageModel.java | 1 + .../DefaultLemmatizerContextGenerator.java | 14 +- .../DefaultLemmatizerSequenceValidator.java | 2 +- .../lemmatizer/DictionaryLemmatizer.java | 10 +- .../opennlp/tools/lemmatizer/LemmaSample.java | 33 +- .../lemmatizer/LemmaSampleEventStream.java | 3 +- .../lemmatizer/LemmaSampleSequenceStream.java | 3 +- .../tools/lemmatizer/LemmaSampleStream.java | 3 +- .../opennlp/tools/lemmatizer/Lemmatizer.java | 1 + .../LemmatizerContextGenerator.java | 1 + .../LemmatizerEvaluationMonitor.java | 1 + .../tools/lemmatizer/LemmatizerEvaluator.java | 3 +- .../tools/lemmatizer/LemmatizerFactory.java | 1 + .../tools/lemmatizer/LemmatizerME.java | 41 +-- .../tools/lemmatizer/LemmatizerModel.java | 173 +++++----- .../java/opennlp/tools/ml/BeamSearch.java | 8 +- .../java/opennlp/tools/ml/EventTrainer.java | 1 + .../java/opennlp/tools/ml/TrainerFactory.java | 10 +- .../ml/maxent/BasicContextGenerator.java | 4 +- .../opennlp/tools/ml/maxent/GISTrainer.java | 42 +-- .../opennlp/tools/ml/maxent/IntegerPool.java | 2 +- .../tools/ml/maxent/RealBasicEventStream.java | 2 +- .../ml/maxent/io/BinaryGISModelWriter.java | 2 +- .../ml/maxent/io/BinaryQNModelReader.java | 1 + .../ml/maxent/io/BinaryQNModelWriter.java | 1 + .../tools/ml/maxent/io/GISModelWriter.java | 26 +- .../ml/maxent/io/ObjectQNModelReader.java | 1 + .../ml/maxent/io/OldFormatGISModelReader.java | 2 +- .../ml/maxent/io/PlainTextGISModelWriter.java | 13 +- .../tools/ml/maxent/io/QNModelReader.java | 1 + .../tools/ml/maxent/io/QNModelWriter.java | 1 + .../io/SuffixSensitiveGISModelWriter.java | 15 +- .../ml/maxent/quasinewton/ArrayMath.java | 3 +- .../tools/ml/maxent/quasinewton/Function.java | 1 + .../ml/maxent/quasinewton/LineSearch.java | 24 +- .../maxent/quasinewton/NegLogLikelihood.java | 9 +- .../quasinewton/ParallelNegLogLikelihood.java | 13 +- .../ml/maxent/quasinewton/QNMinimizer.java | 17 +- .../tools/ml/maxent/quasinewton/QNModel.java | 19 +- .../ml/maxent/quasinewton/QNTrainer.java | 7 +- .../tools/ml/model/AbstractDataIndexer.java | 34 +- .../opennlp/tools/ml/model/AbstractModel.java | 75 +++-- .../tools/ml/model/AbstractModelReader.java | 62 ++-- .../tools/ml/model/ComparableEvent.java | 3 +- .../tools/ml/model/ComparablePredicate.java | 4 +- .../opennlp/tools/ml/model/DataIndexer.java | 4 +- .../tools/ml/model/EvalParameters.java | 4 +- .../java/opennlp/tools/ml/model/Event.java | 71 ++-- .../tools/ml/model/FileEventStream.java | 4 +- .../tools/ml/model/GenericModelReader.java | 2 +- .../tools/ml/model/HashSumEventStream.java | 4 +- .../tools/ml/model/IndexHashTable.java | 4 +- .../opennlp/tools/ml/model/MaxentModel.java | 2 +- .../tools/ml/model/MutableContext.java | 4 +- .../ml/model/OnePassRealValueDataIndexer.java | 8 +- .../tools/ml/model/TwoPassDataIndexer.java | 9 +- .../opennlp/tools/ml/model/UniformPrior.java | 4 +- .../naivebayes/NaiveBayesEvalParameters.java | 1 + .../ml/naivebayes/NaiveBayesModelReader.java | 2 +- .../BinaryPerceptronModelReader.java | 2 +- .../BinaryPerceptronModelWriter.java | 12 +- .../tools/ml/perceptron/PerceptronModel.java | 8 +- .../ml/perceptron/PerceptronModelReader.java | 86 ++--- .../ml/perceptron/PerceptronModelWriter.java | 212 ++++++------ .../ml/perceptron/PerceptronTrainer.java | 36 +-- .../PlainTextPerceptronModelReader.java | 2 +- .../PlainTextPerceptronModelWriter.java | 14 +- .../SimplePerceptronSequenceTrainer.java | 56 ++-- .../SuffixSensitivePerceptronModelWriter.java | 88 ++--- .../BilouNameFinderSequenceValidator.java | 6 +- .../java/opennlp/tools/namefind/BioCodec.java | 2 +- .../namefind/DefaultNameContextGenerator.java | 30 +- .../tools/namefind/NameFinderEventStream.java | 6 +- .../opennlp/tools/namefind/NameFinderME.java | 28 +- .../namefind/NameFinderSequenceValidator.java | 6 +- .../opennlp/tools/namefind/NameSample.java | 16 +- .../tools/namefind/NameSampleDataStream.java | 36 +-- .../namefind/NameSampleSequenceStream.java | 18 +- .../tools/namefind/RegexNameFinder.java | 13 +- .../namefind/RegexNameFinderFactory.java | 29 +- .../TokenNameFinderCrossValidator.java | 6 +- .../namefind/TokenNameFinderFactory.java | 29 +- .../tools/namefind/TokenNameFinderModel.java | 6 +- .../opennlp/tools/ngram/NGramGenerator.java | 16 +- .../java/opennlp/tools/ngram/NGramModel.java | 78 ++--- .../java/opennlp/tools/ngram/NGramUtils.java | 3 +- .../tools/parser/AbstractBottomUpParser.java | 34 +- .../parser/AbstractContextGenerator.java | 116 +++---- .../parser/AbstractParserEventStream.java | 4 +- .../tools/parser/ChunkContextGenerator.java | 48 +-- .../tools/parser/ChunkSampleStream.java | 2 +- .../main/java/opennlp/tools/parser/Cons.java | 1 + .../main/java/opennlp/tools/parser/Parse.java | 96 +++--- .../ParserChunkerSequenceValidator.java | 11 +- .../opennlp/tools/parser/ParserModel.java | 4 +- .../opennlp/tools/parser/PosSampleStream.java | 2 +- .../chunking/CheckContextGenerator.java | 4 +- .../opennlp/tools/parser/chunking/Parser.java | 8 +- .../parser/chunking/ParserEventStream.java | 18 +- .../tools/parser/lang/en/HeadRules.java | 23 +- .../lang/es/AncoraSpanishHeadRules.java | 28 +- .../treeinsert/AttachContextGenerator.java | 52 +-- .../treeinsert/BuildContextGenerator.java | 6 +- .../treeinsert/CheckContextGenerator.java | 10 +- .../tools/parser/treeinsert/Parser.java | 86 ++--- .../parser/treeinsert/ParserEventStream.java | 101 ++++-- .../postag/DefaultPOSContextGenerator.java | 7 +- .../opennlp/tools/postag/POSDictionary.java | 7 +- .../java/opennlp/tools/postag/POSModel.java | 2 +- .../java/opennlp/tools/postag/POSSample.java | 14 +- .../tools/postag/POSSampleEventStream.java | 2 +- .../tools/postag/POSSampleSequenceStream.java | 4 +- .../tools/postag/POSTaggerCrossValidator.java | 2 +- .../postag/POSTaggerEvaluationMonitor.java | 2 +- .../tools/postag/POSTaggerFactory.java | 6 +- .../opennlp/tools/postag/POSTaggerME.java | 8 +- .../sentdetect/DefaultSDContextGenerator.java | 4 +- .../sentdetect/EndOfSentenceScanner.java | 48 +-- .../tools/sentdetect/SDCrossValidator.java | 8 +- .../tools/sentdetect/SentenceDetectorME.java | 51 ++- .../tools/sentdetect/SentenceModel.java | 4 +- .../tools/sentdetect/SentenceSample.java | 4 +- .../tools/sentdetect/lang/Factory.java | 2 +- .../lang/th/SentenceContextGenerator.java | 30 +- .../DefaultTokenContextGenerator.java | 12 +- .../tokenize/DetokenizationDictionary.java | 7 +- .../tools/tokenize/DictionaryDetokenizer.java | 3 +- .../tools/tokenize/SimpleTokenizer.java | 6 +- .../tools/tokenize/TokSpanEventStream.java | 5 +- .../opennlp/tools/tokenize/TokenSample.java | 4 +- .../tools/tokenize/TokenSampleStream.java | 2 +- .../opennlp/tools/tokenize/Tokenizer.java | 32 +- .../tokenize/TokenizerCrossValidator.java | 18 +- .../opennlp/tools/tokenize/TokenizerME.java | 9 +- .../tools/tokenize/TokenizerModel.java | 4 +- .../tools/tokenize/WhitespaceTokenStream.java | 2 +- .../tools/tokenize/WhitespaceTokenizer.java | 2 +- .../opennlp/tools/tokenize/lang/Factory.java | 2 +- .../tokenize/lang/en/TokenSampleStream.java | 32 +- .../tools/util/AbstractEventStream.java | 4 +- .../opennlp/tools/util/BaseToolFactory.java | 6 +- .../java/opennlp/tools/util/HashList.java | 4 +- .../main/java/opennlp/tools/util/Heap.java | 10 +- .../tools/util/InputStreamFactory.java | 1 + .../java/opennlp/tools/util/ListHeap.java | 17 +- .../util/MarkableFileInputStreamFactory.java | 2 +- .../opennlp/tools/util/ObjectStreamUtils.java | 7 +- .../tools/util/PlainTextByLineStream.java | 14 +- .../tools/util/ReverseListIterator.java | 6 +- .../java/opennlp/tools/util/Sequence.java | 14 +- .../main/java/opennlp/tools/util/Span.java | 19 +- .../java/opennlp/tools/util/StringList.java | 2 +- .../java/opennlp/tools/util/StringUtil.java | 284 ++++++++-------- .../tools/util/TrainingParameters.java | 2 +- .../main/java/opennlp/tools/util/Version.java | 9 +- .../opennlp/tools/util/eval/Evaluator.java | 6 +- .../java/opennlp/tools/util/eval/Mean.java | 6 +- .../tools/util/ext/ExtensionLoader.java | 2 +- .../featuregen/AdaptiveFeatureGenerator.java | 4 +- .../AdditionalContextFeatureGenerator.java | 3 - .../BigramNameFeatureGenerator.java | 14 +- .../BrownBigramFeatureGenerator.java | 2 +- .../tools/util/featuregen/BrownCluster.java | 4 +- .../BrownTokenClassFeatureGenerator.java | 2 +- .../BrownTokenFeatureGenerator.java | 2 +- .../featuregen/CachedFeatureGenerator.java | 5 +- .../DictionaryFeatureGenerator.java | 1 + .../util/featuregen/GeneratorFactory.java | 32 +- .../util/featuregen/InSpanGenerator.java | 8 +- .../tools/util/featuregen/StringPattern.java | 28 +- .../TokenPatternFeatureGenerator.java | 88 ++--- .../featuregen/WindowFeatureGenerator.java | 4 +- .../WordClusterFeatureGenerator.java | 6 +- .../opennlp/tools/util/model/BaseModel.java | 20 +- .../util/model/DictionarySerializer.java | 2 +- .../util/model/GenericModelSerializer.java | 2 +- .../util/model/PropertiesSerializer.java | 2 +- .../tools/chunker/ChunkSampleStreamTest.java | 6 +- .../tools/chunker/ChunkSampleTest.java | 126 ++++---- .../ChunkerDetailedFMeasureListenerTest.java | 2 +- .../tools/chunker/ChunkerEvaluatorTest.java | 74 ++--- .../tools/chunker/DummyChunkSampleStream.java | 98 +++--- .../opennlp/tools/chunker/DummyChunker.java | 96 +++--- .../tools/cmdline/ArgumentParserTest.java | 6 +- .../java/opennlp/tools/cmdline/CLITest.java | 6 +- .../tools/dictionary/DictionaryTest.java | 10 +- .../tools/doccat/DoccatFactoryTest.java | 1 + .../doccat/DocumentCategorizerMETest.java | 14 +- .../doccat/DocumentCategorizerNBTest.java | 1 + .../tools/eval/OntoNotes4NameFinderEval.java | 3 +- .../tools/eval/OntoNotes4ParserEval.java | 3 +- .../tools/eval/OntoNotes4PosTaggerEval.java | 13 +- .../formats/Conll03NameSampleStreamTest.java | 8 +- .../LeipzigDoccatSampleStreamTest.java | 2 +- .../formats/ad/ADChunkSampleStreamTest.java | 2 +- .../formats/ad/ADParagraphStreamTest.java | 6 +- .../ConstitParseSampleStreamTest.java | 2 +- .../muc/DocumentSplitterStreamTest.java | 6 +- .../LanguageModelEvaluationTest.java | 1 + .../languagemodel/LanguageModelTestUtils.java | 1 + .../languagemodel/NgramLanguageModelTest.java | 1 + .../lemmatizer/LemmatizerEvaluatorTest.java | 2 +- .../java/opennlp/tools/ml/BeamSearchTest.java | 21 +- .../opennlp/tools/ml/PrepAttachDataUtil.java | 6 +- .../tools/ml/maxent/RealValueModelTest.java | 4 +- .../ml/maxent/quasinewton/LineSearchTest.java | 3 +- .../quasinewton/NegLogLikelihoodTest.java | 41 +-- .../maxent/quasinewton/QNMinimizerTest.java | 5 +- .../ml/maxent/quasinewton/QNTrainerTest.java | 85 ++--- .../NaiveBayesModelReadWriteTest.java | 1 + .../DictionaryNameFinderEvaluatorTest.java | 2 +- .../namefind/DictionaryNameFinderTest.java | 6 +- .../namefind/NameFinderEventStreamTest.java | 2 +- .../tools/namefind/NameFinderMETest.java | 42 +-- .../namefind/NameSampleDataStreamTest.java | 16 +- .../tools/namefind/NameSampleTest.java | 20 +- .../tools/namefind/RegexNameFinderTest.java | 3 +- .../opennlp/tools/ngram/NGramModelTest.java | 3 +- .../opennlp/tools/ngram/NGramUtilsTest.java | 1 + .../java/opennlp/tools/parser/ParseTest.java | 18 +- .../opennlp/tools/parser/ParserTestUtil.java | 54 ++-- .../tools/parser/chunking/ParserTest.java | 4 +- .../tools/postag/DummyPOSTaggerFactory.java | 2 +- .../tools/postag/POSDictionaryTest.java | 2 +- .../opennlp/tools/postag/POSSampleTest.java | 2 +- .../tools/postag/POSTaggerFactoryTest.java | 6 +- .../opennlp/tools/postag/POSTaggerMETest.java | 12 +- .../tools/postag/WordTagSampleStreamTest.java | 2 +- .../NewlineSentenceDetectorTest.java | 2 +- .../tools/sentdetect/SDEventStreamTest.java | 2 +- .../sentdetect/SentenceDetectorMETest.java | 2 +- .../tools/stemmer/PorterStemmerTest.java | 2 +- .../DetokenizationDictionaryTest.java | 2 +- .../tokenize/DictionaryDetokenizerTest.java | 7 +- .../tokenize/TokSpanEventStreamTest.java | 2 +- .../tools/tokenize/TokenSampleStreamTest.java | 6 +- .../tools/tokenize/TokenSampleTest.java | 8 +- .../tools/util/AbstractEventStreamTest.java | 6 +- .../java/opennlp/tools/util/ListHeapTest.java | 2 +- .../tools/util/MockInputStreamFactory.java | 4 +- .../tools/util/PlainTextByLineStreamTest.java | 2 +- .../opennlp/tools/util/StringUtilTest.java | 4 +- .../eval/CrossValidationPartitionerTest.java | 12 +- .../opennlp/tools/util/eval/FMeasureTest.java | 56 ++-- .../FeatureGenWithSerializerMapping.java | 2 +- .../util/featuregen/GeneratorFactoryTest.java | 20 +- .../java/opennlp/uima/chunker/Chunker.java | 24 +- .../opennlp/uima/chunker/ChunkerTrainer.java | 8 +- .../uima/doccat/DocumentCategorizer.java | 2 +- .../doccat/DocumentCategorizerTrainer.java | 4 +- .../uima/namefind/AbstractNameFinder.java | 17 +- .../opennlp/uima/namefind/NameFinder.java | 6 +- .../opennlp/uima/normalizer/Normalizer.java | 6 +- .../main/java/opennlp/uima/parser/Parser.java | 3 +- .../opennlp/uima/postag/POSTaggerTrainer.java | 2 +- .../uima/sentdetect/SentenceDetector.java | 2 +- .../sentdetect/SentenceDetectorTrainer.java | 8 +- .../uima/tokenize/SimpleTokenizer.java | 2 +- .../uima/tokenize/TokenizerTrainer.java | 4 +- .../uima/tokenize/WhitespaceTokenizer.java | 2 +- .../uima/util/AbstractModelResource.java | 24 +- .../uima/util/AnnotationIteratorPair.java | 3 + .../java/opennlp/uima/util/AnnotatorUtil.java | 25 +- .../opennlp/uima/util/CasConsumerUtil.java | 140 ++++---- .../main/java/opennlp/uima/util/UimaUtil.java | 2 +- 375 files changed, 3271 insertions(+), 3130 deletions(-) diff --git a/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderResource.java b/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderResource.java index 39cec0e17..5c3361ac2 100644 --- a/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderResource.java +++ b/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderResource.java @@ -72,9 +72,9 @@ public Map findNames(@QueryParam("model") String modelName, int indexCounter = 0; for (int i = 0; i < sentenceSpans.length; i++) { - + String sentenceText = sentenceSpans[i].getCoveredText(text).toString(); - + // offset of sentence gets lost here! Span tokenSpans[] = tokenizer .tokenizePos(sentenceText); @@ -85,7 +85,7 @@ public Map findNames(@QueryParam("model") String modelName, Span names[] = nameFinder.find(tokens); for (Span name : names) { - + int beginOffset = tokenSpans[name.getStart()].getStart() + sentenceSpans[i].getStart(); int endOffset = tokenSpans[name.getEnd() - 1].getEnd() diff --git a/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/builder/MorfologikDictionayBuilder.java b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/builder/MorfologikDictionayBuilder.java index 54cb95cda..8345c404e 100644 --- a/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/builder/MorfologikDictionayBuilder.java +++ b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/builder/MorfologikDictionayBuilder.java @@ -34,7 +34,7 @@ public class MorfologikDictionayBuilder { /** * Helper to compile a morphological dictionary automaton. - * + * * @param input * The input file (base,inflected,tag). An associated metadata * (*.info) file must exist. @@ -49,7 +49,7 @@ public class MorfologikDictionayBuilder { * @param ignoreEmpty * Ignore empty lines in the input. * @return the dictionary path - * + * * @throws Exception */ public Path build(Path input, boolean overwrite, boolean validate, @@ -60,10 +60,10 @@ public Path build(Path input, boolean overwrite, boolean validate, acceptBom, acceptCr, ignoreEmpty); compiler.call(); - + Path metadataPath = DictionaryMetadata .getExpectedMetadataLocation(input); - + return metadataPath.resolveSibling( metadataPath.getFileName().toString().replaceAll( "\\." + DictionaryMetadata.METADATA_FILE_EXTENSION + "$", ".dict")); @@ -72,13 +72,13 @@ public Path build(Path input, boolean overwrite, boolean validate, /** * Helper to compile a morphological dictionary automaton using default * parameters. - * + * * @param input * The input file (base,inflected,tag). An associated metadata * (*.info) file must exist. - * + * * @return the dictionary path - * + * * @throws Exception */ public Path build(Path input) throws Exception { diff --git a/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/CLI.java b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/CLI.java index 5205739f7..f68bd625d 100644 --- a/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/CLI.java +++ b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/CLI.java @@ -35,130 +35,128 @@ public final class CLI { - public static final String CMD = "opennlp-morfologik-addon"; - - private static Map toolLookupMap; - - static { - toolLookupMap = new LinkedHashMap<>(); - - List tools = new LinkedList<>(); - - tools.add(new MorfologikDictionaryBuilderTool()); - tools.add(new XMLDictionaryToTableTool()); - - for (CmdLineTool tool : tools) { - toolLookupMap.put(tool.getName(), tool); - } - - toolLookupMap = Collections.unmodifiableMap(toolLookupMap); - } - - /** - * @return a set which contains all tool names - */ - public static Set getToolNames() { - return toolLookupMap.keySet(); - } - - private static void usage() { - System.out.print("OpenNLP Morfologik Addon " - + Version.currentVersion().toString() + ". "); - System.out.println("Usage: " + CMD + " TOOL"); - System.out.println("where TOOL is one of:"); - - // distance of tool name from line start - int numberOfSpaces = -1; - for (String toolName : toolLookupMap.keySet()) { - if (toolName.length() > numberOfSpaces) { - numberOfSpaces = toolName.length(); - } - } - numberOfSpaces = numberOfSpaces + 4; - - for (CmdLineTool tool : toolLookupMap.values()) { - - System.out.print(" " + tool.getName()); - - for (int i = 0; i < Math.abs(tool.getName().length() - - numberOfSpaces); i++) { - System.out.print(" "); - } - - System.out.println(tool.getShortDescription()); - } - - System.out - .println("All tools print help when invoked with help parameter"); - System.out - .println("Example: opennlp-morfologik-addon POSDictionaryBuilder help"); - } - - - @SuppressWarnings("rawtypes") - public static void main(String[] args) { - - if (args.length == 0) { - usage(); - System.exit(0); - } - - String toolArguments[] = new String[args.length -1]; - System.arraycopy(args, 1, toolArguments, 0, toolArguments.length); - - String toolName = args[0]; - - //check for format - String formatName = StreamFactoryRegistry.DEFAULT_FORMAT; - int idx = toolName.indexOf("."); - if (-1 < idx) { - formatName = toolName.substring(idx + 1); - toolName = toolName.substring(0, idx); - } - CmdLineTool tool = toolLookupMap.get(toolName); - - try { - if (null == tool) { - throw new TerminateToolException(1, "Tool " + toolName + " is not found."); - } - - if ((0 == toolArguments.length && tool.hasParams()) || - 0 < toolArguments.length && "help".equals(toolArguments[0])) { - if (tool instanceof TypedCmdLineTool) { - System.out.println(((TypedCmdLineTool) tool).getHelp(formatName)); - } else if (tool instanceof BasicCmdLineTool) { - System.out.println(tool.getHelp()); - } - - System.exit(0); - } - - if (tool instanceof TypedCmdLineTool) { - ((TypedCmdLineTool) tool).run(formatName, toolArguments); - } else if (tool instanceof BasicCmdLineTool) { - if (-1 == idx) { - ((BasicCmdLineTool) tool).run(toolArguments); - } else { - throw new TerminateToolException(1, "Tool " + toolName + " does not support formats."); - } - } else { - throw new TerminateToolException(1, "Tool " + toolName + " is not supported."); - } - } - catch (TerminateToolException e) { - - if (e.getMessage() != null) { - System.err.println(e.getMessage()); - } - - if (e.getCause() != null) { - System.err.println(e.getCause().getMessage()); - e.getCause().printStackTrace(System.err); - } - - System.exit(e.getCode()); - } - } - - + public static final String CMD = "opennlp-morfologik-addon"; + + private static Map toolLookupMap; + + static { + toolLookupMap = new LinkedHashMap<>(); + + List tools = new LinkedList<>(); + + tools.add(new MorfologikDictionaryBuilderTool()); + tools.add(new XMLDictionaryToTableTool()); + + for (CmdLineTool tool : tools) { + toolLookupMap.put(tool.getName(), tool); + } + + toolLookupMap = Collections.unmodifiableMap(toolLookupMap); + } + + /** + * @return a set which contains all tool names + */ + public static Set getToolNames() { + return toolLookupMap.keySet(); + } + + private static void usage() { + System.out.print("OpenNLP Morfologik Addon " + + Version.currentVersion().toString() + ". "); + System.out.println("Usage: " + CMD + " TOOL"); + System.out.println("where TOOL is one of:"); + + // distance of tool name from line start + int numberOfSpaces = -1; + for (String toolName : toolLookupMap.keySet()) { + if (toolName.length() > numberOfSpaces) { + numberOfSpaces = toolName.length(); + } + } + numberOfSpaces = numberOfSpaces + 4; + + for (CmdLineTool tool : toolLookupMap.values()) { + + System.out.print(" " + tool.getName()); + + for (int i = 0; i < Math.abs(tool.getName().length() + - numberOfSpaces); i++) { + System.out.print(" "); + } + + System.out.println(tool.getShortDescription()); + } + + System.out + .println("All tools print help when invoked with help parameter"); + System.out + .println("Example: opennlp-morfologik-addon POSDictionaryBuilder help"); + } + + + @SuppressWarnings("rawtypes") + public static void main(String[] args) { + + if (args.length == 0) { + usage(); + System.exit(0); + } + + String toolArguments[] = new String[args.length - 1]; + System.arraycopy(args, 1, toolArguments, 0, toolArguments.length); + + String toolName = args[0]; + + //check for format + String formatName = StreamFactoryRegistry.DEFAULT_FORMAT; + int idx = toolName.indexOf("."); + if (-1 < idx) { + formatName = toolName.substring(idx + 1); + toolName = toolName.substring(0, idx); + } + CmdLineTool tool = toolLookupMap.get(toolName); + + try { + if (null == tool) { + throw new TerminateToolException(1, "Tool " + toolName + " is not found."); + } + + if ((0 == toolArguments.length && tool.hasParams()) || + 0 < toolArguments.length && "help".equals(toolArguments[0])) { + if (tool instanceof TypedCmdLineTool) { + System.out.println(((TypedCmdLineTool) tool).getHelp(formatName)); + } else if (tool instanceof BasicCmdLineTool) { + System.out.println(tool.getHelp()); + } + + System.exit(0); + } + + if (tool instanceof TypedCmdLineTool) { + ((TypedCmdLineTool) tool).run(formatName, toolArguments); + } else if (tool instanceof BasicCmdLineTool) { + if (-1 == idx) { + ((BasicCmdLineTool) tool).run(toolArguments); + } else { + throw new TerminateToolException(1, "Tool " + toolName + " does not support formats."); + } + } else { + throw new TerminateToolException(1, "Tool " + toolName + " is not supported."); + } + } + catch (TerminateToolException e) { + + if (e.getMessage() != null) { + System.err.println(e.getMessage()); + } + + if (e.getCause() != null) { + System.err.println(e.getCause().getMessage()); + e.getCause().printStackTrace(System.err); + } + + System.exit(e.getCode()); + } + } } diff --git a/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/MorfologikDictionaryBuilderParams.java b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/MorfologikDictionaryBuilderParams.java index 5ea2e4fcd..6d12c255a 100644 --- a/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/MorfologikDictionaryBuilderParams.java +++ b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/MorfologikDictionaryBuilderParams.java @@ -30,28 +30,28 @@ interface MorfologikDictionaryBuilderParams extends EncodingParameter { @ParameterDescription(valueName = "in", description = "The input file (base,inflected,tag). An associated metadata (*.info) file must exist.") File getInputFile(); - + @ParameterDescription(valueName = "true|false", description = "Accept leading BOM bytes (UTF-8).") - @OptionalParameter(defaultValue="false") + @OptionalParameter(defaultValue = "false") Boolean getAcceptBOM(); - + @ParameterDescription(valueName = "true|false", description = "Accept CR bytes in input sequences (\r).") - @OptionalParameter(defaultValue="false") + @OptionalParameter(defaultValue = "false") Boolean getAcceptCR(); - + @ParameterDescription(valueName = "FSA5|CFSA2", description = "Automaton serialization format.") - @OptionalParameter(defaultValue="FSA5") + @OptionalParameter(defaultValue = "FSA5") String getFormat(); - + @ParameterDescription(valueName = "true|false", description = "Ignore empty lines in the input.") - @OptionalParameter(defaultValue="false") + @OptionalParameter(defaultValue = "false") Boolean getIgnoreEmpty(); - + @ParameterDescription(valueName = "true|false", description = "Overwrite the output file if it exists.") - @OptionalParameter(defaultValue="false") + @OptionalParameter(defaultValue = "false") Boolean getOverwrite(); - + @ParameterDescription(valueName = "true|false", description = "Validate input to make sure it makes sense.") - @OptionalParameter(defaultValue="false") + @OptionalParameter(defaultValue = "false") Boolean getValidate(); } diff --git a/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/XMLDictionaryToTableParams.java b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/XMLDictionaryToTableParams.java index 4ee8cd4c4..529544d65 100644 --- a/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/XMLDictionaryToTableParams.java +++ b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/XMLDictionaryToTableParams.java @@ -31,15 +31,19 @@ interface XMLDictionaryToTableParams extends EncodingParameter { @ParameterDescription(valueName = "in", description = "OpenNLP XML Tag Dictionary.") File getInputFile(); - @ParameterDescription(valueName = "out", description = "Output for Morfologik (.info will be also created).") + @ParameterDescription(valueName = "out", description = "Output for Morfologik " + + "(.info will be also created).") File getOutputFile(); - @ParameterDescription(valueName = "char", description = "Columm separator (must be a single character)") - @OptionalParameter(defaultValue=",") + @ParameterDescription(valueName = "char", description = "Columm separator " + + "(must be a single character)") + @OptionalParameter(defaultValue = ",") String getSeparator(); - - @ParameterDescription(valueName = "value", description = " Type of lemma-inflected form encoding compression that precedes automaton construction. Allowed values: [suffix, infix, prefix, none].") - @OptionalParameter(defaultValue="prefix") + + @ParameterDescription(valueName = "value", description = " Type of lemma-inflected " + + "form encoding compression that precedes automaton construction. Allowed " + + "values: [suffix, infix, prefix, none].") + @OptionalParameter(defaultValue = "prefix") String getEncoder(); - + } diff --git a/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/XMLDictionaryToTableTool.java b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/XMLDictionaryToTableTool.java index f3108a4df..b9f800acd 100644 --- a/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/XMLDictionaryToTableTool.java +++ b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/XMLDictionaryToTableTool.java @@ -73,7 +73,7 @@ public void run(String[] args) { while (iterator.hasNext()) { String word = iterator.next(); for (String tag : tagDictionary.getTags(word)) { - if(valid(word,tag)) { + if (valid(word,tag)) { String entry = createEntry(word, tag); writer.write(entry); writer.newLine(); @@ -86,14 +86,14 @@ public void run(String[] args) { throw new TerminateToolException(-1, "Error while writing output: " + e.getMessage(), e); } - + Properties info = new Properties(); info.setProperty("fsa.dict.separator", SEPARATOR); info.setProperty("fsa.dict.encoding", params.getEncoding().name()); info.setProperty("fsa.dict.encoder", params.getEncoder()); - + Path metaPath = DictionaryMetadata.getExpectedMetadataLocation(dictOutFile.toPath()); - + try { info.store(Files.newOutputStream(metaPath), "Info file for FSA Morfologik dictionary."); } catch (IOException e) { @@ -101,23 +101,23 @@ public void run(String[] args) { + e.getMessage(), e); } System.out.println("Created metadata: " + dictOutFile.toPath()); - + } private boolean valid(String word, String tag) { - if(word.contains(SEPARATOR) || tag.contains(SEPARATOR)) { + if (word.contains(SEPARATOR) || tag.contains(SEPARATOR)) { System.out .println("Warn: invalid entry because contains separator - word: " + word + " tag: " + tag); return false; } - + return true; } private String createEntry(String word, String tag) { - return "" + SEPARATOR +// base + return "" + SEPARATOR + // base word + SEPARATOR + tag; } diff --git a/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/lemmatizer/MorfologikLemmatizer.java b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/lemmatizer/MorfologikLemmatizer.java index 489b6fcc9..04d1e9534 100644 --- a/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/lemmatizer/MorfologikLemmatizer.java +++ b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/lemmatizer/MorfologikLemmatizer.java @@ -45,7 +45,7 @@ private List lemmatize(String word, String postag) { List dictMap = dictLookup.lookup(word.toLowerCase()); Set lemmas = new HashSet<>(); for (WordData wordData : dictMap) { - if(Objects.equals(postag, asString(wordData.getTag()))) { + if (Objects.equals(postag, asString(wordData.getTag()))) { lemmas.add(asString(wordData.getStem())); } } @@ -53,7 +53,7 @@ private List lemmatize(String word, String postag) { } private String asString(CharSequence tag) { - if(tag == null) + if (tag == null) return null; return tag.toString(); } @@ -62,8 +62,8 @@ private String asString(CharSequence tag) { public String[] lemmatize(String[] toks, String[] tags) { String[] lemmas = new String[toks.length]; for (int i = 0; i < toks.length; i++) { - List l = lemmatize(toks[i],tags[i]); - if(l.size() > 0) { + List l = lemmatize(toks[i],tags[i]); + if (l.size() > 0) { lemmas[i] = l.get(0); } else { lemmas[i] = null; @@ -71,7 +71,7 @@ public String[] lemmatize(String[] toks, String[] tags) { } return lemmas; } - + /** * Generates a lemma tags for the word and postag returning the result in list of possible lemmas. diff --git a/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/tagdict/MorfologikPOSTaggerFactory.java b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/tagdict/MorfologikPOSTaggerFactory.java index 9a6abc997..dee8cbc63 100644 --- a/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/tagdict/MorfologikPOSTaggerFactory.java +++ b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/tagdict/MorfologikPOSTaggerFactory.java @@ -50,26 +50,25 @@ public class MorfologikPOSTaggerFactory extends POSTaggerFactory { public MorfologikPOSTaggerFactory() { } - + public TagDictionary createTagDictionary(File dictionary) throws IOException { - - if(!dictionary.canRead()) { + + if (!dictionary.canRead()) { throw new FileNotFoundException("Could not read dictionary: " + dictionary.getAbsolutePath()); } - + Path dictionaryMeta = DictionaryMetadata.getExpectedMetadataLocation(dictionary.toPath()); - - if(dictionaryMeta == null || !dictionaryMeta.toFile().canRead()) { + + if (dictionaryMeta == null || !dictionaryMeta.toFile().canRead()) { throw new FileNotFoundException("Could not read dictionary metadata: " + dictionaryMeta.getFileName()); } - + this.dictData = Files.readAllBytes(dictionary.toPath()); this.dictInfo = Files.readAllBytes(dictionaryMeta); - + return createMorfologikDictionary(dictData, dictInfo); - } - + @Override protected void init(Dictionary ngramDictionary, TagDictionary posDictionary) { diff --git a/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/util/MorfologikUtil.java b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/util/MorfologikUtil.java index bd4d1a4fa..f0e35cdca 100644 --- a/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/util/MorfologikUtil.java +++ b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/util/MorfologikUtil.java @@ -22,12 +22,12 @@ import morfologik.stemming.DictionaryMetadata; public class MorfologikUtil { - + public static File getExpectedPropertiesFile(File dictFile) { return DictionaryMetadata.getExpectedMetadataLocation(dictFile.toPath()) .toFile(); } - + public static File getExpectedPropertiesFile(String dictFile) { File f = new File(dictFile); return getExpectedPropertiesFile(f); diff --git a/opennlp-morfologik-addon/src/test/java/opennlp/morfologik/builder/POSDictionayBuilderTest.java b/opennlp-morfologik-addon/src/test/java/opennlp/morfologik/builder/POSDictionayBuilderTest.java index d7d94e9a6..c3a93f624 100644 --- a/opennlp-morfologik-addon/src/test/java/opennlp/morfologik/builder/POSDictionayBuilderTest.java +++ b/opennlp-morfologik-addon/src/test/java/opennlp/morfologik/builder/POSDictionayBuilderTest.java @@ -33,51 +33,51 @@ public class POSDictionayBuilderTest { @Test public void testBuildDictionary() throws Exception { - + Path output = createMorfologikDictionary(); MorfologikLemmatizer ml = new MorfologikLemmatizer(output); Assert.assertNotNull(ml); } - + public static Path createMorfologikDictionary() throws Exception { Path tabFilePath = File.createTempFile( POSDictionayBuilderTest.class.getName(), ".txt").toPath(); Path infoFilePath = DictionaryMetadata.getExpectedMetadataLocation(tabFilePath); - + Files.copy(POSDictionayBuilderTest.class.getResourceAsStream( "/dictionaryWithLemma.txt"), tabFilePath, StandardCopyOption.REPLACE_EXISTING); Files.copy(POSDictionayBuilderTest.class.getResourceAsStream( "/dictionaryWithLemma.info"), infoFilePath, StandardCopyOption.REPLACE_EXISTING); - + MorfologikDictionayBuilder builder = new MorfologikDictionayBuilder(); - + return builder.build(tabFilePath); } - - + + public static void main(String[] args) throws Exception { - // Part 1: compile a FSA lemma dictionary - - // we need the tabular dictionary. It is mandatory to have info + // Part 1: compile a FSA lemma dictionary + + // we need the tabular dictionary. It is mandatory to have info // file with same name, but .info extension Path textLemmaDictionary = Paths.get("/Users/wcolen/git/opennlp/opennlp-morfologik-addon/src/test/resources/dictionaryWithLemma.txt"); - + // this will build a binary dictionary located in compiledLemmaDictionary Path compiledLemmaDictionary = new MorfologikDictionayBuilder() .build(textLemmaDictionary); - + // Part 2: load a MorfologikLemmatizer and use it MorfologikLemmatizer lemmatizer = new MorfologikLemmatizer(compiledLemmaDictionary); - + String[] toks = {"casa", "casa"}; String[] tags = {"NOUN", "V"}; - + String[] lemmas = lemmatizer.lemmatize(toks, tags); System.out.println(Arrays.toString(lemmas)); // outputs [casa, casar] - + } } diff --git a/opennlp-morfologik-addon/src/test/java/opennlp/morfologik/lemmatizer/MorfologikLemmatizerTest.java b/opennlp-morfologik-addon/src/test/java/opennlp/morfologik/lemmatizer/MorfologikLemmatizerTest.java index c6a18fd9c..69035cc1c 100644 --- a/opennlp-morfologik-addon/src/test/java/opennlp/morfologik/lemmatizer/MorfologikLemmatizerTest.java +++ b/opennlp-morfologik-addon/src/test/java/opennlp/morfologik/lemmatizer/MorfologikLemmatizerTest.java @@ -33,11 +33,11 @@ public class MorfologikLemmatizerTest { @Test public void testLemmatizeInsensitive() throws Exception { Lemmatizer dict = createDictionary(false); - - + + String[] toks = {"casa", "casa", "Casa"}; String[] tags = {"V", "NOUN", "PROP"}; - + String[] lemmas = dict.lemmatize(toks, tags); assertEquals("casar", lemmas[0]); @@ -47,21 +47,21 @@ public void testLemmatizeInsensitive() throws Exception { assertNull(lemmas[2]); } - + @Test public void testLemmatizeMultiLemma() throws Exception { MorfologikLemmatizer dict = createDictionary(false); - - + + String[] toks = {"foi"}; String[] tags = {"V"}; - + List> lemmas = dict.lemmatize(Arrays.asList(toks), Arrays.asList(tags)); - + assertTrue(lemmas.get(0).contains("ir")); assertTrue(lemmas.get(0).contains("ser")); - + } diff --git a/opennlp-morfologik-addon/src/test/java/opennlp/morfologik/tagdict/POSTaggerFactoryTest.java b/opennlp-morfologik-addon/src/test/java/opennlp/morfologik/tagdict/POSTaggerFactoryTest.java index 354b34c72..e0082b862 100644 --- a/opennlp-morfologik-addon/src/test/java/opennlp/morfologik/tagdict/POSTaggerFactoryTest.java +++ b/opennlp-morfologik-addon/src/test/java/opennlp/morfologik/tagdict/POSTaggerFactoryTest.java @@ -52,7 +52,7 @@ private static ObjectStream createSampleStream() MarkableFileInputStreamFactory sampleDataIn = new MarkableFileInputStreamFactory( new File(POSTaggerFactory.class.getResource("/AnnotatedSentences.txt") .getFile())); - + ObjectStream lineStream = null; try { @@ -84,7 +84,7 @@ public void testPOSTaggerWithCustomFactory() throws Exception { assertTrue(factory.getTagDictionary() instanceof MorfologikTagDictionary); factory = null; - + ByteArrayOutputStream out = new ByteArrayOutputStream(); posModel.serialize(out); ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()); @@ -93,7 +93,7 @@ public void testPOSTaggerWithCustomFactory() throws Exception { factory = fromSerialized.getFactory(); assertTrue(factory.getTagDictionary() instanceof MorfologikTagDictionary); - + assertEquals(2, factory.getTagDictionary().getTags("casa").length); } diff --git a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkSample.java b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkSample.java index c61b05126..78402a014 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkSample.java +++ b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkSample.java @@ -126,7 +126,7 @@ public static Span[] phrasesAsSpanList(String[] aSentence, String[] aTags, foundPhrase = true; } else if (pred.equals("I-" + startTag)) { // middle // do nothing - } else if (foundPhrase) {// end + } else if (foundPhrase) { // end phrases.add(new Span(startIndex, ci, startTag)); foundPhrase = false; startTag = ""; @@ -158,9 +158,9 @@ private static void validateArguments(int sentenceSize, int tagsSize, int predsS */ public String nicePrint() { - Span[] spans = getPhrasesAsSpanList(); + Span[] spans = getPhrasesAsSpanList(); - StringBuilder result = new StringBuilder(" "); + StringBuilder result = new StringBuilder(" "); for (int tokenIndex = 0; tokenIndex < sentence.size(); tokenIndex++) { for (int nameIndex = 0; nameIndex < spans.length; nameIndex++) { @@ -191,13 +191,13 @@ public String nicePrint() { @Override public String toString() { - StringBuilder chunkString = new StringBuilder(); + StringBuilder chunkString = new StringBuilder(); - for (int ci=0; ci < preds.size(); ci++) { - chunkString.append(sentence.get(ci)).append(" ").append(tags.get(ci)).append(" ").append(preds.get(ci)).append("\n"); - } - return chunkString.toString(); - } + for (int ci = 0; ci < preds.size(); ci++) { + chunkString.append(sentence.get(ci)).append(" ").append(tags.get(ci)).append(" ").append(preds.get(ci)).append("\n"); + } + return chunkString.toString(); + } @Override public boolean equals(Object obj) { diff --git a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkSampleSequenceStream.java b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkSampleSequenceStream.java index 8935829a0..9898bd424 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkSampleSequenceStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkSampleSequenceStream.java @@ -45,7 +45,7 @@ public Sequence read() throws IOException { String tags[] = sample.getTags(); Event[] events = new Event[sentence.length]; - for (int i=0; i < sentence.length; i++) { + for (int i = 0; i < sentence.length; i++) { // it is safe to pass the tags as previous tags because // the context generator does not look for non predicted tags diff --git a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkSampleStream.java index 2f342a7fa..773dd195f 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkSampleStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkSampleStream.java @@ -48,10 +48,10 @@ public ChunkSample read() throws IOException { List tags = new ArrayList(); List preds = new ArrayList(); - for (String line = samples.read(); line !=null && !line.equals(""); line = samples.read()) { + for (String line = samples.read(); line != null && !line.equals(""); line = samples.read()) { String[] parts = line.split(" "); if (parts.length != 3) { - System.err.println("Skipping corrupt line: "+line); + System.err.println("Skipping corrupt line: " + line); } else { toks.add(parts[0]); diff --git a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerME.java b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerME.java index 3ed4f9c2c..9ab8e1a42 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerME.java +++ b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerME.java @@ -64,7 +64,7 @@ public class ChunkerME implements Chunker { * is valid for the preceding sequence. This can be used to implement constraints * on what sequences are valid. * @deprecated Use {@link #ChunkerME(ChunkerModel, int)} instead - * and use the {@link ChunkerFactory} to configure the {@link SequenceValidator} and {@link ChunkerContextGenerator}. + * and use the {@link ChunkerFactory} to configure the {@link SequenceValidator} and {@link ChunkerContextGenerator}. */ @Deprecated private ChunkerME(ChunkerModel model, int beamSize, SequenceValidator sequenceValidator, @@ -94,8 +94,8 @@ private ChunkerME(ChunkerModel model, int beamSize, SequenceValidator se @Deprecated private ChunkerME(ChunkerModel model, int beamSize) { - contextGenerator = model.getFactory().getContextGenerator(); - sequenceValidator = model.getFactory().getSequenceValidator(); + contextGenerator = model.getFactory().getContextGenerator(); + sequenceValidator = model.getFactory().getSequenceValidator(); if (model.getChunkerSequenceModel() != null) { this.model = model.getChunkerSequenceModel(); @@ -148,12 +148,12 @@ public void probs(double[] probs) { bestSequence.getProbs(probs); } - /** - * Returns an array with the probabilities of the last decoded sequence. The - * sequence was determined based on the previous call to chunk. - * @return An array with the same number of probabilities as tokens were sent to chunk - * when it was last called. - */ + /** + * Returns an array with the probabilities of the last decoded sequence. The + * sequence was determined based on the previous call to chunk. + * @return An array with the same number of probabilities as tokens were sent to chunk + * when it was last called. + */ public double[] probs() { return bestSequence.getProbs(); } diff --git a/opennlp-tools/src/main/java/opennlp/tools/chunker/DefaultChunkerContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/chunker/DefaultChunkerContextGenerator.java index 3bf4ba452..387994bdb 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/chunker/DefaultChunkerContextGenerator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/chunker/DefaultChunkerContextGenerator.java @@ -35,7 +35,7 @@ public String[] getContext(int index, String[] sequence, String[] priorDecisions } public String[] getContext(int i, String[] toks, String[] tags, String[] preds) { - // Words in a 5-word window + // Words in a 5-word window String w_2, w_1, w0, w1, w2; // Tags in a 5-word window diff --git a/opennlp-tools/src/main/java/opennlp/tools/chunker/DefaultChunkerSequenceValidator.java b/opennlp-tools/src/main/java/opennlp/tools/chunker/DefaultChunkerSequenceValidator.java index ff7136a55..ce395eb17 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/chunker/DefaultChunkerSequenceValidator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/chunker/DefaultChunkerSequenceValidator.java @@ -19,7 +19,7 @@ import opennlp.tools.util.SequenceValidator; -public class DefaultChunkerSequenceValidator implements SequenceValidator{ +public class DefaultChunkerSequenceValidator implements SequenceValidator { private boolean validOutcome(String outcome, String prevOutcome) { if (outcome.startsWith("I-")) { @@ -41,7 +41,7 @@ private boolean validOutcome(String outcome, String prevOutcome) { protected boolean validOutcome(String outcome, String[] sequence) { String prevOutcome = null; if (sequence.length > 0) { - prevOutcome = sequence[sequence.length-1]; + prevOutcome = sequence[sequence.length - 1]; } return validOutcome(outcome,prevOutcome); } diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/AbstractConverterTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/AbstractConverterTool.java index 9b4f23fe8..a6b81ea26 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/AbstractConverterTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/AbstractConverterTool.java @@ -42,7 +42,7 @@ protected AbstractConverterTool(Class sampleType) { public String getShortDescription() { Map> factories = StreamFactoryRegistry.getFactories(type); StringBuilder help = new StringBuilder(); - if (2 == factories.keySet().size()) {//opennlp + foreign + if (2 == factories.keySet().size()) { //opennlp + foreign for (String format : factories.keySet()) { if (!StreamFactoryRegistry.DEFAULT_FORMAT.equals(format)) { help.append(format); @@ -105,7 +105,7 @@ public void run(String format, String[] args) { try (ObjectStream sampleStream = streamFactory.create(formatArgs)) { Object sample; - while((sample = sampleStream.read()) != null) { + while ((sample = sampleStream.read()) != null) { System.out.println(sample.toString()); } } diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/ArgumentParser.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/ArgumentParser.java index ca9a65069..02d1a863e 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/ArgumentParser.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/ArgumentParser.java @@ -49,14 +49,12 @@ */ public class ArgumentParser { - public @Retention(RetentionPolicy.RUNTIME) - @interface OptionalParameter { + public @Retention(RetentionPolicy.RUNTIME) @interface OptionalParameter { String DEFAULT_CHARSET = "DEFAULT_CHARSET"; String defaultValue() default ""; } - public @Retention(RetentionPolicy.RUNTIME) - @interface ParameterDescription { + public @Retention(RetentionPolicy.RUNTIME) @interface ParameterDescription { String valueName(); String description() default ""; } @@ -112,7 +110,7 @@ private static class CharsetArgumentFactory implements ArgumentFactory { public Object parseArgument(Method method, String argName, String charsetName) { try { - if(OptionalParameter.DEFAULT_CHARSET.equals(charsetName)) { + if (OptionalParameter.DEFAULT_CHARSET.equals(charsetName)) { return Charset.defaultCharset(); } else if (Charset.isSupported(charsetName)) { return Charset.forName(charsetName); @@ -183,16 +181,16 @@ private static void checkProxyInterfaces(Class... proxyInterfaces) { // check that method has zero arguments if (method.getParameterTypes().length != 0) throw new IllegalArgumentException(method.getName() + " method must have zero parameters but has " + - method.getParameterTypes().length + "!"); + method.getParameterTypes().length + "!"); // check return types of interface Class returnType = method.getReturnType(); Set> compatibleReturnTypes = argumentFactories.keySet(); - if(!compatibleReturnTypes.contains(returnType)) - throw new IllegalArgumentException(method.getName() + " method must have compatible return type! Got " + - returnType + ", expected one of " + compatibleReturnTypes); + if (!compatibleReturnTypes.contains(returnType)) + throw new IllegalArgumentException(method.getName() + " method must have compatible return type! Got " + + returnType + ", expected one of " + compatibleReturnTypes); } } } @@ -220,7 +218,7 @@ private static String methodNameToParameter(String methodName) { public static String createUsage(Class argProxyInterface) { return createUsage(new Class[]{argProxyInterface}); } - + /** * Auxiliary class that holds information about an argument. This is used by the * GenerateManualTool, which creates a Docbook for the CLI automatically. @@ -230,7 +228,7 @@ static class Argument { private final String value; private final String description; private final boolean optional; - + public Argument(String argument, String value, String description, boolean optional) { super(); @@ -254,9 +252,9 @@ public String getDescription() { public boolean getOptional() { return optional; - } + } } - + /** @@ -289,16 +287,16 @@ public static List createArguments(Class... argProxyInterfaces) else { duplicateFilter.add(paramName); } - + boolean isOptional = false; if (optional != null) isOptional = true; - + Argument arg = new Argument(paramName.substring(1), desc.valueName(), desc.description(), isOptional); arguments.add(arg); - + } } } @@ -346,7 +344,7 @@ public static String createUsage(Class... argProxyInterfaces) { usage.append(paramName).append(' ').append(desc.valueName()); details.append('\t').append(paramName).append(' ').append(desc.valueName()).append('\n'); - if(desc.description().length() > 0) { + if (desc.description().length() > 0) { details.append("\t\t").append(desc.description()).append('\n'); } diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/CLI.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/CLI.java index 7abb6d495..cb103d289 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/CLI.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/CLI.java @@ -123,7 +123,7 @@ public final class CLI { tools.add(new POSTaggerEvaluatorTool()); tools.add(new POSTaggerCrossValidatorTool()); tools.add(new POSTaggerConverterTool()); - + //Lemmatizer tools.add(new LemmatizerMETool()); tools.add(new LemmatizerTrainerTool()); @@ -161,7 +161,7 @@ public final class CLI { public static Set getToolNames() { return toolLookupMap.keySet(); } - + /** * @return a read only map with tool names and instances */ @@ -205,7 +205,7 @@ public static void main(String[] args) { System.exit(0); } - String toolArguments[] = new String[args.length -1]; + String toolArguments[] = new String[args.length - 1]; System.arraycopy(args, 1, toolArguments, 0, toolArguments.length); String toolName = args[0]; @@ -226,13 +226,13 @@ public static void main(String[] args) { if ((0 == toolArguments.length && tool.hasParams()) || 0 < toolArguments.length && "help".equals(toolArguments[0])) { - if (tool instanceof TypedCmdLineTool) { - System.out.println(((TypedCmdLineTool) tool).getHelp(formatName)); - } else if (tool instanceof BasicCmdLineTool) { - System.out.println(tool.getHelp()); - } + if (tool instanceof TypedCmdLineTool) { + System.out.println(((TypedCmdLineTool) tool).getHelp(formatName)); + } else if (tool instanceof BasicCmdLineTool) { + System.out.println(tool.getHelp()); + } - System.exit(0); + System.exit(0); } if (tool instanceof TypedCmdLineTool) { diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/CmdLineUtil.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/CmdLineUtil.java index 9587c6b8e..2d98688ed 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/CmdLineUtil.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/CmdLineUtil.java @@ -43,7 +43,7 @@ */ public final class CmdLineUtil { - static final int IO_BUFFER_SIZE = 1024 * 1024; + static final int IO_BUFFER_SIZE = 1024 * 1024; private CmdLineUtil() { // not intended to be instantiated @@ -58,12 +58,12 @@ private CmdLineUtil() { * - accessibly
* * @param name the name which is used to refer to the file in an error message, it - * should start with a capital letter. + * should start with a capital letter. * * @param inFile the particular file to check to qualify an input file * * @throws TerminateToolException if test does not pass this exception is - * thrown and an error message is printed to the console. + * thrown and an error message is printed to the console. */ public static void checkInputFile(String name, File inFile) { @@ -135,7 +135,7 @@ else if (outFile.isFile()) { } else { isFailure = "The parent directory of the " + name + " file does not exist, " + - "please create it first!"; + "please create it first!"; } } @@ -223,12 +223,12 @@ public static int getParameterIndex(String param, String args[]) { */ public static String getParameter(String param, String args[]) { int i = getParameterIndex(param, args); - if (-1 < i) { - i++; - if (i < args.length) { - return args[i]; - } + if (-1 < i) { + i++; + if (i < args.length) { + return args[i]; } + } return null; } @@ -245,7 +245,7 @@ public static Integer getIntParameter(String param, String args[]) { try { if (value != null) - return Integer.parseInt(value); + return Integer.parseInt(value); } catch (NumberFormatException e) { } @@ -265,7 +265,7 @@ public static Double getDoubleParameter(String param, String args[]) { try { if (value != null) - return Double.parseDouble(value); + return Double.parseDouble(value); } catch (NumberFormatException e) { } diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/EvaluationErrorPrinter.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/EvaluationErrorPrinter.java index 51e11611b..f8a0d9124 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/EvaluationErrorPrinter.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/EvaluationErrorPrinter.java @@ -106,8 +106,8 @@ protected void printError(String references[], String predictions[], // for others protected void printError(T referenceSample, T predictedSample) { - printSamples(referenceSample, predictedSample); - printStream.println(); + printSamples(referenceSample, predictedSample); + printStream.println(); } /** diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/GenerateManualTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/GenerateManualTool.java index 00074ea00..cf237c153 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/GenerateManualTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/GenerateManualTool.java @@ -82,7 +82,7 @@ private static String getUsage() { /** * Appends a group of tools, based on the tool package name - * + * * @param groupName * @param toolsMap * @param sb @@ -102,7 +102,7 @@ private static void appendToolGroup(String groupName, /** * Appends a tool - * + * * @param groupName * @param toolName * @param tool @@ -233,7 +233,7 @@ private static void appendCode(String help, StringBuilder sb) { /** * Prevents long lines. Lines are optimized for printing. - * + * * @param stringBlock * @return */ diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/PerformanceMonitor.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/PerformanceMonitor.java index 76ecf455c..b0a396f43 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/PerformanceMonitor.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/PerformanceMonitor.java @@ -37,7 +37,7 @@ public class PerformanceMonitor { private ScheduledExecutorService scheduler = - Executors.newScheduledThreadPool(1); + Executors.newScheduledThreadPool(1); private final String unit; @@ -128,7 +128,7 @@ public void run() { } }; - beeperHandle = scheduler.scheduleAtFixedRate(beeper, 1, 1, TimeUnit.SECONDS); + beeperHandle = scheduler.scheduleAtFixedRate(beeper, 1, 1, TimeUnit.SECONDS); } public void stopAndPrintFinalResult() { @@ -157,7 +157,7 @@ public void stopAndPrintFinalResult() { out.println(); out.println(); - out.printf("Average: %.1f " + unit +"/s %n", average); + out.printf("Average: %.1f " + unit + "/s %n", average); out.println("Total: " + counter + " " + unit); out.println("Runtime: " + timePassed / 1000d + "s"); } diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/StreamFactoryRegistry.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/StreamFactoryRegistry.java index 2573e2e62..ef1e73efc 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/StreamFactoryRegistry.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/StreamFactoryRegistry.java @@ -198,7 +198,7 @@ public static ObjectStreamFactory getFactory(Class sampleClass, try { return (ObjectStreamFactory) factoryClazz.newInstance(); } catch (InstantiationException | IllegalAccessException e) { - return null; + return null; } } catch (ClassNotFoundException e) { diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/TypedCmdLineTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/TypedCmdLineTool.java index 458f05e41..9aaaa3b99 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/TypedCmdLineTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/TypedCmdLineTool.java @@ -99,7 +99,7 @@ protected String getBasicHelp(Class... argProxyInterfaces) { formats.append(".").append(format).append("|"); } } - formatsHelp = "[" + formats.substring(0, formats.length() - 1)+ "] "; + formatsHelp = "[" + formats.substring(0, formats.length() - 1) + "] "; } return "Usage: " + CLI.CMD + " " + getName() + formatsHelp + diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerDetailedFMeasureListener.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerDetailedFMeasureListener.java index 9b54b9a0d..31404de05 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerDetailedFMeasureListener.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerDetailedFMeasureListener.java @@ -23,7 +23,7 @@ import opennlp.tools.util.Span; public class ChunkerDetailedFMeasureListener extends - DetailedFMeasureListener implements ChunkerEvaluationMonitor{ + DetailedFMeasureListener implements ChunkerEvaluationMonitor { @Override protected Span[] asSpanArray(ChunkSample sample) { diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerEvaluatorTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerEvaluatorTool.java index 01324cc5f..eca9b4f22 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerEvaluatorTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerEvaluatorTool.java @@ -56,10 +56,10 @@ public void run(String format, String[] args) { List> listeners = new LinkedList<>(); ChunkerDetailedFMeasureListener detailedFMeasureListener = null; - if(params.getMisclassified()) { + if (params.getMisclassified()) { listeners.add(new ChunkEvaluationErrorListener()); } - if(params.getDetailedF()) { + if (params.getDetailedF()) { detailedFMeasureListener = new ChunkerDetailedFMeasureListener(); listeners.add(detailedFMeasureListener); } diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerMETool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerMETool.java index b511a0b58..f47947305 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerMETool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerMETool.java @@ -14,6 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package opennlp.tools.cmdline.chunker; import java.io.File; diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerTrainerTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerTrainerTool.java index aa35ef752..ceca0f66c 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerTrainerTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerTrainerTool.java @@ -53,7 +53,7 @@ public void run(String format, String[] args) { super.run(format, args); mlParams = CmdLineUtil.loadTrainingParameters(params.getParams(), false); - if(mlParams == null) { + if (mlParams == null) { mlParams = ModelUtil.createDefaultTrainingParameters(); } diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTool.java index dbc6e6bb0..4ff985475 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTool.java @@ -14,6 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package opennlp.tools.cmdline.doccat; import java.io.File; diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTrainerTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTrainerTool.java index 91ee01976..abb885cb7 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTrainerTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTrainerTool.java @@ -56,7 +56,7 @@ public void run(String format, String[] args) { super.run(format, args); mlParams = CmdLineUtil.loadTrainingParameters(params.getParams(), false); - if(mlParams == null) { + if (mlParams == null) { mlParams = ModelUtil.createDefaultTrainingParameters(); } @@ -91,14 +91,14 @@ public void run(String format, String[] args) { } static Tokenizer createTokenizer(String tokenizer) { - if(tokenizer != null) { + if (tokenizer != null) { return ExtensionLoader.instantiateExtension(Tokenizer.class, tokenizer); } return WhitespaceTokenizer.INSTANCE; } static FeatureGenerator[] createFeatureGenerators(String featureGeneratorsNames) { - if(featureGeneratorsNames == null) { + if (featureGeneratorsNames == null) { return new FeatureGenerator[]{new BagOfWordsFeatureGenerator()}; } String[] classes = featureGeneratorsNames.split(","); diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/lemmatizer/LemmatizerMETool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/lemmatizer/LemmatizerMETool.java index 966111a3a..13f28b20b 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/lemmatizer/LemmatizerMETool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/lemmatizer/LemmatizerMETool.java @@ -14,6 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package opennlp.tools.cmdline.lemmatizer; import java.io.File; diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/CensusDictionaryCreatorTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/CensusDictionaryCreatorTool.java index 8159ef0aa..604251045 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/CensusDictionaryCreatorTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/CensusDictionaryCreatorTool.java @@ -54,7 +54,7 @@ interface Parameters { String getLang(); @ParameterDescription(valueName = "charsetName") - @OptionalParameter(defaultValue="UTF-8") + @OptionalParameter(defaultValue = "UTF-8") String getEncoding(); @ParameterDescription(valueName = "censusDict") @@ -78,7 +78,7 @@ public String getHelp() { * * @param sampleStream stream of samples. * @return a {@code Dictionary} class containing the name dictionary - * built from the input file. + * built from the input file. * @throws IOException IOException */ public static Dictionary createDictionary(ObjectStream sampleStream) throws IOException { diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/NameSampleCountersStream.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/NameSampleCountersStream.java index e821bd68a..70674d687 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/NameSampleCountersStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/NameSampleCountersStream.java @@ -35,53 +35,53 @@ public class NameSampleCountersStream private int sentenceCount; private int tokenCount; - + private Map nameCounters = new HashMap<>(); - + protected NameSampleCountersStream(ObjectStream samples) { super(samples); } @Override public NameSample read() throws IOException { - + NameSample sample = samples.read(); - + if (sample != null) { sentenceCount++; tokenCount += sample.getSentence().length; - + for (Span nameSpan : sample.getNames()) { Integer nameCounter = nameCounters.get(nameSpan.getType()); - + if (nameCounter == null) { nameCounter = 0; } - + nameCounters.put(nameSpan.getType(), nameCounter + 1); } } - + return sample; } - + @Override public void reset() throws IOException, UnsupportedOperationException { super.reset(); - + sentenceCount = 0; tokenCount = 0; nameCounters = new HashMap<>(); } - + public int getSentenceCount() { return sentenceCount; } - + public int getTokenCount() { return tokenCount; } - + public Map getNameCounters() { return Collections.unmodifiableMap(nameCounters); } @@ -90,7 +90,7 @@ public void printSummary() { System.out.println("Training data summary:"); System.out.println("#Sentences: " + getSentenceCount()); System.out.println("#Tokens: " + getTokenCount()); - + int totalNames = 0; for (Map.Entry counter : getNameCounters().entrySet()) { System.out.println("#" + counter.getKey() + " entities: " + counter.getValue()); diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java index aa1e343c1..459a3e5e2 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java @@ -123,7 +123,7 @@ else if ("BILOU".equals(sequenceCodecImplName)) { System.out.println(); - if(detailedFListener == null) { + if (detailedFListener == null) { System.out.println(validator.getFMeasure()); } else { System.out.println(detailedFListener.toString()); diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderEvaluatorTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderEvaluatorTool.java index 8533bb5a7..194397d40 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderEvaluatorTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderEvaluatorTool.java @@ -116,7 +116,7 @@ public void close() throws IOException { System.out.println(); - if(detailedFListener == null) { + if (detailedFListener == null) { System.out.println(evaluator.getFMeasure()); } else { System.out.println(detailedFListener.toString()); diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTool.java index 6e8b86f0a..92a846a38 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTool.java @@ -14,6 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package opennlp.tools.cmdline.namefind; import java.io.File; @@ -59,8 +60,8 @@ public void run(String[] args) { nameFinders[i] = new NameFinderME(model); } -// ObjectStream untokenizedLineStream = -// new PlainTextByLineStream(new InputStreamReader(System.in)); + // ObjectStream untokenizedLineStream = + // new PlainTextByLineStream(new InputStreamReader(System.in)); ObjectStream untokenizedLineStream; PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent"); perfMon.start(); diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java index b2ccfc5a7..f75f97642 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java @@ -60,7 +60,7 @@ public String getShortDescription() { } static byte[] openFeatureGeneratorBytes(String featureGenDescriptorFile) { - if(featureGenDescriptorFile != null) { + if (featureGenDescriptorFile != null) { return openFeatureGeneratorBytes(new File(featureGenDescriptorFile)); } return null; @@ -108,7 +108,7 @@ public static Map loadResources(File resourcePath, File featureG // TODO: Improve error handling! e.printStackTrace(); } - + try (InputStream inputStreamXML = CmdLineUtil.openInFile(featureGenDescriptor)) { elements = GeneratorFactory.getDescriptorElements(inputStreamXML); } catch (IOException e) { @@ -164,7 +164,7 @@ public void run(String format, String[] args) { super.run(format, args); mlParams = CmdLineUtil.loadTrainingParameters(params.getParams(), true); - if(mlParams == null) { + if (mlParams == null) { mlParams = ModelUtil.createDefaultTrainingParameters(); } @@ -207,7 +207,7 @@ else if ("BILOU".equals(sequenceCodecImplName)) { NameSampleCountersStream counters = new NameSampleCountersStream(sampleStream); sampleStream = counters; - + TokenNameFinderModel model; try { model = opennlp.tools.namefind.NameFinderME.train( @@ -225,12 +225,12 @@ else if ("BILOU".equals(sequenceCodecImplName)) { // sorry that this can fail } } - + System.out.println(); counters.printSummary(); System.out.println(); - + CmdLineUtil.writeModel("name finder", modelOutFile, model); - + } } diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/CVParams.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/CVParams.java index 6cf30ea0e..2fe16d6ff 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/CVParams.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/CVParams.java @@ -29,11 +29,11 @@ public interface CVParams { @ParameterDescription(valueName = "true|false", description = "if true will print false negatives and false positives.") - @OptionalParameter(defaultValue="false") + @OptionalParameter(defaultValue = "false") Boolean getMisclassified(); @ParameterDescription(valueName = "num", description = "number of folds, default is 10.") - @OptionalParameter(defaultValue="10") + @OptionalParameter(defaultValue = "10") Integer getFolds(); } diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/DetailedFMeasureEvaluatorParams.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/DetailedFMeasureEvaluatorParams.java index abd035929..4040e7974 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/DetailedFMeasureEvaluatorParams.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/DetailedFMeasureEvaluatorParams.java @@ -30,7 +30,7 @@ public interface DetailedFMeasureEvaluatorParams { @ParameterDescription(valueName = "true|false", description = "if true will print detailed FMeasure results.") - @OptionalParameter(defaultValue="false") + @OptionalParameter(defaultValue = "false") Boolean getDetailedF(); } diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/EvaluatorParams.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/EvaluatorParams.java index 29a32649c..3629ea22c 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/EvaluatorParams.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/params/EvaluatorParams.java @@ -34,7 +34,7 @@ public interface EvaluatorParams { @ParameterDescription(valueName = "true|false", description = "if true will print false negatives and false positives.") - @OptionalParameter(defaultValue="false") + @OptionalParameter(defaultValue = "false") Boolean getMisclassified(); } diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/BuildModelUpdaterTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/BuildModelUpdaterTool.java index fe9872575..ce30f3bf7 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/BuildModelUpdaterTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/BuildModelUpdaterTool.java @@ -40,19 +40,19 @@ protected ParserModel trainAndUpdate(ParserModel originalModel, ObjectStream parseSamples, ModelUpdaterParams parameters) throws IOException { - Dictionary mdict = ParserTrainerTool.buildDictionary(parseSamples, originalModel.getHeadRules(), 5); + Dictionary mdict = ParserTrainerTool.buildDictionary(parseSamples, originalModel.getHeadRules(), 5); - parseSamples.reset(); + parseSamples.reset(); - // TODO: training individual models should be in the chunking parser, not here - // Training build - System.out.println("Training builder"); - ObjectStream bes = new ParserEventStream(parseSamples, - originalModel.getHeadRules(), ParserEventTypeEnum.BUILD, mdict); - AbstractModel buildModel = Parser.train(bes, 100, 5); + // TODO: training individual models should be in the chunking parser, not here + // Training build + System.out.println("Training builder"); + ObjectStream bes = new ParserEventStream(parseSamples, + originalModel.getHeadRules(), ParserEventTypeEnum.BUILD, mdict); + AbstractModel buildModel = Parser.train(bes, 100, 5); - parseSamples.close(); + parseSamples.close(); - return originalModel.updateBuildModel(buildModel); + return originalModel.updateBuildModel(buildModel); } } diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/CheckModelUpdaterTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/CheckModelUpdaterTool.java index f0d75a851..1103e661f 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/CheckModelUpdaterTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/CheckModelUpdaterTool.java @@ -41,20 +41,20 @@ protected ParserModel trainAndUpdate(ParserModel originalModel, ObjectStream parseSamples, ModelUpdaterParams parameters) throws IOException { - Dictionary mdict = ParserTrainerTool.buildDictionary(parseSamples, originalModel.getHeadRules(), 5); + Dictionary mdict = ParserTrainerTool.buildDictionary(parseSamples, originalModel.getHeadRules(), 5); - parseSamples.reset(); + parseSamples.reset(); - // TODO: Maybe that should be part of the ChunkingParser ... - // Training build - System.out.println("Training check model"); - ObjectStream bes = new ParserEventStream(parseSamples, - originalModel.getHeadRules(), ParserEventTypeEnum.CHECK, mdict); - AbstractModel checkModel = Parser.train(bes, - 100, 5); + // TODO: Maybe that should be part of the ChunkingParser ... + // Training build + System.out.println("Training check model"); + ObjectStream bes = new ParserEventStream(parseSamples, + originalModel.getHeadRules(), ParserEventTypeEnum.CHECK, mdict); + AbstractModel checkModel = Parser.train(bes, + 100, 5); - parseSamples.close(); + parseSamples.close(); - return originalModel.updateCheckModel(checkModel); + return originalModel.updateCheckModel(checkModel); } } diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserConverterTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserConverterTool.java index 182f28813..63b212353 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserConverterTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserConverterTool.java @@ -14,6 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package opennlp.tools.cmdline.parser; import opennlp.tools.cmdline.AbstractConverterTool; diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTool.java index dddaf94ab..690e3c06d 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTool.java @@ -14,6 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package opennlp.tools.cmdline.parser; import java.io.File; @@ -55,6 +56,7 @@ public String getHelp() { + "-k n: Show the top n parses. This will also display their log-probablities.\n" + "-tk tok_model: Use the specified tokenizer model to tokenize the sentences. Defaults to a WhitespaceTokenizer."; } + private static Pattern untokenizedParenPattern1 = Pattern.compile("([^ ])([({)}])"); private static Pattern untokenizedParenPattern2 = Pattern.compile("([({)}])([^ ])"); @@ -121,7 +123,7 @@ public void run(String[] args) { Tokenizer tokenizer = WhitespaceTokenizer.INSTANCE; String tokenizerModelName = CmdLineUtil.getParameter( "-tk", args ); - if( tokenizerModelName != null ){ + if (tokenizerModelName != null ) { TokenizerModel tokenizerModel = new TokenizerModelLoader().load( new File( tokenizerModelName ) ); tokenizer = new TokenizerME( tokenizerModel ); } diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java index 10cf7c426..928ea2910 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java @@ -70,9 +70,9 @@ static Dictionary buildDictionary(ObjectStream parseSamples, HeadRules he static ParserType parseParserType(String typeAsString) { ParserType type = null; - if(typeAsString != null && typeAsString.length() > 0) { + if (typeAsString != null && typeAsString.length() > 0) { type = ParserType.parse(typeAsString); - if(type == null) { + if (type == null) { throw new TerminateToolException(1, "ParserType training parameter '" + typeAsString + "' is invalid!"); } @@ -140,7 +140,7 @@ public void run(String format, String[] args) { } } - if(mlParams == null) { + if (mlParams == null) { mlParams = ModelUtil.createDefaultTrainingParameters(); } @@ -152,8 +152,8 @@ public void run(String format, String[] args) { HeadRules rules = creaeHeadRules(params); ParserType type = parseParserType(params.getParserType()); - if(params.getFun()){ - Parse.useFunctionTags(true); + if (params.getFun()) { + Parse.useFunctionTags(true); } if (ParserType.CHUNKING.equals(type)) { diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSModelLoader.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSModelLoader.java index 40632cf33..6e2a01093 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSModelLoader.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSModelLoader.java @@ -27,7 +27,7 @@ *

* Note: Do not use this class, internal use only! */ -public final class POSModelLoader extends ModelLoader{ +public final class POSModelLoader extends ModelLoader { public POSModelLoader() { super("POS Tagger"); diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTool.java index 2c4c661fc..61e322561 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTool.java @@ -14,6 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package opennlp.tools.cmdline.postag; import java.io.File; diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTrainerTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTrainerTool.java index c04b5efad..d19ea5ee7 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTrainerTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTrainerTool.java @@ -61,7 +61,7 @@ public void run(String format, String[] args) { "' is invalid!"); } - if(mlParams == null) { + if (mlParams == null) { mlParams = ModelUtil.createDefaultTrainingParameters(); mlParams.put(TrainingParameters.ALGORITHM_PARAM, getModelType(params.getType()).toString()); } diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorEvaluatorTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorEvaluatorTool.java index 666e8640e..b0d554f13 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorEvaluatorTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorEvaluatorTool.java @@ -58,7 +58,7 @@ public void run(String format, String[] args) { System.out.print("Evaluating ... "); try { - evaluator.evaluate(sampleStream); + evaluator.evaluate(sampleStream); } catch (IOException e) { throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorTrainerTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorTrainerTool.java index d468c287a..9e493b845 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorTrainerTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorTrainerTool.java @@ -70,7 +70,7 @@ public void run(String format, String[] args) { } } - if(mlParams == null) { + if (mlParams == null) { mlParams = ModelUtil.createDefaultTrainingParameters(); } diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/CommandLineTokenizer.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/CommandLineTokenizer.java index 4c81d9356..8eb500471 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/CommandLineTokenizer.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/CommandLineTokenizer.java @@ -14,6 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package opennlp.tools.cmdline.tokenizer; import java.io.IOException; diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/DictionaryDetokenizerTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/DictionaryDetokenizerTool.java index 4c379b215..57176ae30 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/DictionaryDetokenizerTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/DictionaryDetokenizerTool.java @@ -43,15 +43,15 @@ public void run(String[] args) { if (args.length != 1) { System.out.println(getHelp()); } else { - try { - Detokenizer detokenizer = new DictionaryDetokenizer( - new DetokenizationDictionaryLoader().load(new File(args[0]))); + try { + Detokenizer detokenizer = new DictionaryDetokenizer( + new DetokenizationDictionaryLoader().load(new File(args[0]))); - ObjectStream tokenizedLineStream = - new PlainTextByLineStream(new SystemInputStreamFactory(), SystemInputStreamFactory.encoding()); + ObjectStream tokenizedLineStream = + new PlainTextByLineStream(new SystemInputStreamFactory(), SystemInputStreamFactory.encoding()); - PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent"); - perfMon.start(); + PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent"); + perfMon.start(); String tokenizedLine; @@ -64,7 +64,7 @@ public void run(String[] args) { perfMon.incrementCounter(); } - perfMon.stopAndPrintFinalResult(); + perfMon.stopAndPrintFinalResult(); } catch (IOException e) { CmdLineUtil.handleStdinIoError(e); diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/SimpleTokenizerTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/SimpleTokenizerTool.java index 77ff79ec2..0c2680705 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/SimpleTokenizerTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/SimpleTokenizerTool.java @@ -41,7 +41,7 @@ public void run(String[] args) { } else { CommandLineTokenizer tokenizer = - new CommandLineTokenizer(opennlp.tools.tokenize.SimpleTokenizer.INSTANCE); + new CommandLineTokenizer(opennlp.tools.tokenize.SimpleTokenizer.INSTANCE); tokenizer.process(); } diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerMETool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerMETool.java index eff9272b3..27176a12a 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerMETool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerMETool.java @@ -41,7 +41,7 @@ public void run(String[] args) { TokenizerModel model = new TokenizerModelLoader().load(new File(args[0])); CommandLineTokenizer tokenizer = - new CommandLineTokenizer(new opennlp.tools.tokenize.TokenizerME(model)); + new CommandLineTokenizer(new opennlp.tools.tokenize.TokenizerME(model)); tokenizer.process(); } diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerTrainerTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerTrainerTool.java index 7ec58258f..ba619c3ad 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerTrainerTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerTrainerTool.java @@ -73,7 +73,7 @@ public void run(String format, String[] args) { } } - if(mlParams == null) { + if (mlParams == null) { mlParams = ModelUtil.createDefaultTrainingParameters(); } diff --git a/opennlp-tools/src/main/java/opennlp/tools/dictionary/Dictionary.java b/opennlp-tools/src/main/java/opennlp/tools/dictionary/Dictionary.java index 496174175..7258b4bcd 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/dictionary/Dictionary.java +++ b/opennlp-tools/src/main/java/opennlp/tools/dictionary/Dictionary.java @@ -68,7 +68,7 @@ else if (obj instanceof StringListWrapper) { else { result = this.stringList.compareToIgnoreCase(other.getStringList()); } - } + } else { result = false; } @@ -139,9 +139,9 @@ public Dictionary(InputStream in, boolean caseSensitive) throws IOException { * @param tokens the new entry */ public void put(StringList tokens) { - entrySet.add(new StringListWrapper(tokens)); - minTokenCount = Math.min(minTokenCount, tokens.size()); - maxTokenCount = Math.max(maxTokenCount, tokens.size()); + entrySet.add(new StringListWrapper(tokens)); + minTokenCount = Math.min(minTokenCount, tokens.size()); + maxTokenCount = Math.max(maxTokenCount, tokens.size()); } /** @@ -149,7 +149,7 @@ public void put(StringList tokens) { * @return minimum token count in the dictionary */ public int getMinTokenCount() { - return minTokenCount; + return minTokenCount; } /** @@ -157,7 +157,7 @@ public int getMinTokenCount() { * @return maximum token count in the dictionary */ public int getMaxTokenCount() { - return maxTokenCount; + return maxTokenCount; } /** @@ -167,7 +167,7 @@ public int getMaxTokenCount() { * @return true if it contains the entry otherwise false */ public boolean contains(StringList tokens) { - return entrySet.contains(new StringListWrapper(tokens)); + return entrySet.contains(new StringListWrapper(tokens)); } /** @@ -176,7 +176,7 @@ public boolean contains(StringList tokens) { * @param tokens */ public void remove(StringList tokens) { - entrySet.remove(new StringListWrapper(tokens)); + entrySet.remove(new StringListWrapper(tokens)); } /** @@ -199,7 +199,8 @@ public StringList next() { public void remove() { entries.remove(); - }}; + } + }; } /** @@ -220,25 +221,25 @@ public int size() { public void serialize(OutputStream out) throws IOException { Iterator entryIterator = new Iterator() - { - private Iterator dictionaryIterator = Dictionary.this.iterator(); + { + private Iterator dictionaryIterator = Dictionary.this.iterator(); - public boolean hasNext() { - return dictionaryIterator.hasNext(); - } + public boolean hasNext() { + return dictionaryIterator.hasNext(); + } - public Entry next() { + public Entry next() { - StringList tokens = dictionaryIterator.next(); + StringList tokens = dictionaryIterator.next(); - return new Entry(tokens, new Attributes()); - } + return new Entry(tokens, new Attributes()); + } - public void remove() { - throw new UnsupportedOperationException(); - } + public void remove() { + throw new UnsupportedOperationException(); + } - }; + }; DictionarySerializer.serialize(out, entryIterator, isCaseSensitive); } diff --git a/opennlp-tools/src/main/java/opennlp/tools/dictionary/serializer/DictionarySerializer.java b/opennlp-tools/src/main/java/opennlp/tools/dictionary/serializer/DictionarySerializer.java index a0b3dcb8e..13f8927ea 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/dictionary/serializer/DictionarySerializer.java +++ b/opennlp-tools/src/main/java/opennlp/tools/dictionary/serializer/DictionarySerializer.java @@ -45,8 +45,8 @@ import org.xml.sax.helpers.XMLReaderFactory; /** - * This class is used by for reading and writing dictionaries of all kinds. - */ + * This class is used by for reading and writing dictionaries of all kinds. + */ public class DictionarySerializer { // TODO: should check for invalid format, make it save @@ -54,8 +54,8 @@ private static class DictionaryContenthandler implements ContentHandler { private EntryInserter mInserter; -// private boolean mIsInsideDictionaryElement; -// private boolean mIsInsideEntryElement; + // private boolean mIsInsideDictionaryElement; + // private boolean mIsInsideEntryElement; private boolean mIsInsideTokenElement; private boolean mIsCaseSensitiveDictionary; @@ -69,121 +69,122 @@ private DictionaryContenthandler(EntryInserter inserter) { mInserter = inserter; mIsCaseSensitiveDictionary = true; } + + /** + * Not implemented. + */ + public void processingInstruction(String target, String data) + throws SAXException { + } + /** * Not implemented. */ - public void processingInstruction(String target, String data) - throws SAXException { - } - - /** - * Not implemented. - */ - public void startDocument() throws SAXException { - } - - public void startElement(String uri, String localName, String qName, - org.xml.sax.Attributes atts) throws SAXException { - if (DICTIONARY_ELEMENT.equals(localName)) { - - mAttributes = new Attributes(); - - for (int i = 0; i < atts.getLength(); i++) { - mAttributes.setValue(atts.getLocalName(i), atts.getValue(i)); - } - /* get the attribute here ... */ - if (mAttributes.getValue(ATTRIBUTE_CASE_SENSITIVE) != null) { - mIsCaseSensitiveDictionary = Boolean.valueOf(mAttributes.getValue(ATTRIBUTE_CASE_SENSITIVE)); - } - mAttributes = null; - } - else if (ENTRY_ELEMENT.equals(localName)) { - - mAttributes = new Attributes(); - - for (int i = 0; i < atts.getLength(); i++) { - mAttributes.setValue(atts.getLocalName(i), atts.getValue(i)); - } - } - else if (TOKEN_ELEMENT.equals(localName)) { - mIsInsideTokenElement = true; - } - } - - public void characters(char[] ch, int start, int length) - throws SAXException { - if (mIsInsideTokenElement) { - token.append(ch, start, length); - } - } - - /** - * Creates the Profile object after processing is complete - * and switches mIsInsideNgramElement flag. - */ - public void endElement(String uri, String localName, String qName) - throws SAXException { - - if (TOKEN_ELEMENT.equals(localName)) { - mTokenList.add(token.toString().trim()); - token.setLength(0); - mIsInsideTokenElement = false; - } - else if (ENTRY_ELEMENT.equals(localName)) { - - String[] tokens = mTokenList.toArray( - new String[mTokenList.size()]); - - Entry entry = new Entry(new StringList(tokens), mAttributes); - - try { - mInserter.insert(entry); - } catch (InvalidFormatException e) { - throw new SAXException("Invalid dictionary format!", e); - } - - mTokenList.clear(); - mAttributes = null; - } - } - - /** - * Not implemented. - */ - public void endDocument() throws SAXException { - } - - /** - * Not implemented. - */ - public void endPrefixMapping(String prefix) throws SAXException { - } - - /** - * Not implemented. - */ - public void ignorableWhitespace(char[] ch, int start, int length) - throws SAXException { - } - - /** - * Not implemented. - */ - public void setDocumentLocator(Locator locator) { - } - - /** - * Not implemented. - */ - public void skippedEntity(String name) throws SAXException { - } - - /** - * Not implemented. - */ - public void startPrefixMapping(String prefix, String uri) - throws SAXException { - } + public void startDocument() throws SAXException { + } + + public void startElement(String uri, String localName, String qName, + org.xml.sax.Attributes atts) throws SAXException { + if (DICTIONARY_ELEMENT.equals(localName)) { + + mAttributes = new Attributes(); + + for (int i = 0; i < atts.getLength(); i++) { + mAttributes.setValue(atts.getLocalName(i), atts.getValue(i)); + } + /* get the attribute here ... */ + if (mAttributes.getValue(ATTRIBUTE_CASE_SENSITIVE) != null) { + mIsCaseSensitiveDictionary = Boolean.valueOf(mAttributes.getValue(ATTRIBUTE_CASE_SENSITIVE)); + } + mAttributes = null; + } + else if (ENTRY_ELEMENT.equals(localName)) { + + mAttributes = new Attributes(); + + for (int i = 0; i < atts.getLength(); i++) { + mAttributes.setValue(atts.getLocalName(i), atts.getValue(i)); + } + } + else if (TOKEN_ELEMENT.equals(localName)) { + mIsInsideTokenElement = true; + } + } + + public void characters(char[] ch, int start, int length) + throws SAXException { + if (mIsInsideTokenElement) { + token.append(ch, start, length); + } + } + + /** + * Creates the Profile object after processing is complete + * and switches mIsInsideNgramElement flag. + */ + public void endElement(String uri, String localName, String qName) + throws SAXException { + + if (TOKEN_ELEMENT.equals(localName)) { + mTokenList.add(token.toString().trim()); + token.setLength(0); + mIsInsideTokenElement = false; + } + else if (ENTRY_ELEMENT.equals(localName)) { + + String[] tokens = mTokenList.toArray( + new String[mTokenList.size()]); + + Entry entry = new Entry(new StringList(tokens), mAttributes); + + try { + mInserter.insert(entry); + } catch (InvalidFormatException e) { + throw new SAXException("Invalid dictionary format!", e); + } + + mTokenList.clear(); + mAttributes = null; + } + } + + /** + * Not implemented. + */ + public void endDocument() throws SAXException { + } + + /** + * Not implemented. + */ + public void endPrefixMapping(String prefix) throws SAXException { + } + + /** + * Not implemented. + */ + public void ignorableWhitespace(char[] ch, int start, int length) + throws SAXException { + } + + /** + * Not implemented. + */ + public void setDocumentLocator(Locator locator) { + } + + /** + * Not implemented. + */ + public void skippedEntity(String name) throws SAXException { + } + + /** + * Not implemented. + */ + public void startPrefixMapping(String prefix, String uri) + throws SAXException { + } } private static final String CHARSET = "UTF-8"; @@ -222,7 +223,7 @@ public static boolean create(InputStream in, EntryInserter inserter) } catch (SAXException e) { throw new InvalidFormatException("The profile data stream has " + - "an invalid format!", e); + "an invalid format!", e); } return profileContentHandler.mIsCaseSensitiveDictionary; } @@ -241,8 +242,8 @@ public static boolean create(InputStream in, EntryInserter inserter) */ @Deprecated public static void serialize(OutputStream out, Iterator entries) - throws IOException { - DictionarySerializer.serialize(out, entries, true); + throws IOException { + DictionarySerializer.serialize(out, entries, true); } /** @@ -259,8 +260,7 @@ public static void serialize(OutputStream out, Iterator entries) * @throws IOException If an I/O error occurs */ public static void serialize(OutputStream out, Iterator entries, - boolean casesensitive) - throws IOException { + boolean casesensitive) throws IOException { StreamResult streamResult = new StreamResult(out); SAXTransformerFactory tf = (SAXTransformerFactory) SAXTransformerFactory.newInstance(); @@ -285,7 +285,7 @@ public static void serialize(OutputStream out, Iterator entries, AttributesImpl dictionaryAttributes = new AttributesImpl(); dictionaryAttributes.addAttribute("", "", ATTRIBUTE_CASE_SENSITIVE, - "", String.valueOf(casesensitive)); + "", String.valueOf(casesensitive)); hd.startElement("", "", DICTIONARY_ELEMENT, dictionaryAttributes); while (entries.hasNext()) { @@ -304,7 +304,7 @@ public static void serialize(OutputStream out, Iterator entries, } private static void serializeEntry(TransformerHandler hd, Entry entry) - throws SAXException{ + throws SAXException { AttributesImpl entryAttributes = new AttributesImpl(); @@ -312,7 +312,7 @@ private static void serializeEntry(TransformerHandler hd, Entry entry) String key = it.next(); entryAttributes.addAttribute("", "", key, - "", entry.getAttributes().getValue(key)); + "", entry.getAttributes().getValue(key)); } hd.startElement("", "", ENTRY_ELEMENT, entryAttributes); diff --git a/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatFactory.java b/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatFactory.java index b95671adc..76c817642 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatFactory.java +++ b/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatFactory.java @@ -98,7 +98,7 @@ public void validateArtifactMap() throws InvalidFormatException { } public static DoccatFactory create(String subclassName, Tokenizer tokenizer, - FeatureGenerator[] featureGenerators) throws InvalidFormatException { + FeatureGenerator[] featureGenerators) throws InvalidFormatException { if (subclassName == null) { // will create the default factory return new DoccatFactory(tokenizer, featureGenerators); diff --git a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizer.java b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizer.java index edd7b1385..31b97809c 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizer.java +++ b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizer.java @@ -14,6 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package opennlp.tools.doccat; import java.util.Map; diff --git a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerContextGenerator.java index 737b9d1c9..b1da3e34a 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerContextGenerator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerContextGenerator.java @@ -14,6 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package opennlp.tools.doccat; import java.util.Collection; diff --git a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerEvaluator.java b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerEvaluator.java index ad5b49374..d2307e360 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerEvaluator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerEvaluator.java @@ -14,6 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package opennlp.tools.doccat; import opennlp.tools.tokenize.TokenSample; @@ -28,7 +29,7 @@ * @see DocumentCategorizer * @see DocumentSample */ -public class DocumentCategorizerEvaluator extends Evaluator{ +public class DocumentCategorizerEvaluator extends Evaluator { private DocumentCategorizer categorizer; diff --git a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerEventStream.java b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerEventStream.java index 18084c0c2..77da8916f 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerEventStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerEventStream.java @@ -14,6 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package opennlp.tools.doccat; import java.util.Iterator; @@ -23,8 +24,8 @@ import opennlp.tools.util.ObjectStream; /** -* Iterator-like class for modeling document classification events. -*/ + * Iterator-like class for modeling document classification events. + */ public class DocumentCategorizerEventStream extends AbstractEventStream { private DocumentCategorizerContextGenerator mContextGenerator; @@ -40,7 +41,7 @@ public DocumentCategorizerEventStream(ObjectStream data, Feature super(data); mContextGenerator = - new DocumentCategorizerContextGenerator(featureGenerators); + new DocumentCategorizerContextGenerator(featureGenerators); } /** @@ -52,13 +53,13 @@ public DocumentCategorizerEventStream(ObjectStream samples) { super(samples); mContextGenerator = - new DocumentCategorizerContextGenerator(new BagOfWordsFeatureGenerator()); + new DocumentCategorizerContextGenerator(new BagOfWordsFeatureGenerator()); } @Override protected Iterator createEvents(final DocumentSample sample) { - return new Iterator(){ + return new Iterator() { private boolean isVirgin = true; @@ -76,6 +77,7 @@ public Event next() { public void remove() { throw new UnsupportedOperationException(); - }}; + } + }; } } diff --git a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerME.java b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerME.java index d2ad651d7..cb6daeb09 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerME.java +++ b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerME.java @@ -14,6 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package opennlp.tools.doccat; import java.io.IOException; @@ -94,7 +95,7 @@ public double[] categorize(String text[]) { */ @Override public double[] categorize(String documentText, - Map extraInformation) { + Map extraInformation) { Tokenizer tokenizer = model.getFactory().getTokenizer(); return categorize(tokenizer.tokenize(documentText), extraInformation); } @@ -174,13 +175,13 @@ public String getAllResults(double results[]) { } public static DoccatModel train(String languageCode, ObjectStream samples, - TrainingParameters mlParams, DoccatFactory factory) - throws IOException { + TrainingParameters mlParams, DoccatFactory factory) + throws IOException { Map manifestInfoEntries = new HashMap<>(); EventTrainer trainer = TrainerFactory.getEventTrainer( - mlParams.getSettings(), manifestInfoEntries); + mlParams.getSettings(), manifestInfoEntries); MaxentModel model = trainer.train( new DocumentCategorizerEventStream(samples, factory.getFeatureGenerators())); diff --git a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentSample.java b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentSample.java index 47828ab22..0af00d5a0 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentSample.java +++ b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentSample.java @@ -14,6 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package opennlp.tools.doccat; import java.util.ArrayList; @@ -52,7 +53,7 @@ public DocumentSample(String category, String text[], Map extraI this.category = category; this.text = Collections.unmodifiableList(new ArrayList<>(Arrays.asList(text))); - if(extraInformation == null) { + if (extraInformation == null) { this.extraInformation = Collections.emptyMap(); } else { this.extraInformation = extraInformation; diff --git a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentSampleStream.java index 93a070d01..e81f9f57f 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentSampleStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentSampleStream.java @@ -50,7 +50,7 @@ public DocumentSample read() throws IOException { if (tokens.length > 1) { String category = tokens[0]; String docTokens[] = new String[tokens.length - 1]; - System.arraycopy(tokens, 1, docTokens, 0, tokens.length -1); + System.arraycopy(tokens, 1, docTokens, 0, tokens.length - 1); sample = new DocumentSample(category, docTokens); } diff --git a/opennlp-tools/src/main/java/opennlp/tools/entitylinker/BaseLink.java b/opennlp-tools/src/main/java/opennlp/tools/entitylinker/BaseLink.java index 6e06bebaf..95c0d8831 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/entitylinker/BaseLink.java +++ b/opennlp-tools/src/main/java/opennlp/tools/entitylinker/BaseLink.java @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package opennlp.tools.entitylinker; import java.util.HashMap; @@ -40,17 +41,19 @@ public BaseLink(String itemParentID, String itemID, String itemName, String item this.itemName = itemName; this.itemType = itemType; } -/** - * Any parent ID for the linked item - * @return - */ + + /** + * Any parent ID for the linked item + * @return + */ public String getItemParentID() { return itemParentID; } -/** - * returns the parent ID of the linked item - * @param itemParentID - */ + + /** + * returns the parent ID of the linked item + * @param itemParentID + */ public void setItemParentID(String itemParentID) { this.itemParentID = itemParentID; } diff --git a/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinker.java b/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinker.java index 64a53a4ee..caecd9751 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinker.java +++ b/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinker.java @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package opennlp.tools.entitylinker; import java.io.IOException; diff --git a/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinkerFactory.java b/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinkerFactory.java index 5b62cb497..48ba354c2 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinkerFactory.java +++ b/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinkerFactory.java @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package opennlp.tools.entitylinker; import java.io.IOException; diff --git a/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinkerProperties.java b/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinkerProperties.java index bd7b6f689..b0fbea134 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinkerProperties.java +++ b/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinkerProperties.java @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package opennlp.tools.entitylinker; import java.io.File; diff --git a/opennlp-tools/src/main/java/opennlp/tools/entitylinker/LinkedSpan.java b/opennlp-tools/src/main/java/opennlp/tools/entitylinker/LinkedSpan.java index c44aec85c..3598a923e 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/entitylinker/LinkedSpan.java +++ b/opennlp-tools/src/main/java/opennlp/tools/entitylinker/LinkedSpan.java @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package opennlp.tools.entitylinker; import java.util.ArrayList; diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/BioNLP2004NameSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/BioNLP2004NameSampleStream.java index 14eb42e7c..4f62a3a50 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/BioNLP2004NameSampleStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/BioNLP2004NameSampleStream.java @@ -133,7 +133,7 @@ public NameSample read() throws IOException { } beginIndex = i; - endIndex = i +1; + endIndex = i + 1; } else if (tag.startsWith("I-")) { endIndex++; diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/ChunkerSampleStreamFactory.java b/opennlp-tools/src/main/java/opennlp/tools/formats/ChunkerSampleStreamFactory.java index 75dbbfa9e..4ff8a4da4 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/ChunkerSampleStreamFactory.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/ChunkerSampleStreamFactory.java @@ -14,6 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package opennlp.tools.formats; import java.io.IOException; diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStream.java index efb37a062..f1986e730 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStream.java @@ -46,7 +46,7 @@ *

* Note: Do not use this class, internal use only! */ -public class Conll02NameSampleStream implements ObjectStream{ +public class Conll02NameSampleStream implements ObjectStream { public enum LANGUAGE { NL, @@ -172,7 +172,7 @@ public NameSample read() throws IOException { } beginIndex = i; - endIndex = i +1; + endIndex = i + 1; } else if (tag.startsWith("I-")) { endIndex++; diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStream.java index a9e2e64bd..f2498b8a0 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStream.java @@ -33,7 +33,7 @@ /** * An import stream which can parse the CONLL03 data. */ -public class Conll03NameSampleStream implements ObjectStream{ +public class Conll03NameSampleStream implements ObjectStream { public enum LANGUAGE { EN, @@ -87,7 +87,7 @@ public NameSample read() throws IOException { String emptyLine = lineStream.read(); if (!StringUtil.isEmpty(emptyLine)) - throw new IOException("Empty line after -DOCSTART- not empty: '" + emptyLine +"'!"); + throw new IOException("Empty line after -DOCSTART- not empty: '" + emptyLine + "'!"); continue; } @@ -121,19 +121,19 @@ else if (LANGUAGE.DE.equals(lang) && fields.length == 5) { String tag = tags.get(i); if (tag.endsWith("PER") && - (types & Conll02NameSampleStream.GENERATE_PERSON_ENTITIES) == 0) + (types & Conll02NameSampleStream.GENERATE_PERSON_ENTITIES) == 0) tag = "O"; if (tag.endsWith("ORG") && - (types & Conll02NameSampleStream.GENERATE_ORGANIZATION_ENTITIES) == 0) + (types & Conll02NameSampleStream.GENERATE_ORGANIZATION_ENTITIES) == 0) tag = "O"; if (tag.endsWith("LOC") && - (types & Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES) == 0) + (types & Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES) == 0) tag = "O"; if (tag.endsWith("MISC") && - (types & Conll02NameSampleStream.GENERATE_MISC_ENTITIES) == 0) + (types & Conll02NameSampleStream.GENERATE_MISC_ENTITIES) == 0) tag = "O"; if (tag.equals("O")) { diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXPOSSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXPOSSampleStream.java index b3b88fc24..4ca637451 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXPOSSampleStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXPOSSampleStream.java @@ -59,42 +59,42 @@ public POSSample read() throws IOException { // One paragraph contains a whole sentence and, the token // and tag will be read from the FORM and POSTAG field. - String paragraph = samples.read(); + String paragraph = samples.read(); - POSSample sample = null; + POSSample sample = null; - if (paragraph != null) { + if (paragraph != null) { - // paragraph get lines - BufferedReader reader = new BufferedReader(new StringReader(paragraph)); + // paragraph get lines + BufferedReader reader = new BufferedReader(new StringReader(paragraph)); - List tokens = new ArrayList<>(100); - List tags = new ArrayList<>(100); + List tokens = new ArrayList<>(100); + List tags = new ArrayList<>(100); - String line; - while ((line = reader.readLine()) != null) { + String line; + while ((line = reader.readLine()) != null) { - final int minNumberOfFields = 5; + final int minNumberOfFields = 5; - String parts[] = line.split("\t"); + String parts[] = line.split("\t"); - if (parts.length >= minNumberOfFields) { - tokens.add(parts[1]); - tags.add(parts[4]); - } - else { - throw new InvalidFormatException("Every non-empty line must have at least " + - minNumberOfFields + " fields: '" + line + "'!"); - } - } + if (parts.length >= minNumberOfFields) { + tokens.add(parts[1]); + tags.add(parts[4]); + } + else { + throw new InvalidFormatException("Every non-empty line must have at least " + + minNumberOfFields + " fields: '" + line + "'!"); + } + } - // just skip empty samples and read next sample - if (tokens.size() == 0) - sample = read(); + // just skip empty samples and read next sample + if (tokens.size() == 0) + sample = read(); - sample = new POSSample(tokens.toArray(new String[tokens.size()]), tags.toArray(new String[tags.size()])); - } + sample = new POSSample(tokens.toArray(new String[tokens.size()]), tags.toArray(new String[tags.size()])); + } - return sample; + return sample; } } diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/DirectorySampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/DirectorySampleStream.java index 116ac888d..47f4b6799 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/DirectorySampleStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/DirectorySampleStream.java @@ -45,7 +45,7 @@ public class DirectorySampleStream implements ObjectStream { public DirectorySampleStream(File dirs[], FileFilter fileFilter, boolean recursive) { - this.fileFilter= fileFilter; + this.fileFilter = fileFilter; isRecursiveScan = recursive; List inputDirectoryList = new ArrayList(dirs.length); @@ -71,7 +71,7 @@ public DirectorySampleStream(File dir, FileFilter fileFilter, boolean recursive) public File read() throws IOException { - while(textFiles.isEmpty() && !directories.isEmpty()) { + while (textFiles.isEmpty() && !directories.isEmpty()) { File dir = directories.pop(); File files[]; diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/DocumentSampleStreamFactory.java b/opennlp-tools/src/main/java/opennlp/tools/formats/DocumentSampleStreamFactory.java index 5c591b2f4..3e2aa07a4 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/DocumentSampleStreamFactory.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/DocumentSampleStreamFactory.java @@ -14,6 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package opennlp.tools.formats; import java.io.IOException; @@ -50,7 +51,7 @@ public ObjectStream create(String[] args) { CmdLineUtil.checkInputFile("Data", params.getData()); InputStreamFactory sampleDataIn = CmdLineUtil.createInputStreamFactory(params.getData()); - ObjectStream lineStream=null; + ObjectStream lineStream = null; try { lineStream = new PlainTextByLineStream(sampleDataIn, params.getEncoding()); } catch (IOException ex) { diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/EvalitaNameSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/EvalitaNameSampleStream.java index 925a130d5..68a57fbec 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/EvalitaNameSampleStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/EvalitaNameSampleStream.java @@ -55,7 +55,7 @@ *

* Note: Do not use this class, internal use only! */ -public class EvalitaNameSampleStream implements ObjectStream{ +public class EvalitaNameSampleStream implements ObjectStream { public enum LANGUAGE { IT @@ -132,7 +132,7 @@ public NameSample read() throws IOException { String emptyLine = lineStream.read(); if (!StringUtil.isEmpty(emptyLine)) - throw new IOException("Empty line after -DOCSTART- not empty: '" + emptyLine +"'!"); + throw new IOException("Empty line after -DOCSTART- not empty: '" + emptyLine + "'!"); continue; } @@ -185,7 +185,7 @@ public NameSample read() throws IOException { } beginIndex = i; - endIndex = i +1; + endIndex = i + 1; } else if (tag.startsWith("I-")) { endIndex++; diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDoccatSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDoccatSampleStream.java index 0ac318a4f..a9c734ae1 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDoccatSampleStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDoccatSampleStream.java @@ -75,7 +75,7 @@ public LeipzigDoccatSampleStream(String language, int sentencesPerDocument, InputStreamFactory in) throws IOException { this(language, sentencesPerDocument, SimpleTokenizer.INSTANCE, in); } - + public DocumentSample read() throws IOException { int count = 0; diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDocumentSampleStreamFactory.java b/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDocumentSampleStreamFactory.java index c5e5c265e..9c12927ae 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDocumentSampleStreamFactory.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDocumentSampleStreamFactory.java @@ -56,16 +56,16 @@ public ObjectStream create(String[] args) { Parameters params = ArgumentParser.parse(args, Parameters.class); File sentencesFileDir = params.getSentencesDir(); - + File sentencesFiles[] = sentencesFileDir.listFiles(new FilenameFilter() { @Override public boolean accept(File dir, String name) { return name.contains("sentences") && name.endsWith(".txt"); } }); - + @SuppressWarnings("unchecked") - ObjectStream sampleStreams[] = + ObjectStream sampleStreams[] = new ObjectStream[sentencesFiles.length]; for (int i = 0; i < sentencesFiles.length; i++) { @@ -77,7 +77,7 @@ public boolean accept(File dir, String name) { throw new TerminateToolException(-1, "IO error while opening sample data: " + e.getMessage(), e); } } - + return ObjectStreamUtils.createObjectStream(sampleStreams); } } diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/LemmatizerSampleStreamFactory.java b/opennlp-tools/src/main/java/opennlp/tools/formats/LemmatizerSampleStreamFactory.java index 54840dc2d..edd9a2da4 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/LemmatizerSampleStreamFactory.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/LemmatizerSampleStreamFactory.java @@ -14,6 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package opennlp.tools.formats; import java.io.IOException; diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/NameFinderCensus90NameStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/NameFinderCensus90NameStream.java index 7773e507e..0f6bb645c 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/NameFinderCensus90NameStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/NameFinderCensus90NameStream.java @@ -66,7 +66,7 @@ public NameFinderCensus90NameStream(ObjectStream lineStream) { * * @param in an InputStreamFactory for the input file. * @param encoding the Charset to apply to the input stream. - * @throws IOException + * @throws IOException */ public NameFinderCensus90NameStream(InputStreamFactory in, Charset encoding) throws IOException { diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/NameSampleDataStreamFactory.java b/opennlp-tools/src/main/java/opennlp/tools/formats/NameSampleDataStreamFactory.java index a95c8c6fb..e60723915 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/NameSampleDataStreamFactory.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/NameSampleDataStreamFactory.java @@ -14,6 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package opennlp.tools.formats; import java.io.IOException; diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/ParseSampleStreamFactory.java b/opennlp-tools/src/main/java/opennlp/tools/formats/ParseSampleStreamFactory.java index abcaf319c..beaf4149a 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/ParseSampleStreamFactory.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/ParseSampleStreamFactory.java @@ -14,6 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package opennlp.tools.formats; import java.io.IOException; diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/SentenceSampleStreamFactory.java b/opennlp-tools/src/main/java/opennlp/tools/formats/SentenceSampleStreamFactory.java index 6ed70512d..fb5b35f7a 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/SentenceSampleStreamFactory.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/SentenceSampleStreamFactory.java @@ -53,7 +53,7 @@ public ObjectStream create(String[] args) { CmdLineUtil.checkInputFile("Data", params.getData()); InputStreamFactory sampleDataIn = CmdLineUtil.createInputStreamFactory(params.getData()); - ObjectStream lineStream=null; + ObjectStream lineStream = null; try { lineStream = new PlainTextByLineStream(sampleDataIn, params.getEncoding()); } catch (IOException ex) { diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/TokenSampleStreamFactory.java b/opennlp-tools/src/main/java/opennlp/tools/formats/TokenSampleStreamFactory.java index 10338b399..64287f3f0 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/TokenSampleStreamFactory.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/TokenSampleStreamFactory.java @@ -14,6 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package opennlp.tools.formats; import java.io.IOException; diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/WordTagSampleStreamFactory.java b/opennlp-tools/src/main/java/opennlp/tools/formats/WordTagSampleStreamFactory.java index bfa152395..c52e55fee 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/WordTagSampleStreamFactory.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/WordTagSampleStreamFactory.java @@ -14,6 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package opennlp.tools.formats; import java.io.IOException; @@ -38,7 +39,7 @@ public static interface Parameters extends BasicFormatParams { public static void registerFactory() { StreamFactoryRegistry.registerFactory(POSSample.class, - StreamFactoryRegistry.DEFAULT_FORMAT, new WordTagSampleStreamFactory(Parameters.class)); + StreamFactoryRegistry.DEFAULT_FORMAT, new WordTagSampleStreamFactory(Parameters.class)); } protected

WordTagSampleStreamFactory(Class

params) { diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADChunkSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADChunkSampleStream.java index 12faf4c54..e692b11c2 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADChunkSampleStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADChunkSampleStream.java @@ -58,163 +58,163 @@ */ public class ADChunkSampleStream implements ObjectStream { - protected final ObjectStream adSentenceStream; - - private int start = -1; - private int end = -1; - - private int index = 0; - - public static final String OTHER = "O"; - - /** - * Creates a new {@link NameSample} stream from a line stream, i.e. - * {@link ObjectStream}<{@link String}>, that could be a - * {@link PlainTextByLineStream} object. - * - * @param lineStream - * a stream of lines as {@link String} - */ - public ADChunkSampleStream(ObjectStream lineStream) { - this.adSentenceStream = new ADSentenceStream(lineStream); - } - - public ADChunkSampleStream(InputStreamFactory in, String charsetName) throws IOException { - - try { - this.adSentenceStream = new ADSentenceStream(new PlainTextByLineStream( - in, charsetName)); - } catch (UnsupportedEncodingException e) { - // UTF-8 is available on all JVMs, will never happen - throw new IllegalStateException(e); + protected final ObjectStream adSentenceStream; + + private int start = -1; + private int end = -1; + + private int index = 0; + + public static final String OTHER = "O"; + + /** + * Creates a new {@link NameSample} stream from a line stream, i.e. + * {@link ObjectStream}<{@link String}>, that could be a + * {@link PlainTextByLineStream} object. + * + * @param lineStream + * a stream of lines as {@link String} + */ + public ADChunkSampleStream(ObjectStream lineStream) { + this.adSentenceStream = new ADSentenceStream(lineStream); + } + + public ADChunkSampleStream(InputStreamFactory in, String charsetName) throws IOException { + + try { + this.adSentenceStream = new ADSentenceStream(new PlainTextByLineStream( + in, charsetName)); + } catch (UnsupportedEncodingException e) { + // UTF-8 is available on all JVMs, will never happen + throw new IllegalStateException(e); + } + } + + public ChunkSample read() throws IOException { + + Sentence paragraph; + while ((paragraph = this.adSentenceStream.read()) != null) { + + if (end > -1 && index >= end) { + // leave + return null; + } + + if (start > -1 && index < start) { + index++; + // skip this one + } else { + Node root = paragraph.getRoot(); + List sentence = new ArrayList<>(); + List tags = new ArrayList<>(); + List target = new ArrayList<>(); + + processRoot(root, sentence, tags, target); + + if (sentence.size() > 0) { + index++; + return new ChunkSample(sentence, tags, target); + } + } + } + return null; + } - public ChunkSample read() throws IOException { - - Sentence paragraph; - while ((paragraph = this.adSentenceStream.read()) != null) { - - if (end > -1 && index >= end) { - // leave - return null; - } - - if (start > -1 && index < start) { - index++; - // skip this one - } else { - Node root = paragraph.getRoot(); - List sentence = new ArrayList<>(); - List tags = new ArrayList<>(); - List target = new ArrayList<>(); - - processRoot(root, sentence, tags, target); - - if (sentence.size() > 0) { - index++; - return new ChunkSample(sentence, tags, target); - } - - } - - } - return null; - } - - protected void processRoot(Node root, List sentence, List tags, - List target) { - if (root != null) { - TreeElement[] elements = root.getElements(); - for (int i = 0; i < elements.length; i++) { - if (elements[i].isLeaf()) { - processLeaf((Leaf) elements[i], false, OTHER, sentence, tags, target); - } else { - processNode((Node) elements[i], sentence, tags, target, null); - } - } - } - } - - private void processNode(Node node, List sentence, List tags, - List target, String inheritedTag) { + protected void processRoot(Node root, List sentence, List tags, + List target) { + if (root != null) { + TreeElement[] elements = root.getElements(); + for (int i = 0; i < elements.length; i++) { + if (elements[i].isLeaf()) { + processLeaf((Leaf) elements[i], false, OTHER, sentence, tags, target); + } else { + processNode((Node) elements[i], sentence, tags, target, null); + } + } + } + } + + private void processNode(Node node, List sentence, List tags, + List target, String inheritedTag) { String phraseTag = getChunkTag(node); boolean inherited = false; - if(phraseTag.equals(OTHER) && inheritedTag != null) { + if (phraseTag.equals(OTHER) && inheritedTag != null) { phraseTag = inheritedTag; inherited = true; } TreeElement[] elements = node.getElements(); for (int i = 0; i < elements.length; i++) { - if (elements[i].isLeaf()) { - boolean isIntermediate = false; - String tag = phraseTag; - Leaf leaf = (Leaf) elements[i]; - - String localChunk = getChunkTag(leaf); - if(localChunk != null && !tag.equals(localChunk)) { - tag = localChunk; - } - - if(isIntermediate(tags, target, tag) && (inherited || i > 0)) { - isIntermediate = true; - } - if(!isIncludePunctuations() && leaf.getFunctionalTag() == null && - ( - !( i + 1 < elements.length && elements[i+1].isLeaf() ) || - !( i > 0 && elements[i - 1].isLeaf() ) + if (elements[i].isLeaf()) { + boolean isIntermediate = false; + String tag = phraseTag; + Leaf leaf = (Leaf) elements[i]; + + String localChunk = getChunkTag(leaf); + if (localChunk != null && !tag.equals(localChunk)) { + tag = localChunk; + } + + if (isIntermediate(tags, target, tag) && (inherited || i > 0)) { + isIntermediate = true; + } + if (!isIncludePunctuations() && leaf.getFunctionalTag() == null && + ( + !( i + 1 < elements.length && elements[i + 1].isLeaf() ) || + !( i > 0 && elements[i - 1].isLeaf() ) ) - ){ - isIntermediate = false; - tag = OTHER; - } - processLeaf(leaf, isIntermediate, tag, sentence, - tags, target); - } else { - int before = target.size(); - processNode((Node) elements[i], sentence, tags, target, phraseTag); - - // if the child node was of a different type we should break the chunk sequence - for (int j = target.size() - 1; j >= before; j--) { - if(!target.get(j).endsWith("-" + phraseTag)) { - phraseTag = OTHER; - break; - } - } + ) { + isIntermediate = false; + tag = OTHER; } + processLeaf(leaf, isIntermediate, tag, sentence, + tags, target); + } else { + int before = target.size(); + processNode((Node) elements[i], sentence, tags, target, phraseTag); + + // if the child node was of a different type we should break the chunk sequence + for (int j = target.size() - 1; j >= before; j--) { + if (!target.get(j).endsWith("-" + phraseTag)) { + phraseTag = OTHER; + break; + } + } + } } -} + } protected void processLeaf(Leaf leaf, boolean isIntermediate, String phraseTag, - List sentence, List tags, List target) { - String chunkTag; - - if (leaf.getFunctionalTag() != null - && phraseTag.equals(OTHER)) { - phraseTag = getPhraseTagFromPosTag(leaf.getFunctionalTag()); - } - - if (!phraseTag.equals(OTHER)) { - if (isIntermediate) { - chunkTag = "I-" + phraseTag; - } else { - chunkTag = "B-" + phraseTag; - } - } else { - chunkTag = phraseTag; - } - - sentence.add(leaf.getLexeme()); - if (leaf.getSyntacticTag() == null) { - tags.add(leaf.getLexeme()); - } else { - tags.add(ADChunkSampleStream.convertFuncTag(leaf.getFunctionalTag(), false)); - } - target.add(chunkTag); - } + List sentence, List tags, List target) { + String chunkTag; + + if (leaf.getFunctionalTag() != null + && phraseTag.equals(OTHER)) { + phraseTag = getPhraseTagFromPosTag(leaf.getFunctionalTag()); + } + + if (!phraseTag.equals(OTHER)) { + if (isIntermediate) { + chunkTag = "I-" + phraseTag; + } else { + chunkTag = "B-" + phraseTag; + } + } else { + chunkTag = phraseTag; + } + + sentence.add(leaf.getLexeme()); + if (leaf.getSyntacticTag() == null) { + tags.add(leaf.getLexeme()); + } else { + tags.add(ADChunkSampleStream.convertFuncTag(leaf.getFunctionalTag(), false)); + } + target.add(chunkTag); + } protected String getPhraseTagFromPosTag(String functionalTag) { if (functionalTag.equals("v-fin")) { @@ -236,7 +236,7 @@ public static String convertFuncTag(String t, boolean useCGTags) { protected String getChunkTag(Leaf leaf) { String tag = leaf.getSyntacticTag(); - if("P".equals(tag)) { + if ("P".equals(tag)) { return "VP"; } return null; @@ -262,21 +262,21 @@ protected String getChunkTag(Node node) { return phraseTag; } - public void setStart(int aStart) { - this.start = aStart; - } + public void setStart(int aStart) { + this.start = aStart; + } - public void setEnd(int aEnd) { - this.end = aEnd; - } + public void setEnd(int aEnd) { + this.end = aEnd; + } - public void reset() throws IOException, UnsupportedOperationException { - adSentenceStream.reset(); - } + public void reset() throws IOException, UnsupportedOperationException { + adSentenceStream.reset(); + } - public void close() throws IOException { - adSentenceStream.close(); - } + public void close() throws IOException { + adSentenceStream.close(); + } protected boolean isIncludePunctuations() { return false; diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADChunkSampleStreamFactory.java b/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADChunkSampleStreamFactory.java index 5ef5d8927..1f55fe4c9 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADChunkSampleStreamFactory.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADChunkSampleStreamFactory.java @@ -79,7 +79,7 @@ public ObjectStream create(String[] args) { InputStreamFactory sampleDataIn = CmdLineUtil.createInputStreamFactory(params.getData()); - ObjectStream lineStream=null; + ObjectStream lineStream = null; try { lineStream = new PlainTextByLineStream(sampleDataIn, params.getEncoding()); } catch (IOException ex) { @@ -88,11 +88,11 @@ public ObjectStream create(String[] args) { ADChunkSampleStream sampleStream = new ADChunkSampleStream(lineStream); - if(params.getStart() != null && params.getStart() > -1) { + if (params.getStart() != null && params.getStart() > -1) { sampleStream.setStart(params.getStart()); } - if(params.getEnd() != null && params.getEnd() > -1) { + if (params.getEnd() != null && params.getEnd() > -1) { sampleStream.setEnd(params.getEnd()); } diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStream.java index 97b6ab938..1741999ca 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStream.java @@ -211,7 +211,7 @@ public NameSample read() throws IOException { int currentTextID = getTextID(paragraph); boolean clearData = false; - if(currentTextID != textID) { + if (currentTextID != textID) { clearData = true; textID = currentTextID; } @@ -282,62 +282,62 @@ private void processLeaf(Leaf leaf, List sentence, leftContractionPart = null; } - String namedEntityTag = null; - int startOfNamedEntity = -1; + String namedEntityTag = null; + int startOfNamedEntity = -1; - String leafTag = leaf.getSecondaryTag(); - boolean expandLastNER = false; // used when we find a tag + String leafTag = leaf.getSecondaryTag(); + boolean expandLastNER = false; // used when we find a tag - if (leafTag != null) { - if (leafTag.contains("") && !alreadyAdded) { - String[] lexemes = underlinePattern.split(leaf.getLexeme()); - if(lexemes.length > 1) { - sentence.addAll(Arrays.asList(lexemes).subList(0, lexemes.length - 1)); - } - leftContractionPart = lexemes[lexemes.length - 1]; - return; + if (leafTag != null) { + if (leafTag.contains("") && !alreadyAdded) { + String[] lexemes = underlinePattern.split(leaf.getLexeme()); + if (lexemes.length > 1) { + sentence.addAll(Arrays.asList(lexemes).subList(0, lexemes.length - 1)); } - if (leafTag.contains("")) { - // this one an be part of the last name - expandLastNER = true; - } - namedEntityTag = getNER(leafTag); + leftContractionPart = lexemes[lexemes.length - 1]; + return; } - - if (namedEntityTag != null) { - startOfNamedEntity = sentence.size(); + if (leafTag.contains("")) { + // this one an be part of the last name + expandLastNER = true; } + namedEntityTag = getNER(leafTag); + } - if(!alreadyAdded) { - sentence.addAll(processLexeme(leaf.getLexeme())); - } + if (namedEntityTag != null) { + startOfNamedEntity = sentence.size(); + } - if (namedEntityTag != null) { - names - .add(new Span(startOfNamedEntity, sentence.size(), namedEntityTag)); - } + if (!alreadyAdded) { + sentence.addAll(processLexeme(leaf.getLexeme())); + } + + if (namedEntityTag != null) { + names + .add(new Span(startOfNamedEntity, sentence.size(), namedEntityTag)); + } - if (expandLastNER) { - // if the current leaf has the tag , it can be the continuation of - // a NER. - // we check if it is true, and expand the last NER - int lastIndex = names.size() - 1; - if (names.size() > 0) { - Span last = names.get(lastIndex); - if (last.getEnd() == sentence.size() - 1) { - names.set(lastIndex, new Span(last.getStart(), sentence.size(), - last.getType())); - } + if (expandLastNER) { + // if the current leaf has the tag , it can be the continuation of + // a NER. + // we check if it is true, and expand the last NER + int lastIndex = names.size() - 1; + if (names.size() > 0) { + Span last = names.get(lastIndex); + if (last.getEnd() == sentence.size() - 1) { + names.set(lastIndex, new Span(last.getStart(), sentence.size(), + last.getType())); } } - } + } + private List processLexeme(String lexemeStr) { List out = new ArrayList<>(); String[] parts = underlinePattern.split(lexemeStr); for (String tok : parts) { - if(tok.length() > 1 && !alphanumericPattern.matcher(tok).matches()) { + if (tok.length() > 1 && !alphanumericPattern.matcher(tok).matches()) { out.addAll(processTok(tok)); } else { out.add(tok); @@ -416,7 +416,7 @@ private void addIfNotEmpty(String firstTok, List out) { * @return the NER tag, or null if not a NER tag in Arvores Deitadas format */ private static String getNER(String tags) { - if(tags.contains("")) { + if (tags.contains("")) { return null; } String[] tag = tags.split("\\s+"); @@ -449,12 +449,12 @@ enum Type { private Pattern metaPattern; // works for Amazonia -// private static final Pattern meta1 = Pattern -// .compile("^(?:[a-zA-Z\\-]*(\\d+)).*?p=(\\d+).*"); -// -// // works for selva cie -// private static final Pattern meta2 = Pattern -// .compile("^(?:[a-zA-Z\\-]*(\\d+)).*?p=(\\d+).*"); + // private static final Pattern meta1 = Pattern + // .compile("^(?:[a-zA-Z\\-]*(\\d+)).*?p=(\\d+).*"); + // + // // works for selva cie + // private static final Pattern meta2 = Pattern + // .compile("^(?:[a-zA-Z\\-]*(\\d+)).*?p=(\\d+).*"); private int textIdMeta2 = -1; private String textMeta2 = ""; diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStreamFactory.java b/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStreamFactory.java index b1bfb95d6..c93764ab5 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStreamFactory.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStreamFactory.java @@ -75,7 +75,7 @@ public ObjectStream create(String[] args) { InputStreamFactory sampleDataIn = CmdLineUtil.createInputStreamFactory(params.getData()); - ObjectStream lineStream=null; + ObjectStream lineStream = null; try { lineStream = new PlainTextByLineStream(sampleDataIn, params.getEncoding()); } catch (IOException ex) { diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADPOSSampleStreamFactory.java b/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADPOSSampleStreamFactory.java index 80453525b..a551635e7 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADPOSSampleStreamFactory.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADPOSSampleStreamFactory.java @@ -74,7 +74,7 @@ public ObjectStream create(String[] args) { InputStreamFactory sampleDataIn = CmdLineUtil.createInputStreamFactory(params.getData()); - ObjectStream lineStream=null; + ObjectStream lineStream = null; try { lineStream = new PlainTextByLineStream(sampleDataIn, params.getEncoding()); } catch (IOException ex) { diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADSentenceSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADSentenceSampleStream.java index db09bae3e..15e09fdc8 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADSentenceSampleStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADSentenceSampleStream.java @@ -118,9 +118,11 @@ public SentenceSample read() throws IOException { } sent = this.adSentenceStream.read(); updateMeta(); - } while (isSamePara); + } + while (isSamePara); // break; // got one paragraph! - } while (isSameText); + } + while (isSameText); String doc; if (document.length() > 0) { diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADSentenceSampleStreamFactory.java b/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADSentenceSampleStreamFactory.java index 9e3cec785..4e2828fbe 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADSentenceSampleStreamFactory.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADSentenceSampleStreamFactory.java @@ -72,7 +72,7 @@ public ObjectStream create(String[] args) { InputStreamFactory sampleDataIn = CmdLineUtil.createInputStreamFactory(params.getData()); - ObjectStream lineStream=null; + ObjectStream lineStream = null; try { lineStream = new PlainTextByLineStream(sampleDataIn, params.getEncoding()); } catch (IOException ex) { diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADSentenceStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADSentenceStream.java index 549781609..2301be99c 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADSentenceStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADSentenceStream.java @@ -43,8 +43,7 @@ *

* Note: Do not use this class, internal use only! */ -public class ADSentenceStream extends - FilterObjectStream { +public class ADSentenceStream extends FilterObjectStream { public static class Sentence { @@ -70,13 +69,13 @@ public void setRoot(Node root) { this.root = root; } - public void setMetadata(String metadata) { - this.metadata = metadata; - } + public void setMetadata(String metadata) { + this.metadata = metadata; + } - public String getMetadata() { - return metadata; - } + public String getMetadata() { + return metadata; + } } @@ -92,7 +91,7 @@ public static class SentenceParser { private Pattern leafPattern = Pattern .compile("^([=-]*)([^:=]+):([^\\(\\s]+)\\([\"'](.+)[\"']\\s*((?:<.+>)*)\\s*([^\\)]+)?\\)\\s+(.+)"); private Pattern bizarreLeafPattern = Pattern - .compile("^([=-]*)([^:=]+=[^\\(\\s]+)\\(([\"'].+[\"'])?\\s*([^\\)]+)?\\)\\s+(.+)"); + .compile("^([=-]*)([^:=]+=[^\\(\\s]+)\\(([\"'].+[\"'])?\\s*([^\\)]+)?\\)\\s+(.+)"); private Pattern punctuationPattern = Pattern.compile("^(=*)(\\W+)$"); private String text,meta; @@ -111,34 +110,34 @@ public Sentence parse(String sentenceString, int para, boolean isTitle, boolean boolean useSameTextAndMeta = false; // to handle cases where there are diff sug of parse (&&) - // should find the source source - while (!line.startsWith("SOURCE")) { - if(line.equals("&&")) { - // same sentence again! - useSameTextAndMeta = true; - break; - } - line = reader.readLine(); - if (line == null) { - return null; - } + // should find the source source + while (!line.startsWith("SOURCE")) { + if (line.equals("&&")) { + // same sentence again! + useSameTextAndMeta = true; + break; + } + line = reader.readLine(); + if (line == null) { + return null; + } + } + if (!useSameTextAndMeta) { + // got source, get the metadata + String metaFromSource = line.substring(7); + line = reader.readLine(); + // we should have the plain sentence + // we remove the first token + int start = line.indexOf(" "); + text = line.substring(start + 1).trim(); + text = fixPunctuation(text); + String titleTag = ""; + if (isTitle) titleTag = " title"; + String boxTag = ""; + if (isBox) boxTag = " box"; + if (start > 0) { + meta = line.substring(0, start) + " p=" + para + titleTag + boxTag + metaFromSource; } - if(!useSameTextAndMeta) { - // got source, get the metadata - String metaFromSource = line.substring(7); - line = reader.readLine(); - // we should have the plain sentence - // we remove the first token - int start = line.indexOf(" "); - text = line.substring(start + 1).trim(); - text = fixPunctuation(text); - String titleTag = ""; - if(isTitle) titleTag = " title"; - String boxTag = ""; - if(isBox) boxTag = " box"; - if(start > 0) { - meta = line.substring(0, start) + " p=" + para + titleTag + boxTag + metaFromSource; - } } sentence.setText(text); sentence.setMetadata(meta); @@ -146,8 +145,8 @@ public Sentence parse(String sentenceString, int para, boolean isTitle, boolean // skip lines starting with ### line = reader.readLine(); - while(line != null && line.startsWith("###")) { - line = reader.readLine(); + while (line != null && line.startsWith("###")) { + line = reader.readLine(); } // got the root. Add it to the stack @@ -164,7 +163,7 @@ public Sentence parse(String sentenceString, int para, boolean isTitle, boolean while (line != null && line.length() != 0 && !line.startsWith("") && !line.equals("&&")) { TreeElement element = this.getElement(line); - if(element != null) { + if (element != null) { // The idea here is to keep a stack of nodes that are candidates for // parenting the following elements (nodes and leafs). @@ -175,7 +174,7 @@ public Sentence parse(String sentenceString, int para, boolean isTitle, boolean Node nephew = nodeStack.pop(); } - if( element.isLeaf() ) { + if (element.isLeaf() ) { // 2a) If the element is a leaf and there is no parent candidate, // add it as a daughter of the root. if (nodeStack.isEmpty()) { @@ -208,7 +207,7 @@ public Sentence parse(String sentenceString, int para, boolean isTitle, boolean // 3) Check if the element that is at the top of the stack is this // node parent, if yes add it as a son if (!nodeStack.isEmpty() && nodeStack.peek().getLevel() < element.getLevel()) { - nodeStack.peek().addElement(element); + nodeStack.peek().addElement(element); } else { System.err.println("should not happen!"); } @@ -289,12 +288,12 @@ public TreeElement getElement(String line) { } // process the bizarre cases - if(line.equals("_") || line.startsWith("].*")) { - return null; - } + if (lexeme.matches("\\w.*?[\\.<>].*")) { + return null; + } - Leaf leaf = new Leaf(); - leaf.setLevel(level + 1); - leaf.setSyntacticTag(""); - leaf.setMorphologicalTag(""); - leaf.setLexeme(lexeme); + Leaf leaf = new Leaf(); + leaf.setLevel(level + 1); + leaf.setSyntacticTag(""); + leaf.setMorphologicalTag(""); + leaf.setLexeme(lexeme); - return leaf; + return leaf; } } @@ -349,7 +348,9 @@ public abstract class TreeElement { private String morphologicalTag; private int level; - public boolean isLeaf() {return false;} + public boolean isLeaf() { + return false; + } public void setSyntacticTag(String syntacticTag) { this.syntacticTag = syntacticTag; @@ -416,13 +417,15 @@ public class Leaf extends TreeElement { private String functionalTag; @Override - public boolean isLeaf() {return true;} + public boolean isLeaf() { + return true; + } public void setFunctionalTag(String funcTag) { this.functionalTag = funcTag; } - public String getFunctionalTag(){ + public String getFunctionalTag() { return this.functionalTag; } @@ -443,7 +446,7 @@ public String getLexeme() { } private String emptyOrString(String value, String prefix, String suffix) { - if(value == null) return ""; + if (value == null) return ""; return prefix + value + suffix; } @@ -540,29 +543,29 @@ public Sentence read() throws IOException { if (line != null) { - if(sentenceStarted) { - if (sentEnd.matcher(line).matches() || extEnd.matcher(line).matches()) { - sentenceStarted = false; - } else if (!line.startsWith("A1")) { - sentence.append(line).append('\n'); - } + if (sentenceStarted) { + if (sentEnd.matcher(line).matches() || extEnd.matcher(line).matches()) { + sentenceStarted = false; + } else if (!line.startsWith("A1")) { + sentence.append(line).append('\n'); + } } else { - if (sentStart.matcher(line).matches()) { - sentenceStarted = true; - } else if(paraStart.matcher(line).matches()) { - paraID++; - } else if(titleStart.matcher(line).matches()) { - isTitle = true; - } else if(titleEnd.matcher(line).matches()) { - isTitle = false; - } else if(textStart.matcher(line).matches()) { - paraID = 0; - } else if(boxStart.matcher(line).matches()) { - isBox = true; - } else if(boxEnd.matcher(line).matches()) { - isBox = false; - } - } + if (sentStart.matcher(line).matches()) { + sentenceStarted = true; + } else if (paraStart.matcher(line).matches()) { + paraID++; + } else if (titleStart.matcher(line).matches()) { + isTitle = true; + } else if (titleEnd.matcher(line).matches()) { + isTitle = false; + } else if (textStart.matcher(line).matches()) { + paraID = 0; + } else if (boxStart.matcher(line).matches()) { + isBox = true; + } else if (boxEnd.matcher(line).matches()) { + isBox = false; + } + } if (!sentenceStarted && sentence.length() > 0) { diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/brat/AnnotationConfiguration.java b/opennlp-tools/src/main/java/opennlp/tools/formats/brat/AnnotationConfiguration.java index 9ce5b3bee..cd03561fd 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/brat/AnnotationConfiguration.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/brat/AnnotationConfiguration.java @@ -58,7 +58,7 @@ public static AnnotationConfiguration parse(InputStream in) throws IOException { String line; String sectionType = null; - while ((line = reader.readLine())!= null) { + while ((line = reader.readLine()) != null) { line = line.trim(); if (line.isEmpty()) { @@ -70,27 +70,27 @@ public static AnnotationConfiguration parse(InputStream in) throws IOException { String typeName = WhitespaceTokenizer.INSTANCE.tokenize(line)[0]; switch (sectionType) { - case "entities": - typeToClassMap.put(typeName, AnnotationConfiguration.ENTITY_TYPE); - break; + case "entities": + typeToClassMap.put(typeName, AnnotationConfiguration.ENTITY_TYPE); + break; - case "relations": - typeToClassMap.put(typeName, AnnotationConfiguration.RELATION_TYPE); - break; + case "relations": + typeToClassMap.put(typeName, AnnotationConfiguration.RELATION_TYPE); + break; - case "attributes": - typeToClassMap.put(typeName, AnnotationConfiguration.ATTRIBUTE_TYPE); - break; + case "attributes": + typeToClassMap.put(typeName, AnnotationConfiguration.ATTRIBUTE_TYPE); + break; - default: - break; + default: + break; } } } return new AnnotationConfiguration(typeToClassMap); } - + public static AnnotationConfiguration parse(File annConfigFile) throws IOException { try (InputStream in = new BufferedInputStream(new FileInputStream(annConfigFile))) { return parse(in); diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratAnnotation.java b/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratAnnotation.java index 1e14b2635..1e7d20c40 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratAnnotation.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratAnnotation.java @@ -24,7 +24,7 @@ public abstract class BratAnnotation { protected BratAnnotation(String id, String type) { this.id = id; - this.type =type; + this.type = type; } public String getId() { diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratAnnotationStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratAnnotationStream.java index 125e66237..236ec5595 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratAnnotationStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratAnnotationStream.java @@ -190,7 +190,7 @@ public BratAnnotation read() throws IOException { } } } - + return null; } diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratDocumentStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratDocumentStream.java index 4e702cc9e..67d11f97a 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratDocumentStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratDocumentStream.java @@ -42,7 +42,7 @@ public class BratDocumentStream implements ObjectStream { * @param config the annotation.conf from the brat project as an Annotation Configuration object * @param bratCorpusDirectory the directory containing all the brat training data files * @param searchRecursive specifies if the corpus directory should be traversed recursively - * to find training data files. + * to find training data files. * @param fileFilter a custom file filter to filter out certain files or null to accept all files * * @throws IOException if reading from the brat directory fails in anyway @@ -52,7 +52,7 @@ public BratDocumentStream(AnnotationConfiguration config, File bratCorpusDirecto if (!bratCorpusDirectory.isDirectory()) { throw new IOException("Input corpus directory must be a directory " + - "according to File.isDirectory()!"); + "according to File.isDirectory()!"); } this.config = config; diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratNameSampleStreamFactory.java b/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratNameSampleStreamFactory.java index 669c6996c..772fdcc1e 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratNameSampleStreamFactory.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratNameSampleStreamFactory.java @@ -139,7 +139,7 @@ else if (params.getRuleBasedTokenizer() != null) { if ("simple".equals(tokenizerName)) { tokenizer = SimpleTokenizer.INSTANCE; } - else if("whitespace".equals(tokenizerName)) { + else if ("whitespace".equals(tokenizerName)) { tokenizer = WhitespaceTokenizer.INSTANCE; } else { diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/convert/ParseToPOSSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/convert/ParseToPOSSampleStream.java index ad049e080..d5b66759e 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/convert/ParseToPOSSampleStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/convert/ParseToPOSSampleStream.java @@ -44,7 +44,7 @@ public POSSample read() throws IOException { List sentence = new ArrayList<>(); List tags = new ArrayList<>(); - for(Parse tagNode : parse.getTagNodes()) { + for (Parse tagNode : parse.getTagNodes()) { sentence.add(tagNode.getCoveredText()); tags.add(tagNode.getType()); } diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/frenchtreebank/ConstitDocumentHandler.java b/opennlp-tools/src/main/java/opennlp/tools/formats/frenchtreebank/ConstitDocumentHandler.java index 2f3cf6aba..ed4f0be2d 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/frenchtreebank/ConstitDocumentHandler.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/frenchtreebank/ConstitDocumentHandler.java @@ -167,7 +167,7 @@ public void endElement(String uri, String localName, String qName) String txt = text.toString(); int tokenIndex = -1; Parse p = new Parse(txt, new Span(0, txt.length()), AbstractBottomUpParser.TOP_NODE, 1,0); - for (int ci=0;ci < cons.size();ci++) { + for (int ci = 0; ci < cons.size(); ci++) { Constituent con = cons.get(ci); String type = con.getLabel(); if (!type.equals(AbstractBottomUpParser.TOP_NODE)) { diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/muc/Muc6NameSampleStreamFactory.java b/opennlp-tools/src/main/java/opennlp/tools/formats/muc/Muc6NameSampleStreamFactory.java index 496253b4d..e5ef4a204 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/muc/Muc6NameSampleStreamFactory.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/muc/Muc6NameSampleStreamFactory.java @@ -54,7 +54,7 @@ public ObjectStream create(String[] args) { ObjectStream mucDocStream = new FileToStringSampleStream( new DirectorySampleStream(params.getData(), - file -> StringUtil.toLowerCase(file.getName()).endsWith(".sgm"), false), Charset.forName("UTF-8")); + file -> StringUtil.toLowerCase(file.getName()).endsWith(".sgm"), false), Charset.forName("UTF-8")); return new MucNameSampleStream(tokenizer, mucDocStream); } diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesNameSampleStreamFactory.java b/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesNameSampleStreamFactory.java index c0dfdbdfd..f3ab66fc3 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesNameSampleStreamFactory.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesNameSampleStreamFactory.java @@ -39,7 +39,8 @@ public ObjectStream create(String[] args) { OntoNotesFormatParameters params = ArgumentParser.parse(args, OntoNotesFormatParameters.class); ObjectStream documentStream = new DirectorySampleStream(new File( - params.getOntoNotesDir()), file -> { + params.getOntoNotesDir()), + file -> { if (file.isFile()) { return file.getName().endsWith(".name"); } diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesParseSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesParseSampleStream.java index 1627ca53c..855f9121d 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesParseSampleStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesParseSampleStream.java @@ -35,7 +35,7 @@ public Parse read() throws IOException { StringBuilder parseString = new StringBuilder(); - while(true) { + while (true) { String parse = samples.read(); if (parse != null) { diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesParseSampleStreamFactory.java b/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesParseSampleStreamFactory.java index a99bc4e05..140d8df01 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesParseSampleStreamFactory.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesParseSampleStreamFactory.java @@ -38,7 +38,8 @@ public ObjectStream create(String[] args) { OntoNotesFormatParameters params = ArgumentParser.parse(args, OntoNotesFormatParameters.class); ObjectStream documentStream = new DirectorySampleStream(new File( - params.getOntoNotesDir()), file -> { + params.getOntoNotesDir()), + file -> { if (file.isFile()) { return file.getName().endsWith(".parse"); } diff --git a/opennlp-tools/src/main/java/opennlp/tools/languagemodel/LanguageModel.java b/opennlp-tools/src/main/java/opennlp/tools/languagemodel/LanguageModel.java index 7c3f83420..ffc6db97c 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/languagemodel/LanguageModel.java +++ b/opennlp-tools/src/main/java/opennlp/tools/languagemodel/LanguageModel.java @@ -16,6 +16,7 @@ * specific language governing permissions and limitations * under the License. */ + package opennlp.tools.languagemodel; import opennlp.tools.util.StringList; diff --git a/opennlp-tools/src/main/java/opennlp/tools/languagemodel/NGramLanguageModel.java b/opennlp-tools/src/main/java/opennlp/tools/languagemodel/NGramLanguageModel.java index ff3039db8..47e9e7784 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/languagemodel/NGramLanguageModel.java +++ b/opennlp-tools/src/main/java/opennlp/tools/languagemodel/NGramLanguageModel.java @@ -16,6 +16,7 @@ * specific language governing permissions and limitations * under the License. */ + package opennlp.tools.languagemodel; import java.io.IOException; diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DefaultLemmatizerContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DefaultLemmatizerContextGenerator.java index 4d6d083a6..cba1c14f3 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DefaultLemmatizerContextGenerator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DefaultLemmatizerContextGenerator.java @@ -24,12 +24,12 @@ /** * Simple feature generator for learning statistical lemmatizers. * Features based on Grzegorz Chrupała. 2008. Towards a Machine-Learning - * Architecture for Lexical Functional Grammar Parsing. PhD dissertation, - * Dublin City University + * Architecture for Lexical Functional Grammar Parsing. PhD dissertation, + * Dublin City University * @version 2016-02-15 */ public class DefaultLemmatizerContextGenerator implements LemmatizerContextGenerator { - + private static final int PREFIX_LENGTH = 5; private static final int SUFFIX_LENGTH = 7; @@ -54,7 +54,7 @@ protected static String[] getSuffixes(String lex) { } return suffs; } - + public String[] getContext(int index, String[] sequence, String[] priorDecisions, Object[] additionalContext) { return getContext(index, sequence, (String[]) additionalContext[0], priorDecisions); } @@ -79,13 +79,13 @@ public String[] getContext(int index, String[] toks, String[] tags, String[] pre t0 = "t0=" + tags[index]; List features = new ArrayList<>(); - + features.add(w0); features.add(t0); features.add(p_1); features.add(p_1 + t0); features.add(p_1 + w0); - + // do some basic suffix analysis String[] suffs = getSuffixes(lex); for (int i = 0; i < suffs.length; i++) { @@ -108,7 +108,7 @@ public String[] getContext(int index, String[] toks, String[] tags, String[] pre if (hasNum.matcher(lex).find()) { features.add("d"); } - + return features.toArray(new String[features.size()]); } } diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DefaultLemmatizerSequenceValidator.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DefaultLemmatizerSequenceValidator.java index 866dbc4c6..8a697ebd6 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DefaultLemmatizerSequenceValidator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DefaultLemmatizerSequenceValidator.java @@ -19,7 +19,7 @@ import opennlp.tools.util.SequenceValidator; -public class DefaultLemmatizerSequenceValidator implements SequenceValidator{ +public class DefaultLemmatizerSequenceValidator implements SequenceValidator { //TODO implement this public boolean validSequence(int i, String[] sequence, String[] s, String outcome) { diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DictionaryLemmatizer.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DictionaryLemmatizer.java index 6d8b51de6..015b300b0 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DictionaryLemmatizer.java +++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DictionaryLemmatizer.java @@ -41,9 +41,9 @@ public class DictionaryLemmatizer implements Lemmatizer { /** * Construct a hashmap from the input tab separated dictionary. - * + * * The input file should have, for each line, word\tablemma\tabpostag - * + * * @param dictionary * the input dictionary via inputstream */ @@ -64,7 +64,7 @@ public DictionaryLemmatizer(final InputStream dictionary) { /** * Get the Map containing the dictionary. - * + * * @return dictMap the Map */ public Map, String> getDictMap() { @@ -73,7 +73,7 @@ public Map, String> getDictMap() { /** * Get the dictionary keys (word and postag). - * + * * @param word * the surface form word * @param postag @@ -89,7 +89,7 @@ private List getDictKeys(final String word, final String postag) { public String[] lemmatize(final String[] tokens, final String[] postags) { List lemmas = new ArrayList<>(); for (int i = 0; i < tokens.length; i++) { - lemmas.add(this.apply(tokens[i], postags[i])); + lemmas.add(this.apply(tokens[i], postags[i])); } return lemmas.toArray(new String[lemmas.size()]); } diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSample.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSample.java index a19adb44a..b77e19f8b 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSample.java +++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSample.java @@ -30,16 +30,16 @@ public class LemmaSample { private List tokens; private List tags; - + private final List lemmas; - /** - * Represents one lemma sample. - * @param tokens the token - * @param tags the postags - * @param lemmas the lemmas - */ -public LemmaSample(String[] tokens, String[] tags, String[] lemmas) { + /** + * Represents one lemma sample. + * @param tokens the token + * @param tags the postags + * @param lemmas the lemmas + */ + public LemmaSample(String[] tokens, String[] tags, String[] lemmas) { validateArguments(tokens.length, tags.length, lemmas.length); @@ -47,7 +47,7 @@ public LemmaSample(String[] tokens, String[] tags, String[] lemmas) { this.tags = Collections.unmodifiableList(new ArrayList<>(Arrays.asList(tags))); this.lemmas = Collections.unmodifiableList(new ArrayList<>(Arrays.asList(lemmas))); } - + /** * Lemma Sample constructor. * @param tokens the tokens @@ -70,7 +70,7 @@ public String[] getTokens() { public String[] getTags() { return tags.toArray(new String[tags.size()]); } - + public String[] getLemmas() { return lemmas.toArray(new String[lemmas.size()]); } @@ -87,14 +87,13 @@ private void validateArguments(int tokensSize, int tagsSize, int lemmasSize) thr @Override public String toString() { + StringBuilder lemmaString = new StringBuilder(); - StringBuilder lemmaString = new StringBuilder(); - - for (int ci = 0; ci < lemmas.size(); ci++) { - lemmaString.append(tokens.get(ci)).append("\t").append(tags.get(ci)).append("\t").append(lemmas.get(ci)).append("\n"); - } - return lemmaString.toString(); - } + for (int ci = 0; ci < lemmas.size(); ci++) { + lemmaString.append(tokens.get(ci)).append("\t").append(tags.get(ci)).append("\t").append(lemmas.get(ci)).append("\n"); + } + return lemmaString.toString(); + } @Override public boolean equals(Object obj) { diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSampleEventStream.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSampleEventStream.java index 1a46f4aaf..591597354 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSampleEventStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSampleEventStream.java @@ -14,6 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package opennlp.tools.lemmatizer; import java.util.ArrayList; @@ -41,7 +42,7 @@ public LemmaSampleEventStream(ObjectStream d, LemmatizerContextGene super(d); this.contextGenerator = cg; } - + protected Iterator createEvents(LemmaSample sample) { if (sample != null) { diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSampleSequenceStream.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSampleSequenceStream.java index 0940dc61d..70565389d 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSampleSequenceStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSampleSequenceStream.java @@ -14,6 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package opennlp.tools.lemmatizer; import java.io.IOException; @@ -45,7 +46,7 @@ public Sequence read() throws IOException { String preds[] = sample.getLemmas(); Event[] events = new Event[sentence.length]; - for (int i=0; i < sentence.length; i++) { + for (int i = 0; i < sentence.length; i++) { // it is safe to pass the tags as previous tags because // the context generator does not look for non predicted tags String[] context = contextGenerator.getContext(i, sentence, tags, preds); diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSampleStream.java index 9a2a6f098..0a133c380 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSampleStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSampleStream.java @@ -14,6 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package opennlp.tools.lemmatizer; import java.io.IOException; @@ -56,7 +57,7 @@ public LemmaSample read() throws IOException { } if (toks.size() > 0) { return new LemmaSample(toks.toArray(new String[toks.size()]), tags.toArray(new String[tags.size()]), - preds.toArray(new String[preds.size()])); + preds.toArray(new String[preds.size()])); } else { return null; diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/Lemmatizer.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/Lemmatizer.java index d09a8a37f..ddcaa6a2c 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/Lemmatizer.java +++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/Lemmatizer.java @@ -14,6 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package opennlp.tools.lemmatizer; /** diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerContextGenerator.java index 951334299..6dc1e6e2d 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerContextGenerator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerContextGenerator.java @@ -14,6 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package opennlp.tools.lemmatizer; import opennlp.tools.util.BeamSearchContextGenerator; diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerEvaluationMonitor.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerEvaluationMonitor.java index 4d07d2c52..559934069 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerEvaluationMonitor.java +++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerEvaluationMonitor.java @@ -14,6 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package opennlp.tools.lemmatizer; import opennlp.tools.util.eval.EvaluationMonitor; diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerEvaluator.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerEvaluator.java index 686e0d768..4a64a378b 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerEvaluator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerEvaluator.java @@ -14,6 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package opennlp.tools.lemmatizer; import opennlp.tools.util.eval.Evaluator; @@ -57,7 +58,7 @@ protected LemmaSample processSample(LemmaSample reference) { String[] predictedLemmas = lemmatizer.lemmatize(reference.getTokens(), reference.getTags()); String[] referenceLemmas = reference.getLemmas(); - + for (int i = 0; i < referenceLemmas.length; i++) { if (referenceLemmas[i].equals(predictedLemmas[i])) { wordAccuracy.add(1); diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerFactory.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerFactory.java index 8415e5778..0effba278 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerFactory.java +++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerFactory.java @@ -14,6 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package opennlp.tools.lemmatizer; import opennlp.tools.util.BaseToolFactory; diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerME.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerME.java index 6d97b5f43..2ec5691d5 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerME.java +++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerME.java @@ -14,6 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package opennlp.tools.lemmatizer; import java.io.IOException; @@ -40,21 +41,21 @@ /** * A probabilistic lemmatizer. Tries to predict the induced permutation class * for each word depending on its surrounding context. Based on - * Grzegorz Chrupała. 2008. Towards a Machine-Learning Architecture - * for Lexical Functional Grammar Parsing. PhD dissertation, Dublin City University. + * Grzegorz Chrupała. 2008. Towards a Machine-Learning Architecture + * for Lexical Functional Grammar Parsing. PhD dissertation, Dublin City University. * http://grzegorz.chrupala.me/papers/phd-single.pdf */ public class LemmatizerME implements Lemmatizer { - + public static final int DEFAULT_BEAM_SIZE = 3; protected int beamSize; private Sequence bestSequence; - + private SequenceClassificationModel model; - + private LemmatizerContextGenerator contextGenerator; private SequenceValidator sequenceValidator; - + /** * Initializes the current instance with the provided model * and the default beam size of 3. @@ -62,14 +63,14 @@ public class LemmatizerME implements Lemmatizer { * @param model the model */ public LemmatizerME(LemmatizerModel model) { - + LemmatizerFactory factory = model.getFactory(); int defaultBeamSize = LemmatizerME.DEFAULT_BEAM_SIZE; String beamSizeString = model.getManifestProperty(BeamSearch.BEAM_SIZE_PARAMETER); if (beamSizeString != null) { defaultBeamSize = Integer.parseInt(beamSizeString); } - + contextGenerator = factory.getContextGenerator(); beamSize = defaultBeamSize; @@ -83,13 +84,13 @@ public LemmatizerME(LemmatizerModel model) { (MaxentModel) model.getLemmatizerSequenceModel(), 0); } } - -public String[] lemmatize(String[] toks, String[] tags) { + + public String[] lemmatize(String[] toks, String[] tags) { bestSequence = model.bestSequence(toks, new Object[] {tags}, contextGenerator, sequenceValidator); List c = bestSequence.getOutcomes(); return c.toArray(new String[c.size()]); } - + /** * Decodes the lemma from the word and the induced lemma class. * @param toks the array of tokens @@ -108,7 +109,7 @@ public String[] decodeLemmas(String[] toks, String[] preds) { } return lemmas.toArray(new String[lemmas.size()]); } - + public Sequence[] topKSequences(String[] sentence, String[] tags) { return model.bestSequences(DEFAULT_BEAM_SIZE, sentence, new Object[] { tags }, contextGenerator, sequenceValidator); @@ -130,16 +131,16 @@ public void probs(double[] probs) { bestSequence.getProbs(probs); } - /** - * Returns an array with the probabilities of the last decoded sequence. The - * sequence was determined based on the previous call to chunk. - * @return An array with the same number of probabilities as tokens were sent to chunk - * when it was last called. - */ + /** + * Returns an array with the probabilities of the last decoded sequence. The + * sequence was determined based on the previous call to chunk. + * @return An array with the same number of probabilities as tokens were sent to chunk + * when it was last called. + */ public double[] probs() { return bestSequence.getProbs(); } - + public static LemmatizerModel train(String languageCode, ObjectStream samples, TrainingParameters trainParams, LemmatizerFactory posFactory) throws IOException { @@ -192,7 +193,7 @@ else if (TrainerType.SEQUENCE_TRAINER.equals(trainerType)) { return new LemmatizerModel(languageCode, seqLemmatizerModel, manifestInfoEntries, posFactory); } } - + public Sequence[] topKLemmaClasses(String[] sentence, String[] tags) { return model.bestSequences(DEFAULT_BEAM_SIZE, sentence, new Object[] { tags }, contextGenerator, sequenceValidator); diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerModel.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerModel.java index 0cac9c2d4..2f5f6ef27 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerModel.java +++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerModel.java @@ -14,6 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package opennlp.tools.lemmatizer; import java.io.File; @@ -32,92 +33,92 @@ import opennlp.tools.util.model.BaseModel; /** -* The {@link LemmatizerModel} is the model used -* by a learnable {@link Lemmatizer}. -* -* @see LemmatizerME -*/ + * The {@link LemmatizerModel} is the model used + * by a learnable {@link Lemmatizer}. + * + * @see LemmatizerME + */ public class LemmatizerModel extends BaseModel { - private static final String COMPONENT_NAME = "StatisticalLemmatizer"; - private static final String LEMMATIZER_MODEL_ENTRY_NAME = "lemmatizer.model"; - - public LemmatizerModel(String languageCode, SequenceClassificationModel lemmatizerModel, - Map manifestInfoEntries, LemmatizerFactory factory) { - super(COMPONENT_NAME, languageCode, manifestInfoEntries, factory); - artifactMap.put(LEMMATIZER_MODEL_ENTRY_NAME, lemmatizerModel); - checkArtifactMap(); - } - - public LemmatizerModel(String languageCode, MaxentModel lemmatizerModel, - Map manifestInfoEntries, LemmatizerFactory factory) { - this(languageCode, lemmatizerModel, LemmatizerME.DEFAULT_BEAM_SIZE, manifestInfoEntries, factory); - } - - public LemmatizerModel(String languageCode, MaxentModel lemmatizerModel, int beamSize, - Map manifestInfoEntries, LemmatizerFactory factory) { - super(COMPONENT_NAME, languageCode, manifestInfoEntries, factory); - artifactMap.put(LEMMATIZER_MODEL_ENTRY_NAME, lemmatizerModel); - - Properties manifest = (Properties) artifactMap.get(MANIFEST_ENTRY); - manifest.put(BeamSearch.BEAM_SIZE_PARAMETER, Integer.toString(beamSize)); - checkArtifactMap(); - } - - public LemmatizerModel(String languageCode, MaxentModel lemmatizerModel, LemmatizerFactory factory) { - this(languageCode, lemmatizerModel, null, factory); - } - - public LemmatizerModel(InputStream in) throws IOException, InvalidFormatException { - super(COMPONENT_NAME, in); - } - - public LemmatizerModel(File modelFile) throws IOException, InvalidFormatException { - super(COMPONENT_NAME, modelFile); - } - - public LemmatizerModel(URL modelURL) throws IOException, InvalidFormatException { - super(COMPONENT_NAME, modelURL); - } - - @Override - protected void validateArtifactMap() throws InvalidFormatException { - super.validateArtifactMap(); - - if (!(artifactMap.get(LEMMATIZER_MODEL_ENTRY_NAME) instanceof AbstractModel)) { - throw new InvalidFormatException("Lemmatizer model is incomplete!"); - } - } - - public SequenceClassificationModel getLemmatizerSequenceModel() { - - Properties manifest = (Properties) artifactMap.get(MANIFEST_ENTRY); - - if (artifactMap.get(LEMMATIZER_MODEL_ENTRY_NAME) instanceof MaxentModel) { - String beamSizeString = manifest.getProperty(BeamSearch.BEAM_SIZE_PARAMETER); - - int beamSize = LemmatizerME.DEFAULT_BEAM_SIZE; - if (beamSizeString != null) { - beamSize = Integer.parseInt(beamSizeString); - } - - return new BeamSearch<>(beamSize, (MaxentModel) artifactMap.get(LEMMATIZER_MODEL_ENTRY_NAME)); - } - else if (artifactMap.get(LEMMATIZER_MODEL_ENTRY_NAME) instanceof SequenceClassificationModel) { - return (SequenceClassificationModel) artifactMap.get(LEMMATIZER_MODEL_ENTRY_NAME); - } - else { - return null; - } - } - - @Override - protected Class getDefaultFactory() { - return LemmatizerFactory.class; - } - - - public LemmatizerFactory getFactory() { - return (LemmatizerFactory) this.toolFactory; - } + private static final String COMPONENT_NAME = "StatisticalLemmatizer"; + private static final String LEMMATIZER_MODEL_ENTRY_NAME = "lemmatizer.model"; + + public LemmatizerModel(String languageCode, SequenceClassificationModel lemmatizerModel, + Map manifestInfoEntries, LemmatizerFactory factory) { + super(COMPONENT_NAME, languageCode, manifestInfoEntries, factory); + artifactMap.put(LEMMATIZER_MODEL_ENTRY_NAME, lemmatizerModel); + checkArtifactMap(); + } + + public LemmatizerModel(String languageCode, MaxentModel lemmatizerModel, + Map manifestInfoEntries, LemmatizerFactory factory) { + this(languageCode, lemmatizerModel, LemmatizerME.DEFAULT_BEAM_SIZE, manifestInfoEntries, factory); + } + + public LemmatizerModel(String languageCode, MaxentModel lemmatizerModel, int beamSize, + Map manifestInfoEntries, LemmatizerFactory factory) { + super(COMPONENT_NAME, languageCode, manifestInfoEntries, factory); + artifactMap.put(LEMMATIZER_MODEL_ENTRY_NAME, lemmatizerModel); + + Properties manifest = (Properties) artifactMap.get(MANIFEST_ENTRY); + manifest.put(BeamSearch.BEAM_SIZE_PARAMETER, Integer.toString(beamSize)); + checkArtifactMap(); + } + + public LemmatizerModel(String languageCode, MaxentModel lemmatizerModel, LemmatizerFactory factory) { + this(languageCode, lemmatizerModel, null, factory); + } + + public LemmatizerModel(InputStream in) throws IOException, InvalidFormatException { + super(COMPONENT_NAME, in); + } + + public LemmatizerModel(File modelFile) throws IOException, InvalidFormatException { + super(COMPONENT_NAME, modelFile); + } + + public LemmatizerModel(URL modelURL) throws IOException, InvalidFormatException { + super(COMPONENT_NAME, modelURL); + } + + @Override + protected void validateArtifactMap() throws InvalidFormatException { + super.validateArtifactMap(); + + if (!(artifactMap.get(LEMMATIZER_MODEL_ENTRY_NAME) instanceof AbstractModel)) { + throw new InvalidFormatException("Lemmatizer model is incomplete!"); + } + } + + public SequenceClassificationModel getLemmatizerSequenceModel() { + + Properties manifest = (Properties) artifactMap.get(MANIFEST_ENTRY); + + if (artifactMap.get(LEMMATIZER_MODEL_ENTRY_NAME) instanceof MaxentModel) { + String beamSizeString = manifest.getProperty(BeamSearch.BEAM_SIZE_PARAMETER); + + int beamSize = LemmatizerME.DEFAULT_BEAM_SIZE; + if (beamSizeString != null) { + beamSize = Integer.parseInt(beamSizeString); + } + + return new BeamSearch<>(beamSize, (MaxentModel) artifactMap.get(LEMMATIZER_MODEL_ENTRY_NAME)); + } + else if (artifactMap.get(LEMMATIZER_MODEL_ENTRY_NAME) instanceof SequenceClassificationModel) { + return (SequenceClassificationModel) artifactMap.get(LEMMATIZER_MODEL_ENTRY_NAME); + } + else { + return null; + } + } + + @Override + protected Class getDefaultFactory() { + return LemmatizerFactory.class; + } + + + public LemmatizerFactory getFactory() { + return (LemmatizerFactory) this.toolFactory; + } } diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/BeamSearch.java b/opennlp-tools/src/main/java/opennlp/tools/ml/BeamSearch.java index 209d4af96..e55cb5212 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/BeamSearch.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/BeamSearch.java @@ -117,21 +117,21 @@ public Sequence[] bestSequences(int numSequences, T[] sequence, Arrays.sort(temp_scores); - double min = temp_scores[Math.max(0,scores.length-size)]; + double min = temp_scores[Math.max(0,scores.length - size)]; for (int p = 0; p < scores.length; p++) { if (scores[p] < min) continue; //only advance first "size" outcomes String out = model.getOutcome(p); - if (validator.validSequence(i, sequence, outcomes, out)) { + if (validator.validSequence(i, sequence, outcomes, out)) { Sequence ns = new Sequence(top, out, scores[p]); if (ns.getScore() > minSequenceScore) { next.add(ns); } - } + } } - if (next.size() == 0) {//if no advanced sequences, advance all valid + if (next.size() == 0) { //if no advanced sequences, advance all valid for (int p = 0; p < scores.length; p++) { String out = model.getOutcome(p); if (validator.validSequence(i, sequence, outcomes, out)) { diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/EventTrainer.java b/opennlp-tools/src/main/java/opennlp/tools/ml/EventTrainer.java index 4bf8c0ab3..a35dcc2cf 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/EventTrainer.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/EventTrainer.java @@ -29,6 +29,7 @@ public interface EventTrainer { public static final String EVENT_VALUE = "Event"; public void init(Map trainParams, Map reportMap); + public MaxentModel train(ObjectStream events) throws IOException; } diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/TrainerFactory.java b/opennlp-tools/src/main/java/opennlp/tools/ml/TrainerFactory.java index 207f830e7..5c09efab6 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/TrainerFactory.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/TrainerFactory.java @@ -59,7 +59,7 @@ public enum TrainerType { * @param trainParams * @return the trainer type or null if type couldn't be determined. */ - public static TrainerType getTrainerType(Map trainParams){ + public static TrainerType getTrainerType(Map trainParams) { String alogrithmValue = trainParams.get(AbstractTrainer.ALGORITHM_PARAM); @@ -70,7 +70,7 @@ public static TrainerType getTrainerType(Map trainParams){ Class trainerClass = BUILTIN_TRAINERS.get(alogrithmValue); - if(trainerClass != null) { + if (trainerClass != null) { if (EventTrainer.class.isAssignableFrom(trainerClass)) { return TrainerType.EVENT_MODEL_TRAINER; @@ -115,7 +115,7 @@ public static SequenceTrainer getSequenceModelTrainer(Map trainP if (trainerType != null) { if (BUILTIN_TRAINERS.containsKey(trainerType)) { - SequenceTrainer trainer = TrainerFactory. createBuiltinTrainer( + SequenceTrainer trainer = TrainerFactory.createBuiltinTrainer( BUILTIN_TRAINERS.get(trainerType)); trainer.init(trainParams, reportMap); return trainer; @@ -135,7 +135,7 @@ public static EventModelSequenceTrainer getEventModelSequenceTrainer(Map createBuiltinTrainer( + EventModelSequenceTrainer trainer = TrainerFactory.createBuiltinTrainer( BUILTIN_TRAINERS.get(trainerType)); trainer.init(trainParams, reportMap); return trainer; @@ -162,7 +162,7 @@ public static EventTrainer getEventTrainer(Map trainParams, } else { if (BUILTIN_TRAINERS.containsKey(trainerType)) { - EventTrainer trainer = TrainerFactory. createBuiltinTrainer( + EventTrainer trainer = TrainerFactory.createBuiltinTrainer( BUILTIN_TRAINERS.get(trainerType)); trainer.init(trainParams, reportMap); return trainer; diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/BasicContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/BasicContextGenerator.java index 471fe6327..f29ca99d6 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/BasicContextGenerator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/BasicContextGenerator.java @@ -33,9 +33,9 @@ public class BasicContextGenerator implements ContextGenerator { private String separator = " "; - public BasicContextGenerator () {} + public BasicContextGenerator() {} - public BasicContextGenerator (String sep) { + public BasicContextGenerator(String sep) { separator = sep; } diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GISTrainer.java b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GISTrainer.java index 7f087b0b9..0cfbc3d2d 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GISTrainer.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GISTrainer.java @@ -273,8 +273,8 @@ public GISModel trainModel(int iterations, DataIndexer di, Prior modelPrior, int } else { float cl = values[ci][0]; - for (int vi=1;vi correctionConstant) { @@ -301,7 +301,7 @@ public GISModel trainModel(int iterations, DataIndexer di, Prior modelPrior, int for (int ti = 0; ti < numUniqueEvents; ti++) { for (int j = 0; j < contexts[ti].length; j++) { if (values != null && values[ti] != null) { - predCount[contexts[ti][j]][outcomeList[ti]] += numTimesEventsSeen[ti]*values[ti][j]; + predCount[contexts[ti][j]][outcomeList[ti]] += numTimesEventsSeen[ti] * values[ti][j]; } else { predCount[contexts[ti][j]][outcomeList[ti]] += numTimesEventsSeen[ti]; @@ -323,7 +323,7 @@ public GISModel trainModel(int iterations, DataIndexer di, Prior modelPrior, int // implementation, this is cancelled out when we compute the next // iteration of a parameter, making the extra divisions wasteful. params = new MutableContext[numPreds]; - for (int i = 0; i< modelExpects.length; i++) + for (int i = 0; i < modelExpects.length; i++) modelExpects[i] = new MutableContext[numPreds]; observedExpects = new MutableContext[numPreds]; @@ -334,7 +334,7 @@ public GISModel trainModel(int iterations, DataIndexer di, Prior modelPrior, int evalParams = new EvalParameters(params,0,1,numOutcomes); int[] activeOutcomes = new int[numOutcomes]; int[] outcomePattern; - int[] allOutcomesPattern= new int[numOutcomes]; + int[] allOutcomesPattern = new int[numOutcomes]; for (int oi = 0; oi < numOutcomes; oi++) { allOutcomesPattern[oi] = oi; } @@ -361,17 +361,17 @@ public GISModel trainModel(int iterations, DataIndexer di, Prior modelPrior, int } } params[pi] = new MutableContext(outcomePattern,new double[numActiveOutcomes]); - for (int i = 0; i< modelExpects.length; i++) + for (int i = 0; i < modelExpects.length; i++) modelExpects[i][pi] = new MutableContext(outcomePattern,new double[numActiveOutcomes]); observedExpects[pi] = new MutableContext(outcomePattern,new double[numActiveOutcomes]); - for (int aoi=0;aoi 0) { - observedExpects[pi].setParameter(aoi, predCount[pi][oi]); + observedExpects[pi].setParameter(aoi, predCount[pi][oi]); } else if (useSimpleSmoothing) { observedExpects[pi].setParameter(aoi,smoothingObservation); @@ -387,7 +387,7 @@ else if (useSimpleSmoothing) { if (threads == 1) display("Computing model parameters ...\n"); else - display("Computing model parameters in " + threads +" threads...\n"); + display("Computing model parameters in " + threads + " threads...\n"); findParameters(iterations, correctionConstant); @@ -399,9 +399,9 @@ else if (useSimpleSmoothing) { /* Estimate and return the model parameters. */ private void findParameters(int iterations, double correctionConstant) { - int threads=modelExpects.length; - ExecutorService executor = Executors.newFixedThreadPool(threads); - CompletionService completionService = new ExecutorCompletionService<>(executor); + int threads = modelExpects.length; + ExecutorService executor = Executors.newFixedThreadPool(threads); + CompletionService completionService = new ExecutorCompletionService<>(executor); double prevLL = 0.0; double currLL; display("Performing " + iterations + " iterations.\n"); @@ -496,7 +496,7 @@ public ModelExpactationComputeTask call() { int pi = contexts[ei][j]; if (predicateCounts[pi] >= cutoff) { int[] activeOutcomes = modelExpects[threadIndex][pi].getOutcomes(); - for (int aoi=0;aoi> compressOutcomes(ComparablePredicate[] sorted) { List> outcomePatterns = new ArrayList<>(); - if(sorted.length > 0) { - ComparablePredicate cp = sorted[0]; - List newGroup = new ArrayList<>(); - for (int i = 0; i < sorted.length; i++) { - if (cp.compareTo(sorted[i]) == 0) { - newGroup.add(sorted[i]); - } else { - cp = sorted[i]; - outcomePatterns.add(newGroup); - newGroup = new ArrayList<>(); - newGroup.add(sorted[i]); - } + if (sorted.length > 0) { + ComparablePredicate cp = sorted[0]; + List newGroup = new ArrayList<>(); + for (int i = 0; i < sorted.length; i++) { + if (cp.compareTo(sorted[i]) == 0) { + newGroup.add(sorted[i]); + } else { + cp = sorted[i]; + outcomePatterns.add(newGroup); + newGroup = new ArrayList<>(); + newGroup.add(sorted[i]); } - outcomePatterns.add(newGroup); + } + outcomePatterns.add(newGroup); } return outcomePatterns; } diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/io/ObjectQNModelReader.java b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/io/ObjectQNModelReader.java index 945b4a0b4..0cd572d81 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/io/ObjectQNModelReader.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/io/ObjectQNModelReader.java @@ -16,6 +16,7 @@ * specific language governing permissions and limitations * under the License. */ + package opennlp.tools.ml.maxent.io; import java.io.ObjectInputStream; diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/io/OldFormatGISModelReader.java b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/io/OldFormatGISModelReader.java index 831569261..8e243d474 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/io/OldFormatGISModelReader.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/io/OldFormatGISModelReader.java @@ -35,7 +35,7 @@ * which stores the parameters. */ public class OldFormatGISModelReader extends PlainTextGISModelReader { - private DataInputStream paramsInput; + private DataInputStream paramsInput; /** * Constructor which takes the name of the model without any suffixes, such as diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/io/PlainTextGISModelWriter.java b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/io/PlainTextGISModelWriter.java index af94871e0..bde68e167 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/io/PlainTextGISModelWriter.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/io/PlainTextGISModelWriter.java @@ -42,8 +42,7 @@ public class PlainTextGISModelWriter extends GISModelWriter { * @param model The GISModel which is to be persisted. * @param f The File in which the model is to be persisted. */ - public PlainTextGISModelWriter (AbstractModel model, File f) - throws IOException { + public PlainTextGISModelWriter(AbstractModel model, File f) throws IOException { super(model); if (f.getName().endsWith(".gz")) { @@ -62,27 +61,27 @@ public PlainTextGISModelWriter (AbstractModel model, File f) * @param model The GISModel which is to be persisted. * @param bw The BufferedWriter which will be used to persist the model. */ - public PlainTextGISModelWriter (AbstractModel model, BufferedWriter bw) { + public PlainTextGISModelWriter(AbstractModel model, BufferedWriter bw) { super(model); output = bw; } - public void writeUTF (String s) throws java.io.IOException { + public void writeUTF(String s) throws java.io.IOException { output.write(s); output.newLine(); } - public void writeInt (int i) throws java.io.IOException { + public void writeInt(int i) throws java.io.IOException { output.write(Integer.toString(i)); output.newLine(); } - public void writeDouble (double d) throws java.io.IOException { + public void writeDouble(double d) throws java.io.IOException { output.write(Double.toString(d)); output.newLine(); } - public void close () throws java.io.IOException { + public void close() throws java.io.IOException { output.flush(); output.close(); } diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/io/QNModelReader.java b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/io/QNModelReader.java index a60872049..a17f1abff 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/io/QNModelReader.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/io/QNModelReader.java @@ -16,6 +16,7 @@ * specific language governing permissions and limitations * under the License. */ + package opennlp.tools.ml.maxent.io; import java.io.File; diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/io/QNModelWriter.java b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/io/QNModelWriter.java index 68e4cd6be..77d92a3c4 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/io/QNModelWriter.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/io/QNModelWriter.java @@ -16,6 +16,7 @@ * specific language governing permissions and limitations * under the License. */ + package opennlp.tools.ml.maxent.io; import java.io.IOException; diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/io/SuffixSensitiveGISModelWriter.java b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/io/SuffixSensitiveGISModelWriter.java index 56b064cf5..de8514473 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/io/SuffixSensitiveGISModelWriter.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/io/SuffixSensitiveGISModelWriter.java @@ -51,10 +51,9 @@ public class SuffixSensitiveGISModelWriter extends GISModelWriter { * @param model The GISModel which is to be persisted. * @param f The File in which the model is to be stored. */ - public SuffixSensitiveGISModelWriter (AbstractModel model, File f) - throws IOException { + public SuffixSensitiveGISModelWriter(AbstractModel model, File f) throws IOException { - super (model); + super(model); OutputStream output; String filename = f.getName(); @@ -62,7 +61,7 @@ public SuffixSensitiveGISModelWriter (AbstractModel model, File f) // handle the zipped/not zipped distinction if (filename.endsWith(".gz")) { output = new GZIPOutputStream(new FileOutputStream(f)); - filename = filename.substring(0,filename.length()-3); + filename = filename.substring(0,filename.length() - 3); } else { output = new DataOutputStream(new FileOutputStream(f)); @@ -81,19 +80,19 @@ public SuffixSensitiveGISModelWriter (AbstractModel model, File f) } } - public void writeUTF (String s) throws java.io.IOException { + public void writeUTF(String s) throws java.io.IOException { suffixAppropriateWriter.writeUTF(s); } - public void writeInt (int i) throws java.io.IOException { + public void writeInt(int i) throws java.io.IOException { suffixAppropriateWriter.writeInt(i); } - public void writeDouble (double d) throws java.io.IOException { + public void writeDouble(double d) throws java.io.IOException { suffixAppropriateWriter.writeDouble(d); } - public void close () throws java.io.IOException { + public void close() throws java.io.IOException { suffixAppropriateWriter.close(); } } diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/ArrayMath.java b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/ArrayMath.java index dd4e03d62..5cd70e6a4 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/ArrayMath.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/ArrayMath.java @@ -16,6 +16,7 @@ * specific language governing permissions and limitations * under the License. */ + package opennlp.tools.ml.maxent.quasinewton; import java.util.List; @@ -86,7 +87,7 @@ public static double max(double[] x) { * Find index of maximum element in the vector x * @param x input vector * @return index of the maximum element. Index of the first - * maximum element is returned if multiple maximums are found. + * maximum element is returned if multiple maximums are found. */ public static int maxIdx(double[] x) { if (x == null || x.length == 0) { diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/Function.java b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/Function.java index 110620076..c7ac53fcb 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/Function.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/Function.java @@ -16,6 +16,7 @@ * specific language governing permissions and limitations * under the License. */ + package opennlp.tools.ml.maxent.quasinewton; /** diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/LineSearch.java b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/LineSearch.java index 80c839bab..6ce817b84 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/LineSearch.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/LineSearch.java @@ -16,6 +16,7 @@ * specific language governing permissions and limitations * under the License. */ + package opennlp.tools.ml.maxent.quasinewton; /** @@ -101,7 +102,7 @@ public static void doConstrainedLineSearch(Function function, // New sign vector for (int i = 0; i < dimension; i++) { - signX[i] = x[i] == 0? -pseudoGradAtX[i] : x[i]; + signX[i] = x[i] == 0 ? -pseudoGradAtX[i] : x[i]; } while (true) { @@ -176,7 +177,7 @@ public LineSearchResult( int fctEvalCount) { setAll(stepSize, valueAtCurr, valueAtNext, gradAtCurr, gradAtNext, - currPoint, nextPoint, fctEvalCount); + currPoint, nextPoint, fctEvalCount); } /** @@ -195,7 +196,7 @@ public LineSearchResult( int fctEvalCount) { setAll(stepSize, valueAtCurr, valueAtNext, gradAtCurr, gradAtNext, - pseudoGradAtNext, currPoint, nextPoint, signVector, fctEvalCount); + pseudoGradAtNext, currPoint, nextPoint, signVector, fctEvalCount); } /** @@ -249,6 +250,7 @@ public double getFuncChangeRate() { public double getStepSize() { return stepSize; } + public void setStepSize(double stepSize) { this.stepSize = stepSize; } @@ -256,6 +258,7 @@ public void setStepSize(double stepSize) { public double getValueAtCurr() { return valueAtCurr; } + public void setValueAtCurr(double valueAtCurr) { this.valueAtCurr = valueAtCurr; } @@ -263,6 +266,7 @@ public void setValueAtCurr(double valueAtCurr) { public double getValueAtNext() { return valueAtNext; } + public void setValueAtNext(double valueAtNext) { this.valueAtNext = valueAtNext; } @@ -270,6 +274,7 @@ public void setValueAtNext(double valueAtNext) { public double[] getGradAtCurr() { return gradAtCurr; } + public void setGradAtCurr(double[] gradAtCurr) { this.gradAtCurr = gradAtCurr; } @@ -277,6 +282,7 @@ public void setGradAtCurr(double[] gradAtCurr) { public double[] getGradAtNext() { return gradAtNext; } + public void setGradAtNext(double[] gradAtNext) { this.gradAtNext = gradAtNext; } @@ -284,6 +290,7 @@ public void setGradAtNext(double[] gradAtNext) { public double[] getPseudoGradAtNext() { return pseudoGradAtNext; } + public void setPseudoGradAtNext(double[] pseudoGradAtNext) { this.pseudoGradAtNext = pseudoGradAtNext; } @@ -291,6 +298,7 @@ public void setPseudoGradAtNext(double[] pseudoGradAtNext) { public double[] getCurrPoint() { return currPoint; } + public void setCurrPoint(double[] currPoint) { this.currPoint = currPoint; } @@ -298,6 +306,7 @@ public void setCurrPoint(double[] currPoint) { public double[] getNextPoint() { return nextPoint; } + public void setNextPoint(double[] nextPoint) { this.nextPoint = nextPoint; } @@ -305,6 +314,7 @@ public void setNextPoint(double[] nextPoint) { public double[] getSignVector() { return signVector; } + public void setSignVector(double[] signVector) { this.signVector = signVector; } @@ -312,12 +322,13 @@ public void setSignVector(double[] signVector) { public int getFctEvalCount() { return fctEvalCount; } + public void setFctEvalCount(int fctEvalCount) { this.fctEvalCount = fctEvalCount; } /** - * Initial linear search object + * Initial linear search object. */ public static LineSearchResult getInitialObject( double valueAtX, @@ -328,7 +339,7 @@ public static LineSearchResult getInitialObject( } /** - * Initial linear search object for L1-regularization + * Initial linear search object for L1-regularization. */ public static LineSearchResult getInitialObjectForL1( double valueAtX, @@ -345,8 +356,7 @@ public static LineSearchResult getInitialObject( double[] pseudoGradAtX, double[] x, double[] signX, - int fctEvalCount) - { + int fctEvalCount) { return new LineSearchResult(0.0, 0.0, valueAtX, new double[x.length], gradAtX, pseudoGradAtX, new double[x.length], x, signX, fctEvalCount); } diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/NegLogLikelihood.java b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/NegLogLikelihood.java index dbe8bafda..1c20369f1 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/NegLogLikelihood.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/NegLogLikelihood.java @@ -16,6 +16,7 @@ * specific language governing permissions and limitations * under the License. */ + package opennlp.tools.ml.maxent.quasinewton; import java.util.Arrays; @@ -94,7 +95,7 @@ public double valueAt(double[] x) { tempSums[oi] = 0; for (ai = 0; ai < contexts[ci].length; ai++) { vectorIndex = indexOf(oi, contexts[ci][ai]); - predValue = values != null? values[ci][ai] : 1.0; + predValue = values != null ? values[ci][ai] : 1.0; tempSums[oi] += predValue * x[vectorIndex]; } } @@ -129,7 +130,7 @@ public double[] gradientAt(double[] x) { expectation[oi] = 0; for (ai = 0; ai < contexts[ci].length; ai++) { vectorIndex = indexOf(oi, contexts[ci][ai]); - predValue = values != null? values[ci][ai] : 1.0; + predValue = values != null ? values[ci][ai] : 1.0; expectation[oi] += predValue * x[vectorIndex]; } } @@ -141,10 +142,10 @@ public double[] gradientAt(double[] x) { } for (oi = 0; oi < numOutcomes; oi++) { - empirical = outcomeList[ci] == oi? 1 : 0; + empirical = outcomeList[ci] == oi ? 1 : 0; for (ai = 0; ai < contexts[ci].length; ai++) { vectorIndex = indexOf(oi, contexts[ci][ai]); - predValue = values != null? values[ci][ai] : 1.0; + predValue = values != null ? values[ci][ai] : 1.0; gradient[vectorIndex] += predValue * (expectation[oi] - empirical) * numTimesEventsSeen[ci]; } diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/ParallelNegLogLikelihood.java b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/ParallelNegLogLikelihood.java index bbe8d1b18..5ae7c715f 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/ParallelNegLogLikelihood.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/ParallelNegLogLikelihood.java @@ -16,6 +16,7 @@ * specific language governing permissions and limitations * under the License. */ + package opennlp.tools.ml.maxent.quasinewton; import java.lang.reflect.Constructor; @@ -117,10 +118,10 @@ private void computeInParallel(double[] x, Class taskClas for (int i = 0; i < threads; i++) { if (i != threads - 1) futures.add(executor.submit( - cons.newInstance(this, i, i*taskSize, taskSize, x))); + cons.newInstance(this, i, i * taskSize, taskSize, x))); else futures.add(executor.submit( - cons.newInstance(this, i, i*taskSize, taskSize + leftOver, x))); + cons.newInstance(this, i, i * taskSize, taskSize + leftOver, x))); } for (Future future: futures) @@ -179,7 +180,7 @@ public NegLLComputeTask call() { tempSums[oi] = 0; for (ai = 0; ai < contexts[ci].length; ai++) { vectorIndex = indexOf(oi, contexts[ci][ai]); - predValue = values != null? values[ci][ai] : 1.0; + predValue = values != null ? values[ci][ai] : 1.0; tempSums[oi] += predValue * x[vectorIndex]; } } @@ -221,7 +222,7 @@ public GradientComputeTask call() { expectation[oi] = 0; for (ai = 0; ai < contexts[ci].length; ai++) { vectorIndex = indexOf(oi, contexts[ci][ai]); - predValue = values != null? values[ci][ai] : 1.0; + predValue = values != null ? values[ci][ai] : 1.0; expectation[oi] += predValue * x[vectorIndex]; } } @@ -233,10 +234,10 @@ public GradientComputeTask call() { } for (oi = 0; oi < numOutcomes; oi++) { - empirical = outcomeList[ci] == oi? 1 : 0; + empirical = outcomeList[ci] == oi ? 1 : 0; for (ai = 0; ai < contexts[ci].length; ai++) { vectorIndex = indexOf(oi, contexts[ci][ai]); - predValue = values != null? values[ci][ai] : 1.0; + predValue = values != null ? values[ci][ai] : 1.0; gradientThread[threadIndex][vectorIndex] += predValue * (expectation[oi] - empirical) * numTimesEventsSeen[ci]; } diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNMinimizer.java b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNMinimizer.java index 821faed71..f957a02e2 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNMinimizer.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNMinimizer.java @@ -16,6 +16,7 @@ * specific language governing permissions and limitations * under the License. */ + package opennlp.tools.ml.maxent.quasinewton; import opennlp.tools.ml.maxent.quasinewton.LineSearch.LineSearchResult; @@ -165,6 +166,7 @@ public QNMinimizer(double l1Cost, double l2Cost, int iterations, public Evaluator getEvaluator() { return evaluator; } + public void setEvaluator(Evaluator evaluator) { this.evaluator = evaluator; } @@ -218,9 +220,9 @@ public double[] minimize(Function function) { long startTime = System.currentTimeMillis(); // Initial step size for the 1st iteration - double initialStepSize = l1Cost > 0? + double initialStepSize = l1Cost > 0 ? ArrayMath.invL2norm(lsr.getPseudoGradAtNext()) : - ArrayMath.invL2norm(lsr.getGradAtNext()); + ArrayMath.invL2norm(lsr.getGradAtNext()); for (int iter = 1; iter <= iterations; iter++) { // Find direction @@ -260,12 +262,11 @@ else if (iter < 100) display(iter + ": "); if (evaluator != null) { - display("\t" + lsr.getValueAtNext() - + "\t" + lsr.getFuncChangeRate() - + "\t" + evaluator.evaluate(lsr.getNextPoint()) + "\n"); + display("\t" + lsr.getValueAtNext() + "\t" + lsr.getFuncChangeRate() + + "\t" + evaluator.evaluate(lsr.getNextPoint()) + "\n"); } else { display("\t " + lsr.getValueAtNext() + - "\t" + lsr.getFuncChangeRate() + "\n"); + "\t" + lsr.getFuncChangeRate() + "\n"); } } if (isConverged(lsr)) @@ -370,13 +371,13 @@ private boolean isConverged(LineSearchResult lsr) { if (lsr.getFuncChangeRate() < CONVERGE_TOLERANCE) { if (verbose) display("Function change rate is smaller than the threshold " - + CONVERGE_TOLERANCE + ".\nTraining will stop.\n\n"); + + CONVERGE_TOLERANCE + ".\nTraining will stop.\n\n"); return true; } // Check gradient's norm using the criteria: ||g(x)|| / max(1, ||x||) < threshold double xNorm = Math.max(1, ArrayMath.l2norm(lsr.getNextPoint())); - double gradNorm = l1Cost > 0? + double gradNorm = l1Cost > 0 ? ArrayMath.l2norm(lsr.getPseudoGradAtNext()) : ArrayMath.l2norm(lsr.getGradAtNext()); if (gradNorm / xNorm < REL_GRAD_NORM_TOL) { if (verbose) diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNModel.java b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNModel.java index 40256c9e4..2725793c3 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNModel.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNModel.java @@ -16,6 +16,7 @@ * specific language governing permissions and limitations * under the License. */ + package opennlp.tools.ml.maxent.quasinewton; import opennlp.tools.ml.model.AbstractModel; @@ -24,7 +25,7 @@ public class QNModel extends AbstractModel { public QNModel(Context[] params, String[] predLabels, String[] outcomeNames) { - super(params, predLabels, outcomeNames); + super(params, predLabels, outcomeNames); this.modelType = ModelType.MaxentQn; } @@ -45,7 +46,7 @@ public double[] eval(String[] context, double[] probs) { } public double[] eval(String[] context, float[] values) { - return eval(context, values, new double[evalParams.getNumOutcomes()]); + return eval(context, values, new double[evalParams.getNumOutcomes()]); } /** @@ -81,7 +82,7 @@ private double[] eval(String[] context, float[] values, double[] probs) { double logSumExp = ArrayMath.logSumOfExps(probs); for (int oi = 0; oi < outcomeNames.length; oi++) { - probs[oi] = Math.exp(probs[oi] - logSumExp); + probs[oi] = Math.exp(probs[oi] - logSumExp); } return probs; } @@ -109,7 +110,7 @@ public static double[] eval(int[] context, float[] values, double[] probs, for (int i = 0; i < context.length; i++) { int predIdx = context[i]; - double predValue = values != null? values[i] : 1.0; + double predValue = values != null ? values[i] : 1.0; for (int oi = 0; oi < nOutcomes; oi++) { probs[oi] += predValue * parameters[oi * nPredLabels + predIdx]; } @@ -142,7 +143,7 @@ public boolean equals(Object obj) { for (String pred : pmap.keySet()) { pmapArray[pmap.get(pred)] = pred; } - + for (int i = 0; i < this.pmap.size(); i++) { if (i != objModel.pmap.get(pmapArray[i])) return false; @@ -156,15 +157,15 @@ public boolean equals(Object obj) { if (this.evalParams.getParams()[i].getOutcomes().length != contextComparing[i].getOutcomes().length) return false; for (int j = 0; i < this.evalParams.getParams()[i].getOutcomes().length; i++) { - if (this.evalParams.getParams()[i].getOutcomes()[j] != contextComparing[i].getOutcomes()[j]) - return false; + if (this.evalParams.getParams()[i].getOutcomes()[j] != contextComparing[i].getOutcomes()[j]) + return false; } if (this.evalParams.getParams()[i].getParameters().length != contextComparing[i].getParameters().length) return false; for (int j = 0; i < this.evalParams.getParams()[i].getParameters().length; i++) { - if (this.evalParams.getParams()[i].getParameters()[j] != contextComparing[i].getParameters()[j]) - return false; + if (this.evalParams.getParams()[i].getParameters()[j] != contextComparing[i].getParameters()[j]) + return false; } } return true; diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNTrainer.java b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNTrainer.java index dbcc33133..b40a00b07 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNTrainer.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/quasinewton/QNTrainer.java @@ -16,6 +16,7 @@ * specific language governing permissions and limitations * under the License. */ + package opennlp.tools.ml.maxent.quasinewton; import java.io.IOException; @@ -84,8 +85,8 @@ public QNTrainer(int m, boolean verbose) { // For testing purpose public QNTrainer(int m, int maxFctEval, boolean verbose) { this.verbose = verbose; - this.m = m < 0? M_DEFAULT: m; - this.maxFctEval = maxFctEval < 0? MAX_FCT_EVAL_DEFAULT: maxFctEval; + this.m = m < 0 ? M_DEFAULT : m; + this.maxFctEval = maxFctEval < 0 ? MAX_FCT_EVAL_DEFAULT : maxFctEval; this.threads = THREADS_DEFAULT; this.l1Cost = L1COST_DEFAULT; this.l2Cost = L2COST_DEFAULT; @@ -223,7 +224,7 @@ public double evaluate(double[] parameters) { for (int ei = 0; ei < contexts.length; ei++) { int[] context = contexts[ei]; - float[] value = values == null? null: values[ei]; + float[] value = values == null ? null : values[ei]; double[] probs = new double[nOutcomes]; QNModel.eval(context, value, probs, nOutcomes, nPredLabels, parameters); diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/model/AbstractDataIndexer.java b/opennlp-tools/src/main/java/opennlp/tools/ml/model/AbstractDataIndexer.java index 8abceccf4..caebbd766 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/model/AbstractDataIndexer.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/model/AbstractDataIndexer.java @@ -89,7 +89,7 @@ protected int sortAndMerge(List eventsToCompare, boolean sort) if (sort && eventsToCompare.size() > 0) { Collections.sort(eventsToCompare); - + ComparableEvent ce = eventsToCompare.get(0); for (int i = 1; i < numEvents; i++) { ComparableEvent ce2 = eventsToCompare.get(i); @@ -108,11 +108,11 @@ protected int sortAndMerge(List eventsToCompare, boolean sort) else { numUniqueEvents = eventsToCompare.size(); } - - if(numUniqueEvents == 0) { + + if (numUniqueEvents == 0) { throw new InsufficientTrainingDataException("Insufficient training data to create model."); } - + if (sort) System.out.println("done. Reduced " + numEvents + " events to " + numUniqueEvents + "."); contexts = new int[numUniqueEvents][]; @@ -144,19 +144,19 @@ public int getNumEvents() { * @param counter The predicate counters. * @param cutoff The cutoff which determines whether a predicate is included. */ - protected static void update(String[] ec, Set predicateSet, Map counter, int cutoff) { - for (String s : ec) { - Integer i = counter.get(s); - if (i == null) { - counter.put(s, 1); - } - else { - counter.put(s, i + 1); - } - if (!predicateSet.contains(s) && counter.get(s) >= cutoff) { - predicateSet.add(s); - } - } + protected static void update(String[] ec, Set predicateSet, Map counter, int cutoff) { + for (String s : ec) { + Integer i = counter.get(s); + if (i == null) { + counter.put(s, 1); + } + else { + counter.put(s, i + 1); + } + if (!predicateSet.contains(s) && counter.get(s) >= cutoff) { + predicateSet.add(s); + } + } } /** diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/model/AbstractModel.java b/opennlp-tools/src/main/java/opennlp/tools/ml/model/AbstractModel.java index 3c03c223b..3c10fadcd 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/model/AbstractModel.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/model/AbstractModel.java @@ -34,7 +34,12 @@ public abstract class AbstractModel implements MaxentModel { /** Prior distribution for this model. */ protected Prior prior; - public enum ModelType {Maxent,Perceptron,MaxentQn,NaiveBayes} + public enum ModelType { + Maxent, + Perceptron, + MaxentQn, + NaiveBayes + } /** The type of the model. */ protected ModelType modelType; @@ -55,13 +60,13 @@ public AbstractModel(Context[] params, String[] predLabels, String[] outcomeName this.evalParams = new EvalParameters(params,correctionParam,correctionConstant,outcomeNames.length); } - private void init(String[] predLabels, String[] outcomeNames){ + private void init(String[] predLabels, String[] outcomeNames) { this.pmap = new HashMap(predLabels.length); - + for (int i = 0; i < predLabels.length; i++) { pmap.put(predLabels[i], i); } - + this.outcomeNames = outcomeNames; } @@ -75,13 +80,13 @@ private void init(String[] predLabels, String[] outcomeNames){ * @return The name of the most likely outcome. */ public final String getBestOutcome(double[] ocs) { - int best = 0; - for (int i = 1; i ocs[best]) best = i; - return outcomeNames[best]; + int best = 0; + for (int i = 1; i < ocs.length; i++) + if (ocs[i] > ocs[best]) best = i; + return outcomeNames[best]; } - public ModelType getModelType(){ + public ModelType getModelType() { return modelType; } @@ -98,18 +103,18 @@ public ModelType getModelType(){ * for each one. */ public final String getAllOutcomes(double[] ocs) { - if (ocs.length != outcomeNames.length) { - return "The double array sent as a parameter to GISModel.getAllOutcomes() must not have been produced by this model."; - } - else { - DecimalFormat df = new DecimalFormat("0.0000"); - StringBuilder sb = new StringBuilder(ocs.length * 2); - sb.append(outcomeNames[0]).append("[").append(df.format(ocs[0])).append("]"); - for (int i = 1; iindex 2: java.lang.String[] containing the names of the outcomes, * stored in the index of the array which represents their - * unique ids in the model. + * unique ids in the model. *

  • index 3: java.lang.Integer containing the value of the models * correction constant *
  • index 4: java.lang.Double containing the value of the models @@ -164,12 +169,12 @@ public int getNumOutcomes() { * @return An Object[] with the values as described above. */ public final Object[] getDataStructures() { - Object[] data = new Object[5]; - data[0] = evalParams.getParams(); - data[1] = pmap; - data[2] = outcomeNames; - data[3] = (int) evalParams.getCorrectionConstant(); - data[4] = evalParams.getCorrectionParam(); - return data; + Object[] data = new Object[5]; + data[0] = evalParams.getParams(); + data[1] = pmap; + data[2] = outcomeNames; + data[3] = (int) evalParams.getCorrectionConstant(); + data[4] = evalParams.getCorrectionParam(); + return data; } } \ No newline at end of file diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/model/AbstractModelReader.java b/opennlp-tools/src/main/java/opennlp/tools/ml/model/AbstractModelReader.java index 6c26214eb..bb44d8b69 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/model/AbstractModelReader.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/model/AbstractModelReader.java @@ -41,7 +41,7 @@ public AbstractModelReader(File f) throws IOException { // handle the zipped/not zipped distinction if (filename.endsWith(".gz")) { input = new GZIPInputStream(new FileInputStream(f)); - filename = filename.substring(0,filename.length()-3); + filename = filename.substring(0,filename.length() - 3); } else { input = new FileInputStream(f); @@ -92,56 +92,56 @@ public AbstractModel getModel() throws IOException { public abstract AbstractModel constructModel() throws java.io.IOException; protected String[] getOutcomes() throws java.io.IOException { - int numOutcomes = readInt(); - String[] outcomeLabels = new String[numOutcomes]; - for (int i=0; i { public int outcome; public int[] predIndexes; - public int seen = 1; // the number of times this event - // has been seen. + public int seen = 1; // the number of times this event has been seen. public float[] values; diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/model/ComparablePredicate.java b/opennlp-tools/src/main/java/opennlp/tools/ml/model/ComparablePredicate.java index 7483a262a..b08b5742e 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/model/ComparablePredicate.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/model/ComparablePredicate.java @@ -36,10 +36,10 @@ public ComparablePredicate(String n, int[] ocs, double[] ps) { } public int compareTo(ComparablePredicate cp) { - int smallerLength = outcomes.length > cp.outcomes.length? + int smallerLength = outcomes.length > cp.outcomes.length ? cp.outcomes.length : outcomes.length; - for (int i=0; i cp.outcomes[i]) return 1; } diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/model/DataIndexer.java b/opennlp-tools/src/main/java/opennlp/tools/ml/model/DataIndexer.java index 35680ac9f..4bb259ef9 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/model/DataIndexer.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/model/DataIndexer.java @@ -25,7 +25,7 @@ public interface DataIndexer { /** * Returns the array of predicates seen in each event. * @return a 2-D array whose first dimension is the event index and array this refers to contains - * the contexts for that event. + * the contexts for that event. */ public int[][] getContexts(); @@ -44,7 +44,7 @@ public interface DataIndexer { /** * Returns an array of predicate/context names. * @return an array of predicate/context names indexed by context index. These indices are the - * value of the array returned by getContexts. + * value of the array returned by getContexts. */ public String[] getPredLabels(); diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/model/EvalParameters.java b/opennlp-tools/src/main/java/opennlp/tools/ml/model/EvalParameters.java index 08dce82b3..6290e40e0 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/model/EvalParameters.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/model/EvalParameters.java @@ -19,13 +19,13 @@ package opennlp.tools.ml.model; - /** +/** * This class encapsulates the varibales used in producing probabilities from a model * and facilitaes passing these variables to the eval method. */ public class EvalParameters { - /** Mapping between outcomes and paramater values for each context. + /** Mapping between outcomes and paramater values for each context. * The integer representation of the context can be found using pmap.*/ private Context[] params; /** The number of outcomes being predicted. */ diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/model/Event.java b/opennlp-tools/src/main/java/opennlp/tools/ml/model/Event.java index 00389951a..9327e1636 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/model/Event.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/model/Event.java @@ -25,49 +25,48 @@ * contextual predicates and an outcome. */ public class Event { - private String outcome; - private String[] context; - private float[] values; + private String outcome; + private String[] context; + private float[] values; - public Event(String outcome, String[] context) { - this(outcome,context,null); - } + public Event(String outcome, String[] context) { + this(outcome,context,null); + } - public Event(String outcome, String[] context, float[] values) { - this.outcome = outcome; - this.context = context; - this.values = values; - } + public Event(String outcome, String[] context, float[] values) { + this.outcome = outcome; + this.context = context; + this.values = values; + } - public String getOutcome() { - return outcome; - } + public String getOutcome() { + return outcome; + } - public String[] getContext() { - return context; - } + public String[] getContext() { + return context; + } - public float[] getValues() { - return values; - } + public float[] getValues() { + return values; + } - public String toString() { - StringBuilder sb = new StringBuilder(); - sb.append(outcome).append(" ["); - if (context.length > 0) { - sb.append(context[0]); - if (values != null) { - sb.append("=").append(values[0]); - } + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append(outcome).append(" ["); + if (context.length > 0) { + sb.append(context[0]); + if (values != null) { + sb.append("=").append(values[0]); } - for (int ci=1;ci eventStream) { digest = MessageDigest.getInstance("MD5"); } catch (NoSuchAlgorithmException e) { // should never happen, does all java runtimes have md5 ?! - throw new IllegalStateException(e); + throw new IllegalStateException(e); } } @@ -63,7 +63,7 @@ public Event read() throws IOException { * * @return the hash sum * @throws IllegalStateException if the stream is not consumed completely, - * completely means that hasNext() returns false + * completely means that hasNext() returns false */ public BigInteger calculateHashSum() { return new BigInteger(1, digest.digest()); diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/model/IndexHashTable.java b/opennlp-tools/src/main/java/opennlp/tools/ml/model/IndexHashTable.java index 584979117..42a9b6f20 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/model/IndexHashTable.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/model/IndexHashTable.java @@ -33,8 +33,8 @@ * The table is thread safe and can concurrently accessed by multiple threads, * thread safety is achieved through immutability. Though its not strictly immutable * which means, that the table must still be safely published to other threads. - * - * @deprecated use java.util.HashMap instead + * + * @deprecated use java.util.HashMap instead */ @Deprecated public class IndexHashTable { diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/model/MaxentModel.java b/opennlp-tools/src/main/java/opennlp/tools/ml/model/MaxentModel.java index a5fd18402..d24404f8f 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/model/MaxentModel.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/model/MaxentModel.java @@ -98,7 +98,7 @@ public interface MaxentModel { * @param outcome the String name of the outcome for which the * index is desired * @return the index if the given outcome label exists for this - * model, -1 if it does not. + * model, -1 if it does not. **/ int getIndex(String outcome); diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/model/MutableContext.java b/opennlp-tools/src/main/java/opennlp/tools/ml/model/MutableContext.java index 36e2cf1f3..675925214 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/model/MutableContext.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/model/MutableContext.java @@ -45,7 +45,7 @@ public MutableContext(int[] outcomePattern, double[] parameters) { * @param value The value to be assigned. */ public void setParameter(int outcomeIndex, double value) { - parameters[outcomeIndex]=value; + parameters[outcomeIndex] = value; } /** @@ -55,7 +55,7 @@ public void setParameter(int outcomeIndex, double value) { * @param value The value to be added. */ public void updateParameter(int outcomeIndex, double value) { - parameters[outcomeIndex]+=value; + parameters[outcomeIndex] += value; } public boolean contains(int outcome) { diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/model/OnePassRealValueDataIndexer.java b/opennlp-tools/src/main/java/opennlp/tools/ml/model/OnePassRealValueDataIndexer.java index 272f60867..4b8f946a9 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/model/OnePassRealValueDataIndexer.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/model/OnePassRealValueDataIndexer.java @@ -80,7 +80,7 @@ protected List index(LinkedList events, Map eventsToCompare = new ArrayList<>(numEvents); List indexedContext = new ArrayList<>(); - for (int eventIndex=0; eventIndex index(LinkedList events, Map 0) { int[] cons = new int[indexedContext.size()]; - for (int ci=0;ci eventStream) throws IOException { public TwoPassDataIndexer(ObjectStream eventStream, int cutoff) throws IOException { this(eventStream,cutoff,true); } + /** * Two argument constructor for DataIndexer. * @@ -102,7 +103,7 @@ public TwoPassDataIndexer(ObjectStream eventStream, int cutoff, boolean s sortAndMerge(eventsToCompare,sort); System.out.println("Done indexing."); } - catch(IOException e) { + catch (IOException e) { System.err.println(e); } } @@ -133,7 +134,7 @@ private int computeEventCounts(ObjectStream eventStream, Writer eventStor } predCounts = new int[predicateSet.size()]; int index = 0; - for (Iterator pi=predicateSet.iterator();pi.hasNext();index++) { + for (Iterator pi = predicateSet.iterator(); pi.hasNext(); index++) { String predicate = pi.next(); predCounts[index] = counter.get(predicate); predicatesInOut.put(predicate,index); @@ -173,7 +174,7 @@ private List index(int numEvents, ObjectStream es, Map 0) { int[] cons = new int[indexedContext.size()]; - for (int ci=0;ciPerceptron (model type identifier) - *
    1. # of parameters (int) - *
    2. # of outcomes (int) - *
    * list of outcome names (String) - *
    3. # of different types of outcome patterns (int) - *
    * list of (int int[]) - *
    [# of predicates for which outcome pattern is true] [outcome pattern] - *
    4. # of predicates (int) - *
    * list of predicate names (String) - * - *

    If you are creating a reader for a format which won't work with this - * (perhaps a database or xml file), override this method and ignore the - * other methods provided in this abstract class. - * - * @return The PerceptronModel stored in the format and location specified to - * this PerceptronModelReader (usually via its the constructor). - */ - public AbstractModel constructModel() throws IOException { - String[] outcomeLabels = getOutcomes(); - int[][] outcomePatterns = getOutcomePatterns(); - String[] predLabels = getPredicates(); - Context[] params = getParameters(outcomePatterns); + /** + * Retrieve a model from disk. It assumes that models are saved in the + * following sequence: + * + *
    Perceptron (model type identifier) + *
    1. # of parameters (int) + *
    2. # of outcomes (int) + *
    * list of outcome names (String) + *
    3. # of different types of outcome patterns (int) + *
    * list of (int int[]) + *
    [# of predicates for which outcome pattern is true] [outcome pattern] + *
    4. # of predicates (int) + *
    * list of predicate names (String) + * + *

    If you are creating a reader for a format which won't work with this + * (perhaps a database or xml file), override this method and ignore the + * other methods provided in this abstract class. + * + * @return The PerceptronModel stored in the format and location specified to + * this PerceptronModelReader (usually via its the constructor). + */ + public AbstractModel constructModel() throws IOException { + String[] outcomeLabels = getOutcomes(); + int[][] outcomePatterns = getOutcomePatterns(); + String[] predLabels = getPredicates(); + Context[] params = getParameters(outcomePatterns); - return new PerceptronModel(params, - predLabels, - outcomeLabels); - } + return new PerceptronModel(params, + predLabels, + outcomeLabels); + } - public void checkModelType() throws java.io.IOException { - String modelType = readUTF(); - if (!modelType.equals("Perceptron")) - System.out.println("Error: attempting to load a "+modelType+ - " model as a Perceptron model."+ - " You should expect problems."); - } + public void checkModelType() throws java.io.IOException { + String modelType = readUTF(); + if (!modelType.equals("Perceptron")) + System.out.println("Error: attempting to load a " + modelType + + " model as a Perceptron model." + + " You should expect problems."); + } } diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/PerceptronModelWriter.java b/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/PerceptronModelWriter.java index e958c3c66..fcab1f4b1 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/PerceptronModelWriter.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/PerceptronModelWriter.java @@ -37,129 +37,129 @@ * */ public abstract class PerceptronModelWriter extends AbstractModelWriter { - protected Context[] PARAMS; - protected String[] OUTCOME_LABELS; - protected String[] PRED_LABELS; - private int numOutcomes; + protected Context[] PARAMS; + protected String[] OUTCOME_LABELS; + protected String[] PRED_LABELS; + private int numOutcomes; - public PerceptronModelWriter (AbstractModel model) { + public PerceptronModelWriter(AbstractModel model) { - Object[] data = model.getDataStructures(); - this.numOutcomes = model.getNumOutcomes(); - PARAMS = (Context[]) data[0]; + Object[] data = model.getDataStructures(); + this.numOutcomes = model.getNumOutcomes(); + PARAMS = (Context[]) data[0]; - @SuppressWarnings("unchecked") - Map pmap = (Map) data[1]; - OUTCOME_LABELS = (String[])data[2]; + @SuppressWarnings("unchecked") + Map pmap = (Map) data[1]; + OUTCOME_LABELS = (String[])data[2]; - PRED_LABELS = new String[pmap.size()]; - for (String pred : pmap.keySet()) { - PRED_LABELS[pmap.get(pred)] = pred; - } + PRED_LABELS = new String[pmap.size()]; + for (String pred : pmap.keySet()) { + PRED_LABELS[pmap.get(pred)] = pred; } - - protected ComparablePredicate[] sortValues () { - ComparablePredicate[] sortPreds; - ComparablePredicate[] tmpPreds = new ComparablePredicate[PARAMS.length]; - int[] tmpOutcomes = new int[numOutcomes]; - double[] tmpParams = new double[numOutcomes]; - int numPreds = 0; - //remove parameters with 0 weight and predicates with no parameters - for (int pid=0; pid> computeOutcomePatterns(ComparablePredicate[] sorted) { - ComparablePredicate cp = sorted[0]; - List> outcomePatterns = new ArrayList<>(); - List newGroup = new ArrayList<>(); - for (ComparablePredicate predicate : sorted) { - if (cp.compareTo(predicate) == 0) { - newGroup.add(predicate); - } else { - cp = predicate; - outcomePatterns.add(newGroup); - newGroup = new ArrayList<>(); - newGroup.add(predicate); - } + for (int pi = 0; pi < numParams; pi++) { + activeOutcomes[pi] = tmpOutcomes[pi]; + activeParams[pi] = tmpParams[pi]; + } + if (numParams != 0) { + tmpPreds[numPreds] = new ComparablePredicate(PRED_LABELS[pid],activeOutcomes,activeParams); + numPreds++; } - outcomePatterns.add(newGroup); - System.err.println(outcomePatterns.size()+" outcome patterns"); - return outcomePatterns; } - - /** - * Writes the model to disk, using the writeX() methods - * provided by extending classes. - * - *

    If you wish to create a PerceptronModelWriter which uses a different - * structure, it will be necessary to override the persist method in - * addition to implementing the writeX() methods. - */ - public void persist() throws IOException { - - // the type of model (Perceptron) - writeUTF("Perceptron"); - - // the mapping from outcomes to their integer indexes - writeInt(OUTCOME_LABELS.length); - - for (String label : OUTCOME_LABELS) { - writeUTF(label); + System.err.println("Compressed " + PARAMS.length + " parameters to " + numPreds); + sortPreds = new ComparablePredicate[numPreds]; + System.arraycopy(tmpPreds, 0, sortPreds, 0, numPreds); + Arrays.sort(sortPreds); + return sortPreds; + } + + + protected List> computeOutcomePatterns(ComparablePredicate[] sorted) { + ComparablePredicate cp = sorted[0]; + List> outcomePatterns = new ArrayList<>(); + List newGroup = new ArrayList<>(); + for (ComparablePredicate predicate : sorted) { + if (cp.compareTo(predicate) == 0) { + newGroup.add(predicate); + } else { + cp = predicate; + outcomePatterns.add(newGroup); + newGroup = new ArrayList<>(); + newGroup.add(predicate); } + } + outcomePatterns.add(newGroup); + System.err.println(outcomePatterns.size() + " outcome patterns"); + return outcomePatterns; + } + + /** + * Writes the model to disk, using the writeX() methods + * provided by extending classes. + * + *

    If you wish to create a PerceptronModelWriter which uses a different + * structure, it will be necessary to override the persist method in + * addition to implementing the writeX() methods. + */ + public void persist() throws IOException { + + // the type of model (Perceptron) + writeUTF("Perceptron"); + + // the mapping from outcomes to their integer indexes + writeInt(OUTCOME_LABELS.length); + + for (String label : OUTCOME_LABELS) { + writeUTF(label); + } - // the mapping from predicates to the outcomes they contributed to. - // The sorting is done so that we actually can write this out more - // compactly than as the entire list. - ComparablePredicate[] sorted = sortValues(); - List> compressed = computeOutcomePatterns(sorted); + // the mapping from predicates to the outcomes they contributed to. + // The sorting is done so that we actually can write this out more + // compactly than as the entire list. + ComparablePredicate[] sorted = sortValues(); + List> compressed = computeOutcomePatterns(sorted); - writeInt(compressed.size()); + writeInt(compressed.size()); - for (List a : compressed) { - writeUTF(a.size() + a.get(0).toString()); - } + for (List a : compressed) { + writeUTF(a.size() + a.get(0).toString()); + } - // the mapping from predicate names to their integer indexes - writeInt(sorted.length); + // the mapping from predicate names to their integer indexes + writeInt(sorted.length); - for (ComparablePredicate s : sorted) { - writeUTF(s.name); - } + for (ComparablePredicate s : sorted) { + writeUTF(s.name); + } - // write out the parameters - for (int i=0; i values[max]) @@ -387,13 +387,13 @@ private int maxIndex (double[] values) { return max; } - private void display (String s) { + private void display(String s) { if (printMessages) System.out.print(s); } - private void displayIteration (int i) { - if (i > 10 && (i%10) != 0) + private void displayIteration(int i) { + if (i > 10 && (i % 10) != 0) return; if (i < 10) @@ -406,9 +406,9 @@ else if (i < 100) // See whether a number is a perfect square. Inefficient, but fine // for our purposes. - private static boolean isPerfectSquare (int n) { - int root = (int)Math.sqrt(n); - return root*root == n; + private static boolean isPerfectSquare(int n) { + int root = (int) Math.sqrt(n); + return root * root == n; } } diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/PlainTextPerceptronModelReader.java b/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/PlainTextPerceptronModelReader.java index c3a7c55ed..a9f37880a 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/PlainTextPerceptronModelReader.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/PlainTextPerceptronModelReader.java @@ -44,7 +44,7 @@ public PlainTextPerceptronModelReader(BufferedReader br) { * * @param f The File in which the model is stored. */ - public PlainTextPerceptronModelReader (File f) throws IOException { + public PlainTextPerceptronModelReader(File f) throws IOException { super(f); } } diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/PlainTextPerceptronModelWriter.java b/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/PlainTextPerceptronModelWriter.java index ffde6c61c..4f65f550e 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/PlainTextPerceptronModelWriter.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/PlainTextPerceptronModelWriter.java @@ -42,8 +42,8 @@ public class PlainTextPerceptronModelWriter extends PerceptronModelWriter { * @param model The PerceptronModel which is to be persisted. * @param f The File in which the model is to be persisted. */ - public PlainTextPerceptronModelWriter (AbstractModel model, File f) - throws IOException { + public PlainTextPerceptronModelWriter(AbstractModel model, File f) + throws IOException { super(model); if (f.getName().endsWith(".gz")) { @@ -62,27 +62,27 @@ public PlainTextPerceptronModelWriter (AbstractModel model, File f) * @param model The PerceptronModel which is to be persisted. * @param bw The BufferedWriter which will be used to persist the model. */ - public PlainTextPerceptronModelWriter (AbstractModel model, BufferedWriter bw) { + public PlainTextPerceptronModelWriter(AbstractModel model, BufferedWriter bw) { super(model); output = bw; } - public void writeUTF (String s) throws java.io.IOException { + public void writeUTF(String s) throws java.io.IOException { output.write(s); output.newLine(); } - public void writeInt (int i) throws java.io.IOException { + public void writeInt(int i) throws java.io.IOException { output.write(Integer.toString(i)); output.newLine(); } - public void writeDouble (double d) throws java.io.IOException { + public void writeDouble(double d) throws java.io.IOException { output.write(Double.toString(d)); output.newLine(); } - public void close () throws java.io.IOException { + public void close() throws java.io.IOException { output.flush(); output.close(); } diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/SimplePerceptronSequenceTrainer.java b/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/SimplePerceptronSequenceTrainer.java index f14ebf1b5..abbd49439 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/SimplePerceptronSequenceTrainer.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/SimplePerceptronSequenceTrainer.java @@ -96,7 +96,7 @@ public boolean isValid() { String algorithmName = getAlgorithm(); return !(algorithmName != null - && !(PERCEPTRON_SEQUENCE_VALUE.equals(algorithmName))); + && !(PERCEPTRON_SEQUENCE_VALUE.equals(algorithmName))); } public AbstractModel doTrain(SequenceStream events) throws IOException { @@ -129,7 +129,7 @@ public AbstractModel trainModel(int iterations, SequenceStream sequenceStream, i for (int i = 0; i < predLabels.length; i++) { pmap.put(predLabels[i], i); } - + display("Incorporating indexed data for training... \n"); this.useAverage = useAverage; numEvents = di.getNumEvents(); @@ -137,7 +137,7 @@ public AbstractModel trainModel(int iterations, SequenceStream sequenceStream, i this.iterations = iterations; outcomeLabels = di.getOutcomeLabels(); omap = new HashMap<>(); - for (int oli=0;oli> featureCounts = new ArrayList<>(numOutcomes); - for (int oi=0;oi()); } PerceptronModel model = new PerceptronModel(params,predLabels,pmap,outcomeLabels); @@ -235,7 +235,7 @@ public void nextIteration(int iteration) throws IOException { Event[] taggerEvents = sequenceStream.updateContext(sequence, model); Event[] events = sequence.getEvents(); boolean update = false; - for (int ei=0;ei "+averageParams[pi].getParameters()[oi]); @@ -319,41 +319,41 @@ public void nextIteration(int iteration) throws IOException { si++; } //finish average computation - double totIterations = (double) iterations*si; - if (useAverage && iteration == iterations-1) { + double totIterations = (double) iterations * si; + if (useAverage && iteration == iterations - 1) { for (int pi = 0; pi < numPreds; pi++) { double[] predParams = averageParams[pi].getParameters(); - for (int oi = 0;oi "+averageParams[pi].getParameters()[oi]); } } } } - display(". ("+numCorrect+"/"+numEvents+") "+((double) numCorrect / numEvents) + "\n"); + display(". (" + numCorrect + "/" + numEvents + ") " + ((double) numCorrect / numEvents) + "\n"); } private void trainingStats(MutableContext[] params) throws IOException { int numCorrect = 0; - int oei=0; + int oei = 0; sequenceStream.reset(); Sequence sequence; while ((sequence = sequenceStream.read()) != null) { Event[] taggerEvents = sequenceStream.updateContext(sequence, new PerceptronModel(params,predLabels,pmap,outcomeLabels)); - for (int ei=0;ei */ public class SuffixSensitivePerceptronModelWriter extends PerceptronModelWriter { - private final AbstractModelWriter suffixAppropriateWriter; + private final AbstractModelWriter suffixAppropriateWriter; - /** - * Constructor which takes a GISModel and a File and invokes the - * GISModelWriter appropriate for the suffix. - * - * @param model The GISModel which is to be persisted. - * @param f The File in which the model is to be stored. - */ - public SuffixSensitivePerceptronModelWriter (AbstractModel model, File f) - throws IOException { + /** + * Constructor which takes a GISModel and a File and invokes the + * GISModelWriter appropriate for the suffix. + * + * @param model The GISModel which is to be persisted. + * @param f The File in which the model is to be stored. + */ + public SuffixSensitivePerceptronModelWriter(AbstractModel model, File f) + throws IOException { - super (model); + super(model); - OutputStream output; - String filename = f.getName(); + OutputStream output; + String filename = f.getName(); - // handle the zipped/not zipped distinction - if (filename.endsWith(".gz")) { - output = new GZIPOutputStream(new FileOutputStream(f)); - filename = filename.substring(0,filename.length()-3); - } - else { - output = new DataOutputStream(new FileOutputStream(f)); - } - - // handle the different formats - if (filename.endsWith(".bin")) { - suffixAppropriateWriter = - new BinaryPerceptronModelWriter(model, - new DataOutputStream(output)); - } - else { // default is ".txt" - suffixAppropriateWriter = - new PlainTextPerceptronModelWriter(model, - new BufferedWriter(new OutputStreamWriter(output))); - } + // handle the zipped/not zipped distinction + if (filename.endsWith(".gz")) { + output = new GZIPOutputStream(new FileOutputStream(f)); + filename = filename.substring(0,filename.length() - 3); } - - public void writeUTF (String s) throws java.io.IOException { - suffixAppropriateWriter.writeUTF(s); + else { + output = new DataOutputStream(new FileOutputStream(f)); } - public void writeInt (int i) throws java.io.IOException { - suffixAppropriateWriter.writeInt(i); + // handle the different formats + if (filename.endsWith(".bin")) { + suffixAppropriateWriter = + new BinaryPerceptronModelWriter(model, + new DataOutputStream(output)); } - - public void writeDouble (double d) throws java.io.IOException { - suffixAppropriateWriter.writeDouble(d); + else { // default is ".txt" + suffixAppropriateWriter = + new PlainTextPerceptronModelWriter(model, + new BufferedWriter(new OutputStreamWriter(output))); } + } - public void close () throws java.io.IOException { - suffixAppropriateWriter.close(); - } + public void writeUTF(String s) throws java.io.IOException { + suffixAppropriateWriter.writeUTF(s); + } + + public void writeInt(int i) throws java.io.IOException { + suffixAppropriateWriter.writeInt(i); + } + + public void writeDouble(double d) throws java.io.IOException { + suffixAppropriateWriter.writeDouble(d); + } + + public void close() throws java.io.IOException { + suffixAppropriateWriter.close(); + } } diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/BilouNameFinderSequenceValidator.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/BilouNameFinderSequenceValidator.java index 77aa253e4..177b93827 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/namefind/BilouNameFinderSequenceValidator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/BilouNameFinderSequenceValidator.java @@ -39,9 +39,9 @@ public boolean validSequence(int i, String[] inputSequence, // if it is continue, we have to check if previous match was of the same type String previousNameType = NameFinderME.extractNameType(outcomesSequence[li]); String nameType = NameFinderME.extractNameType(outcome); - if( previousNameType != null || nameType != null ) { - if( nameType != null ) { - if( nameType.equals(previousNameType) ){ + if (previousNameType != null || nameType != null) { + if (nameType != null) { + if (nameType.equals(previousNameType)) { return true; } } diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/BioCodec.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/BioCodec.java index 150208129..916a5311c 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/namefind/BioCodec.java +++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/BioCodec.java @@ -35,7 +35,7 @@ public class BioCodec implements SequenceCodec { static String extractNameType(String outcome) { Matcher matcher = typedOutcomePattern.matcher(outcome); - if(matcher.matches()) { + if (matcher.matches()) { return matcher.group(1); } diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/DefaultNameContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/DefaultNameContextGenerator.java index 8dcfe1dec..28e10c9b6 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/namefind/DefaultNameContextGenerator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/DefaultNameContextGenerator.java @@ -40,11 +40,11 @@ public class DefaultNameContextGenerator implements NameContextGenerator { @Deprecated private static AdaptiveFeatureGenerator windowFeatures = new CachedFeatureGenerator( - new WindowFeatureGenerator(new TokenFeatureGenerator(), 2, 2), - new WindowFeatureGenerator(new TokenClassFeatureGenerator(true), 2, 2), - new OutcomePriorFeatureGenerator(), - new PreviousMapFeatureGenerator(), - new BigramNameFeatureGenerator()); + new WindowFeatureGenerator(new TokenFeatureGenerator(), 2, 2), + new WindowFeatureGenerator(new TokenClassFeatureGenerator(true), 2, 2), + new OutcomePriorFeatureGenerator(), + new PreviousMapFeatureGenerator(), + new BigramNameFeatureGenerator()); /** * Creates a name context generator. @@ -73,21 +73,21 @@ public DefaultNameContextGenerator(AdaptiveFeatureGenerator... featureGenerators } public void addFeatureGenerator(AdaptiveFeatureGenerator generator) { - AdaptiveFeatureGenerator generators[] = featureGenerators; + AdaptiveFeatureGenerator generators[] = featureGenerators; - featureGenerators = new AdaptiveFeatureGenerator[featureGenerators.length + 1]; + featureGenerators = new AdaptiveFeatureGenerator[featureGenerators.length + 1]; - System.arraycopy(generators, 0, featureGenerators, 0, generators.length); + System.arraycopy(generators, 0, featureGenerators, 0, generators.length); - featureGenerators[featureGenerators.length - 1] = generator; + featureGenerators[featureGenerators.length - 1] = generator; } public void updateAdaptiveData(String[] tokens, String[] outcomes) { if (tokens != null && outcomes != null && tokens.length != outcomes.length) { - throw new IllegalArgumentException( - "The tokens and outcome arrays MUST have the same size!"); - } + throw new IllegalArgumentException( + "The tokens and outcome arrays MUST have the same size!"); + } for (AdaptiveFeatureGenerator featureGenerator : featureGenerators) { featureGenerator.updateAdaptiveData(tokens, outcomes); @@ -122,12 +122,12 @@ public String[] getContext(int index, String[] tokens, String[] preds, Object[] // TODO: These should be moved out here in its own feature generator! if (preds != null) { - if (index > 1){ - ppo = preds[index-2]; + if (index > 1) { + ppo = preds[index - 2]; } if (index > 0) { - po = preds[index-1]; + po = preds[index - 1]; } features.add("po=" + po); features.add("pow=" + po + "," + tokens[index]); diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderEventStream.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderEventStream.java index 65b01c58a..aadfcc530 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderEventStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderEventStream.java @@ -125,7 +125,7 @@ protected Iterator createEvents(NameSample sample) { } String outcomes[] = codec.encode(sample.getNames(), sample.getSentence().length); -// String outcomes[] = generateOutcomes(sample.getNames(), type, sample.getSentence().length); + // String outcomes[] = generateOutcomes(sample.getNames(), type, sample.getSentence().length); additionalContextFeatureGenerator.setCurrentContext(sample.getAdditionalContext()); String[] tokens = new String[sample.getSentence().length]; @@ -145,9 +145,9 @@ protected Iterator createEvents(NameSample sample) { */ public static String[][] additionalContext(String[] tokens, Map prevMap) { String[][] ac = new String[tokens.length][1]; - for (int ti=0;ti { - if (resources != null) { - return resources.get(key); - } - return null; - }); + generatorDescriptor), key -> { + if (resources != null) { + return resources.get(key); + } + return null; + }); } else { featureGenerator = null; } @@ -113,10 +114,9 @@ public Span[] find(String[] tokens) { * Generates name tags for the given sequence, typically a sentence, returning * token spans for any identified names. * - * @param tokens an array of the tokens or words of the sequence, typically a - * sentence. + * @param tokens an array of the tokens or words of the sequence, typically a sentence. * @param additionalContext features which are based on context outside of the - * sentence but which should also be used. + * sentence but which should also be used. * * @return an array of spans for each of the names identified. */ @@ -151,7 +151,7 @@ public void clearAdaptiveData() { * number of tokens in the previous call to chunk. * * @param probs An array used to hold the probabilities of the last decoded - * sequence. + * sequence. */ public void probs(double[] probs) { bestSequence.getProbs(probs); @@ -162,7 +162,7 @@ public void probs(double[] probs) { * sequence was determined based on the previous call to chunk. * * @return An array with the same number of probabilities as tokens were sent - * to chunk when it was last called. + * to chunk when it was last called. */ public double[] probs() { return bestSequence.getProbs(); @@ -175,12 +175,12 @@ public double[] probs() { * @return */ private Span[] setProbs(Span[] spans) { - double[] probs = probs(spans); - if (probs != null) { + double[] probs = probs(spans); + if (probs != null) { for (int i = 0; i < probs.length; i++) { double prob = probs[i]; - spans[i]= new Span(spans[i], prob); + spans[i] = new Span(spans[i], prob); } } return spans; diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderSequenceValidator.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderSequenceValidator.java index 92c1cdd0c..cccf1ac11 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderSequenceValidator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderSequenceValidator.java @@ -39,9 +39,9 @@ public boolean validSequence(int i, String[] inputSequence, // if it is continue, we have to check if previous match was of the same type String previousNameType = NameFinderME.extractNameType(outcomesSequence[li]); String nameType = NameFinderME.extractNameType(outcome); - if( previousNameType != null || nameType != null ) { - if( nameType != null ) { - if( nameType.equals(previousNameType) ){ + if (previousNameType != null || nameType != null ) { + if (nameType != null ) { + if (nameType.equals(previousNameType)) { return true; } } diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/NameSample.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/NameSample.java index e1dc80198..bedb970de 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/namefind/NameSample.java +++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/NameSample.java @@ -182,10 +182,10 @@ private static String errorTokenWithContext(String sentence[], int index) { // two token before if (index > 1) - errorString.append(sentence[index -2]).append(" "); + errorString.append(sentence[index - 2]).append(" "); if (index > 0) - errorString.append(sentence[index -1]).append(" "); + errorString.append(sentence[index - 1]).append(" "); // token itself errorString.append("###"); @@ -210,9 +210,9 @@ public static NameSample parse(String taggedTokens, } public static NameSample parse(String taggedTokens, String defaultType, - boolean isClearAdaptiveData) + boolean isClearAdaptiveData) throws IOException { // TODO: Should throw another exception, and then convert it into an IOException in the stream - throws IOException { + String[] parts = WhitespaceTokenizer.INSTANCE.tokenize(taggedTokens); List tokenList = new ArrayList<>(parts.length); @@ -229,15 +229,15 @@ public static NameSample parse(String taggedTokens, String defaultType, for (int pi = 0; pi < parts.length; pi++) { Matcher startMatcher = START_TAG_PATTERN.matcher(parts[pi]); if (startMatcher.matches()) { - if(catchingName) { + if (catchingName) { throw new IOException("Found unexpected annotation" + " while handling a name sequence: " + errorTokenWithContext(parts, pi)); } catchingName = true; startIndex = wordIndex; String nameTypeFromSample = startMatcher.group(2); - if(nameTypeFromSample != null) { - if(nameTypeFromSample.length() == 0) { + if (nameTypeFromSample != null) { + if (nameTypeFromSample.length() == 0) { throw new IOException("Missing a name type: " + errorTokenWithContext(parts, pi)); } nameType = nameTypeFromSample; @@ -245,7 +245,7 @@ public static NameSample parse(String taggedTokens, String defaultType, } else if (parts[pi].equals(NameSampleDataStream.END_TAG)) { - if(!catchingName) { + if (!catchingName) { throw new IOException("Found unexpected annotation: " + errorTokenWithContext(parts, pi)); } catchingName = false; diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/NameSampleDataStream.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/NameSampleDataStream.java index 08cd46b32..3c4cd68cd 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/namefind/NameSampleDataStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/NameSampleDataStream.java @@ -40,23 +40,23 @@ public NameSampleDataStream(ObjectStream in) { } public NameSample read() throws IOException { - String token = samples.read(); - - boolean isClearAdaptiveData = false; - - // An empty line indicates the begin of a new article - // for which the adaptive data in the feature generators - // must be cleared - while (token != null && token.trim().length() == 0) { - isClearAdaptiveData = true; - token = samples.read(); - } - - if (token != null) { - return NameSample.parse(token, isClearAdaptiveData); - } - else { - return null; - } + String token = samples.read(); + + boolean isClearAdaptiveData = false; + + // An empty line indicates the begin of a new article + // for which the adaptive data in the feature generators + // must be cleared + while (token != null && token.trim().length() == 0) { + isClearAdaptiveData = true; + token = samples.read(); + } + + if (token != null) { + return NameSample.parse(token, isClearAdaptiveData); + } + else { + return null; + } } } diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/NameSampleSequenceStream.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/NameSampleSequenceStream.java index 22c855068..5ac519e26 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/namefind/NameSampleSequenceStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/NameSampleSequenceStream.java @@ -15,7 +15,7 @@ * limitations under the License. */ - package opennlp.tools.namefind; +package opennlp.tools.namefind; import java.io.IOException; import java.util.Collections; @@ -40,12 +40,12 @@ public NameSampleSequenceStream(ObjectStream psi) throws IOException } public NameSampleSequenceStream(ObjectStream psi, AdaptiveFeatureGenerator featureGen) - throws IOException { + throws IOException { this(psi, new DefaultNameContextGenerator(featureGen), true); } public NameSampleSequenceStream(ObjectStream psi, AdaptiveFeatureGenerator featureGen, boolean useOutcomes) - throws IOException { + throws IOException { this(psi, new DefaultNameContextGenerator(featureGen), useOutcomes); } @@ -61,7 +61,7 @@ public NameSampleSequenceStream(ObjectStream psi, NameContextGenerat public NameSampleSequenceStream(ObjectStream psi, NameContextGenerator pcg, boolean useOutcomes, SequenceCodec seqCodec) - throws IOException { + throws IOException { this.psi = psi; this.useOutcomes = useOutcomes; this.pcg = pcg; @@ -88,7 +88,7 @@ public Sequence read() throws IOException { String tags[] = seqCodec.encode(sample.getNames(), sentence.length); Event[] events = new Event[sentence.length]; - for (int i=0; i < sentence.length; i++) { + for (int i = 0; i < sentence.length; i++) { // it is safe to pass the tags as previous tags because // the context generator does not look for non predicted tags @@ -103,10 +103,10 @@ public Sequence read() throws IOException { events[i] = new Event(tags[i], context); } return new Sequence<>(events,sample); - } - else { - return null; - } + } + else { + return null; + } } @Override diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/RegexNameFinder.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/RegexNameFinder.java index 111719cf0..7ac85460d 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/namefind/RegexNameFinder.java +++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/RegexNameFinder.java @@ -14,6 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package opennlp.tools.namefind; import java.util.Collection; @@ -96,9 +97,9 @@ public Span[] find(String tokens[]) { while (matcher.find()) { Integer tokenStartIndex = - sentencePosTokenMap.get(matcher.start()); + sentencePosTokenMap.get(matcher.start()); Integer tokenEndIndex = - sentencePosTokenMap.get(matcher.end()); + sentencePosTokenMap.get(matcher.end()); if (tokenStartIndex != null && tokenEndIndex != null) { Span annotation = new Span(tokenStartIndex, tokenEndIndex, entry.getKey()); @@ -113,9 +114,9 @@ public Span[] find(String tokens[]) { while (matcher.find()) { Integer tokenStartIndex = - sentencePosTokenMap.get(matcher.start()); + sentencePosTokenMap.get(matcher.start()); Integer tokenEndIndex = - sentencePosTokenMap.get(matcher.end()); + sentencePosTokenMap.get(matcher.end()); if (tokenStartIndex != null && tokenEndIndex != null) { Span annotation = new Span(tokenStartIndex, tokenEndIndex, sType); @@ -127,7 +128,7 @@ public Span[] find(String tokens[]) { return annotations.toArray( - new Span[annotations.size()]); + new Span[annotations.size()]); } /** @@ -172,7 +173,7 @@ private Span[] getAnnotations(String text) { } return annotations.toArray( - new Span[annotations.size()]); + new Span[annotations.size()]); } @Override diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/RegexNameFinderFactory.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/RegexNameFinderFactory.java index 261321bd4..0b6962479 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/namefind/RegexNameFinderFactory.java +++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/RegexNameFinderFactory.java @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package opennlp.tools.namefind; import java.util.HashMap; @@ -75,11 +76,11 @@ public static void main(String[] args) { String text = "my email is opennlp@gmail.com and my phone num is 123-234-5678 and i like https://www.google.com and I visited MGRS 11sku528111 AKA 11S KU 528 111 and DMS 45N 123W AKA +45.1234, -123.12 AKA 45.1234N 123.12W AKA 45 30 N 50 30 W"; String[] tokens = text.split(" "); RegexNameFinder regexNameFinder = RegexNameFinderFactory.getDefaultRegexNameFinders( - DEFAULT_REGEX_NAME_FINDER.DEGREES_MIN_SEC_LAT_LON, - DEFAULT_REGEX_NAME_FINDER.EMAIL, - DEFAULT_REGEX_NAME_FINDER.MGRS, - DEFAULT_REGEX_NAME_FINDER.USA_PHONE_NUM, - DEFAULT_REGEX_NAME_FINDER.URL); + DEFAULT_REGEX_NAME_FINDER.DEGREES_MIN_SEC_LAT_LON, + DEFAULT_REGEX_NAME_FINDER.EMAIL, + DEFAULT_REGEX_NAME_FINDER.MGRS, + DEFAULT_REGEX_NAME_FINDER.USA_PHONE_NUM, + DEFAULT_REGEX_NAME_FINDER.URL); Span[] find = regexNameFinder.find(tokens); @@ -145,15 +146,15 @@ public String getType() { public Map getRegexMap() { Pattern[] p = new Pattern[1]; p[0] = Pattern.compile("\\b(((ht|f)tp(s?)\\:\\/\\/|~\\/|\\/)|www.)" - + "(\\w+:\\w+@)?(([-\\w]+\\.)+(com|org|net|gov" - + "|mil|biz|info|mobi|name|aero|jobs|museum" - + "|travel|[a-z]{2}))(:[\\d]{1,5})?" - + "(((\\/([-\\w~!$+|.,=]|%[a-f\\d]{2})+)+|\\/)+|\\?|#)?" - + "((\\?([-\\w~!$+|.,*:]|%[a-f\\d{2}])+=?" - + "([-\\w~!$+|.,*:=]|%[a-f\\d]{2})*)" - + "(&(?:[-\\w~!$+|.,*:]|%[a-f\\d{2}])+=?" - + "([-\\w~!$+|.,*:=]|%[a-f\\d]{2})*)*)*" - + "(#([-\\w~!$+|.,*:=]|%[a-f\\d]{2})*)?\\b", Pattern.CASE_INSENSITIVE); + + "(\\w+:\\w+@)?(([-\\w]+\\.)+(com|org|net|gov" + + "|mil|biz|info|mobi|name|aero|jobs|museum" + + "|travel|[a-z]{2}))(:[\\d]{1,5})?" + + "(((\\/([-\\w~!$+|.,=]|%[a-f\\d]{2})+)+|\\/)+|\\?|#)?" + + "((\\?([-\\w~!$+|.,*:]|%[a-f\\d{2}])+=?" + + "([-\\w~!$+|.,*:=]|%[a-f\\d]{2})*)" + + "(&(?:[-\\w~!$+|.,*:]|%[a-f\\d{2}])+=?" + + "([-\\w~!$+|.,*:=]|%[a-f\\d]{2})*)*)*" + + "(#([-\\w~!$+|.,*:=]|%[a-f\\d]{2})*)?\\b", Pattern.CASE_INSENSITIVE); Map regexMap = new HashMap<>(); regexMap.put(getType(), p); return regexMap; diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderCrossValidator.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderCrossValidator.java index b4ff4e1be..38eba4474 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderCrossValidator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderCrossValidator.java @@ -105,7 +105,7 @@ public void reset() throws IOException, UnsupportedOperationException { /** * Splits DocumentSample into NameSamples. */ - private class DocumentToNameSampleStream extends FilterObjectStream{ + private class DocumentToNameSampleStream extends FilterObjectStream { protected DocumentToNameSampleStream(ObjectStream samples) { super(samples); @@ -215,11 +215,11 @@ public void evaluate(ObjectStream samples, int nFolds) TokenNameFinderModel model; if (factory != null) { model = NameFinderME.train(languageCode, type, new DocumentToNameSampleStream(trainingSampleStream), - params, factory); + params, factory); } else { model = NameFinderME.train(languageCode, type, new DocumentToNameSampleStream(trainingSampleStream), - params, TokenNameFinderFactory.create(null, featureGeneratorBytes, resources, new BioCodec())); + params, TokenNameFinderFactory.create(null, featureGeneratorBytes, resources, new BioCodec())); } // do testing diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderFactory.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderFactory.java index 55f1ab694..ae2998b90 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderFactory.java +++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderFactory.java @@ -59,7 +59,7 @@ public TokenNameFinderFactory() { } public TokenNameFinderFactory(byte[] featureGeneratorBytes, final Map resources, - SequenceCodec seqCodec) { + SequenceCodec seqCodec) { init(featureGeneratorBytes, resources, seqCodec); } @@ -70,15 +70,15 @@ void init(byte[] featureGeneratorBytes, final Map resources, Seq } private static byte[] loadDefaultFeatureGeneratorBytes() { - + ByteArrayOutputStream bytes = new ByteArrayOutputStream(); try (InputStream in = TokenNameFinderFactory.class.getResourceAsStream( "/opennlp/tools/namefind/ner-default-features.xml")) { - + if (in == null) { throw new IllegalStateException("Classpath must contain ner-default-features.xml file!"); } - + byte buf[] = new byte[1024]; int len; while ((len = in.read(buf)) > 0) { @@ -88,10 +88,10 @@ private static byte[] loadDefaultFeatureGeneratorBytes() { catch (IOException e) { throw new IllegalStateException("Failed reading from ner-default-features.xml file on classpath!"); } - + return bytes.toByteArray(); } - + protected SequenceCodec getSequenceCodec() { return seqCodec; } @@ -105,8 +105,7 @@ protected byte[] getFeatureGenerator() { } public static TokenNameFinderFactory create(String subclassName, byte[] featureGeneratorBytes, final Map resources, - SequenceCodec seqCodec) - throws InvalidFormatException { + SequenceCodec seqCodec) throws InvalidFormatException { TokenNameFinderFactory theFactory; if (subclassName == null) { // will create the default factory @@ -150,12 +149,12 @@ public NameContextGenerator createContextGenerator() { if (featureGenerator == null) { featureGenerator = new CachedFeatureGenerator( - new WindowFeatureGenerator(new TokenFeatureGenerator(), 2, 2), - new WindowFeatureGenerator(new TokenClassFeatureGenerator(true), 2, 2), - new OutcomePriorFeatureGenerator(), - new PreviousMapFeatureGenerator(), - new BigramNameFeatureGenerator(), - new SentenceFeatureGenerator(true, false)); + new WindowFeatureGenerator(new TokenFeatureGenerator(), 2, 2), + new WindowFeatureGenerator(new TokenClassFeatureGenerator(true), 2, 2), + new OutcomePriorFeatureGenerator(), + new PreviousMapFeatureGenerator(), + new BigramNameFeatureGenerator(), + new SentenceFeatureGenerator(true, false)); } return new DefaultNameContextGenerator(featureGenerator); @@ -176,7 +175,7 @@ public AdaptiveFeatureGenerator createFeatureGenerators() { featureGeneratorBytes = artifactProvider.getArtifact( TokenNameFinderModel.GENERATOR_DESCRIPTOR_ENTRY_NAME); } - + if (featureGeneratorBytes == null) { featureGeneratorBytes = loadDefaultFeatureGeneratorBytes(); } diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java index ea2db5014..c78f28cfc 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java +++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java @@ -232,8 +232,8 @@ protected void validateArtifactMap() throws InvalidFormatException { super.validateArtifactMap(); if (!(artifactMap.get(MAXENT_MODEL_ENTRY_NAME) instanceof MaxentModel) && - !(artifactMap.get(MAXENT_MODEL_ENTRY_NAME) instanceof SequenceClassificationModel)) { - throw new InvalidFormatException("Token Name Finder model is incomplete!"); - } + !(artifactMap.get(MAXENT_MODEL_ENTRY_NAME) instanceof SequenceClassificationModel)) { + throw new InvalidFormatException("Token Name Finder model is incomplete!"); + } } } diff --git a/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramGenerator.java index f001ba258..0ee1d8d9b 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramGenerator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramGenerator.java @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package opennlp.tools.ngram; import java.util.ArrayList; @@ -50,13 +51,14 @@ public static List generate(List input, int n, String separator) } return outGrams; } -/** - *Generates an nGram based on a char[] input - * @param input the array of chars to convert to nGram - * @param n The number of grams (chars) that each output gram will consist of - * @param separator each char in each gram will be separated by this value if desired. Pass in empty string if no separator is desired - * @return - */ + + /** + *Generates an nGram based on a char[] input + * @param input the array of chars to convert to nGram + * @param n The number of grams (chars) that each output gram will consist of + * @param separator each char in each gram will be separated by this value if desired. Pass in empty string if no separator is desired + * @return + */ public static List generate(char[] input, int n, String separator) { List outGrams = new ArrayList<>(); diff --git a/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramModel.java b/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramModel.java index 6d912da4e..296a6320d 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramModel.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramModel.java @@ -38,7 +38,7 @@ * * @see StringList */ -public class NGramModel implements Iterable{ +public class NGramModel implements Iterable { protected static final String COUNT = "count"; @@ -59,26 +59,26 @@ public NGramModel() { public NGramModel(InputStream in) throws IOException { DictionarySerializer.create(in, entry -> { - int count; - String countValueString = null; + int count; + String countValueString = null; - try { - countValueString = entry.getAttributes().getValue(COUNT); + try { + countValueString = entry.getAttributes().getValue(COUNT); - if (countValueString == null) { - throw new InvalidFormatException( - "The count attribute must be set!"); - } - - count = Integer.parseInt(countValueString); - } catch (NumberFormatException e) { - throw new InvalidFormatException("The count attribute '" + countValueString - + "' must be a number!", e); + if (countValueString == null) { + throw new InvalidFormatException( + "The count attribute must be set!"); } - add(entry.getTokens()); - setCount(entry.getTokens(), count); - }); + count = Integer.parseInt(countValueString); + } catch (NumberFormatException e) { + throw new InvalidFormatException("The count attribute '" + countValueString + + "' must be a number!", e); + } + + add(entry.getTokens()); + setCount(entry.getTokens(), count); + }); } /** @@ -139,12 +139,12 @@ public void add(StringList ngram) { public void add(StringList ngram, int minLength, int maxLength) { if (minLength < 1 || maxLength < 1) - throw new IllegalArgumentException("minLength and maxLength param must be at least 1. " + - "minLength=" + minLength + ", maxLength= " + maxLength); + throw new IllegalArgumentException("minLength and maxLength param must be at least 1. " + + "minLength=" + minLength + ", maxLength= " + maxLength); if (minLength > maxLength) - throw new IllegalArgumentException("minLength param must not be larger than " + - "maxLength param. minLength=" + minLength + ", maxLength= " + maxLength); + throw new IllegalArgumentException("minLength param must not be larger than " + + "maxLength param. minLength=" + minLength + ", maxLength= " + maxLength); for (int lengthIndex = minLength; lengthIndex < maxLength + 1; lengthIndex++) { for (int textIndex = 0; @@ -301,32 +301,32 @@ public Dictionary toDictionary(boolean caseSensitive) { * @throws IOException if an I/O Error during writing occurs */ public void serialize(OutputStream out) throws IOException { - Iterator entryIterator = new Iterator() - { - private Iterator mDictionaryIterator = NGramModel.this.iterator(); + Iterator entryIterator = new Iterator() + { + private Iterator mDictionaryIterator = NGramModel.this.iterator(); - public boolean hasNext() { - return mDictionaryIterator.hasNext(); - } + public boolean hasNext() { + return mDictionaryIterator.hasNext(); + } - public Entry next() { + public Entry next() { - StringList tokens = mDictionaryIterator.next(); + StringList tokens = mDictionaryIterator.next(); - Attributes attributes = new Attributes(); + Attributes attributes = new Attributes(); - attributes.setValue(COUNT, Integer.toString(getCount(tokens))); + attributes.setValue(COUNT, Integer.toString(getCount(tokens))); - return new Entry(tokens, attributes); - } + return new Entry(tokens, attributes); + } - public void remove() { - throw new UnsupportedOperationException(); - } + public void remove() { + throw new UnsupportedOperationException(); + } - }; + }; - DictionarySerializer.serialize(out, entryIterator, false); + DictionarySerializer.serialize(out, entryIterator, false); } @Override @@ -346,7 +346,7 @@ else if (obj instanceof NGramModel) { } return result; - } + } @Override public String toString() { diff --git a/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramUtils.java b/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramUtils.java index c1e36608d..098827225 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramUtils.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramUtils.java @@ -16,6 +16,7 @@ * specific language governing permissions and limitations * under the License. */ + package opennlp.tools.ngram; import java.util.Collection; @@ -123,7 +124,7 @@ public static double calculateBigramPriorSmoothingProbability(String x0, String * @return the linear interpolation probability */ public static double calculateTrigramLinearInterpolationProbability(String x0, String x1, String x2, Collection set, - Double lambda1, Double lambda2, Double lambda3) { + Double lambda1, Double lambda2, Double lambda3) { assert lambda1 + lambda2 + lambda3 == 1 : "lambdas sum should be equals to 1"; assert lambda1 > 0 && lambda2 > 0 && lambda3 > 0 : "lambdas should all be greater than 0"; diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/AbstractBottomUpParser.java b/opennlp-tools/src/main/java/opennlp/tools/parser/AbstractBottomUpParser.java index cc8eab58b..d0804bd75 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/parser/AbstractBottomUpParser.java +++ b/opennlp-tools/src/main/java/opennlp/tools/parser/AbstractBottomUpParser.java @@ -220,12 +220,12 @@ public static Parse[] collapsePunctuation(Parse[] chunks, Set punctSet) List collapsedParses = new ArrayList<>(chunks.length); int lastNonPunct = -1; int nextNonPunct; - for (int ci=0,cn=chunks.length;ci= 0) { chunks[lastNonPunct].addNextPunctuation(chunks[ci]); } - for (nextNonPunct=ci+1;nextNonPunct topParses = new ArrayList<>(numParses); - while(!completeParses.isEmpty() && topParses.size() < numParses) { + while (!completeParses.isEmpty() && topParses.size() < numParses) { Parse tp = completeParses.extract(); topParses.add(tp); //parses.remove(tp); @@ -390,7 +390,7 @@ protected Parse[] advanceChunks(final Parse p, double minChunkScore) { ptags[i] = sp.getType(); } //System.err.println("adjusted mcs = "+(minChunkScore-p.getProb())); - Sequence[] cs = chunker.topKSequences(words, ptags,minChunkScore-p.getProb()); + Sequence[] cs = chunker.topKSequences(words, ptags,minChunkScore - p.getProb()); Parse[] newParses = new Parse[cs.length]; for (int si = 0, sl = cs.length; si < sl; si++) { newParses[si] = (Parse) p.clone(); //copies top level @@ -484,7 +484,7 @@ protected Parse[] advanceTags(final Parse p) { * @param nonPunctParses The parses without punctuation. * @param parses The parses wit punctuation. * @return An index into the specified parses which corresponds to the same node the specified index - * into the parses with punctuation. + * into the parses with punctuation. */ protected int mapParseIndex(int index, Parse[] nonPunctParses, Parse[] parses) { int parseIndex = index; @@ -527,12 +527,12 @@ public static Dictionary buildDictionary(ObjectStream data, HeadRules rul NGramModel mdict = new NGramModel(); Parse p; - while((p = data.read()) != null) { + while ((p = data.read()) != null) { p.updateHeads(rules); Parse[] pwords = p.getTagNodes(); String[] words = new String[pwords.length]; //add all uni-grams - for (int wi=0;wi data, HeadRules rul //add tri-grams and bi-grams for inital sequence Parse[] chunks = collapsePunctuation(ParserEventStream.getInitialChunks(p),rules.getPunctuationTags()); String[] cwords = new String[chunks.length]; - for (int wi=0;wi data, HeadRules rul if (lastChild(chunks[ci], chunks[ci].getParent(),rules.getPunctuationTags())) { //perform reduce int reduceStart = ci; - while (reduceStart >=0 && chunks[reduceStart].getParent() == chunks[ci].getParent()) { + while (reduceStart >= 0 && chunks[reduceStart].getParent() == chunks[ci].getParent()) { reduceStart--; } reduceStart++; @@ -565,24 +565,24 @@ public static Dictionary buildDictionary(ObjectStream data, HeadRules rul if (chunks.length != 0) { String[] window = new String[5]; int wi = 0; - if (ci-2 >= 0) window[wi++] = chunks[ci-2].getHead().getCoveredText(); - if (ci-1 >= 0) window[wi++] = chunks[ci-1].getHead().getCoveredText(); + if (ci - 2 >= 0) window[wi++] = chunks[ci - 2].getHead().getCoveredText(); + if (ci - 1 >= 0) window[wi++] = chunks[ci - 1].getHead().getCoveredText(); window[wi++] = chunks[ci].getHead().getCoveredText(); - if (ci+1 < chunks.length) window[wi++] = chunks[ci+1].getHead().getCoveredText(); - if (ci+2 < chunks.length) window[wi++] = chunks[ci+2].getHead().getCoveredText(); + if (ci + 1 < chunks.length) window[wi++] = chunks[ci + 1].getHead().getCoveredText(); + if (ci + 2 < chunks.length) window[wi++] = chunks[ci + 2].getHead().getCoveredText(); if (wi < 5) { String[] subWindow = new String[wi]; System.arraycopy(window, 0, subWindow, 0, wi); window = subWindow; } - if (window.length >=3) { + if (window.length >= 3) { mdict.add(new StringList(window), 2, 3); } else if (window.length == 2) { mdict.add(new StringList(window), 2, 2); } } - ci=reduceStart-1; //ci will be incremented at end of loop + ci = reduceStart - 1; //ci will be incremented at end of loop } ci++; } diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/AbstractContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/parser/AbstractContextGenerator.java index 85de098c2..df45c139b 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/parser/AbstractContextGenerator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/parser/AbstractContextGenerator.java @@ -91,7 +91,7 @@ protected String consbo(Parse p, int i) { //cons back-off * @param p The parse which stats teh production. * @param includePunctuation Whether punctuation should be included in the production. * @return a string representing the grammar rule production that the specified parse - * is starting. + * is starting. */ protected String production(Parse p, boolean includePunctuation) { StringBuilder production = new StringBuilder(20); @@ -99,12 +99,12 @@ protected String production(Parse p, boolean includePunctuation) { Parse[] children = AbstractBottomUpParser.collapsePunctuation(p.getChildren(),punctSet); for (int ci = 0; ci < children.length; ci++) { production.append(children[ci].getType()); - if (ci+1 != children.length) { + if (ci + 1 != children.length) { production.append(","); Collection nextPunct = children[ci].getNextPunctuationSet(); if (includePunctuation && nextPunct != null) { //TODO: make sure multiple punctuation comes out the same - for (Iterator pit=nextPunct.iterator();pit.hasNext();) { + for (Iterator pit = nextPunct.iterator(); pit.hasNext();) { Parse punct = pit.next(); production.append(punct.getType()).append(","); } @@ -118,26 +118,26 @@ protected void cons2(List features, Cons c0, Cons c1, Collection if (punct1s != null) { for (Iterator pi = punct1s.iterator();pi.hasNext();) { Parse p = pi.next(); - String punctbo = punctbo(p,c1.index <= 0 ? c1.index -1 : c1.index); + String punctbo = punctbo(p,c1.index <= 0 ? c1.index - 1 : c1.index); //punctbo(1); features.add(punctbo); if (c0.index == 0) { //TODO look at removing case //cons(0)punctbo(1) - if (c0.unigram) features.add(c0.cons+","+punctbo); - features.add(c0.consbo+","+punctbo); + if (c0.unigram) features.add(c0.cons + "," + punctbo); + features.add(c0.consbo + "," + punctbo); } if (c1.index == 0) { //TODO look at removing case //punctbo(1)cons(1) - if (c1.unigram) features.add(punctbo+","+c1.cons); - features.add(punctbo+","+c1.consbo); + if (c1.unigram) features.add(punctbo + "," + c1.cons); + features.add(punctbo + "," + c1.consbo); } //cons(0)punctbo(1)cons(1) - if (bigram) features.add(c0.cons+","+punctbo+","+c1.cons); - if (c1.unigram) features.add(c0.consbo+","+punctbo+","+c1.cons); - if (c0.unigram) features.add(c0.cons+","+punctbo+","+c1.consbo); - features.add(c0.consbo+","+punctbo+","+c1.consbo); + if (bigram) features.add(c0.cons + "," + punctbo + "," + c1.cons); + if (c1.unigram) features.add(c0.consbo + "," + punctbo + "," + c1.cons); + if (c0.unigram) features.add(c0.cons + "," + punctbo + "," + c1.consbo); + features.add(c0.consbo + "," + punctbo + "," + c1.consbo); } } else { @@ -166,10 +166,10 @@ protected void cons3(List features, Cons c0, Cons c1, Cons c2, Collectio // features.add("stage=cons(0),cons(1),cons(2)"); if (punct1s != null) { if (c0.index == -2) { - for (Iterator pi=punct1s.iterator();pi.hasNext();) { + for (Iterator pi = punct1s.iterator(); pi.hasNext();) { Parse p = pi.next(); -// String punct = punct(p,c1.index); - String punctbo = punctbo(p,c1.index <= 0 ? c1.index -1 : c1.index); + // String punct = punct(p,c1.index); + String punctbo = punctbo(p,c1.index <= 0 ? c1.index - 1 : c1.index); //punct(-2) //TODO consider changing //features.add(punct); @@ -181,10 +181,10 @@ protected void cons3(List features, Cons c0, Cons c1, Cons c2, Collectio } if (punct2s != null) { if (c2.index == 2) { - for (Iterator pi=punct2s.iterator();pi.hasNext();) { + for (Iterator pi = punct2s.iterator(); pi.hasNext();) { Parse p = pi.next(); -// String punct = punct(p,c2.index); - String punctbo = punctbo(p,c2.index <= 0 ? c2.index -1 : c2.index); + // String punct = punct(p,c2.index); + String punctbo = punctbo(p,c2.index <= 0 ? c2.index - 1 : c2.index); //punct(2) //TODO consider changing //features.add(punct); @@ -195,45 +195,45 @@ protected void cons3(List features, Cons c0, Cons c1, Cons c2, Collectio } if (punct1s != null) { //cons(0),punctbo(1),cons(1),punctbo(2),cons(2) - for (Iterator pi2=punct2s.iterator();pi2.hasNext();) { - String punctbo2 = punctbo(pi2.next(),c2.index <= 0 ? c2.index -1 : c2.index); - for (Iterator pi1=punct1s.iterator();pi1.hasNext();) { - String punctbo1 = punctbo(pi1.next(),c1.index <= 0 ? c1.index -1 : c1.index); - if (trigram) features.add(c0.cons + "," + punctbo1+","+c1.cons + "," + punctbo2+","+c2.cons); + for (Iterator pi2 = punct2s.iterator(); pi2.hasNext();) { + String punctbo2 = punctbo(pi2.next(),c2.index <= 0 ? c2.index - 1 : c2.index); + for (Iterator pi1 = punct1s.iterator(); pi1.hasNext();) { + String punctbo1 = punctbo(pi1.next(),c1.index <= 0 ? c1.index - 1 : c1.index); + if (trigram) features.add(c0.cons + "," + punctbo1 + "," + c1.cons + "," + punctbo2 + "," + c2.cons); - if (bigram2) features.add(c0.consbo + "," + punctbo1+","+c1.cons + "," + punctbo2+","+c2.cons); - if (c0.unigram && c2.unigram) features.add(c0.cons + "," + punctbo1+","+c1.consbo + "," + punctbo2+","+c2.cons); - if (bigram1) features.add(c0.cons + "," + punctbo1+","+c1.cons + "," + punctbo2+","+c2.consbo); + if (bigram2) features.add(c0.consbo + "," + punctbo1 + "," + c1.cons + "," + punctbo2 + "," + c2.cons); + if (c0.unigram && c2.unigram) features.add(c0.cons + "," + punctbo1 + "," + c1.consbo + "," + punctbo2 + "," + c2.cons); + if (bigram1) features.add(c0.cons + "," + punctbo1 + "," + c1.cons + "," + punctbo2 + "," + c2.consbo); - if (c2.unigram) features.add(c0.consbo + "," + punctbo1+","+c1.consbo + "," + punctbo2+","+c2.cons); - if (c1.unigram) features.add(c0.consbo + "," + punctbo1+","+c1.cons + "," + punctbo2+","+c2.consbo); - if (c0.unigram) features.add(c0.cons + "," + punctbo1+","+c1.consbo + "," + punctbo2+","+c2.consbo); + if (c2.unigram) features.add(c0.consbo + "," + punctbo1 + "," + c1.consbo + "," + punctbo2 + "," + c2.cons); + if (c1.unigram) features.add(c0.consbo + "," + punctbo1 + "," + c1.cons + "," + punctbo2 + "," + c2.consbo); + if (c0.unigram) features.add(c0.cons + "," + punctbo1 + "," + c1.consbo + "," + punctbo2 + "," + c2.consbo); - features.add(c0.consbo + "," + punctbo1+","+c1.consbo + "," + punctbo2+","+c2.consbo); + features.add(c0.consbo + "," + punctbo1 + "," + c1.consbo + "," + punctbo2 + "," + c2.consbo); if (zeroBackOff) { - if (bigram1) features.add(c0.cons + "," + punctbo1+","+c1.cons + "," + punctbo2); - if (c1.unigram) features.add(c0.consbo + "," + punctbo1+","+c1.cons + "," + punctbo2); - if (c0.unigram) features.add(c0.cons + "," + punctbo1+","+c1.consbo + "," + punctbo2); - features.add(c0.consbo + "," + punctbo1+","+c1.consbo + "," + punctbo2); + if (bigram1) features.add(c0.cons + "," + punctbo1 + "," + c1.cons + "," + punctbo2); + if (c1.unigram) features.add(c0.consbo + "," + punctbo1 + "," + c1.cons + "," + punctbo2); + if (c0.unigram) features.add(c0.cons + "," + punctbo1 + "," + c1.consbo + "," + punctbo2); + features.add(c0.consbo + "," + punctbo1 + "," + c1.consbo + "," + punctbo2); } } } } else { //punct1s == null //cons(0),cons(1),punctbo(2),cons(2) - for (Iterator pi2=punct2s.iterator();pi2.hasNext();) { - String punctbo2 = punctbo(pi2.next(),c2.index <= 0 ? c2.index -1 : c2.index); - if (trigram) features.add(c0.cons + "," + c1.cons + "," + punctbo2+","+c2.cons); + for (Iterator pi2 = punct2s.iterator(); pi2.hasNext();) { + String punctbo2 = punctbo(pi2.next(),c2.index <= 0 ? c2.index - 1 : c2.index); + if (trigram) features.add(c0.cons + "," + c1.cons + "," + punctbo2 + "," + c2.cons); - if (bigram2) features.add(c0.consbo + "," + c1.cons + "," + punctbo2+ "," + c2.cons); - if (c0.unigram && c2.unigram) features.add(c0.cons + "," + c1.consbo + "," + punctbo2+","+c2.cons); - if (bigram1) features.add(c0.cons + "," + c1.cons + "," + punctbo2+","+c2.consbo); + if (bigram2) features.add(c0.consbo + "," + c1.cons + "," + punctbo2 + "," + c2.cons); + if (c0.unigram && c2.unigram) features.add(c0.cons + "," + c1.consbo + "," + punctbo2 + "," + c2.cons); + if (bigram1) features.add(c0.cons + "," + c1.cons + "," + punctbo2 + "," + c2.consbo); - if (c2.unigram) features.add(c0.consbo + "," + c1.consbo + "," + punctbo2+","+c2.cons); - if (c1.unigram) features.add(c0.consbo + "," + c1.cons + "," + punctbo2+","+c2.consbo); - if (c0.unigram) features.add(c0.cons + "," + c1.consbo + "," + punctbo2+","+c2.consbo); + if (c2.unigram) features.add(c0.consbo + "," + c1.consbo + "," + punctbo2 + "," + c2.cons); + if (c1.unigram) features.add(c0.consbo + "," + c1.cons + "," + punctbo2 + "," + c2.consbo); + if (c0.unigram) features.add(c0.cons + "," + c1.consbo + "," + punctbo2 + "," + c2.consbo); - features.add(c0.consbo + "," + c1.consbo + "," + punctbo2+","+c2.consbo); + features.add(c0.consbo + "," + c1.consbo + "," + punctbo2 + "," + c2.consbo); if (zeroBackOff) { if (bigram1) features.add(c0.cons + "," + c1.cons + "," + punctbo2); @@ -247,19 +247,19 @@ protected void cons3(List features, Cons c0, Cons c1, Cons c2, Collectio else { if (punct1s != null) { //cons(0),punctbo(1),cons(1),cons(2) - for (Iterator pi1=punct1s.iterator();pi1.hasNext();) { - String punctbo1 = punctbo(pi1.next(),c1.index <= 0 ? c1.index -1 : c1.index); - if (trigram) features.add(c0.cons + "," + punctbo1 +","+ c1.cons +","+c2.cons); + for (Iterator pi1 = punct1s.iterator(); pi1.hasNext();) { + String punctbo1 = punctbo(pi1.next(), c1.index <= 0 ? c1.index - 1 : c1.index); + if (trigram) features.add(c0.cons + "," + punctbo1 + "," + c1.cons + "," + c2.cons); - if (bigram2) features.add(c0.consbo + "," + punctbo1 +","+ c1.cons +","+c2.cons); - if (c0.unigram && c2.unigram) features.add(c0.cons + "," + punctbo1 +","+ c1.consbo +","+c2.cons); - if (bigram1) features.add(c0.cons + "," + punctbo1 +","+ c1.cons +","+c2.consbo); + if (bigram2) features.add(c0.consbo + "," + punctbo1 + "," + c1.cons + "," + c2.cons); + if (c0.unigram && c2.unigram) features.add(c0.cons + "," + punctbo1 + "," + c1.consbo + "," + c2.cons); + if (bigram1) features.add(c0.cons + "," + punctbo1 + "," + c1.cons + "," + c2.consbo); - if (c2.unigram) features.add(c0.consbo + "," + punctbo1 +","+ c1.consbo +","+c2.cons); - if (c1.unigram) features.add(c0.consbo + "," + punctbo1 +","+ c1.cons +","+c2.consbo); - if (c0.unigram) features.add(c0.cons + "," + punctbo1 +","+ c1.consbo +","+c2.consbo); + if (c2.unigram) features.add(c0.consbo + "," + punctbo1 + "," + c1.consbo + "," + c2.cons); + if (c1.unigram) features.add(c0.consbo + "," + punctbo1 + "," + c1.cons + "," + c2.consbo); + if (c0.unigram) features.add(c0.cons + "," + punctbo1 + "," + c1.consbo + "," + c2.consbo); - features.add(c0.consbo + "," + punctbo1 +","+ c1.consbo +","+c2.consbo); + features.add(c0.consbo + "," + punctbo1 + "," + c1.consbo + "," + c2.consbo); //zero backoff case covered by cons(0)cons(1) } @@ -292,8 +292,8 @@ protected void cons3(List features, Cons c0, Cons c1, Cons c2, Collectio protected void surround(Parse node, int i, String type, Collection punctuation, List features) { StringBuilder feat = new StringBuilder(20); feat.append("s").append(i).append("="); - if (punctuation !=null) { - for (Iterator pi=punctuation.iterator();pi.hasNext();) { + if (punctuation != null) { + for (Iterator pi = punctuation.iterator(); pi.hasNext();) { Parse punct = pi.next(); if (node != null) { feat.append(node.getHead().getCoveredText()).append("|").append(type).append("|").append(node.getType()).append("|").append(punct.getType()); @@ -383,7 +383,7 @@ protected void getFrontierNodes(List rf, Parse[] nodes) { int leftIndex = 0; int prevHeadIndex = -1; - for (int fi=0;fi rf, Parse[] nodes) { } } } - for (int ni=leftIndex;ni chunkEvents, Parse[] chunks) { boolean start = true; String ctype = c.getType(); Parse[] kids = c.getChildren(); - for (int ti=0,tl=kids.length;ti tagEvents, Parse[] chunks) { } else { Parse[] kids = c.getChildren(); - for (int ti=0,tl=kids.length;ti= 0) { - t_2=tags[x_2]; - p_2=preds[x_2]; - w_2=words[x_2]; + t_2 = tags[x_2]; + p_2 = preds[x_2]; + w_2 = words[x_2]; } else { - t_2=EOS; - p_2=EOS; - w_2=EOS; + t_2 = EOS; + p_2 = EOS; + w_2 = EOS; } // chunkandpostag(-1) if (x_1 >= 0) { - t_1=tags[x_1]; - p_1=preds[x_1]; - w_1=words[x_1]; + t_1 = tags[x_1]; + p_1 = preds[x_1]; + w_1 = words[x_1]; } else { - t_1=EOS; - p_1=EOS; - w_1=EOS; + t_1 = EOS; + p_1 = EOS; + w_1 = EOS; } // chunkandpostag(0) - t0=tags[i]; - w0=words[i]; + t0 = tags[i]; + w0 = words[i]; // chunkandpostag(1) if (x1 < tags.length) { - t1=tags[x1]; - w1=words[x1]; + t1 = tags[x1]; + w1 = words[x1]; } else { - t1=EOS; - w1=EOS; + t1 = EOS; + w1 = EOS; } // chunkandpostag(2) if (x2 < tags.length) { - t2=tags[x2]; - w2=words[x2]; + t2 = tags[x2]; + w2 = words[x2]; } else { - t2=EOS; - w2=EOS; + t2 = EOS; + w2 = EOS; } - String cacheKey = i +t_2+t1+t0+t1+t2+p_2+p_1; - if (contextsCache!= null) { + String cacheKey = i + t_2 + t1 + t0 + t1 + t2 + p_2 + p_1; + if (contextsCache != null) { if (wordsKey == words) { String[] contexts = contextsCache.get(cacheKey); if (contexts != null) { diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/ChunkSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/parser/ChunkSampleStream.java index 2d8dae257..bc940097e 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/parser/ChunkSampleStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/parser/ChunkSampleStream.java @@ -82,7 +82,7 @@ public ChunkSample read() throws IOException { boolean start = true; String ctype = c.getType(); Parse[] kids = c.getChildren(); - for (int ti=0,tl=kids.length;ti nodes = new LinkedList(); nodes.add(parse); - while(nodes.size() != 0) { + while (nodes.size() != 0) { Parse node = nodes.remove(0); Parse[] children = node.getChildren(); if (children.length == 1 && node.getType().equals(children[0].getType())) { @@ -799,20 +800,20 @@ public static void pruneParse(Parse parse) { public static void fixPossesives(Parse parse) { Parse[] tags = parse.getTagNodes(); - for (int ti=0;ti tags = new LinkedList<>(); List nodes = new LinkedList<>(); nodes.addAll(this.parts); - while(nodes.size() != 0) { + while (nodes.size() != 0) { Parse p = nodes.remove(0); if (p.isPosTag()) { tags.add(p); @@ -981,7 +982,7 @@ public Parse getCommonParent(Parse node) { } Set parents = new HashSet<>(); Parse cparent = this; - while(cparent != null) { + while (cparent != null) { parents.add(cparent); cparent = cparent.getParent(); } @@ -1012,10 +1013,10 @@ else if (!this.label.equals(p.label)) { if (!this.text.equals(p.text)) { return false; } - if (this.parts.size() != p.parts.size()){ - return false; + if (this.parts.size() != p.parts.size()) { + return false; } - for (int ci=0;ci " + kids[ki].getParent().hashCode() + " " + kids[ki].getParent().getType() + " " + kids[ki].getCoveredText()); codeTree(kids[ki],nlevels); @@ -1099,7 +1099,7 @@ public void showCodeTree() { * @param tokens */ public static void addNames(String tag, Span[] names, Parse[] tokens) { - for (int ni=0,nn=names.length;ni 1 && nameSpan.contains(grandKids[grandKids.length-1].getSpan())) { - commonParent.insert(new Parse(commonParent.getText(),commonParent.getSpan(),tag,1.0,commonParent.getHeadIndex())); + if (grandKids.length > 1 && nameSpan.contains(grandKids[grandKids.length - 1].getSpan())) { + commonParent.insert(new Parse(commonParent.getText(), commonParent.getSpan(), tag,1.0, commonParent.getHeadIndex())); } } } @@ -1148,9 +1148,9 @@ public static void main(String[] args) throws java.io.IOException { System.err.println("Reads training parses (one-sentence-per-line) and displays parse structure."); System.exit(1); } - int ai=0; + int ai = 0; boolean fixPossesives = false; - while(args[ai].startsWith("-") && ai < args.length) { + while (args[ai].startsWith("-") && ai < args.length) { if (args[ai].equals("-fun")) { Parse.useFunctionTags(true); ai++; diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/ParserChunkerSequenceValidator.java b/opennlp-tools/src/main/java/opennlp/tools/parser/ParserChunkerSequenceValidator.java index 9cba69735..0787b8514 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/parser/ParserChunkerSequenceValidator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/parser/ParserChunkerSequenceValidator.java @@ -30,13 +30,14 @@ public class ParserChunkerSequenceValidator implements SequenceValidator public ParserChunkerSequenceValidator(String outcomes[]) { continueStartMap = new HashMap<>(outcomes.length); - for (int oi=0, on = outcomes.length; oi nextPunct = p.getNextPunctuationSet(); if (nextPunct != null) { - for (Iterator pit=nextPunct.iterator();pit.hasNext();) { + for (Iterator pit = nextPunct.iterator(); pit.hasNext();) { Parse punct = pit.next(); punctProduction.append(punct.getType()).append(","); } diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/Parser.java b/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/Parser.java index 505f688c9..fd558ff30 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/Parser.java +++ b/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/Parser.java @@ -94,7 +94,7 @@ public Parser(ParserModel model) { * @param headRules The head rules for head word perculation. * @param beamSize The number of different parses kept during parsing. * @param advancePercentage The minimal amount of probability mass which advanced outcomes must represent. - * Only outcomes which contribute to the top "advancePercentage" will be explored. + * Only outcomes which contribute to the top "advancePercentage" will be explored. */ private Parser(MaxentModel buildModel, MaxentModel checkModel, POSTagger tagger, Chunker chunker, HeadRules headRules, int beamSize, double advancePercentage) { super(tagger, chunker, headRules, beamSize, advancePercentage); @@ -143,7 +143,7 @@ protected Parse[] advanceParses(final Parse p, double probMass) { /* The index of the node which will be labeled in this iteration of advancing the parse. */ int advanceNodeIndex; /* The node which will be labeled in this iteration of advancing the parse. */ - Parse advanceNode=null; + Parse advanceNode = null; Parse[] originalChildren = p.getChildren(); Parse[] children = collapsePunctuation(originalChildren,punctSet); int numNodes = children.length; @@ -225,7 +225,7 @@ else if (contTypeMap.containsKey(tag)) { flat &= cons[ci].isPosTag(); } if (!flat) { //flat chunks are done by chunker - if (lastStartIndex == 0 && advanceNodeIndex == numNodes-1) { //check for top node to include end and begining punctuation + if (lastStartIndex == 0 && advanceNodeIndex == numNodes - 1) { //check for top node to include end and begining punctuation //System.err.println("ParserME.advanceParses: reducing entire span: "+new Span(lastStartNode.getSpan().getStart(), advanceNode.getSpan().getEnd())+" "+lastStartType+" "+java.util.Arrays.asList(children)); newParse2.insert(new Parse(p.getText(), p.getSpan(), lastStartType, cprobs[1], headRules.getHead(cons, lastStartType))); } @@ -250,7 +250,7 @@ else if (contTypeMap.containsKey(tag)) { /** * @deprecated Please do not use anymore, use the ObjectStream train methods instead! This method - * will be removed soon. + * will be removed soon. */ @Deprecated public static AbstractModel train(ObjectStream es, int iterations, int cut) throws java.io.IOException { diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/ParserEventStream.java b/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/ParserEventStream.java index 8ded9ecf2..2bf8a2390 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/ParserEventStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/ParserEventStream.java @@ -85,27 +85,27 @@ public static Parse[] reduceChunks(Parse[] chunks, int ci, Parse parent) { // perform reduce int reduceStart = ci; int reduceEnd = ci; - while (reduceStart >=0 && chunks[reduceStart].getParent() == parent) { + while (reduceStart >= 0 && chunks[reduceStart].getParent() == parent) { reduceStart--; } reduceStart++; Parse[] reducedChunks; if (!type.equals(AbstractBottomUpParser.TOP_NODE)) { - reducedChunks = new Parse[chunks.length-(reduceEnd-reduceStart+1)+1]; //total - num_removed + 1 (for new node) + reducedChunks = new Parse[chunks.length - (reduceEnd - reduceStart + 1) + 1]; //total - num_removed + 1 (for new node) //insert nodes before reduction System.arraycopy(chunks, 0, reducedChunks, 0, reduceStart); //insert reduced node - reducedChunks[reduceStart]=parent; + reducedChunks[reduceStart] = parent; //propagate punctuation sets parent.setPrevPunctuation(chunks[reduceStart].getPreviousPunctuationSet()); parent.setNextPunctuation(chunks[reduceEnd].getNextPunctuationSet()); //insert nodes after reduction - int ri=reduceStart+1; - for (int rci=reduceEnd+1;rci parseEvents, Parse[] chunks) { } //perform reduce int reduceStart = ci; - while (reduceStart >=0 && chunks[reduceStart].getParent() == parent) { + while (reduceStart >= 0 && chunks[reduceStart].getParent() == parent) { reduceStart--; } reduceStart++; chunks = reduceChunks(chunks,ci,parent); - ci=reduceStart-1; //ci will be incremented at end of loop + ci = reduceStart - 1; //ci will be incremented at end of loop } else { if (etype == ParserEventTypeEnum.CHECK) { diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/lang/en/HeadRules.java b/opennlp-tools/src/main/java/opennlp/tools/parser/lang/en/HeadRules.java index 86b74ccc4..009254c90 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/parser/lang/en/HeadRules.java +++ b/opennlp-tools/src/main/java/opennlp/tools/parser/lang/en/HeadRules.java @@ -61,6 +61,7 @@ public void serialize(opennlp.tools.parser.lang.en.HeadRules artifact, OutputStr private static class HeadRule { public boolean leftToRight; public String[] tags; + public HeadRule(boolean l2r, String[] tags) { leftToRight = l2r; @@ -213,23 +214,23 @@ private void readHeadRules(BufferedReader str) throws IOException { public void labelGaps(Stack stack) { if (stack.size() > 4) { //Constituent con0 = (Constituent) stack.get(stack.size()-1); - Constituent con1 = stack.get(stack.size()-2); - Constituent con2 = stack.get(stack.size()-3); - Constituent con3 = stack.get(stack.size()-4); - Constituent con4 = stack.get(stack.size()-5); + Constituent con1 = stack.get(stack.size() - 2); + Constituent con2 = stack.get(stack.size() - 3); + Constituent con3 = stack.get(stack.size() - 4); + Constituent con4 = stack.get(stack.size() - 5); //System.err.println("con0="+con0.label+" con1="+con1.label+" con2="+con2.label+" con3="+con3.label+" con4="+con4.label); //subject extraction if (con1.getLabel().equals("NP") && con2.getLabel().equals("S") && con3.getLabel().equals("SBAR")) { - con1.setLabel(con1.getLabel()+"-G"); - con2.setLabel(con2.getLabel()+"-G"); - con3.setLabel(con3.getLabel()+"-G"); + con1.setLabel(con1.getLabel() + "-G"); + con2.setLabel(con2.getLabel() + "-G"); + con3.setLabel(con3.getLabel() + "-G"); } //object extraction else if (con1.getLabel().equals("NP") && con2.getLabel().equals("VP") && con3.getLabel().equals("S") && con4.getLabel().equals("SBAR")) { - con1.setLabel(con1.getLabel()+"-G"); - con2.setLabel(con2.getLabel()+"-G"); - con3.setLabel(con3.getLabel()+"-G"); - con4.setLabel(con4.getLabel()+"-G"); + con1.setLabel(con1.getLabel() + "-G"); + con2.setLabel(con2.getLabel() + "-G"); + con3.setLabel(con3.getLabel() + "-G"); + con4.setLabel(con4.getLabel() + "-G"); } } } diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/lang/es/AncoraSpanishHeadRules.java b/opennlp-tools/src/main/java/opennlp/tools/parser/lang/es/AncoraSpanishHeadRules.java index 6f5e32ac2..7c9808e47 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/parser/lang/es/AncoraSpanishHeadRules.java +++ b/opennlp-tools/src/main/java/opennlp/tools/parser/lang/es/AncoraSpanishHeadRules.java @@ -69,9 +69,11 @@ public void serialize(opennlp.tools.parser.lang.es.AncoraSpanishHeadRules artifa artifact.serialize(new OutputStreamWriter(out, "UTF-8")); } } + private static class HeadRule { public boolean leftToRight; public String[] tags; + public HeadRule(boolean l2r, String[] tags) { leftToRight = l2r; @@ -133,7 +135,7 @@ public Parse getHead(Parse[] constituents, String type) { return null; } HeadRule hr; - if (type.equals("SN") || type.equals("GRUP.NOM")) { + if (type.equals("SN") || type.equals("GRUP.NOM")) { String[] tags1 = {"AQA.*","AQC.*","GRUP\\.A","S\\.A","NC.*S.*", "NP.*","NC.*P.*", "GRUP\\.NOM"}; for (int i = 0; i < constituents.length; i++) { @@ -173,7 +175,7 @@ else if ((hr = headRules.get(type)) != null) { if (hr.leftToRight) { for (int ti = 0; ti < tl; ti++) { for (int ci = 0; ci < cl; ci++) { - if (constituents[ci].getType().matches(tags[ti])) { + if (constituents[ci].getType().matches(tags[ti])) { return constituents[ci]; } } @@ -215,23 +217,23 @@ private void readHeadRules(BufferedReader str) throws IOException { public void labelGaps(Stack stack) { if (stack.size() > 4) { //Constituent con0 = (Constituent) stack.get(stack.size()-1); - Constituent con1 = stack.get(stack.size()-2); - Constituent con2 = stack.get(stack.size()-3); - Constituent con3 = stack.get(stack.size()-4); - Constituent con4 = stack.get(stack.size()-5); + Constituent con1 = stack.get(stack.size() - 2); + Constituent con2 = stack.get(stack.size() - 3); + Constituent con3 = stack.get(stack.size() - 4); + Constituent con4 = stack.get(stack.size() - 5); //System.err.println("con0="+con0.label+" con1="+con1.label+" con2="+con2.label+" con3="+con3.label+" con4="+con4.label); //subject extraction if (con1.getLabel().equals("SN") && con2.getLabel().equals("S") && con3.getLabel().equals("GRUP.NOM")) { - con1.setLabel(con1.getLabel()+"-G"); - con2.setLabel(con2.getLabel()+"-G"); - con3.setLabel(con3.getLabel()+"-G"); + con1.setLabel(con1.getLabel() + "-G"); + con2.setLabel(con2.getLabel() + "-G"); + con3.setLabel(con3.getLabel() + "-G"); } //object extraction else if (con1.getLabel().equals("SN") && con2.getLabel().equals("GRUP.VERB") && con3.getLabel().equals("S") && con4.getLabel().equals("GRUP.NOM")) { - con1.setLabel(con1.getLabel()+"-G"); - con2.setLabel(con2.getLabel()+"-G"); - con3.setLabel(con3.getLabel()+"-G"); - con4.setLabel(con4.getLabel()+"-G"); + con1.setLabel(con1.getLabel() + "-G"); + con2.setLabel(con2.getLabel() + "-G"); + con3.setLabel(con3.getLabel() + "-G"); + con4.setLabel(con4.getLabel() + "-G"); } } } diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/AttachContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/AttachContextGenerator.java index 93b23ad7c..5613ab3a6 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/AttachContextGenerator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/AttachContextGenerator.java @@ -40,9 +40,9 @@ public String[] getContext(Object o) { return getContext((Parse[]) parts[0], (Integer) parts[1],(List) parts[2], (Integer) parts[3]); } - private boolean containsPunct(Collection puncts, String punct){ - if (puncts != null){ - for (Iterator pi=puncts.iterator();pi.hasNext();) { + private boolean containsPunct(Collection puncts, String punct) { + if (puncts != null) { + for (Iterator pi = puncts.iterator(); pi.hasNext();) { Parse p = pi.next(); if (p.getType().equals(punct)) { return true; @@ -63,8 +63,8 @@ public String[] getContext(Parse[] constituents, int index, List rightFro List features = new ArrayList<>(100); Parse fn = rightFrontier.get(rfi); Parse fp = null; - if (rfi+1 < rightFrontier.size()) { - fp = rightFrontier.get(rfi+1); + if (rfi + 1 < rightFrontier.size()) { + fp = rightFrontier.get(rfi + 1); } Parse p_1 = null; if (rightFrontier.size() > 0) { @@ -72,25 +72,25 @@ public String[] getContext(Parse[] constituents, int index, List rightFro } Parse p0 = constituents[index]; Parse p1 = null; - if (index+1 < constituents.length) { - p1 = constituents[index+1]; + if (index + 1 < constituents.length) { + p1 = constituents[index + 1]; } Collection punct_1fs = fn.getPreviousPunctuationSet(); Collection punct_1s = p0.getPreviousPunctuationSet(); Collection punct1s = p0.getNextPunctuationSet(); - String consfp = cons(fp,-3); - String consf = cons(fn,-2); - String consp_1 = cons(p_1,-1); - String consp0 = cons(p0,0); - String consp1 = cons(p1,1); + String consfp = cons(fp, -3); + String consf = cons(fn, -2); + String consp_1 = cons(p_1, -1); + String consp0 = cons(p0, 0); + String consp1 = cons(p1, 1); - String consbofp = consbo(fp,-3); - String consbof = consbo(fn,-2); - String consbop_1 = consbo(p_1,-1); - String consbop0 = consbo(p0,0); - String consbop1 = consbo(p1,1); + String consbofp = consbo(fp, -3); + String consbof = consbo(fn, -2); + String consbop_1 = consbo(p_1, -1); + String consbop0 = consbo(p0, 0); + String consbop1 = consbo(p1, 1); Cons cfp = new Cons(consfp,consbofp,-3,true); Cons cf = new Cons(consf,consbof,-2,true); @@ -116,12 +116,12 @@ public String[] getContext(Parse[] constituents, int index, List rightFro //productions String prod = production(fn,false); //String punctProd = production(fn,true,punctSet); - features.add("pn="+prod); - features.add("pd="+prod+","+p0.getType()); - features.add("ps="+fn.getType()+"->"+fn.getType()+","+p0.getType()); + features.add("pn=" + prod); + features.add("pd=" + prod + "," + p0.getType()); + features.add("ps=" + fn.getType() + "->" + fn.getType() + "," + p0.getType()); if (punct_1s != null) { StringBuilder punctBuf = new StringBuilder(5); - for (Iterator pi=punct_1s.iterator();pi.hasNext();) { + for (Iterator pi = punct_1s.iterator(); pi.hasNext();) { Parse punct = pi.next(); punctBuf.append(punct.getType()).append(","); } @@ -144,12 +144,12 @@ public String[] getContext(Parse[] constituents, int index, List rightFro features.add("jn="+jn.getType()); } */ - int headDistance = (p0.getHeadIndex()-fn.getHeadIndex()); - features.add("hd="+headDistance); - features.add("nd="+ rfi); + int headDistance = (p0.getHeadIndex() - fn.getHeadIndex()); + features.add("hd=" + headDistance); + features.add("nd=" + rfi); - features.add("nd="+p0.getType()+"."+ rfi); - features.add("hd="+p0.getType()+"."+headDistance); + features.add("nd=" + p0.getType() + "." + rfi); + features.add("hd=" + p0.getType() + "." + headDistance); //features.add("fs="+rightFrontier.size()); //paired punct features if (containsPunct(punct_1s,"''")) { diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/BuildContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/BuildContextGenerator.java index 589a1dc88..52c124fce 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/BuildContextGenerator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/BuildContextGenerator.java @@ -65,7 +65,7 @@ public String[] getContext(Parse[] constituents, int index) { } Parse p2 = null; - if (index +2 < ps) { + if (index + 2 < ps) { p2 = constituents[index + 2]; } @@ -140,8 +140,8 @@ public String[] getContext(Parse[] constituents, int index) { cons3(features,c_1,c0,c1,punct_1s,punct_1s,true,true,true); if (rf.isEmpty()) { - features.add(EOS+","+consp0); - features.add(EOS+","+consbop0); + features.add(EOS + "," + consp0); + features.add(EOS + "," + consbop0); } return features.toArray(new String[features.size()]); diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/CheckContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/CheckContextGenerator.java index de6824d0d..325690a4f 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/CheckContextGenerator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/CheckContextGenerator.java @@ -47,12 +47,12 @@ public String[] getContext(Parse parent, Parse[] constituents, int index, boolea features.add("default"); Parse[] children = Parser.collapsePunctuation(parent.getChildren(),punctSet); Parse pstart = children[0]; - Parse pend = children[children.length-1]; + Parse pend = children[children.length - 1]; String type = parent.getType(); checkcons(pstart, "begin", type, features); checkcons(pend, "last", type, features); - String production = "p="+production(parent,false); - String punctProduction = "pp="+production(parent,true); + String production = "p=" + production(parent,false); + String punctProduction = "pp=" + production(parent,true); features.add(production); features.add(punctProduction); @@ -72,10 +72,10 @@ public String[] getContext(Parse parent, Parse[] constituents, int index, boolea if (trimFrontier) { int pi = rf.indexOf(parent); if (pi == -1) { - throw new RuntimeException("Parent not found in right frontier:"+parent+" rf="+rf); + throw new RuntimeException("Parent not found in right frontier:" + parent + " rf=" + rf); } else { - for (int ri=0;ri<=pi;ri++) { + for (int ri = 0; ri <= pi; ri++) { //System.err.println(pi+" removing "+((Parse)rf.get(0)).getType()+" "+rf.get(0)+" "+(rf.size()-1)+" remain"); rf.remove(0); } diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/Parser.java b/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/Parser.java index 85d6cf958..e6c13481a 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/Parser.java +++ b/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/Parser.java @@ -149,10 +149,10 @@ public static List getRightFrontier(Parse root,Set punctSet) { else { top = root; } - while(!top.isPosTag()) { + while (!top.isPosTag()) { rf.add(0,top); Parse[] kids = top.getChildren(); - top = kids[kids.length-1]; + top = kids[kids.length - 1]; } return new ArrayList<>(rf); } @@ -164,10 +164,10 @@ private void setBuilt(Parse p) { } else { if (isComplete(p)) { - p.setLabel(Parser.BUILT+"."+Parser.COMPLETE); + p.setLabel(Parser.BUILT + "." + Parser.COMPLETE); } else { - p.setLabel(Parser.BUILT+"."+Parser.INCOMPLETE); + p.setLabel(Parser.BUILT + "." + Parser.INCOMPLETE); } } } @@ -178,7 +178,7 @@ private void setComplete(Parse p) { p.setLabel(Parser.COMPLETE); } else { - p.setLabel(Parser.BUILT+"."+Parser.COMPLETE); + p.setLabel(Parser.BUILT + "." + Parser.COMPLETE); } } @@ -187,7 +187,7 @@ private void setIncomplete(Parse p) { p.setLabel(Parser.INCOMPLETE); } else { - p.setLabel(Parser.BUILT+"."+Parser.INCOMPLETE); + p.setLabel(Parser.BUILT + "." + Parser.INCOMPLETE); } } @@ -204,9 +204,9 @@ private boolean isComplete(Parse p) { @Override protected Parse[] advanceChunks(Parse p, double minChunkScore) { Parse[] parses = super.advanceChunks(p, minChunkScore); - for (int pi=0;pi probMass) { //just incomplete advances + else if (1 - cprobs[completeIndex] > probMass) { //just incomplete advances setIncomplete(newNode); - newParse1.addProb(Math.log(1-cprobs[completeIndex])); + newParse1.addProb(Math.log(1 - cprobs[completeIndex])); if (debugOn) System.out.println("Only advancing incomplete node"); } else { //both complete and incomplete advance @@ -295,12 +298,12 @@ else if (1-cprobs[completeIndex] > probMass) { //just incomplete advances newParse2.insert(newNode2); newParse2.addProb(Math.log(bprob)); newParsesList.add(newParse2); - newParse2.addProb(Math.log(1-cprobs[completeIndex])); + newParse2.addProb(Math.log(1 - cprobs[completeIndex])); setIncomplete(newNode2); //set incomplete for non-clone } } else { - if (debugOn) System.out.println("building "+tag+" "+bprob); + if (debugOn) System.out.println("building " + tag + " " + bprob); } } } @@ -311,10 +314,10 @@ else if (1-cprobs[completeIndex] > probMass) { //just incomplete advances //mark nodes as built if (checkComplete) { if (isComplete(advanceNode)) { - newParse1.setChild(originalAdvanceIndex,Parser.BUILT+"."+Parser.COMPLETE); //replace constituent being labeled to create new derivation + newParse1.setChild(originalAdvanceIndex,Parser.BUILT + "." + Parser.COMPLETE); //replace constituent being labeled to create new derivation } else { - newParse1.setChild(originalAdvanceIndex,Parser.BUILT+"."+Parser.INCOMPLETE); //replace constituent being labeled to create new derivation + newParse1.setChild(originalAdvanceIndex,Parser.BUILT + "." + Parser.INCOMPLETE); //replace constituent being labeled to create new derivation } } else { @@ -326,40 +329,42 @@ else if (1-cprobs[completeIndex] > probMass) { //just incomplete advances } else { List rf = getRightFrontier(p,punctSet); - for (int fi=0,fs=rf.size();fi threshold and // if !checkComplete then prevent daughter attaching to chunk // if checkComplete then prevent daughter attacing to complete node or // sister attaching to an incomplete node if (prob > q && ( - (!checkComplete && (attachments[ai]!= daughterAttachIndex || !isComplete(fn))) + (!checkComplete && (attachments[ai] != daughterAttachIndex || !isComplete(fn))) || - (checkComplete && ((attachments[ai]== daughterAttachIndex && !isComplete(fn)) || (attachments[ai] == sisterAttachIndex && isComplete(fn)))))) { + (checkComplete && ((attachments[ai] == daughterAttachIndex && !isComplete(fn)) || (attachments[ai] == sisterAttachIndex && isComplete(fn)))))) { Parse newParse2 = newParse1.cloneRoot(fn,originalZeroIndex); Parse[] newKids = Parser.collapsePunctuation(newParse2.getChildren(),punctSet); //remove node from top level since were going to attach it (including punct) - for (int ri=originalZeroIndex+1;ri<=originalAdvanceIndex;ri++) { + for (int ri = originalZeroIndex + 1; ri <= originalAdvanceIndex; ri++) { //System.out.println(at"-removing "+(originalZeroIndex+1)+" "+newParse2.getChildren()[originalZeroIndex+1]); - newParse2.remove(originalZeroIndex+1); + newParse2.remove(originalZeroIndex + 1); } List crf = getRightFrontier(newParse2,punctSet); Parse updatedNode; - if (attachments[ai] == daughterAttachIndex) {//attach daughter + if (attachments[ai] == daughterAttachIndex) { //attach daughter updatedNode = crf.get(fi); updatedNode.add(advanceNode,headRules); } else { //attach sister Parse psite; - if (fi+1 < crf.size()) { - psite = crf.get(fi+1); + if (fi + 1 < crf.size()) { + psite = crf.get(fi + 1); updatedNode = psite.adjoin(advanceNode,headRules); } else { @@ -369,7 +374,7 @@ else if (1-cprobs[completeIndex] > probMass) { //just incomplete advances } } //update spans affected by attachment - for (int ni=fi+1;ni probMass) { //just incomplete advances newParse2.addProb(Math.log(cprobs[completeIndex])); if (debugOn) System.out.println("Only advancing complete node"); } - else if (1-cprobs[completeIndex] > probMass) { + else if (1 - cprobs[completeIndex] > probMass) { setIncomplete(updatedNode); - newParse2.addProb(Math.log(1-cprobs[completeIndex])); + newParse2.addProb(Math.log(1 - cprobs[completeIndex])); if (debugOn) System.out.println("Only advancing incomplete node"); } else { @@ -394,17 +399,22 @@ else if (1-cprobs[completeIndex] > probMass) { newParse3.addProb(Math.log(cprobs[completeIndex])); newParsesList.add(newParse3); setIncomplete(updatedNode); - newParse2.addProb(Math.log(1-cprobs[completeIndex])); - if (debugOn) System.out.println("Advancing both complete and incomplete nodes; c="+cprobs[completeIndex]); + newParse2.addProb(Math.log(1 - cprobs[completeIndex])); + if (debugOn) System.out.println("Advancing both complete and incomplete nodes; c=" + cprobs[completeIndex]); } } } else { - if (debugOn) System.out.println("Skipping "+fn.getType()+"."+fn.getLabel()+" "+fn+" daughter="+(attachments[ai] == daughterAttachIndex)+" complete="+isComplete(fn)+" prob="+prob); + if (debugOn) + System.out.println("Skipping " + fn.getType() + "." + fn.getLabel() + " " + + fn + " daughter=" + (attachments[ai] == daughterAttachIndex) + + " complete=" + isComplete(fn) + " prob=" + prob); } } - if(checkComplete && !isComplete(fn)) { - if (debugOn) System.out.println("Stopping at incomplete node("+fi+"): "+fn.getType()+"."+fn.getLabel()+" "+fn); + if (checkComplete && !isComplete(fn)) { + if (debugOn) + System.out.println("Stopping at incomplete node(" + fi + "): " + + fn.getType() + "." + fn.getLabel() + " " + fn); break; } } @@ -422,7 +432,7 @@ protected void advanceTop(Parse p) { public static ParserModel train(String languageCode, ObjectStream parseSamples, HeadRules rules, TrainingParameters mlParams) - throws IOException { + throws IOException { Map manifestInfoEntries = new HashMap<>(); diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/ParserEventStream.java b/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/ParserEventStream.java index 5949414b5..bc6159936 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/ParserEventStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/ParserEventStream.java @@ -48,7 +48,8 @@ public class ParserEventStream extends AbstractParserEventStream { private static final boolean debug = false; - public ParserEventStream(ObjectStream d, HeadRules rules, ParserEventTypeEnum etype, Dictionary dict) { + public ParserEventStream(ObjectStream d, HeadRules rules, + ParserEventTypeEnum etype, Dictionary dict) { super(d, rules, etype, dict); } @@ -75,7 +76,7 @@ private Map getNonAdjoinedParent(Parse node) { Parse parent = node.getParent(); int index = indexOf(node,parent); parents.put(parent, index); - while(parent.getType().equals(node.getType())) { + while (parent.getType().equals(node.getType())) { node = parent; parent = parent.getParent(); index = indexOf(node,parent); @@ -86,7 +87,7 @@ private Map getNonAdjoinedParent(Parse node) { private int indexOf(Parse child, Parse parent) { Parse[] kids = Parser.collapsePunctuation(parent.getChildren(),punctSet); - for (int ki=0;ki parseEvents, Parse[] chunks) { * Specifically, these nodes don't have all their children attached like the parents of * the chunk nodes do.*/ Parse[] currentChunks = new Parse[chunks.length]; - for (int ci=0;ci parseEvents, Parse[] chunks) { //see if chunk is complete if (lastChild(chunks[ci], parent)) { if (etype == ParserEventTypeEnum.CHECK) { - parseEvents.add(new Event(Parser.COMPLETE, checkContextGenerator.getContext(currentChunks[ci],currentChunks, ci,false))); + parseEvents.add(new Event(Parser.COMPLETE, + checkContextGenerator.getContext(currentChunks[ci],currentChunks, ci,false))); } currentChunks[ci].setLabel(Parser.COMPLETE); parent.setLabel(Parser.COMPLETE); } else { if (etype == ParserEventTypeEnum.CHECK) { - parseEvents.add(new Event(Parser.INCOMPLETE, checkContextGenerator.getContext(currentChunks[ci],currentChunks,ci,false))); + parseEvents.add(new Event(Parser.INCOMPLETE, + checkContextGenerator.getContext(currentChunks[ci],currentChunks,ci,false))); } currentChunks[ci].setLabel(Parser.INCOMPLETE); parent.setLabel(Parser.COMPLETE); @@ -201,56 +206,68 @@ protected void addParseEvents(List parseEvents, Parse[] chunks) { /* Node selected for attachment. */ Parse attachNode = null; int attachNodeIndex = -1; - if (ci == 0){ - Parse top = new Parse(currentChunks[ci].getText(),new Span(0,currentChunks[ci].getText().length()),AbstractBottomUpParser.TOP_NODE,1,0); + if (ci == 0) { + Parse top = new Parse(currentChunks[ci].getText(), + new Span(0,currentChunks[ci].getText().length()),AbstractBottomUpParser.TOP_NODE,1,0); top.insert(currentChunks[ci]); } else { /* Right frontier consisting of partially-built nodes based on current state of the parse.*/ List currentRightFrontier = Parser.getRightFrontier(currentChunks[0],punctSet); if (currentRightFrontier.size() != rightFrontier.size()) { - System.err.println("fontiers mis-aligned: "+currentRightFrontier.size()+" != "+rightFrontier.size()+" "+currentRightFrontier+" "+rightFrontier); + System.err.println("fontiers mis-aligned: " + currentRightFrontier.size() + " != " + + rightFrontier.size() + " " + currentRightFrontier + " " + rightFrontier); System.exit(1); } Map parents = getNonAdjoinedParent(chunks[ci]); //try daughters first. - for (int cfi=0;cfi "+parents); + if (debug) + System.err.println("Looking at attachment site (" + cfi + "): " + + cfn.getType() + " ci=" + i + " cs=" + nonPunctChildCount(cfn) + + ", " + cfn + " :for " + currentChunks[ci].getType() + " " + + currentChunks[ci] + " -> " + parents); + if (attachNode == null && i != null && i == nonPunctChildCount(cfn)) { attachType = Parser.ATTACH_DAUGHTER; attachNodeIndex = cfi; attachNode = cfn; if (etype == ParserEventTypeEnum.ATTACH) { - parseEvents.add(new Event(attachType, attachContextGenerator.getContext(currentChunks, ci, currentRightFrontier, attachNodeIndex))); + parseEvents.add(new Event(attachType, attachContextGenerator.getContext(currentChunks, + ci, currentRightFrontier, attachNodeIndex))); } //System.err.println("daughter attach "+attachNode+" at "+fi); } } else { - if (debug) System.err.println("Skipping ("+cfi+"): "+cfn.getType()+","+cfn.getPreviousPunctuationSet()+" "+cfn+" :for "+currentChunks[ci].getType()+" "+currentChunks[ci]+" -> "+parents); + if (debug) + System.err.println("Skipping (" + cfi + "): " + cfn.getType() + "," + + cfn.getPreviousPunctuationSet() + " " + cfn + " :for " + + currentChunks[ci].getType() + " " + currentChunks[ci] + " -> " + parents); } // Can't attach past first incomplete node. if (Parser.checkComplete && cfn.getLabel().equals(Parser.INCOMPLETE)) { - if (debug) System.err.println("breaking on incomplete:"+cfn.getType()+" "+cfn); + if (debug) System.err.println("breaking on incomplete:" + cfn.getType() + " " + cfn); break; } } //try sisters, and generate non-attach events. - for (int cfi=0;cfi es = new ParserEventStream( new ParseSampleStream(new PlainTextByLineStream( new SystemInputStreamFactory(), Charset.defaultCharset())), diff --git a/opennlp-tools/src/main/java/opennlp/tools/postag/DefaultPOSContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/postag/DefaultPOSContextGenerator.java index 581fed556..469bf0076 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/postag/DefaultPOSContextGenerator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/postag/DefaultPOSContextGenerator.java @@ -67,6 +67,7 @@ public DefaultPOSContextGenerator(int cacheSize, Dictionary dict) { contextsCache = new Cache<>(cacheSize); } } + protected static String[] getPrefixes(String lex) { String[] prefs = new String[PREFIX_LENGTH]; for (int li = 0; li < PREFIX_LENGTH; li++) { @@ -127,9 +128,9 @@ public String[] getContext(int index, Object[] tokens, String[] tags) { else { prev = SB; // Sentence Beginning } - String cacheKey = index+tagprev+tagprevprev; + String cacheKey = index + tagprev + tagprevprev; if (contextsCache != null) { - if (wordsKey == tokens){ + if (wordsKey == tokens) { String[] cachedContexts = contextsCache.get(cacheKey); if (cachedContexts != null) { return cachedContexts; @@ -178,7 +179,7 @@ public String[] getContext(int index, Object[] tokens, String[] tags) { if (prevprev != null) { e.add("pp=" + prevprev); if (tagprevprev != null) { - e.add("t2=" + tagprevprev+","+tagprev); + e.add("t2=" + tagprevprev + "," + tagprev); } } } diff --git a/opennlp-tools/src/main/java/opennlp/tools/postag/POSDictionary.java b/opennlp-tools/src/main/java/opennlp/tools/postag/POSDictionary.java index ef237a8e3..455e26d7b 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/postag/POSDictionary.java +++ b/opennlp-tools/src/main/java/opennlp/tools/postag/POSDictionary.java @@ -70,7 +70,7 @@ public POSDictionary(boolean caseSensitive) { * @param word The word. * * @return A list of valid tags for the specified word or - * null if no information is available for that word. + * null if no information is available for that word. */ public String[] getTags(String word) { if (caseSensitive) { @@ -224,10 +224,11 @@ public void insert(Entry entry) throws InvalidFormatException { StringList word = entry.getTokens(); if (word.size() != 1) - throw new InvalidFormatException("Each entry must have exactly one token! "+word); + throw new InvalidFormatException("Each entry must have exactly one token! " + word); newPosDict.dictionary.put(word.getToken(0), tags); - }}); + } + }); newPosDict.caseSensitive = isCaseSensitive; diff --git a/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java b/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java index 21522a5ce..bfe5c9029 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java +++ b/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java @@ -157,7 +157,7 @@ public POSTaggerFactory getFactory() { * @return ngram dictionary or null if not used */ public Dictionary getNgramDictionary() { - if(getFactory() != null) + if (getFactory() != null) return getFactory().getDictionary(); return null; } diff --git a/opennlp-tools/src/main/java/opennlp/tools/postag/POSSample.java b/opennlp-tools/src/main/java/opennlp/tools/postag/POSSample.java index 7a22ac791..ecb9d3238 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/postag/POSSample.java +++ b/opennlp-tools/src/main/java/opennlp/tools/postag/POSSample.java @@ -77,12 +77,12 @@ private void checkArguments() { ", tags: " + tags.size()); } - if (sentence.contains(null)) { - throw new IllegalArgumentException("null elements are not allowed in sentence tokens!"); - } - if (tags.contains(null)) { - throw new IllegalArgumentException("null elements are not allowed in tags!"); - } + if (sentence.contains(null)) { + throw new IllegalArgumentException("null elements are not allowed in sentence tokens!"); + } + if (tags.contains(null)) { + throw new IllegalArgumentException("null elements are not allowed in tags!"); + } } public String[] getSentence() { @@ -132,7 +132,7 @@ public static POSSample parse(String sentenceString) throws InvalidFormatExcepti } sentence[i] = tokenTags[i].substring(0, split); - tags[i] = tokenTags[i].substring(split+1); + tags[i] = tokenTags[i].substring(split + 1); } return new POSSample(sentence, tags); diff --git a/opennlp-tools/src/main/java/opennlp/tools/postag/POSSampleEventStream.java b/opennlp-tools/src/main/java/opennlp/tools/postag/POSSampleEventStream.java index 034308488..4dd31e01e 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/postag/POSSampleEventStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/postag/POSSampleEventStream.java @@ -74,7 +74,7 @@ public static List generateEvents(String[] sentence, String[] tags, Object[] additionalContext, POSContextGenerator cg) { List events = new ArrayList(sentence.length); - for (int i=0; i < sentence.length; i++) { + for (int i = 0; i < sentence.length; i++) { // it is safe to pass the tags as previous tags because // the context generator does not look for non predicted tags diff --git a/opennlp-tools/src/main/java/opennlp/tools/postag/POSSampleSequenceStream.java b/opennlp-tools/src/main/java/opennlp/tools/postag/POSSampleSequenceStream.java index 00f820ff7..b81fc48b8 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/postag/POSSampleSequenceStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/postag/POSSampleSequenceStream.java @@ -15,7 +15,7 @@ * limitations under the License. */ - package opennlp.tools.postag; +package opennlp.tools.postag; import java.io.IOException; @@ -63,7 +63,7 @@ public Sequence read() throws IOException { String tags[] = sample.getTags(); Event[] events = new Event[sentence.length]; - for (int i=0; i < sentence.length; i++) { + for (int i = 0; i < sentence.length; i++) { // it is safe to pass the tags as previous tags because // the context generator does not look for non predicted tags diff --git a/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerCrossValidator.java b/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerCrossValidator.java index b4e5d12f2..3010e03be 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerCrossValidator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerCrossValidator.java @@ -105,7 +105,7 @@ public void evaluate(ObjectStream samples, int nFolds) throws IOExcep Dictionary ngramDict = this.factory.getDictionary(); if (ngramDict == null) { - if(this.ngramCutoff != null) { + if (this.ngramCutoff != null) { System.err.print("Building ngram dictionary ... "); ngramDict = POSTaggerME.buildNGramDictionary(trainingSampleStream, this.ngramCutoff); diff --git a/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerEvaluationMonitor.java b/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerEvaluationMonitor.java index f62b497d9..d3ece7a92 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerEvaluationMonitor.java +++ b/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerEvaluationMonitor.java @@ -19,6 +19,6 @@ import opennlp.tools.util.eval.EvaluationMonitor; -public interface POSTaggerEvaluationMonitor extends EvaluationMonitor{ +public interface POSTaggerEvaluationMonitor extends EvaluationMonitor { } diff --git a/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerFactory.java b/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerFactory.java index 6115994d2..765f4a0b3 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerFactory.java +++ b/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerFactory.java @@ -111,13 +111,13 @@ public void setTagDictionary(TagDictionary dictionary) { } public TagDictionary getTagDictionary() { - if(this.posDictionary == null && artifactProvider != null) + if (this.posDictionary == null && artifactProvider != null) this.posDictionary = artifactProvider.getArtifact(TAG_DICTIONARY_ENTRY_NAME); return this.posDictionary; } public Dictionary getDictionary() { - if(this.ngramDictionary == null && artifactProvider != null) + if (this.ngramDictionary == null && artifactProvider != null) this.ngramDictionary = artifactProvider.getArtifact(NGRAM_DICTIONARY_ENTRY_NAME); return this.ngramDictionary; } @@ -196,7 +196,7 @@ public void validateArtifactMap() throws InvalidFormatException { if (tagdictEntry != null) { if (tagdictEntry instanceof POSDictionary) { - if(!this.artifactProvider.isLoadedFromSerialized()) { + if (!this.artifactProvider.isLoadedFromSerialized()) { AbstractModel posModel = this.artifactProvider .getArtifact(POSModel.POS_MODEL_ENTRY_NAME); POSDictionary posDict = (POSDictionary) tagdictEntry; diff --git a/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerME.java b/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerME.java index 366a91a83..5a3961c6f 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerME.java +++ b/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerME.java @@ -152,7 +152,7 @@ public String[][] tag(int numTaggings, String[] sentence) { Sequence[] bestSequences = model.bestSequences(numTaggings, sentence, null, contextGen, sequenceValidator); String[][] tags = new String[bestSequences.length][]; - for (int si=0;si t = bestSequences[si].getOutcomes(); tags[si] = t.toArray(new String[t.size()]); } @@ -208,8 +208,8 @@ public String[] getOrderedTags(List words, List tags, int index, } } orderedTags[i] = posModel.getOutcome(max); - if (tprobs != null){ - tprobs[i]=probs[max]; + if (tprobs != null) { + tprobs[i] = probs[max]; } probs[max] = 0; } @@ -280,7 +280,7 @@ public static Dictionary buildNGramDictionary(ObjectStream samples, i NGramModel ngramModel = new NGramModel(); POSSample sample; - while((sample = samples.read()) != null) { + while ((sample = samples.read()) != null) { String[] words = sample.getSentence(); if (words.length > 0) diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/DefaultSDContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/DefaultSDContextGenerator.java index 822c43b1c..5dd64b274 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/DefaultSDContextGenerator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/DefaultSDContextGenerator.java @@ -59,8 +59,8 @@ public DefaultSDContextGenerator(char[] eosCharacters) { * the set of induced abbreviations. * * @param inducedAbbreviations a Set of Strings - * representing induced abbreviations in the training data. - * Example: "Mr." + * representing induced abbreviations in the training data. + * Example: "Mr." * * @param eosCharacters */ diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/EndOfSentenceScanner.java b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/EndOfSentenceScanner.java index 3e3ab4235..2b79ce4b9 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/EndOfSentenceScanner.java +++ b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/EndOfSentenceScanner.java @@ -38,30 +38,30 @@ public interface EndOfSentenceScanner { */ char[] getEndOfSentenceCharacters(); - /** - * The receiver scans the specified string for sentence ending characters and - * returns their offsets. - * - * @param s a String value - * @return a List of Integer objects. - */ - List getPositions(String s); + /** + * The receiver scans the specified string for sentence ending characters and + * returns their offsets. + * + * @param s a String value + * @return a List of Integer objects. + */ + List getPositions(String s); - /** - * The receiver scans `buf' for sentence ending characters and - * returns their offsets. - * - * @param buf a StringBuffer value - * @return a List of Integer objects. - */ - List getPositions(StringBuffer buf); + /** + * The receiver scans `buf' for sentence ending characters and + * returns their offsets. + * + * @param buf a StringBuffer value + * @return a List of Integer objects. + */ + List getPositions(StringBuffer buf); - /** - * The receiver scans `cbuf' for sentence ending characters and - * returns their offsets. - * - * @param cbuf a char[] value - * @return a List of Integer objects. - */ - List getPositions(char[] cbuf); + /** + * The receiver scans `cbuf' for sentence ending characters and + * returns their offsets. + * + * @param cbuf a char[] value + * @return a List of Integer objects. + */ + List getPositions(char[] cbuf); } diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SDCrossValidator.java b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SDCrossValidator.java index d140052e6..fab03f678 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SDCrossValidator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SDCrossValidator.java @@ -72,7 +72,7 @@ public SDCrossValidator(String languageCode, TrainingParameters params, /** * @deprecated use {@link #SDCrossValidator(String, TrainingParameters, SentenceDetectorFactory, SentenceDetectorEvaluationMonitor...)} - * instead and pass in a TrainingParameters object. + * instead and pass in a TrainingParameters object. */ public SDCrossValidator(String languageCode) { this(languageCode, ModelUtil.createDefaultTrainingParameters()); @@ -93,10 +93,10 @@ public void evaluate(ObjectStream samples, int nFolds) throws IO CrossValidationPartitioner partitioner = new CrossValidationPartitioner(samples, nFolds); - while (partitioner.hasNext()) { + while (partitioner.hasNext()) { - CrossValidationPartitioner.TrainingSampleStream trainingSampleStream = - partitioner.next(); + CrossValidationPartitioner.TrainingSampleStream trainingSampleStream = + partitioner.next(); SentenceModel model; diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java index 499854592..2eb358550 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java +++ b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java @@ -49,12 +49,12 @@ public class SentenceDetectorME implements SentenceDetector { /** * Constant indicates a sentence split. */ - public static final String SPLIT ="s"; + public static final String SPLIT = "s"; /** * Constant indicates no sentence split. */ - public static final String NO_SPLIT ="n"; + public static final String NO_SPLIT = "n"; /** * The maximum entropy model to use to evaluate contexts. @@ -113,7 +113,7 @@ public SentenceDetectorME(SentenceModel model, Factory factory) { } private static Set getAbbreviations(Dictionary abbreviations) { - if(abbreviations == null) { + if (abbreviations == null) { return Collections.emptySet(); } return abbreviations.asStringSet(); @@ -177,7 +177,7 @@ public Span[] sentPosDetect(String s) { if (i + 1 < end && enders.get(i + 1) < fws) { continue; } - if(positions.size() > 0 && cint < positions.get(positions.size()-1)) continue; + if (positions.size() > 0 && cint < positions.get(positions.size() - 1)) continue; double[] probs = model.eval(cgen.getContext(sb, cint)); String bestOutcome = model.getBestOutcome(probs); @@ -205,37 +205,37 @@ public Span[] sentPosDetect(String s) { // string does not contain sentence end positions if (starts.length == 0) { - // remove leading and trailing whitespace - int start = 0; - int end = s.length(); + // remove leading and trailing whitespace + int start = 0; + int end = s.length(); - while (start < s.length() && StringUtil.isWhitespace(s.charAt(start))) - start++; + while (start < s.length() && StringUtil.isWhitespace(s.charAt(start))) + start++; - while (end > 0 && StringUtil.isWhitespace(s.charAt(end - 1))) - end--; + while (end > 0 && StringUtil.isWhitespace(s.charAt(end - 1))) + end--; - if (end - start > 0) { - sentProbs.add(1d); - return new Span[] {new Span(start, end)}; - } - else - return new Span[0]; + if (end - start > 0) { + sentProbs.add(1d); + return new Span[] {new Span(start, end)}; + } + else + return new Span[0]; } // Convert the sentence end indexes to spans boolean leftover = starts[starts.length - 1] != s.length(); - Span[] spans = new Span[leftover? starts.length + 1 : starts.length]; + Span[] spans = new Span[leftover ? starts.length + 1 : starts.length]; - for (int si=0; si < starts.length; si++) { + for (int si = 0; si < starts.length; si++) { int start; - if (si==0) { + if (si == 0) { start = 0; } else { - start = starts[si-1]; + start = starts[si - 1]; } // A span might contain only white spaces, in this case the length of @@ -250,9 +250,9 @@ public Span[] sentPosDetect(String s) { } if (leftover) { - Span span = new Span(starts[starts.length-1],s.length()).trim(s); + Span span = new Span(starts[starts.length - 1], s.length()).trim(s); if (span.length() > 0) { - spans[spans.length-1] = span; + spans[spans.length - 1] = span; sentProbs.add(1d); } } @@ -261,7 +261,7 @@ public Span[] sentPosDetect(String s) { */ for (int i = 0; i < spans.length; i++) { double prob = sentProbs.get(i); - spans[i]= new Span(spans[i], prob); + spans[i] = new Span(spans[i], prob); } @@ -273,8 +273,7 @@ public Span[] sentPosDetect(String s) { * calls to sentDetect(). * * @return probability for each sentence returned for the most recent - * call to sentDetect. If not applicable an empty array is - * returned. + * call to sentDetect. If not applicable an empty array is returned. */ public double[] getSentenceProbabilities() { double[] sentProbArray = new double[sentProbs.size()]; diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceModel.java b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceModel.java index 9355f88ba..cdd03464a 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceModel.java +++ b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceModel.java @@ -116,7 +116,7 @@ protected void validateArtifactMap() throws InvalidFormatException { if (!ModelUtil.validateOutcomes(getMaxentModel(), SentenceDetectorME.SPLIT, SentenceDetectorME.NO_SPLIT)) { throw new InvalidFormatException("The maxent model is not compatible " + - "with the sentence detector!"); + "with the sentence detector!"); } } @@ -155,7 +155,7 @@ public char[] getEosCharacters() { } public static void main(String[] args) throws FileNotFoundException, IOException, InvalidFormatException { - if (args.length < 3){ + if (args.length < 3) { System.err.println("SentenceModel [-abbreviationsDictionary] [-useTokenEnd] languageCode packageName modelName"); System.exit(1); } diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceSample.java b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceSample.java index d168150e4..71f50db45 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceSample.java +++ b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceSample.java @@ -78,8 +78,8 @@ public String getDocument() { /** * Retrieves the sentences. * - * @return the begin indexes of the sentences - * in the document. + * @return the begin indexes of the sentences in the document. + */ public Span[] getSentences() { return sentences.toArray(new Span[sentences.size()]); diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/lang/Factory.java b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/lang/Factory.java index d182b26d0..9d4647d4f 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/lang/Factory.java +++ b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/lang/Factory.java @@ -52,7 +52,7 @@ public SDContextGenerator createSentenceContextGenerator(String languageCode, Se if ("th".equals(languageCode)) { return new SentenceContextGenerator(); - } else if("pt".equals(languageCode)) { + } else if ("pt".equals(languageCode)) { return new DefaultSDContextGenerator(abbreviations, ptEosCharacters); } diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/lang/th/SentenceContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/lang/th/SentenceContextGenerator.java index abf5aa715..baab080d7 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/lang/th/SentenceContextGenerator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/lang/th/SentenceContextGenerator.java @@ -43,20 +43,20 @@ protected void collectFeatures(String prefix, String suffix, String previous, St collectFeats.add(buf.toString()); buf.setLength(0); - collectFeats.add("p1="+prefix.substring(Math.max(prefix.length()-1,0))); - collectFeats.add("p2="+prefix.substring(Math.max(prefix.length()-2,0))); - collectFeats.add("p3="+prefix.substring(Math.max(prefix.length()-3,0))); - collectFeats.add("p4="+prefix.substring(Math.max(prefix.length()-4,0))); - collectFeats.add("p5="+prefix.substring(Math.max(prefix.length()-5,0))); - collectFeats.add("p6="+prefix.substring(Math.max(prefix.length()-6,0))); - collectFeats.add("p7="+prefix.substring(Math.max(prefix.length()-7,0))); - - collectFeats.add("n1="+suffix.substring(0,Math.min(1,suffix.length()))); - collectFeats.add("n2="+suffix.substring(0,Math.min(2,suffix.length()))); - collectFeats.add("n3="+suffix.substring(0,Math.min(3,suffix.length()))); - collectFeats.add("n4="+suffix.substring(0,Math.min(4,suffix.length()))); - collectFeats.add("n5="+suffix.substring(0,Math.min(5,suffix.length()))); - collectFeats.add("n6="+suffix.substring(0,Math.min(6,suffix.length()))); - collectFeats.add("n7="+suffix.substring(0,Math.min(7,suffix.length()))); + collectFeats.add("p1=" + prefix.substring(Math.max(prefix.length() - 1,0))); + collectFeats.add("p2=" + prefix.substring(Math.max(prefix.length() - 2,0))); + collectFeats.add("p3=" + prefix.substring(Math.max(prefix.length() - 3,0))); + collectFeats.add("p4=" + prefix.substring(Math.max(prefix.length() - 4,0))); + collectFeats.add("p5=" + prefix.substring(Math.max(prefix.length() - 5,0))); + collectFeats.add("p6=" + prefix.substring(Math.max(prefix.length() - 6,0))); + collectFeats.add("p7=" + prefix.substring(Math.max(prefix.length() - 7,0))); + + collectFeats.add("n1=" + suffix.substring(0,Math.min(1, suffix.length()))); + collectFeats.add("n2=" + suffix.substring(0,Math.min(2, suffix.length()))); + collectFeats.add("n3=" + suffix.substring(0,Math.min(3, suffix.length()))); + collectFeats.add("n4=" + suffix.substring(0,Math.min(4, suffix.length()))); + collectFeats.add("n5=" + suffix.substring(0,Math.min(5, suffix.length()))); + collectFeats.add("n6=" + suffix.substring(0,Math.min(6, suffix.length()))); + collectFeats.add("n7=" + suffix.substring(0,Math.min(7, suffix.length()))); } } diff --git a/opennlp-tools/src/main/java/opennlp/tools/tokenize/DefaultTokenContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/tokenize/DefaultTokenContextGenerator.java index 8050bddfb..99c717d07 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/tokenize/DefaultTokenContextGenerator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/tokenize/DefaultTokenContextGenerator.java @@ -91,7 +91,7 @@ protected List createContext(String sentence, int index) { preds.add("p1=bok"); } addCharPreds("f1", sentence.charAt(index), preds); - if (index+1 < sentence.length()) { + if (index + 1 < sentence.length()) { addCharPreds("f2", sentence.charAt(index + 1), preds); preds.add("f12=" + sentence.charAt(index) + sentence.charAt(index + 1)); } @@ -102,7 +102,7 @@ protected List createContext(String sentence, int index) { preds.add("cc");//character code } - if(index == sentence.length() - 1 && inducedAbbreviations.contains(sentence)) { + if (index == sentence.length() - 1 && inducedAbbreviations.contains(sentence)) { preds.add("pabb"); } @@ -128,16 +128,16 @@ else if (StringUtil.isWhitespace(c)) { preds.add(key + "_ws"); } else { - if (c=='.' || c=='?' || c=='!') { + if (c == '.' || c == '?' || c == '!') { preds.add(key + "_eos"); } - else if (c=='`' || c=='"' || c=='\'') { + else if (c == '`' || c == '"' || c == '\'') { preds.add(key + "_quote"); } - else if (c=='[' || c=='{' || c=='(') { + else if (c == '[' || c == '{' || c == '(') { preds.add(key + "_lp"); } - else if (c==']' || c=='}' || c==')') { + else if (c == ']' || c == '}' || c == ')') { preds.add(key + "_rp"); } } diff --git a/opennlp-tools/src/main/java/opennlp/tools/tokenize/DetokenizationDictionary.java b/opennlp-tools/src/main/java/opennlp/tools/tokenize/DetokenizationDictionary.java index 2fbb4a3db..00d2dc72f 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/tokenize/DetokenizationDictionary.java +++ b/opennlp-tools/src/main/java/opennlp/tools/tokenize/DetokenizationDictionary.java @@ -106,7 +106,7 @@ public DetokenizationDictionary(String tokens[], } } - public DetokenizationDictionary(InputStream in) throws IOException, InvalidFormatException{ + public DetokenizationDictionary(InputStream in) throws IOException, InvalidFormatException { DictionarySerializer.create(in, new EntryInserter() { public void insert(Entry entry) throws InvalidFormatException { @@ -116,7 +116,7 @@ public void insert(Entry entry) throws InvalidFormatException { StringList word = entry.getTokens(); if (word.size() != 1) - throw new InvalidFormatException("Each entry must have exactly one token! "+word); + throw new InvalidFormatException("Each entry must have exactly one token! " + word); // parse operation Operation operation = Operation.parse(operationString); @@ -125,7 +125,8 @@ public void insert(Entry entry) throws InvalidFormatException { throw new InvalidFormatException("Unknown operation type: " + operationString); operationTable.put(word.getToken(0), operation); - }}); + } + }); } DetokenizationDictionary.Operation getOperation(String token) { diff --git a/opennlp-tools/src/main/java/opennlp/tools/tokenize/DictionaryDetokenizer.java b/opennlp-tools/src/main/java/opennlp/tools/tokenize/DictionaryDetokenizer.java index 3be19db78..cb38004c5 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/tokenize/DictionaryDetokenizer.java +++ b/opennlp-tools/src/main/java/opennlp/tools/tokenize/DictionaryDetokenizer.java @@ -43,8 +43,7 @@ public DetokenizationOperation[] detokenize(String[] tokens) { Set matchingTokens = new HashSet(); for (int i = 0; i < tokens.length; i++) { - DetokenizationDictionary.Operation dictOperation = - dict.getOperation(tokens[i]); + DetokenizationDictionary.Operation dictOperation = dict.getOperation(tokens[i]); if (dictOperation == null) { operations[i] = Detokenizer.DetokenizationOperation.NO_OPERATION; diff --git a/opennlp-tools/src/main/java/opennlp/tools/tokenize/SimpleTokenizer.java b/opennlp-tools/src/main/java/opennlp/tools/tokenize/SimpleTokenizer.java index c3d4b295f..03f15ca20 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/tokenize/SimpleTokenizer.java +++ b/opennlp-tools/src/main/java/opennlp/tools/tokenize/SimpleTokenizer.java @@ -38,7 +38,7 @@ public class SimpleTokenizer extends AbstractTokenizer { /** * @deprecated Use INSTANCE field instead to obtain an instance, constructor - * will be made private in the future. + * will be made private in the future. */ @Deprecated public SimpleTokenizer() { @@ -112,8 +112,8 @@ public static void main(String[] args) throws IOException { if (tokens.length > 0) { System.out.print(tokens[0]); } - for (int ti=1,tn=tokens.length;ti createEvents(TokenSample tokenSample) { //adjust cSpan to text offsets cSpan = new Span(cSpan.getStart() + start, cSpan.getEnd() + start); //should we skip this token - if (ctok.length() > 1 - && (!skipAlphaNumerics || !alphaNumeric.matcher(ctok).matches())) { + if (ctok.length() > 1 && (!skipAlphaNumerics || !alphaNumeric.matcher(ctok).matches())) { //find offsets of annotated tokens inside of candidate tokens boolean foundTrainingTokens = false; @@ -141,7 +140,7 @@ else if (tokens[ti].getEnd() < cSpan.getStart()) { else { if (logger.isLoggable(Level.WARNING)) { logger.warning("Bad training token: " + tokens[ti] + " cand: " + cSpan + - " token="+text.substring(tokens[ti].getStart(), tokens[ti].getEnd())); + " token=" + text.substring(tokens[ti].getStart(), tokens[ti].getEnd())); } } } diff --git a/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenSample.java b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenSample.java index 1a40dd511..813eac415 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenSample.java +++ b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenSample.java @@ -162,10 +162,10 @@ private static void addToken(StringBuilder sample, List tokenSpans, String public static TokenSample parse(String sampleString, String separatorChars) { if (sampleString == null) { - throw new IllegalArgumentException("sampleString must not be null!"); + throw new IllegalArgumentException("sampleString must not be null!"); } if (separatorChars == null) { - throw new IllegalArgumentException("separatorChars must not be null!"); + throw new IllegalArgumentException("separatorChars must not be null!"); } Span whitespaceTokenSpans[] = WhitespaceTokenizer.INSTANCE.tokenizePos(sampleString); diff --git a/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenSampleStream.java index 19e18c514..3fecaaf56 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenSampleStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenSampleStream.java @@ -51,7 +51,7 @@ public TokenSampleStream(ObjectStream sampleStrings, String separatorCha throw new IllegalArgumentException("separatorChars must not be null!"); } - this.separatorChars= separatorChars; + this.separatorChars = separatorChars; } public TokenSampleStream(ObjectStream sentences) { diff --git a/opennlp-tools/src/main/java/opennlp/tools/tokenize/Tokenizer.java b/opennlp-tools/src/main/java/opennlp/tools/tokenize/Tokenizer.java index aae4a83be..7564ee3a0 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/tokenize/Tokenizer.java +++ b/opennlp-tools/src/main/java/opennlp/tools/tokenize/Tokenizer.java @@ -45,21 +45,21 @@ */ public interface Tokenizer { - /** - * Splits a string into its atomic parts - * - * @param s The string to be tokenized. - * @return The String[] with the individual tokens as the array - * elements. - */ - String[] tokenize(String s); + /** + * Splits a string into its atomic parts + * + * @param s The string to be tokenized. + * @return The String[] with the individual tokens as the array + * elements. + */ + String[] tokenize(String s); - /** - * Finds the boundaries of atomic parts in a string. - * - * @param s The string to be tokenized. - * @return The Span[] with the spans (offsets into s) for each - * token as the individuals array elements. - */ - Span[] tokenizePos(String s); + /** + * Finds the boundaries of atomic parts in a string. + * + * @param s The string to be tokenized. + * @return The Span[] with the spans (offsets into s) for each + * token as the individuals array elements. + */ + Span[] tokenizePos(String s); } diff --git a/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerCrossValidator.java b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerCrossValidator.java index fe9e4c621..42f4b344d 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerCrossValidator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerCrossValidator.java @@ -52,19 +52,19 @@ public void evaluate(ObjectStream samples, int nFolds) throws IOExc CrossValidationPartitioner partitioner = new CrossValidationPartitioner<>(samples, nFolds); - while (partitioner.hasNext()) { + while (partitioner.hasNext()) { - CrossValidationPartitioner.TrainingSampleStream trainingSampleStream = - partitioner.next(); + CrossValidationPartitioner.TrainingSampleStream trainingSampleStream = + partitioner.next(); - // Maybe throws IOException if temporary file handling fails ... - TokenizerModel model = TokenizerME.train(trainingSampleStream, this.factory, params); + // Maybe throws IOException if temporary file handling fails ... + TokenizerModel model = TokenizerME.train(trainingSampleStream, this.factory, params); - TokenizerEvaluator evaluator = new TokenizerEvaluator(new TokenizerME(model), listeners); + TokenizerEvaluator evaluator = new TokenizerEvaluator(new TokenizerME(model), listeners); - evaluator.evaluate(trainingSampleStream.getTestSampleStream()); - fmeasure.mergeInto(evaluator.getFMeasure()); - } + evaluator.evaluate(trainingSampleStream.getTestSampleStream()); + fmeasure.mergeInto(evaluator.getFMeasure()); + } } public FMeasure getFMeasure() { diff --git a/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerME.java b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerME.java index 491b6fab3..4444e781d 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerME.java +++ b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerME.java @@ -77,12 +77,12 @@ public class TokenizerME extends AbstractTokenizer { /** * Constant indicates a token split. */ - public static final String SPLIT ="T"; + public static final String SPLIT = "T"; /** * Constant indicates no token split. */ - public static final String NO_SPLIT ="F"; + public static final String NO_SPLIT = "F"; /** * Alpha-Numeric Pattern @@ -147,7 +147,7 @@ public TokenizerME(TokenizerModel model, Factory factory) { } private static Set getAbbreviations(Dictionary abbreviations) { - if(abbreviations == null) { + if (abbreviations == null) { return Collections.emptySet(); } return abbreviations.asStringSet(); @@ -158,8 +158,7 @@ private static Set getAbbreviations(Dictionary abbreviations) { * calls to {@link TokenizerME#tokenize(String)} or {@link TokenizerME#tokenizePos(String)}. * * @return probability for each token returned for the most recent - * call to tokenize. If not applicable an empty array is - * returned. + * call to tokenize. If not applicable an empty array is returned. */ public double[] getTokenProbabilities() { double[] tokProbArray = new double[tokProbs.size()]; diff --git a/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerModel.java b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerModel.java index ed84b4ee8..497c8afbe 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerModel.java +++ b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerModel.java @@ -145,7 +145,7 @@ public boolean useAlphaNumericOptimization() { } public static void main(String[] args) throws IOException { - if (args.length < 3){ + if (args.length < 3) { System.err.println("TokenizerModel [-alphaNumericOptimization] languageCode packageName modelName"); System.exit(1); } @@ -167,7 +167,7 @@ public static void main(String[] args) throws IOException { new FileInputStream(modelName))).getModel(); TokenizerModel packageModel = new TokenizerModel(model, null, - TokenizerFactory.create(null, languageCode, null, alphaNumericOptimization, null)); + TokenizerFactory.create(null, languageCode, null, alphaNumericOptimization, null)); OutputStream out = null; try { diff --git a/opennlp-tools/src/main/java/opennlp/tools/tokenize/WhitespaceTokenStream.java b/opennlp-tools/src/main/java/opennlp/tools/tokenize/WhitespaceTokenStream.java index 4644b4ea9..4f6694f92 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/tokenize/WhitespaceTokenStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/tokenize/WhitespaceTokenStream.java @@ -48,7 +48,7 @@ public String read() throws IOException { // Shorten string by one to get rid of last space if (whitespaceSeparatedTokenString.length() > 0) { whitespaceSeparatedTokenString.setLength( - whitespaceSeparatedTokenString.length() -1 ); + whitespaceSeparatedTokenString.length() - 1 ); } return whitespaceSeparatedTokenString.toString(); diff --git a/opennlp-tools/src/main/java/opennlp/tools/tokenize/WhitespaceTokenizer.java b/opennlp-tools/src/main/java/opennlp/tools/tokenize/WhitespaceTokenizer.java index d51d7da7d..05ae04ce5 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/tokenize/WhitespaceTokenizer.java +++ b/opennlp-tools/src/main/java/opennlp/tools/tokenize/WhitespaceTokenizer.java @@ -45,7 +45,7 @@ private WhitespaceTokenizer() { public Span[] tokenizePos(String d) { int tokStart = -1; - Listtokens = new ArrayList(); + List tokens = new ArrayList(); boolean inTok = false; //gather up potential tokens diff --git a/opennlp-tools/src/main/java/opennlp/tools/tokenize/lang/Factory.java b/opennlp-tools/src/main/java/opennlp/tools/tokenize/lang/Factory.java index 416621e88..ef2a9f80e 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/tokenize/lang/Factory.java +++ b/opennlp-tools/src/main/java/opennlp/tools/tokenize/lang/Factory.java @@ -37,7 +37,7 @@ public class Factory { * @return the alpha numeric pattern for the language or the default pattern. */ public Pattern getAlphanumeric(String languageCode) { - if("pt".equals(languageCode)) { + if ("pt".equals(languageCode)) { return Pattern.compile("^[0-9a-záãâàéêíóõôúüçA-ZÁÃÂÀÉÊÍÓÕÔÚÜÇ]+$"); } diff --git a/opennlp-tools/src/main/java/opennlp/tools/tokenize/lang/en/TokenSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/tokenize/lang/en/TokenSampleStream.java index d95d05bd5..8006d7bef 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/tokenize/lang/en/TokenSampleStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/tokenize/lang/en/TokenSampleStream.java @@ -53,14 +53,14 @@ public boolean hasNext() { public TokenSample next() { String[] tokens = line.split("\\s+"); if (tokens.length == 0) { - evenq =true; + evenq = true; } StringBuilder sb = new StringBuilder(line.length()); List spans = new ArrayList(); int length = 0; - for (int ti=0;ti= 0 ? tokens[ti-1] : ""; + String lastToken = ti - 1 >= 0 ? tokens[ti - 1] : ""; if (token.equals("-LRB-")) { token = "("; } @@ -79,7 +79,7 @@ else if (token.equals("-RCB-")) { else if (!alphaNumeric.matcher(token).find() || token.startsWith("'") || token.equalsIgnoreCase("n't")) { if ((token.equals("``") || token.equals("--") || token.equals("$") || token.equals("(") || token.equals("&") || token.equals("#") || - (token.equals("\"") && (evenq && ti != tokens.length-1))) + (token.equals("\"") && (evenq && ti != tokens.length - 1))) && (!lastToken.equals("(") || !lastToken.equals("{"))) { //System.out.print(" "+token); length++; @@ -99,8 +99,8 @@ else if (!alphaNumeric.matcher(token).find() || token.startsWith("'") || token.e } } if (token.equals("\"")) { - if (ti == tokens.length -1) { - evenq=true; + if (ti == tokens.length - 1) { + evenq = true; } else { evenq = !evenq; @@ -110,8 +110,8 @@ else if (!alphaNumeric.matcher(token).find() || token.startsWith("'") || token.e sb.append(" "); } sb.append(token); - spans.add(new Span(length,length+token.length())); - length+=token.length(); + spans.add(new Span(length, length + token.length())); + length += token.length(); } //System.out.println(); try { @@ -135,32 +135,32 @@ private static void usage() { public static void main(String[] args) throws IOException { boolean showSpans = false; - int ai=0; + int ai = 0; while (ai < args.length) { if (args[ai].equals("-spans")) { showSpans = true; } else { - System.err.println("Unknown option "+args[ai]); + System.err.println("Unknown option " + args[ai]); usage(); } ai++; } TokenSampleStream tss = new TokenSampleStream(System.in); - while(tss.hasNext()) { + while (tss.hasNext()) { TokenSample ts = tss.next(); String text = ts.getText(); System.out.println(text); Span[] tokenSpans = ts.getTokenSpans(); - int ti=0; + int ti = 0; if (showSpans) { - for (int i=0;i= 0 && i==tokenSpans[ti-1].getEnd()-1) { + for (int i = 0; i < text.length(); i++) { + if (ti - 1 >= 0 && i == tokenSpans[ti - 1].getEnd() - 1) { System.out.print("]"); } - else if (i==tokenSpans[ti].getStart()) { + else if (i == tokenSpans[ti].getStart()) { ti++; - if (ti-1 >= 0 && i==tokenSpans[ti-1].getEnd()-1) { + if (ti - 1 >= 0 && i == tokenSpans[ti - 1].getEnd() - 1) { System.out.print("|"); } else { diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/AbstractEventStream.java b/opennlp-tools/src/main/java/opennlp/tools/util/AbstractEventStream.java index c636378f9..20184ea18 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/util/AbstractEventStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/util/AbstractEventStream.java @@ -43,10 +43,10 @@ public AbstractEventStream(ObjectStream samples) { * Creates events for the provided sample. * * @param sample the sample for which training {@link Event}s - * are be created. + * are be created. * * @return an {@link Iterator} of training events or - * an empty {@link Iterator}. + * an empty {@link Iterator}. */ protected abstract Iterator createEvents(T sample); diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/BaseToolFactory.java b/opennlp-tools/src/main/java/opennlp/tools/util/BaseToolFactory.java index 499aa8f70..448bc8560 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/util/BaseToolFactory.java +++ b/opennlp-tools/src/main/java/opennlp/tools/util/BaseToolFactory.java @@ -46,9 +46,9 @@ public abstract class BaseToolFactory { public BaseToolFactory() { } - /** - * Initializes the ToolFactory with an artifact provider. - */ + /** + * Initializes the ToolFactory with an artifact provider. + */ protected void init(ArtifactProvider artifactProvider) { this.artifactProvider = artifactProvider; } diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/HashList.java b/opennlp-tools/src/main/java/opennlp/tools/util/HashList.java index a3ee25667..e88a90753 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/util/HashList.java +++ b/opennlp-tools/src/main/java/opennlp/tools/util/HashList.java @@ -70,7 +70,7 @@ public List put(Object key, Object value) { o.add(value); - if(o.size() == 1) + if (o.size() == 1) return null; else return o; @@ -84,7 +84,7 @@ public boolean remove(Object key, Object value) { else { boolean r = l.remove(value); if (l.size() == 0) { - remove(key); + remove(key); } return r; } diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/Heap.java b/opennlp-tools/src/main/java/opennlp/tools/util/Heap.java index a27f333f0..00d79bae0 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/util/Heap.java +++ b/opennlp-tools/src/main/java/opennlp/tools/util/Heap.java @@ -57,11 +57,11 @@ public interface Heap { */ int size(); - /** - * Returns whether the heap is empty. - * @return true if the heap is empty; false otherwise. - */ - boolean isEmpty(); + /** + * Returns whether the heap is empty. + * @return true if the heap is empty; false otherwise. + */ + boolean isEmpty(); /** * Returns an iterator over the elements of the heap. No specific ordering of these diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/InputStreamFactory.java b/opennlp-tools/src/main/java/opennlp/tools/util/InputStreamFactory.java index 41f48724f..4a57a4991 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/util/InputStreamFactory.java +++ b/opennlp-tools/src/main/java/opennlp/tools/util/InputStreamFactory.java @@ -14,6 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package opennlp.tools.util; import java.io.IOException; diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/ListHeap.java b/opennlp-tools/src/main/java/opennlp/tools/util/ListHeap.java index 7a60debda..4fdd2e8e3 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/util/ListHeap.java +++ b/opennlp-tools/src/main/java/opennlp/tools/util/ListHeap.java @@ -194,13 +194,14 @@ public boolean isEmpty() { @Deprecated public static void main(String[] args) { - Heap heap = new ListHeap(5); - for (int ai=0;ai heap = new ListHeap(5); + for (int ai = 0; ai < args.length; ai++) { + heap.add(Integer.parseInt(args[ai])); + } + + while (!heap.isEmpty()) { + System.out.print(heap.extract() + " "); + } + System.out.println(); } } diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/MarkableFileInputStreamFactory.java b/opennlp-tools/src/main/java/opennlp/tools/util/MarkableFileInputStreamFactory.java index 1dddf35ca..830e6970f 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/util/MarkableFileInputStreamFactory.java +++ b/opennlp-tools/src/main/java/opennlp/tools/util/MarkableFileInputStreamFactory.java @@ -30,7 +30,7 @@ public class MarkableFileInputStreamFactory implements InputStreamFactory { private File file; public MarkableFileInputStreamFactory(File file) throws FileNotFoundException { - if(!file.exists()) { + if (!file.exists()) { throw new FileNotFoundException("File '" + file + "' cannot be found"); } this.file = file; diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/ObjectStreamUtils.java b/opennlp-tools/src/main/java/opennlp/tools/util/ObjectStreamUtils.java index 9ad98ddf5..0ee8fa59b 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/util/ObjectStreamUtils.java +++ b/opennlp-tools/src/main/java/opennlp/tools/util/ObjectStreamUtils.java @@ -87,7 +87,7 @@ public void close() { /** * Creates a single concatenated ObjectStream from multiple individual * ObjectStreams with the same type. - * + * * @param streams * @return */ @@ -111,7 +111,7 @@ public T read() throws IOException { object = streams[streamIndex].read(); if (object == null) - streamIndex++; + streamIndex++; } return object; @@ -130,6 +130,7 @@ public void close() throws IOException { for (ObjectStream stream : streams) { stream.close(); } - }}; + } + }; } } diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/PlainTextByLineStream.java b/opennlp-tools/src/main/java/opennlp/tools/util/PlainTextByLineStream.java index 9c7cdc10b..eb3a41068 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/util/PlainTextByLineStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/util/PlainTextByLineStream.java @@ -59,7 +59,7 @@ public void reset() throws IOException { in = new BufferedReader(new InputStreamReader(inputStreamFactory.createInputStream(), encoding)); } else if (channel == null) { - in.reset(); + in.reset(); } else { channel.position(0); @@ -69,11 +69,11 @@ else if (channel == null) { public void close() throws IOException { - if (in != null && channel == null) { - in.close(); - } - else if (channel != null) { - channel.close(); - } + if (in != null && channel == null) { + in.close(); + } + else if (channel != null) { + channel.close(); + } } } diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/ReverseListIterator.java b/opennlp-tools/src/main/java/opennlp/tools/util/ReverseListIterator.java index c7e456dda..ee418d1ea 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/util/ReverseListIterator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/util/ReverseListIterator.java @@ -29,8 +29,8 @@ public class ReverseListIterator implements Iterator { private List list; public ReverseListIterator(List list) { - index = list.size()-1; - this.list=list; + index = list.size() - 1; + this.list = list; } public T next() { @@ -38,7 +38,7 @@ public T next() { } public boolean hasNext() { - return index >=0; + return index >= 0; } public void remove() { diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/Sequence.java b/opennlp-tools/src/main/java/opennlp/tools/util/Sequence.java index a81d958eb..4721cc635 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/util/Sequence.java +++ b/opennlp-tools/src/main/java/opennlp/tools/util/Sequence.java @@ -36,21 +36,21 @@ public Sequence() { } public Sequence(Sequence s) { - outcomes = new ArrayList(s.outcomes.size()+1); + outcomes = new ArrayList(s.outcomes.size() + 1); outcomes.addAll(s.outcomes); - probs = new ArrayList(s.probs.size()+1); + probs = new ArrayList(s.probs.size() + 1); probs.addAll(s.probs); score = s.score; } public Sequence(Sequence s,String outcome, double p) { - outcomes = new ArrayList(s.outcomes.size()+1); + outcomes = new ArrayList(s.outcomes.size() + 1); outcomes.addAll(s.outcomes); outcomes.add(outcome); - probs = new ArrayList(s.probs.size()+1); + probs = new ArrayList(s.probs.size() + 1); probs.addAll(s.probs); probs.add(p); - score = s.score+Math.log(p); + score = s.score + Math.log(p); } public Sequence(List outcomes) { @@ -104,13 +104,13 @@ public double getScore() { * @param ps a pre-allocated array to use to hold the values of the probabilities of the outcomes for this sequence. */ public void getProbs(double[] ps) { - for (int pi=0,pl=probs.size();pitrue if {@link CharSequence#length()} is - * 0 or null. - * - * @return true if {@link CharSequence#length()} is 0, otherwise - * false - * - * @since 1.5.1 - */ + * Returns true if {@link CharSequence#length()} is + * 0 or null. + * + * @return true if {@link CharSequence#length()} is 0, otherwise + * false + * + * @since 1.5.1 + */ public static boolean isEmpty(CharSequence theString) { - return theString.length() == 0; + return theString.length() == 0; } - + /** * Get mininum of three values. * @param a number a @@ -122,17 +122,17 @@ public static boolean isEmpty(CharSequence theString) { * @return the minimum */ private static int minimum(int a, int b, int c) { - int minValue; - minValue = a; - if (b < minValue) { - minValue = b; - } - if (c < minValue) { - minValue = c; - } - return minValue; + int minValue; + minValue = a; + if (b < minValue) { + minValue = b; + } + if (c < minValue) { + minValue = c; + } + return minValue; } - + /** * Computes the Levenshtein distance of two strings in a matrix. * Based on pseudo-code provided here: @@ -149,7 +149,7 @@ public static int[][] levenshteinDistance(String wordForm, String lemma) { int lemmaLength = lemma.length(); int cost; int[][] distance = new int[wordLength + 1][lemmaLength + 1]; - + if (wordLength == 0) { return distance; } @@ -179,71 +179,71 @@ public static int[][] levenshteinDistance(String wordForm, String lemma) { } return distance; } - + /** * Computes the Shortest Edit Script (SES) to convert a word into its lemma. * This is based on Chrupala's PhD thesis (2008). - * @param wordForm the token - * @param lemma the target lemma - * @param distance the levenshtein distance - * @param permutations the number of permutations - */ -public static void computeShortestEditScript(String wordForm, String lemma, int[][] distance, StringBuffer permutations) { - + * @param wordForm the token + * @param lemma the target lemma + * @param distance the levenshtein distance + * @param permutations the number of permutations + */ + public static void computeShortestEditScript(String wordForm, String lemma, int[][] distance, StringBuffer permutations) { + int n = distance.length; int m = distance[0].length; - + int wordFormLength = n - 1; int lemmaLength = m - 1; - while(true) { - - if (distance[wordFormLength][lemmaLength] == 0) { - break; - } - if ((lemmaLength > 0 && wordFormLength > 0) && (distance[wordFormLength - 1][lemmaLength - 1] < distance[wordFormLength][lemmaLength])) { - permutations.append('R').append(Integer.toString(wordFormLength - 1)).append(wordForm.charAt(wordFormLength - 1)).append(lemma.charAt(lemmaLength - 1)); - lemmaLength--; - wordFormLength--; - continue; - } - if (lemmaLength > 0 && (distance[wordFormLength][lemmaLength - 1] < distance[wordFormLength][lemmaLength])) { - permutations.append('I').append(Integer.toString(wordFormLength)).append(lemma.charAt(lemmaLength - 1)); - lemmaLength--; - continue; - } - if (wordFormLength > 0 && (distance[wordFormLength - 1][lemmaLength] < distance[wordFormLength][lemmaLength])) { - permutations.append('D').append(Integer.toString(wordFormLength - 1)).append(wordForm.charAt(wordFormLength - 1)); - wordFormLength--; - continue; - } - if ((wordFormLength > 0 && lemmaLength > 0) && (distance[wordFormLength - 1][lemmaLength - 1] == distance[wordFormLength][lemmaLength])) { - wordFormLength--; lemmaLength--; - continue ; - } - if (wordFormLength > 0 && (distance[wordFormLength - 1][lemmaLength] == distance[wordFormLength][lemmaLength])) { - wordFormLength--; - continue; - } - if (lemmaLength > 0 && (distance[wordFormLength][lemmaLength - 1] == distance[wordFormLength][lemmaLength])) { - lemmaLength--; - continue; - } + while (true) { + + if (distance[wordFormLength][lemmaLength] == 0) { + break; + } + if ((lemmaLength > 0 && wordFormLength > 0) && (distance[wordFormLength - 1][lemmaLength - 1] < distance[wordFormLength][lemmaLength])) { + permutations.append('R').append(Integer.toString(wordFormLength - 1)).append(wordForm.charAt(wordFormLength - 1)).append(lemma.charAt(lemmaLength - 1)); + lemmaLength--; + wordFormLength--; + continue; + } + if (lemmaLength > 0 && (distance[wordFormLength][lemmaLength - 1] < distance[wordFormLength][lemmaLength])) { + permutations.append('I').append(Integer.toString(wordFormLength)).append(lemma.charAt(lemmaLength - 1)); + lemmaLength--; + continue; + } + if (wordFormLength > 0 && (distance[wordFormLength - 1][lemmaLength] < distance[wordFormLength][lemmaLength])) { + permutations.append('D').append(Integer.toString(wordFormLength - 1)).append(wordForm.charAt(wordFormLength - 1)); + wordFormLength--; + continue; + } + if ((wordFormLength > 0 && lemmaLength > 0) && (distance[wordFormLength - 1][lemmaLength - 1] == distance[wordFormLength][lemmaLength])) { + wordFormLength--; lemmaLength--; + continue ; + } + if (wordFormLength > 0 && (distance[wordFormLength - 1][lemmaLength] == distance[wordFormLength][lemmaLength])) { + wordFormLength--; + continue; + } + if (lemmaLength > 0 && (distance[wordFormLength][lemmaLength - 1] == distance[wordFormLength][lemmaLength])) { + lemmaLength--; + continue; + } } -} + } -/** - * Read predicted SES by the lemmatizer model and apply the - * permutations to obtain the lemma from the wordForm. - * @param wordForm the wordForm - * @param permutations the permutations predicted by the lemmatizer model - * @return the lemma - */ -public static String decodeShortestEditScript(String wordForm, String permutations) { - - StringBuffer lemma = new StringBuffer(wordForm).reverse(); - - int permIndex = 0; - while(true) { + /** + * Read predicted SES by the lemmatizer model and apply the + * permutations to obtain the lemma from the wordForm. + * @param wordForm the wordForm + * @param permutations the permutations predicted by the lemmatizer model + * @return the lemma + */ + public static String decodeShortestEditScript(String wordForm, String permutations) { + + StringBuffer lemma = new StringBuffer(wordForm).reverse(); + + int permIndex = 0; + while (true) { if (permutations.length() <= permIndex) { break; } @@ -253,75 +253,75 @@ public static String decodeShortestEditScript(String wordForm, String permutatio //go to the next permutation letter permIndex++; if (nextOperation == 'R') { - String charAtPerm = Character.toString(permutations.charAt(permIndex)); - int charIndex = Integer.parseInt(charAtPerm); - // go to the next character in the permutation buffer - // which is the replacement character - permIndex++; - char replace = permutations.charAt(permIndex); - //go to the next char in the permutation buffer - // which is the candidate character - permIndex++; - char with = permutations.charAt(permIndex); - - if (lemma.length() <= charIndex) { - return wordForm; - } - if (lemma.charAt(charIndex) == replace) { - lemma.setCharAt(charIndex, with); - } - //System.err.println("-> ROP: " + lemma.toString()); - //go to next permutation - permIndex++; - + String charAtPerm = Character.toString(permutations.charAt(permIndex)); + int charIndex = Integer.parseInt(charAtPerm); + // go to the next character in the permutation buffer + // which is the replacement character + permIndex++; + char replace = permutations.charAt(permIndex); + //go to the next char in the permutation buffer + // which is the candidate character + permIndex++; + char with = permutations.charAt(permIndex); + + if (lemma.length() <= charIndex) { + return wordForm; + } + if (lemma.charAt(charIndex) == replace) { + lemma.setCharAt(charIndex, with); + } + //System.err.println("-> ROP: " + lemma.toString()); + //go to next permutation + permIndex++; + } else if (nextOperation == 'I') { - String charAtPerm = Character.toString(permutations.charAt(permIndex)); - int charIndex = Integer.parseInt(charAtPerm); - permIndex++; - //character to be inserted - char in = permutations.charAt(permIndex); - - if (lemma.length() < charIndex) { - return wordForm; - } - lemma.insert(charIndex, in); - //System.err.println("-> IOP " + lemma.toString()); - //go to next permutation - permIndex++; + String charAtPerm = Character.toString(permutations.charAt(permIndex)); + int charIndex = Integer.parseInt(charAtPerm); + permIndex++; + //character to be inserted + char in = permutations.charAt(permIndex); + + if (lemma.length() < charIndex) { + return wordForm; + } + lemma.insert(charIndex, in); + //System.err.println("-> IOP " + lemma.toString()); + //go to next permutation + permIndex++; } else if (nextOperation == 'D') { - String charAtPerm = Character.toString(permutations.charAt(permIndex)); - int charIndex = Integer.parseInt(charAtPerm); - if (lemma.length() <= charIndex) { - return wordForm; - } - lemma.deleteCharAt(charIndex); - permIndex++; - // go to next permutation - permIndex++; + String charAtPerm = Character.toString(permutations.charAt(permIndex)); + int charIndex = Integer.parseInt(charAtPerm); + if (lemma.length() <= charIndex) { + return wordForm; + } + lemma.deleteCharAt(charIndex); + permIndex++; + // go to next permutation + permIndex++; } + } + return lemma.reverse().toString(); } - return lemma.reverse().toString(); -} -/** - * Get the SES required to go from a word to a lemma. - * @param wordForm the word - * @param lemma the lemma - * @return the shortest edit script - */ -public static String getShortestEditScript(String wordForm, String lemma) { - String reversedWF = new StringBuffer(wordForm.toLowerCase()).reverse().toString(); - String reversedLemma = new StringBuffer(lemma.toLowerCase()).reverse().toString(); - StringBuffer permutations = new StringBuffer(); - String ses; - if (!reversedWF.equals(reversedLemma)) { - int[][]levenDistance = StringUtil.levenshteinDistance(reversedWF, reversedLemma); - StringUtil.computeShortestEditScript(reversedWF, reversedLemma, levenDistance, permutations); - ses = permutations.toString(); - } else { - ses = "O"; + /** + * Get the SES required to go from a word to a lemma. + * @param wordForm the word + * @param lemma the lemma + * @return the shortest edit script + */ + public static String getShortestEditScript(String wordForm, String lemma) { + String reversedWF = new StringBuffer(wordForm.toLowerCase()).reverse().toString(); + String reversedLemma = new StringBuffer(lemma.toLowerCase()).reverse().toString(); + StringBuffer permutations = new StringBuffer(); + String ses; + if (!reversedWF.equals(reversedLemma)) { + int[][]levenDistance = StringUtil.levenshteinDistance(reversedWF, reversedLemma); + StringUtil.computeShortestEditScript(reversedWF, reversedLemma, levenDistance, permutations); + ses = permutations.toString(); + } else { + ses = "O"; + } + return ses; } - return ses; -} } diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/TrainingParameters.java b/opennlp-tools/src/main/java/opennlp/tools/util/TrainingParameters.java index 3677fc427..9e31bec2a 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/util/TrainingParameters.java +++ b/opennlp-tools/src/main/java/opennlp/tools/util/TrainingParameters.java @@ -36,7 +36,7 @@ public class TrainingParameters { public static final String ITERATIONS_PARAM = "Iterations"; public static final String CUTOFF_PARAM = "Cutoff"; public static final String THREADS_PARAM = "Threads"; - + private Map parameters = new HashMap<>(); public TrainingParameters() { diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/Version.java b/opennlp-tools/src/main/java/opennlp/tools/util/Version.java index 7683b3042..6749a5772 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/util/Version.java +++ b/opennlp-tools/src/main/java/opennlp/tools/util/Version.java @@ -150,7 +150,7 @@ && getRevision() == version.getRevision() * @return the version represented by the string value * * @throws NumberFormatException if the string does - * not contain a valid version + * not contain a valid version */ public static Version parse(String version) { @@ -159,7 +159,7 @@ public static Version parse(String version) { int indexSecondDot = version.indexOf('.', indexFirstDot + 1); if (indexFirstDot == -1 || indexSecondDot == -1) { - throw new NumberFormatException("Invalid version format '" + version + "', expected two dots!"); + throw new NumberFormatException("Invalid version format '" + version + "', expected two dots!"); } int indexFirstDash = version.indexOf('-'); @@ -191,7 +191,7 @@ public static Version currentVersion() { // Try to read the version from the version file if it is available, // otherwise set the version to the development version - try (InputStream versionIn = + try (InputStream versionIn = Version.class.getResourceAsStream("opennlp.version")) { if (versionIn != null) { manifest.load(versionIn); @@ -200,8 +200,7 @@ public static Version currentVersion() { // ignore error } - String versionString = - manifest.getProperty("OpenNLP-Version", DEV_VERSION_STRING); + String versionString = manifest.getProperty("OpenNLP-Version", DEV_VERSION_STRING); if (versionString.equals("${pom.version}")) versionString = DEV_VERSION_STRING; diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/eval/Evaluator.java b/opennlp-tools/src/main/java/opennlp/tools/util/eval/Evaluator.java index aa77b985a..1075d4cd4 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/util/eval/Evaluator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/util/eval/Evaluator.java @@ -78,8 +78,8 @@ public Evaluator(EvaluationMonitor... aListeners) { */ public void evaluateSample(T sample) { T predicted = processSample(sample); - if(!listeners.isEmpty()) { - if(sample.equals(predicted)) { + if (!listeners.isEmpty()) { + if (sample.equals(predicted)) { for (EvaluationMonitor listener : listeners) { listener.correctlyClassified(sample, predicted); } @@ -97,7 +97,7 @@ public void evaluateSample(T sample) { * {@link #evaluateSample(Object)} method. * * @param samples the stream of reference which - * should be evaluated. + * should be evaluated. * * @throws IOException IOException */ diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/eval/Mean.java b/opennlp-tools/src/main/java/opennlp/tools/util/eval/Mean.java index 95e89d05d..9feeb0838 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/util/eval/Mean.java +++ b/opennlp-tools/src/main/java/opennlp/tools/util/eval/Mean.java @@ -37,7 +37,7 @@ public class Mean { * Adds a value to the arithmetic mean. * * @param value the value which should be added - * to the arithmetic mean. + * to the arithmetic mean. */ public void add(double value) { add(value, 1); @@ -47,10 +47,10 @@ public void add(double value) { * Adds a value count times to the arithmetic mean. * * @param value the value which should be added - * to the arithmetic mean. + * to the arithmetic mean. * * @param count number of times the value should be added to - * arithmetic mean. + * arithmetic mean. */ public void add(double value, long count) { sum += value * count; diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/ext/ExtensionLoader.java b/opennlp-tools/src/main/java/opennlp/tools/util/ext/ExtensionLoader.java index b33d5771f..8839321fd 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/util/ext/ExtensionLoader.java +++ b/opennlp-tools/src/main/java/opennlp/tools/util/ext/ExtensionLoader.java @@ -78,7 +78,7 @@ public static T instantiateExtension(Class clazz, String extensionClassNa } catch (NoSuchFieldException | SecurityException e1) { throw new ExtensionNotLoadedException(e1); } - if(instanceField != null) { + if (instanceField != null) { try { return (T) instanceField.get(null); } catch (IllegalArgumentException | IllegalAccessException e1) { diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/AdaptiveFeatureGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/AdaptiveFeatureGenerator.java index 85d9df9b7..494e7fd8f 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/AdaptiveFeatureGenerator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/AdaptiveFeatureGenerator.java @@ -51,11 +51,11 @@ public interface AdaptiveFeatureGenerator { * @param tokens The tokens of the sentence or other text unit which has been processed. * @param outcomes The outcomes associated with the specified tokens. */ - default void updateAdaptiveData(String[] tokens, String[] outcomes) {}; + default void updateAdaptiveData(String[] tokens, String[] outcomes) {}; /** * Informs the feature generator that the context of the adaptive data (typically a document) * is no longer valid. */ - default void clearAdaptiveData() {}; + default void clearAdaptiveData() {}; } diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/AdditionalContextFeatureGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/AdditionalContextFeatureGenerator.java index 3606bfcbc..482eb67d8 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/AdditionalContextFeatureGenerator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/AdditionalContextFeatureGenerator.java @@ -28,9 +28,6 @@ public class AdditionalContextFeatureGenerator implements AdaptiveFeatureGenerat private String[][] additionalContext; -// public AdditionalContextFeatureGenerator() { -// } - public void createFeatures(List features, String[] tokens, int index, String[] preds) { if (additionalContext != null && additionalContext.length != 0) { diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BigramNameFeatureGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BigramNameFeatureGenerator.java index 779628bde..0028ec194 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BigramNameFeatureGenerator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BigramNameFeatureGenerator.java @@ -25,14 +25,14 @@ public void createFeatures(List features, String[] tokens, int index, St String wc = FeatureGeneratorUtil.tokenFeature(tokens[index]); //bi-gram features if (index > 0) { - features.add("pw,w="+tokens[index-1]+","+tokens[index]); - String pwc = FeatureGeneratorUtil.tokenFeature(tokens[index-1]); - features.add("pwc,wc="+pwc+","+wc); + features.add("pw,w=" + tokens[index - 1] + "," + tokens[index]); + String pwc = FeatureGeneratorUtil.tokenFeature(tokens[index - 1]); + features.add("pwc,wc=" + pwc + "," + wc); } - if (index+1 < tokens.length) { - features.add("w,nw="+tokens[index]+","+tokens[index+1]); - String nwc = FeatureGeneratorUtil.tokenFeature(tokens[index+1]); - features.add("wc,nc="+wc+","+nwc); + if (index + 1 < tokens.length) { + features.add("w,nw=" + tokens[index] + "," + tokens[index + 1]); + String nwc = FeatureGeneratorUtil.tokenFeature(tokens[index + 1]); + features.add("wc,nc=" + wc + "," + nwc); } } } \ No newline at end of file diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownBigramFeatureGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownBigramFeatureGenerator.java index a932e74ab..8be8a7786 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownBigramFeatureGenerator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownBigramFeatureGenerator.java @@ -26,7 +26,7 @@ public class BrownBigramFeatureGenerator implements AdaptiveFeatureGenerator { private BrownCluster brownLexicon; - public BrownBigramFeatureGenerator(BrownCluster dict){ + public BrownBigramFeatureGenerator(BrownCluster dict) { this.brownLexicon = dict; } diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownCluster.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownCluster.java index 6a109b09d..ad5e64f1b 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownCluster.java +++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownCluster.java @@ -76,8 +76,8 @@ public BrownCluster(InputStream in) throws IOException { String[] lineArray = tabPattern.split(line); if (lineArray.length == 3) { int freq = Integer.parseInt(lineArray[2]); - if (freq > 5 ) { - tokenToClusterMap.put(lineArray[1], lineArray[0]); + if (freq > 5 ) { + tokenToClusterMap.put(lineArray[1], lineArray[0]); } } else if (lineArray.length == 2) { diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownTokenClassFeatureGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownTokenClassFeatureGenerator.java index db3d77463..b1aaf2eed 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownTokenClassFeatureGenerator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownTokenClassFeatureGenerator.java @@ -26,7 +26,7 @@ public class BrownTokenClassFeatureGenerator implements AdaptiveFeatureGenerator private BrownCluster brownLexicon; - public BrownTokenClassFeatureGenerator(BrownCluster dict){ + public BrownTokenClassFeatureGenerator(BrownCluster dict) { this.brownLexicon = dict; } diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownTokenFeatureGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownTokenFeatureGenerator.java index f41203fce..d29e77aa2 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownTokenFeatureGenerator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownTokenFeatureGenerator.java @@ -26,7 +26,7 @@ public class BrownTokenFeatureGenerator implements AdaptiveFeatureGenerator { private BrownCluster brownLexicon; - public BrownTokenFeatureGenerator(BrownCluster dict){ + public BrownTokenFeatureGenerator(BrownCluster dict) { this.brownLexicon = dict; } diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/CachedFeatureGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/CachedFeatureGenerator.java index afb0a2c25..baedfc9b4 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/CachedFeatureGenerator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/CachedFeatureGenerator.java @@ -100,7 +100,8 @@ public long getNumberOfCacheMisses() { @Override public String toString() { - return super.toString()+": hits=" + numberOfCacheHits+" misses="+ numberOfCacheMisses+" hit%"+ (numberOfCacheHits > 0 ? - (double) numberOfCacheHits/(numberOfCacheMisses+numberOfCacheHits) : 0); + return super.toString() + ": hits=" + numberOfCacheHits + + " misses=" + numberOfCacheMisses + " hit%" + (numberOfCacheHits > 0 ? + (double) numberOfCacheHits / (numberOfCacheMisses + numberOfCacheHits) : 0); } } diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/DictionaryFeatureGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/DictionaryFeatureGenerator.java index 68037eda0..ff61d1e3b 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/DictionaryFeatureGenerator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/DictionaryFeatureGenerator.java @@ -38,6 +38,7 @@ public class DictionaryFeatureGenerator implements AdaptiveFeatureGenerator { public DictionaryFeatureGenerator(Dictionary dict) { this("",dict); } + public DictionaryFeatureGenerator(String prefix, Dictionary dict) { setDictionary(prefix,dict); } diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java index f6cf702d9..9065fb6e1 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java +++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java @@ -132,7 +132,7 @@ public AdaptiveFeatureGenerator create(Element generatorElement, } return new AggregatedFeatureGenerator(aggregatedGenerators.toArray( - new AdaptiveFeatureGenerator[aggregatedGenerators.size()])); + new AdaptiveFeatureGenerator[aggregatedGenerators.size()])); } static void register(Map factoryMap) { @@ -429,17 +429,17 @@ public AdaptiveFeatureGenerator create(Element generatorElement, FeatureGeneratorResourceProvider resourceManager) { String attribute = generatorElement.getAttribute("wordAndClass"); - + // Default to true. boolean generateWordAndClassFeature = true; - - if(attribute != "") { - // Anything other than "true" sets it to false. - if(!"true".equalsIgnoreCase(attribute)) { - generateWordAndClassFeature = false; - } + + if (attribute != "") { + // Anything other than "true" sets it to false. + if (!"true".equalsIgnoreCase(attribute)) { + generateWordAndClassFeature = false; + } } - + return new TokenClassFeatureGenerator(generateWordAndClassFeature); } @@ -512,7 +512,7 @@ public AdaptiveFeatureGenerator create(Element generatorElement, if (nestedGeneratorElement == null) { throw new InvalidFormatException("window feature generator must contain" + - " an aggregator element"); + " an aggregator element"); } AdaptiveFeatureGenerator nestedGenerator = GeneratorFactory.createGenerator(nestedGeneratorElement, resourceManager); @@ -703,10 +703,10 @@ private static org.w3c.dom.Document createDOM(InputStream xmlDescriptorIn) * components. * * @param xmlDescriptorIn the {@link InputStream} from which the descriptor - * is read, the stream remains open and must be closed by the caller. + * is read, the stream remains open and must be closed by the caller. * * @param resourceManager the resource manager which is used to resolve resources - * referenced by a key in the descriptor + * referenced by a key in the descriptor * * @return created feature generators * @@ -724,8 +724,7 @@ public static AdaptiveFeatureGenerator create(InputStream xmlDescriptorIn, } public static Map> extractCustomArtifactSerializerMappings( - InputStream xmlDescriptorIn) - throws IOException, InvalidFormatException { + InputStream xmlDescriptorIn) throws IOException, InvalidFormatException { Map> mapping = new HashMap<>(); @@ -767,8 +766,7 @@ public static Map> extractCustomArtifactSerializer * @throws IOException if inputstream cannot be open * @throws InvalidFormatException if xml is not well-formed */ - public static List getDescriptorElements( - InputStream xmlDescriptorIn) + public static List getDescriptorElements(InputStream xmlDescriptorIn) throws IOException, InvalidFormatException { List elements = new ArrayList(); @@ -786,8 +784,8 @@ public static List getDescriptorElements( if (allElements.item(i) instanceof Element) { Element customElement = (Element) allElements.item(i); elements.add(customElement); - } } + } return elements; } } diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/InSpanGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/InSpanGenerator.java index bcffc7ea5..b557a4c2b 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/InSpanGenerator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/InSpanGenerator.java @@ -41,24 +41,24 @@ public class InSpanGenerator implements AdaptiveFeatureGenerator { * Initializes the current instance. * * @param prefix the prefix is used to distinguish the generated features - * from features generated by other instances of {@link InSpanGenerator}s. + * from features generated by other instances of {@link InSpanGenerator}s. * @param finder the {@link TokenNameFinder} used to detect the names. */ public InSpanGenerator(String prefix, TokenNameFinder finder) { if (prefix == null) - throw new IllegalArgumentException("prefix must not be null!"); + throw new IllegalArgumentException("prefix must not be null!"); this.prefix = prefix; if (finder == null) - throw new IllegalArgumentException("finder must not be null!"); + throw new IllegalArgumentException("finder must not be null!"); this.finder = finder; } public void createFeatures(List features, String[] tokens, int index, - String[] preds) { + String[] preds) { // cache results for sentence if (currentSentence != tokens) { currentSentence = tokens; diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/StringPattern.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/StringPattern.java index 8d93c68db..571e26f3b 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/StringPattern.java +++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/StringPattern.java @@ -160,24 +160,24 @@ public static StringPattern recognize(String token) { } switch (ch) { - case ',': - pattern |= CONTAINS_COMMA; - break; + case ',': + pattern |= CONTAINS_COMMA; + break; - case '.': - pattern |= CONTAINS_PERIOD; - break; + case '.': + pattern |= CONTAINS_PERIOD; + break; - case '/': - pattern |= CONTAINS_SLASH; - break; + case '/': + pattern |= CONTAINS_SLASH; + break; - case '-': - pattern |= CONTAINS_HYPHEN; - break; + case '-': + pattern |= CONTAINS_HYPHEN; + break; - default: - break; + default: + break; } } } diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/TokenPatternFeatureGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/TokenPatternFeatureGenerator.java index 79e61b289..6dbc41cd6 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/TokenPatternFeatureGenerator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/TokenPatternFeatureGenerator.java @@ -31,59 +31,59 @@ */ public class TokenPatternFeatureGenerator implements AdaptiveFeatureGenerator { - private Pattern noLetters = Pattern.compile("[^a-zA-Z]"); - private Tokenizer tokenizer; - - /** - * Initializes a new instance. - * For tokinization the {@link SimpleTokenizer} is used. - */ - public TokenPatternFeatureGenerator() { - this(SimpleTokenizer.INSTANCE); + private Pattern noLetters = Pattern.compile("[^a-zA-Z]"); + private Tokenizer tokenizer; + + /** + * Initializes a new instance. + * For tokinization the {@link SimpleTokenizer} is used. + */ + public TokenPatternFeatureGenerator() { + this(SimpleTokenizer.INSTANCE); + } + + /** + * Initializes a new instance. + * + * @param supportTokenizer + */ + public TokenPatternFeatureGenerator(Tokenizer supportTokenizer) { + tokenizer = supportTokenizer; + } + + public void createFeatures(List feats, String[] toks, int index, String[] preds) { + + String[] tokenized = tokenizer.tokenize(toks[index]); + + if (tokenized.length == 1) { + feats.add("st=" + StringUtil.toLowerCase(toks[index])); + return; } - /** - * Initializes a new instance. - * - * @param supportTokenizer - */ - public TokenPatternFeatureGenerator(Tokenizer supportTokenizer) { - tokenizer = supportTokenizer; - } + feats.add("stn=" + tokenized.length); - public void createFeatures(List feats, String[] toks, int index, String[] preds) { + StringBuilder pattern = new StringBuilder(); - String[] tokenized = tokenizer.tokenize(toks[index]); + for (int i = 0; i < tokenized.length; i++) { - if (tokenized.length == 1) { - feats.add("st=" + StringUtil.toLowerCase(toks[index])); - return; + if (i < tokenized.length - 1) { + feats.add("pt2=" + FeatureGeneratorUtil.tokenFeature(tokenized[i]) + + FeatureGeneratorUtil.tokenFeature(tokenized[i + 1])); } - feats.add("stn=" + tokenized.length); - - StringBuilder pattern = new StringBuilder(); - - for (int i = 0; i < tokenized.length; i++) { - - if (i < tokenized.length - 1) { - feats.add("pt2=" + FeatureGeneratorUtil.tokenFeature(tokenized[i]) + - FeatureGeneratorUtil.tokenFeature(tokenized[i + 1])); - } - - if (i < tokenized.length - 2) { - feats.add("pt3=" + FeatureGeneratorUtil.tokenFeature(tokenized[i]) + - FeatureGeneratorUtil.tokenFeature(tokenized[i + 1]) + - FeatureGeneratorUtil.tokenFeature(tokenized[i + 2])); - } + if (i < tokenized.length - 2) { + feats.add("pt3=" + FeatureGeneratorUtil.tokenFeature(tokenized[i]) + + FeatureGeneratorUtil.tokenFeature(tokenized[i + 1]) + + FeatureGeneratorUtil.tokenFeature(tokenized[i + 2])); + } - pattern.append(FeatureGeneratorUtil.tokenFeature(tokenized[i])); + pattern.append(FeatureGeneratorUtil.tokenFeature(tokenized[i])); - if (!noLetters.matcher(tokenized[i]).find()) { - feats.add("st=" + StringUtil.toLowerCase(tokenized[i])); - } + if (!noLetters.matcher(tokenized[i]).find()) { + feats.add("st=" + StringUtil.toLowerCase(tokenized[i])); } - - feats.add("pta=" + pattern.toString()); } + + feats.add("pta=" + pattern.toString()); + } } diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WindowFeatureGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WindowFeatureGenerator.java index bfa9673c3..b52a85677 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WindowFeatureGenerator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WindowFeatureGenerator.java @@ -120,11 +120,11 @@ public void updateAdaptiveData(String[] tokens, String[] outcomes) { } public void clearAdaptiveData() { - generator.clearAdaptiveData(); + generator.clearAdaptiveData(); } @Override public String toString() { - return super.toString()+": Prev window size: " + prevWindowSize +", Next window size: " + nextWindowSize; + return super.toString() + ": Prev window size: " + prevWindowSize + ", Next window size: " + nextWindowSize; } } diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WordClusterFeatureGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WordClusterFeatureGenerator.java index 11e0e6bec..b5955add6 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WordClusterFeatureGenerator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WordClusterFeatureGenerator.java @@ -28,9 +28,9 @@ public class WordClusterFeatureGenerator implements AdaptiveFeatureGenerator { private boolean lowerCaseDictionary; public WordClusterFeatureGenerator(WordClusterDictionary dict, String dictResourceKey, boolean lowerCaseDictionary) { - tokenDictionary = dict; - resourceName = dictResourceKey; - this.lowerCaseDictionary = lowerCaseDictionary; + tokenDictionary = dict; + resourceName = dictResourceKey; + this.lowerCaseDictionary = lowerCaseDictionary; } public void createFeatures(List features, String[] tokens, int index, diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/model/BaseModel.java b/opennlp-tools/src/main/java/opennlp/tools/util/model/BaseModel.java index 5fcd1dae6..a7f42878e 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/util/model/BaseModel.java +++ b/opennlp-tools/src/main/java/opennlp/tools/util/model/BaseModel.java @@ -130,7 +130,7 @@ protected BaseModel(String componentName, String languageCode, artifactMap.put(MANIFEST_ENTRY, manifest); finishedLoadingArtifacts = true; - if (factory!=null) { + if (factory != null) { setManifestProperty(FACTORY_NAME, factory.getClass().getCanonicalName()); artifactMap.putAll(factory.createArtifactMap()); @@ -223,7 +223,7 @@ private void loadModel(InputStream in) throws IOException { boolean isSearchingForManifest = true; ZipEntry entry; - while((entry = zip.getNextEntry()) != null && isSearchingForManifest) { + while ((entry = zip.getNextEntry()) != null && isSearchingForManifest) { if ("manifest.properties".equals(entry.getName())) { // TODO: Probably better to use the serializer here directly! @@ -253,7 +253,7 @@ private void initializeFactory() throws InvalidFormatException { if (factoryName == null) { // load the default factory Class factoryClass = getDefaultFactory(); - if(factoryClass != null) { + if (factoryClass != null) { this.toolFactory = BaseToolFactory.create(factoryClass, this); } } else { @@ -295,7 +295,7 @@ private void finishLoadingArtifacts(InputStream in) Map artifactMap = new HashMap<>(); ZipEntry entry; - while((entry = zip.getNextEntry()) != null ) { + while ((entry = zip.getNextEntry()) != null ) { // Note: The manifest.properties file will be read here again, // there should be no need to prevent that. @@ -380,7 +380,7 @@ protected static Map createArtifactSerializers() { */ protected void createArtifactSerializers( Map serializers) { - if(this.toolFactory != null) + if (this.toolFactory != null) serializers.putAll(this.toolFactory.createArtifactSerializersMap()); } @@ -423,13 +423,13 @@ protected void validateArtifactMap() throws InvalidFormatException { if (Version.currentVersion().getMajor() != version.getMajor() || Version.currentVersion().getMinor() - 2 > version.getMinor()) { throw new InvalidFormatException("Model version " + version + " is not supported by this (" - + Version.currentVersion() +") version of OpenNLP!"); + + Version.currentVersion() + ") version of OpenNLP!"); } // Reject loading a snapshot model with a non-snapshot version if (!Version.currentVersion().isSnapshot() && version.isSnapshot()) { throw new InvalidFormatException("Model version " + version + " is a snapshot - snapshot models are not " + - "supported by this non-snapshot version (" + Version.currentVersion() + ") of OpenNLP!"); + "supported by this non-snapshot version (" + Version.currentVersion() + ") of OpenNLP!"); } } } @@ -448,7 +448,7 @@ protected void validateArtifactMap() throws InvalidFormatException { if (getManifestProperty(LANGUAGE_PROPERTY) == null) throw new InvalidFormatException("Missing " + LANGUAGE_PROPERTY + " property in " + - MANIFEST_ENTRY + "!"); + MANIFEST_ENTRY + "!"); // Validate the factory. We try to load it using the ExtensionLoader. It // will return the factory, null or raise an exception @@ -469,7 +469,7 @@ protected void validateArtifactMap() throws InvalidFormatException { } // validate artifacts declared by the factory - if(toolFactory != null) { + if (toolFactory != null) { toolFactory.validateArtifactMap(); } } @@ -605,7 +605,7 @@ public final void serialize(OutputStream out) throws IOException { @SuppressWarnings("unchecked") public T getArtifact(String key) { Object artifact = artifactMap.get(key); - if(artifact == null) + if (artifact == null) return null; return (T) artifact; } diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/model/DictionarySerializer.java b/opennlp-tools/src/main/java/opennlp/tools/util/model/DictionarySerializer.java index 124ba5cb6..afd374b6b 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/util/model/DictionarySerializer.java +++ b/opennlp-tools/src/main/java/opennlp/tools/util/model/DictionarySerializer.java @@ -40,5 +40,5 @@ public void serialize(Dictionary dictionary, OutputStream out) static void register(Map factories) { factories.put("dictionary", new DictionarySerializer()); - } + } } \ No newline at end of file diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/model/GenericModelSerializer.java b/opennlp-tools/src/main/java/opennlp/tools/util/model/GenericModelSerializer.java index d47137786..d55b64e2f 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/util/model/GenericModelSerializer.java +++ b/opennlp-tools/src/main/java/opennlp/tools/util/model/GenericModelSerializer.java @@ -40,6 +40,6 @@ public void serialize(AbstractModel artifact, OutputStream out) throws IOExcepti } public static void register(Map factories) { - factories.put("model", new GenericModelSerializer()); + factories.put("model", new GenericModelSerializer()); } } diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/model/PropertiesSerializer.java b/opennlp-tools/src/main/java/opennlp/tools/util/model/PropertiesSerializer.java index 803d8b05f..9fef59e6e 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/util/model/PropertiesSerializer.java +++ b/opennlp-tools/src/main/java/opennlp/tools/util/model/PropertiesSerializer.java @@ -42,5 +42,5 @@ public void serialize(Properties properties, OutputStream out) throws IOExceptio static void register(Map factories) { factories.put("properties", new PropertiesSerializer()); - } + } } \ No newline at end of file diff --git a/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkSampleStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkSampleStreamTest.java index 5bfbcb62b..65a1e2ea5 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkSampleStreamTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkSampleStreamTest.java @@ -29,7 +29,7 @@ import opennlp.tools.util.ObjectStream; import opennlp.tools.util.PlainTextByLineStream; -public class ChunkSampleStreamTest{ +public class ChunkSampleStreamTest { @Test public void testReadingEvents() throws IOException { @@ -54,7 +54,7 @@ public void testReadingEvents() throws IOException { sample.append('\n'); sample.append("word23 tag23 pred23"); sample.append('\n'); - + ObjectStream stringStream = new PlainTextByLineStream( new MockInputStreamFactory(sample.toString()), UTF_8); @@ -86,7 +86,7 @@ public void testReadingEvents() throws IOException { assertEquals("pred23", secondSample.getPreds()[2]); assertNull(chunkStream.read()); - + chunkStream.close(); } } diff --git a/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkSampleTest.java b/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkSampleTest.java index 24952722c..f7832fa44 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkSampleTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkSampleTest.java @@ -37,7 +37,7 @@ public class ChunkSampleTest { - @Test(expected=IllegalArgumentException.class) + @Test(expected = IllegalArgumentException.class) public void testParameterValidation() { new ChunkSample(new String[]{""}, new String[]{""}, new String[]{"test", "one element to much"}); @@ -127,13 +127,13 @@ public void testToString() throws IOException { StringReader sr = new StringReader(sample.toString()); BufferedReader reader = new BufferedReader(sr); for (int i = 0; i < sentence.length; i++) { - String line = reader.readLine(); - String[] parts = line.split("\\s+"); - assertEquals(3, parts.length); - assertEquals(sentence[i], parts[0]); - assertEquals(tags[i], parts[1]); - assertEquals(chunks[i], parts[2]); - } + String line = reader.readLine(); + String[] parts = line.split("\\s+"); + assertEquals(3, parts.length); + assertEquals(sentence[i], parts[0]); + assertEquals(tags[i], parts[1]); + assertEquals(chunks[i], parts[2]); + } } @Test @@ -142,46 +142,46 @@ public void testNicePrint() { ChunkSample sample = new ChunkSample(createSentence(), createTags(), createChunks()); assertEquals(" [NP Forecasts_NNS ] [PP for_IN ] [NP the_DT trade_NN figures_NNS ] " + - "[VP range_VBP ] [ADVP widely_RB ] ,_, [NP Forecasts_NNS ] [PP for_IN ] [NP the_DT trade_NN figures_NNS ] " + - "[VP range_VBP ] [ADVP widely_RB ] ._.", sample.nicePrint()); + "[VP range_VBP ] [ADVP widely_RB ] ,_, [NP Forecasts_NNS ] [PP for_IN ] [NP the_DT trade_NN figures_NNS ] " + + "[VP range_VBP ] [ADVP widely_RB ] ._.", sample.nicePrint()); } @Test public void testAsSpan() { - ChunkSample sample = new ChunkSample(createSentence(), createTags(), - createChunks()); - Span[] spans = sample.getPhrasesAsSpanList(); - - assertEquals(10, spans.length); - assertEquals(new Span(0, 1, "NP"), spans[0]); - assertEquals(new Span(1, 2, "PP"), spans[1]); - assertEquals(new Span(2, 5, "NP"), spans[2]); - assertEquals(new Span(5, 6, "VP"), spans[3]); - assertEquals(new Span(6, 7, "ADVP"), spans[4]); - assertEquals(new Span(8, 9, "NP"), spans[5]); + ChunkSample sample = new ChunkSample(createSentence(), createTags(), + createChunks()); + Span[] spans = sample.getPhrasesAsSpanList(); + + assertEquals(10, spans.length); + assertEquals(new Span(0, 1, "NP"), spans[0]); + assertEquals(new Span(1, 2, "PP"), spans[1]); + assertEquals(new Span(2, 5, "NP"), spans[2]); + assertEquals(new Span(5, 6, "VP"), spans[3]); + assertEquals(new Span(6, 7, "ADVP"), spans[4]); + assertEquals(new Span(8, 9, "NP"), spans[5]); assertEquals(new Span(9, 10, "PP"), spans[6]); assertEquals(new Span(10, 13, "NP"), spans[7]); assertEquals(new Span(13, 14, "VP"), spans[8]); assertEquals(new Span(14, 15, "ADVP"), spans[9]); } - @Test - public void testPhraseAsSpan() { - Span[] spans = ChunkSample.phrasesAsSpanList(createSentence(), - createTags(), createChunks()); - - assertEquals(10, spans.length); - assertEquals(new Span(0, 1, "NP"), spans[0]); - assertEquals(new Span(1, 2, "PP"), spans[1]); - assertEquals(new Span(2, 5, "NP"), spans[2]); - assertEquals(new Span(5, 6, "VP"), spans[3]); - assertEquals(new Span(6, 7, "ADVP"), spans[4]); - assertEquals(new Span(8, 9, "NP"), spans[5]); - assertEquals(new Span(9, 10, "PP"), spans[6]); - assertEquals(new Span(10, 13, "NP"), spans[7]); - assertEquals(new Span(13, 14, "VP"), spans[8]); - assertEquals(new Span(14, 15, "ADVP"), spans[9]); - } + @Test + public void testPhraseAsSpan() { + Span[] spans = ChunkSample.phrasesAsSpanList(createSentence(), + createTags(), createChunks()); + + assertEquals(10, spans.length); + assertEquals(new Span(0, 1, "NP"), spans[0]); + assertEquals(new Span(1, 2, "PP"), spans[1]); + assertEquals(new Span(2, 5, "NP"), spans[2]); + assertEquals(new Span(5, 6, "VP"), spans[3]); + assertEquals(new Span(6, 7, "ADVP"), spans[4]); + assertEquals(new Span(8, 9, "NP"), spans[5]); + assertEquals(new Span(9, 10, "PP"), spans[6]); + assertEquals(new Span(10, 13, "NP"), spans[7]); + assertEquals(new Span(13, 14, "VP"), spans[8]); + assertEquals(new Span(14, 15, "ADVP"), spans[9]); + } @Test public void testRegions() throws IOException { @@ -191,27 +191,27 @@ public void testRegions() throws IOException { DummyChunkSampleStream predictedSample = new DummyChunkSampleStream( new PlainTextByLineStream(in, UTF_8), false); - ChunkSample cs1 = predictedSample.read(); - String[] g1 = Span.spansToStrings(cs1.getPhrasesAsSpanList(), cs1.getSentence()); - assertEquals(15, g1.length); - - ChunkSample cs2 = predictedSample.read(); - String[] g2 = Span.spansToStrings(cs2.getPhrasesAsSpanList(), cs2.getSentence()); - assertEquals(10, g2.length); - - ChunkSample cs3 = predictedSample.read(); - String[] g3 = Span.spansToStrings(cs3.getPhrasesAsSpanList(), cs3.getSentence()); - assertEquals(7, g3.length); - assertEquals("United", g3[0]); - assertEquals("'s directors", g3[1]); - assertEquals("voted", g3[2]); - assertEquals("themselves", g3[3]); - assertEquals("their spouses", g3[4]); - assertEquals("lifetime access", g3[5]); - assertEquals("to", g3[6]); - - predictedSample.close(); - + ChunkSample cs1 = predictedSample.read(); + String[] g1 = Span.spansToStrings(cs1.getPhrasesAsSpanList(), cs1.getSentence()); + assertEquals(15, g1.length); + + ChunkSample cs2 = predictedSample.read(); + String[] g2 = Span.spansToStrings(cs2.getPhrasesAsSpanList(), cs2.getSentence()); + assertEquals(10, g2.length); + + ChunkSample cs3 = predictedSample.read(); + String[] g3 = Span.spansToStrings(cs3.getPhrasesAsSpanList(), cs3.getSentence()); + assertEquals(7, g3.length); + assertEquals("United", g3[0]); + assertEquals("'s directors", g3[1]); + assertEquals("voted", g3[2]); + assertEquals("themselves", g3[3]); + assertEquals("their spouses", g3[4]); + assertEquals("lifetime access", g3[5]); + assertEquals("to", g3[6]); + + predictedSample.close(); + } @@ -253,13 +253,13 @@ public void testEquals() { } public static ChunkSample createGoldSample() { - return new ChunkSample(createSentence(), createTags(), createChunks()); + return new ChunkSample(createSentence(), createTags(), createChunks()); } public static ChunkSample createPredSample() { - String[] chunks = createChunks(); - chunks[5] = "B-NP"; - return new ChunkSample(createSentence(), createTags(), chunks); + String[] chunks = createChunks(); + chunks[5] = "B-NP"; + return new ChunkSample(createSentence(), createTags(), chunks); } } diff --git a/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerDetailedFMeasureListenerTest.java b/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerDetailedFMeasureListenerTest.java index ac9c0c8d6..9c0754d21 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerDetailedFMeasureListenerTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerDetailedFMeasureListenerTest.java @@ -40,7 +40,7 @@ public void testEvaluator() throws IOException { "/opennlp/tools/chunker/output.txt"); ResourceAsStreamFactory detailedOutputStream = new ResourceAsStreamFactory( getClass(), "/opennlp/tools/chunker/detailedOutput.txt"); - + DummyChunkSampleStream predictedSample = new DummyChunkSampleStream( new PlainTextByLineStream(inPredicted, StandardCharsets.UTF_8), true); diff --git a/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerEvaluatorTest.java b/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerEvaluatorTest.java index 4261d6a1e..b95016367 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerEvaluatorTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerEvaluatorTest.java @@ -36,43 +36,43 @@ */ public class ChunkerEvaluatorTest { - private static final double DELTA = 1.0E-9d; - - /** - * Checks the evaluator results against the results got using the conlleval, - * available at http://www.cnts.ua.ac.be/conll2000/chunking/output.html - * The output.txt file has only 3 sentences, but can be replaced by the one - * available at the conll2000 site to validate using a bigger sample. - * @throws IOException - */ - @Test - public void testEvaluator() throws IOException { - ResourceAsStreamFactory inPredicted = new ResourceAsStreamFactory( - getClass(), "/opennlp/tools/chunker/output.txt"); - ResourceAsStreamFactory inExpected = new ResourceAsStreamFactory(getClass(), - "/opennlp/tools/chunker/output.txt"); - - DummyChunkSampleStream predictedSample = new DummyChunkSampleStream( - new PlainTextByLineStream(inPredicted, StandardCharsets.UTF_8), true); - - DummyChunkSampleStream expectedSample = new DummyChunkSampleStream( - new PlainTextByLineStream(inExpected, StandardCharsets.UTF_8), false); - - Chunker dummyChunker = new DummyChunker(predictedSample); - - OutputStream stream = new ByteArrayOutputStream(); - ChunkerEvaluationMonitor listener = new ChunkEvaluationErrorListener(stream); - ChunkerEvaluator evaluator = new ChunkerEvaluator(dummyChunker, listener); - - evaluator.evaluate(expectedSample); - - FMeasure fm = evaluator.getFMeasure(); - - Assert.assertEquals(0.8d, fm.getPrecisionScore(), DELTA); - Assert.assertEquals(0.875d, fm.getRecallScore(), DELTA); - - Assert.assertNotSame(stream.toString().length(), 0); - } + private static final double DELTA = 1.0E-9d; + + /** + * Checks the evaluator results against the results got using the conlleval, + * available at http://www.cnts.ua.ac.be/conll2000/chunking/output.html + * The output.txt file has only 3 sentences, but can be replaced by the one + * available at the conll2000 site to validate using a bigger sample. + * @throws IOException + */ + @Test + public void testEvaluator() throws IOException { + ResourceAsStreamFactory inPredicted = new ResourceAsStreamFactory( + getClass(), "/opennlp/tools/chunker/output.txt"); + ResourceAsStreamFactory inExpected = new ResourceAsStreamFactory(getClass(), + "/opennlp/tools/chunker/output.txt"); + + DummyChunkSampleStream predictedSample = new DummyChunkSampleStream( + new PlainTextByLineStream(inPredicted, StandardCharsets.UTF_8), true); + + DummyChunkSampleStream expectedSample = new DummyChunkSampleStream( + new PlainTextByLineStream(inExpected, StandardCharsets.UTF_8), false); + + Chunker dummyChunker = new DummyChunker(predictedSample); + + OutputStream stream = new ByteArrayOutputStream(); + ChunkerEvaluationMonitor listener = new ChunkEvaluationErrorListener(stream); + ChunkerEvaluator evaluator = new ChunkerEvaluator(dummyChunker, listener); + + evaluator.evaluate(expectedSample); + + FMeasure fm = evaluator.getFMeasure(); + + Assert.assertEquals(0.8d, fm.getPrecisionScore(), DELTA); + Assert.assertEquals(0.875d, fm.getRecallScore(), DELTA); + + Assert.assertNotSame(stream.toString().length(), 0); + } @Test public void testEvaluatorNoError() throws IOException { diff --git a/opennlp-tools/src/test/java/opennlp/tools/chunker/DummyChunkSampleStream.java b/opennlp-tools/src/test/java/opennlp/tools/chunker/DummyChunkSampleStream.java index 6dc67e92a..23611043f 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/chunker/DummyChunkSampleStream.java +++ b/opennlp-tools/src/test/java/opennlp/tools/chunker/DummyChunkSampleStream.java @@ -30,61 +30,61 @@ * can be used together with DummyChunker simulate a chunker. */ public class DummyChunkSampleStream extends - FilterObjectStream { + FilterObjectStream { - boolean mIsPredicted; - int count = 0; + boolean mIsPredicted; + int count = 0; - // the predicted flag sets if the stream will contain the expected or the - // predicted tags. - public DummyChunkSampleStream(ObjectStream samples, - boolean isPredicted) { - super(samples); - mIsPredicted = isPredicted; - } + // the predicted flag sets if the stream will contain the expected or the + // predicted tags. + public DummyChunkSampleStream(ObjectStream samples, + boolean isPredicted) { + super(samples); + mIsPredicted = isPredicted; + } - /** - * Returns a pair representing the expected and the predicted at 0: the - * chunk tag according to the corpus at 1: the chunk tag predicted - * - * @see opennlp.tools.util.ObjectStream#read() - */ - public ChunkSample read() throws IOException { + /** + * Returns a pair representing the expected and the predicted at 0: the + * chunk tag according to the corpus at 1: the chunk tag predicted + * + * @see opennlp.tools.util.ObjectStream#read() + */ + public ChunkSample read() throws IOException { - List toks = new ArrayList(); - List posTags = new ArrayList(); - List chunkTags = new ArrayList(); - List predictedChunkTags = new ArrayList(); + List toks = new ArrayList(); + List posTags = new ArrayList(); + List chunkTags = new ArrayList(); + List predictedChunkTags = new ArrayList(); - for (String line = samples.read(); line != null && !line.equals(""); line = samples - .read()) { - String[] parts = line.split(" "); - if (parts.length != 4) { - System.err.println("Skipping corrupt line " + count + ": " - + line); - } else { - toks.add(parts[0]); - posTags.add(parts[1]); - chunkTags.add(parts[2]); - predictedChunkTags.add(parts[3]); - } - count++; - } + for (String line = samples.read(); line != null && !line.equals(""); line = samples + .read()) { + String[] parts = line.split(" "); + if (parts.length != 4) { + System.err.println("Skipping corrupt line " + count + ": " + + line); + } else { + toks.add(parts[0]); + posTags.add(parts[1]); + chunkTags.add(parts[2]); + predictedChunkTags.add(parts[3]); + } + count++; + } - if (toks.size() > 0) { - if (mIsPredicted) { - return new ChunkSample(toks.toArray(new String[toks.size()]), - posTags.toArray(new String[posTags.size()]), - predictedChunkTags - .toArray(new String[predictedChunkTags.size()])); - } else - return new ChunkSample(toks.toArray(new String[toks.size()]), - posTags.toArray(new String[posTags.size()]), - chunkTags.toArray(new String[chunkTags.size()])); - } else { - return null; - } + if (toks.size() > 0) { + if (mIsPredicted) { + return new ChunkSample(toks.toArray(new String[toks.size()]), + posTags.toArray(new String[posTags.size()]), + predictedChunkTags + .toArray(new String[predictedChunkTags.size()])); + } else + return new ChunkSample(toks.toArray(new String[toks.size()]), + posTags.toArray(new String[posTags.size()]), + chunkTags.toArray(new String[chunkTags.size()])); + } else { + return null; + } - } + } } diff --git a/opennlp-tools/src/test/java/opennlp/tools/chunker/DummyChunker.java b/opennlp-tools/src/test/java/opennlp/tools/chunker/DummyChunker.java index 4c4a2b5ee..08f93ab2c 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/chunker/DummyChunker.java +++ b/opennlp-tools/src/test/java/opennlp/tools/chunker/DummyChunker.java @@ -32,54 +32,54 @@ */ public class DummyChunker implements Chunker { - private DummyChunkSampleStream mSampleStream; - - public DummyChunker(DummyChunkSampleStream aSampleStream) { - mSampleStream = aSampleStream; - } - - public List chunk(List toks, List tags) { - return Arrays.asList(chunk(toks.toArray(new String[toks.size()]), - tags.toArray(new String[tags.size()]))); - } - - public String[] chunk(String[] toks, String[] tags) { - try { - ChunkSample predsSample = mSampleStream.read(); - - // checks if the streams are sync - for (int i = 0; i < toks.length; i++) { - if (!toks[i].equals(predsSample.getSentence()[i]) - || !tags[i].equals(predsSample.getTags()[i])) { - throw new RuntimeException("The streams are not sync!" - + "\n expected sentence: " + Arrays.toString(toks) - + "\n expected tags: " + Arrays.toString(tags) - + "\n predicted sentence: " - + Arrays.toString(predsSample.getSentence()) - + "\n predicted tags: " - + Arrays.toString(predsSample.getTags())); - } - } - - return predsSample.getPreds(); - - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - public Sequence[] topKSequences(List sentence, List tags) { - return null; - } - - public Sequence[] topKSequences(String[] sentence, String[] tags, - double minSequenceScore) { - return null; - } - - public Span[] chunkAsSpans(String[] toks, String[] tags) { - return null; - } + private DummyChunkSampleStream mSampleStream; + + public DummyChunker(DummyChunkSampleStream aSampleStream) { + mSampleStream = aSampleStream; + } + + public List chunk(List toks, List tags) { + return Arrays.asList(chunk(toks.toArray(new String[toks.size()]), + tags.toArray(new String[tags.size()]))); + } + + public String[] chunk(String[] toks, String[] tags) { + try { + ChunkSample predsSample = mSampleStream.read(); + + // checks if the streams are sync + for (int i = 0; i < toks.length; i++) { + if (!toks[i].equals(predsSample.getSentence()[i]) + || !tags[i].equals(predsSample.getTags()[i])) { + throw new RuntimeException("The streams are not sync!" + + "\n expected sentence: " + Arrays.toString(toks) + + "\n expected tags: " + Arrays.toString(tags) + + "\n predicted sentence: " + + Arrays.toString(predsSample.getSentence()) + + "\n predicted tags: " + + Arrays.toString(predsSample.getTags())); + } + } + + return predsSample.getPreds(); + + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + public Sequence[] topKSequences(List sentence, List tags) { + return null; + } + + public Sequence[] topKSequences(String[] sentence, String[] tags, + double minSequenceScore) { + return null; + } + + public Span[] chunkAsSpans(String[] toks, String[] tags) { + return null; + } public Sequence[] topKSequences(String[] sentence, String[] tags) { return null; diff --git a/opennlp-tools/src/test/java/opennlp/tools/cmdline/ArgumentParserTest.java b/opennlp-tools/src/test/java/opennlp/tools/cmdline/ArgumentParserTest.java index c4fa6dd2e..a6aedd95a 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/cmdline/ArgumentParserTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/cmdline/ArgumentParserTest.java @@ -128,8 +128,8 @@ public void testAllOptionalArgumentsExtraArgument() { public void testSimpleArgumentsUsage() { String arguments[] = new String[] {"-encoding charset", - "[-iterations num]", - "[-alphaNumOpt true|false]"}; + "[-iterations num]", + "[-alphaNumOpt true|false]"}; String usage = ArgumentParser.createUsage(SimpleArguments.class); @@ -169,7 +169,7 @@ public void testSetEncodingParameter() { Collection availableCharset = Charset.availableCharsets().values(); String notTheDefaultCharset = "UTF-8"; for (Charset charset : availableCharset) { - if(!charset.equals(Charset.defaultCharset())) { + if (!charset.equals(Charset.defaultCharset())) { notTheDefaultCharset = charset.name(); break; } diff --git a/opennlp-tools/src/test/java/opennlp/tools/cmdline/CLITest.java b/opennlp-tools/src/test/java/opennlp/tools/cmdline/CLITest.java index c20ff4ad6..00a7c11f0 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/cmdline/CLITest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/cmdline/CLITest.java @@ -53,7 +53,7 @@ public void checkPermission(Permission perm, Object context) { } @Override - public void checkExit(int status){ + public void checkExit(int status) { super.checkExit(status); throw new ExitException(status); @@ -62,7 +62,7 @@ public void checkExit(int status){ private final SecurityManager originalSecurityManager = System.getSecurityManager(); - @Before + @Before public void installNoExitSecurityManager() { System.setSecurityManager(new NoExitSecurityManager()); } @@ -124,7 +124,7 @@ public void testUnknownFileMessage() { public void testHelpMessageOfTools() { for (String toolName : CLI.getToolNames()) { - System.err.println("-> ToolName" + toolName); + System.err.println("-> ToolName" + toolName); try { CLI.main(new String[]{toolName, "help"}); } catch (ExitException e) { diff --git a/opennlp-tools/src/test/java/opennlp/tools/dictionary/DictionaryTest.java b/opennlp-tools/src/test/java/opennlp/tools/dictionary/DictionaryTest.java index 4b0e83bc0..b6fc9c07c 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/dictionary/DictionaryTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/dictionary/DictionaryTest.java @@ -32,22 +32,22 @@ import org.junit.Test; /** - * Tests for the {@link Dictionary} class. - */ + * Tests for the {@link Dictionary} class. + */ public class DictionaryTest { /** * @return a case sensitive Dictionary */ private Dictionary getCaseSensitive() { - return new Dictionary(true); + return new Dictionary(true); } /** * @return a case insensitive Dictionary */ private Dictionary getCaseInsensitive() { - return new Dictionary(false); + return new Dictionary(false); } /** @@ -126,7 +126,7 @@ public void testParseOneEntryPerLine() throws IOException { String testDictionary = "1a 1b 1c 1d \n 2a 2b 2c \n 3a \n 4a 4b "; Dictionary dictionay = - Dictionary.parseOneEntryPerLine(new StringReader(testDictionary)); + Dictionary.parseOneEntryPerLine(new StringReader(testDictionary)); assertTrue(dictionay.size() == 4); diff --git a/opennlp-tools/src/test/java/opennlp/tools/doccat/DoccatFactoryTest.java b/opennlp-tools/src/test/java/opennlp/tools/doccat/DoccatFactoryTest.java index 8062762d8..6502c3b2c 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/doccat/DoccatFactoryTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/doccat/DoccatFactoryTest.java @@ -14,6 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package opennlp.tools.doccat; import static org.junit.Assert.assertEquals; diff --git a/opennlp-tools/src/test/java/opennlp/tools/doccat/DocumentCategorizerMETest.java b/opennlp-tools/src/test/java/opennlp/tools/doccat/DocumentCategorizerMETest.java index 673198aa7..52e35958a 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/doccat/DocumentCategorizerMETest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/doccat/DocumentCategorizerMETest.java @@ -14,12 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package opennlp.tools.doccat; import static org.junit.Assert.assertEquals; import java.io.IOException; -import java.util.Map; import java.util.Set; import java.util.SortedMap; @@ -35,12 +35,12 @@ public class DocumentCategorizerMETest { public void testSimpleTraining() throws IOException { ObjectStream samples = ObjectStreamUtils.createObjectStream(new DocumentSample[]{ - new DocumentSample("1", new String[]{"a", "b", "c"}), - new DocumentSample("1", new String[]{"a", "b", "c", "1", "2"}), - new DocumentSample("1", new String[]{"a", "b", "c", "3", "4"}), - new DocumentSample("0", new String[]{"x", "y", "z"}), - new DocumentSample("0", new String[]{"x", "y", "z", "5", "6"}), - new DocumentSample("0", new String[]{"x", "y", "z", "7", "8"}) + new DocumentSample("1", new String[]{"a", "b", "c"}), + new DocumentSample("1", new String[]{"a", "b", "c", "1", "2"}), + new DocumentSample("1", new String[]{"a", "b", "c", "3", "4"}), + new DocumentSample("0", new String[]{"x", "y", "z"}), + new DocumentSample("0", new String[]{"x", "y", "z", "5", "6"}), + new DocumentSample("0", new String[]{"x", "y", "z", "7", "8"}) }); TrainingParameters params = new TrainingParameters(); diff --git a/opennlp-tools/src/test/java/opennlp/tools/doccat/DocumentCategorizerNBTest.java b/opennlp-tools/src/test/java/opennlp/tools/doccat/DocumentCategorizerNBTest.java index 67347dec8..99586931e 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/doccat/DocumentCategorizerNBTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/doccat/DocumentCategorizerNBTest.java @@ -14,6 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package opennlp.tools.doccat; import static org.junit.Assert.assertEquals; diff --git a/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4NameFinderEval.java b/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4NameFinderEval.java index 737fae3e0..24327fe30 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4NameFinderEval.java +++ b/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4NameFinderEval.java @@ -39,7 +39,8 @@ private static void crossEval(TrainingParameters params, String type, double exp throws IOException { ObjectStream documentStream = new DirectorySampleStream(new File( - EvalUtil.getOpennlpDataDir(), "ontonotes4/data/files/data/english"), file -> { + EvalUtil.getOpennlpDataDir(), "ontonotes4/data/files/data/english"), + file -> { if (file.isFile()) { return file.getName().endsWith(".name"); } diff --git a/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4ParserEval.java b/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4ParserEval.java index 087ab7ef4..00e37dd89 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4ParserEval.java +++ b/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4ParserEval.java @@ -42,7 +42,8 @@ private static void crossEval(TrainingParameters params, HeadRules rules, double throws IOException { ObjectStream documentStream = new DirectorySampleStream(new File( - EvalUtil.getOpennlpDataDir(), "ontonotes4/data/files/data/english"), file -> { + EvalUtil.getOpennlpDataDir(), "ontonotes4/data/files/data/english"), + file -> { if (file.isFile()) { return file.getName().endsWith(".parse"); } diff --git a/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4PosTaggerEval.java b/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4PosTaggerEval.java index fb258367a..9c7d35251 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4PosTaggerEval.java +++ b/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4PosTaggerEval.java @@ -37,9 +37,10 @@ public class OntoNotes4PosTaggerEval { private static void crossEval(TrainingParameters params, double expectedScore) throws IOException { - + ObjectStream documentStream = new DirectorySampleStream(new File( - EvalUtil.getOpennlpDataDir(), "ontonotes4/data/files/data/english"), file -> { + EvalUtil.getOpennlpDataDir(), "ontonotes4/data/files/data/english"), + file -> { if (file.isFile()) { return file.getName().endsWith(".parse"); } @@ -49,14 +50,14 @@ private static void crossEval(TrainingParameters params, double expectedScore) ParseToPOSSampleStream samples = new ParseToPOSSampleStream(new OntoNotesParseSampleStream( new DocumentToLineStream( - new FileToStringSampleStream(documentStream, Charset.forName("UTF-8"))))); - + new FileToStringSampleStream(documentStream, Charset.forName("UTF-8"))))); + POSTaggerCrossValidator cv = new POSTaggerCrossValidator("en", params, new POSTaggerFactory()); cv.evaluate(samples, 10); - + Assert.assertEquals(expectedScore, cv.getWordAccuracy(), 0.0001d); } - + @Test public void evalEnglishMaxentTagger() throws IOException { crossEval(ModelUtil.createDefaultTrainingParameters(), 0.9707977252663043d); diff --git a/opennlp-tools/src/test/java/opennlp/tools/formats/Conll03NameSampleStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/formats/Conll03NameSampleStreamTest.java index e73b72c50..b5b8d5f1e 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/formats/Conll03NameSampleStreamTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/formats/Conll03NameSampleStreamTest.java @@ -74,16 +74,16 @@ public void testParsingEnglishSample() throws IOException { assertNull(sampleStream.read()); } - @Test(expected=IOException.class) + @Test(expected = IOException.class) public void testParsingEnglishSampleWithGermanAsLanguage() throws IOException { ObjectStream sampleStream = openData(LANGUAGE.DE, ENGLISH_SAMPLE); sampleStream.read(); } - @Test(expected=IOException.class) + @Test(expected = IOException.class) public void testParsingGermanSampleWithEnglishAsLanguage() throws IOException { - ObjectStream sampleStream = openData(LANGUAGE.EN, GERMAN_SAMPLE); - sampleStream.read(); + ObjectStream sampleStream = openData(LANGUAGE.EN, GERMAN_SAMPLE); + sampleStream.read(); } @Test diff --git a/opennlp-tools/src/test/java/opennlp/tools/formats/LeipzigDoccatSampleStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/formats/LeipzigDoccatSampleStreamTest.java index 5797ab4ba..c32f7946c 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/formats/LeipzigDoccatSampleStreamTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/formats/LeipzigDoccatSampleStreamTest.java @@ -51,7 +51,7 @@ public void testParsingSample() throws IOException { assertEquals("en", doc4.getCategory()); assertNull(sampleStream.read()); - + sampleStream.close(); } } diff --git a/opennlp-tools/src/test/java/opennlp/tools/formats/ad/ADChunkSampleStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/formats/ad/ADChunkSampleStreamTest.java index dec574fff..e75683852 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/formats/ad/ADChunkSampleStreamTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/formats/ad/ADChunkSampleStreamTest.java @@ -71,7 +71,7 @@ public void setup() throws IOException { ADParagraphStreamTest.class, "/opennlp/tools/formats/ad.sample"); ADChunkSampleStream stream = new ADChunkSampleStream( - new PlainTextByLineStream(in, "UTF-8")); + new PlainTextByLineStream(in, "UTF-8")); ChunkSample sample = stream.read(); diff --git a/opennlp-tools/src/test/java/opennlp/tools/formats/ad/ADParagraphStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/formats/ad/ADParagraphStreamTest.java index f9430d696..469eb6523 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/formats/ad/ADParagraphStreamTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/formats/ad/ADParagraphStreamTest.java @@ -39,10 +39,10 @@ public void testSimpleReading() throws IOException { ADSentenceStream.Sentence paragraph = stream.read(); paragraph.getRoot(); - while(paragraph != null) { + while (paragraph != null) { count++; paragraph = stream.read(); -// paragraph.getRoot(); + // paragraph.getRoot(); } assertEquals(ADParagraphStreamTest.NUM_SENTENCES, count); @@ -55,7 +55,7 @@ public void testLeadingWithContraction() throws IOException { ADSentenceStream stream = openData(); ADSentenceStream.Sentence paragraph = stream.read(); - while(paragraph != null) { + while (paragraph != null) { count++; paragraph = stream.read(); diff --git a/opennlp-tools/src/test/java/opennlp/tools/formats/frenchtreebank/ConstitParseSampleStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/formats/frenchtreebank/ConstitParseSampleStreamTest.java index aa7a24054..e6096877d 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/formats/frenchtreebank/ConstitParseSampleStreamTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/formats/frenchtreebank/ConstitParseSampleStreamTest.java @@ -120,7 +120,7 @@ public void testTokensAreCorrect() throws IOException { Parse[] tagNodes = p.getTagNodes(); String[] tokens = new String[tagNodes.length]; - for (int ti=0;ti\n"); - docsString.append("test document #"+ i + "\n"); + docsString.append("test document #" + i + "\n"); docsString.append("\n"); } @@ -42,11 +42,11 @@ public void testSplitTwoDocuments() throws IOException { String doc1 = docs.read(); Assert.assertEquals(docsString.length() / 2, doc1.length() + 1); Assert.assertTrue(doc1.contains("#0")); - + String doc2 = docs.read(); Assert.assertEquals(docsString.length() / 2, doc2.length() + 1); Assert.assertTrue(doc2.contains("#1")); - + Assert.assertNull(docs.read()); Assert.assertNull(docs.read()); } diff --git a/opennlp-tools/src/test/java/opennlp/tools/languagemodel/LanguageModelEvaluationTest.java b/opennlp-tools/src/test/java/opennlp/tools/languagemodel/LanguageModelEvaluationTest.java index 07fc3ce29..3a8f85668 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/languagemodel/LanguageModelEvaluationTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/languagemodel/LanguageModelEvaluationTest.java @@ -16,6 +16,7 @@ * specific language governing permissions and limitations * under the License. */ + package opennlp.tools.languagemodel; import java.util.Collection; diff --git a/opennlp-tools/src/test/java/opennlp/tools/languagemodel/LanguageModelTestUtils.java b/opennlp-tools/src/test/java/opennlp/tools/languagemodel/LanguageModelTestUtils.java index 6f855fc61..ec16cf448 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/languagemodel/LanguageModelTestUtils.java +++ b/opennlp-tools/src/test/java/opennlp/tools/languagemodel/LanguageModelTestUtils.java @@ -16,6 +16,7 @@ * specific language governing permissions and limitations * under the License. */ + package opennlp.tools.languagemodel; import java.math.BigDecimal; diff --git a/opennlp-tools/src/test/java/opennlp/tools/languagemodel/NgramLanguageModelTest.java b/opennlp-tools/src/test/java/opennlp/tools/languagemodel/NgramLanguageModelTest.java index e0bbc943b..4bff0a408 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/languagemodel/NgramLanguageModelTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/languagemodel/NgramLanguageModelTest.java @@ -16,6 +16,7 @@ * specific language governing permissions and limitations * under the License. */ + package opennlp.tools.languagemodel; import java.io.InputStream; diff --git a/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/LemmatizerEvaluatorTest.java b/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/LemmatizerEvaluatorTest.java index 994f0e642..0e294e8b6 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/LemmatizerEvaluatorTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/LemmatizerEvaluatorTest.java @@ -41,7 +41,7 @@ public class LemmatizerEvaluatorTest { * Checks the evaluator results against the results got using the conlleval, * available at http://www.cnts.ua.ac.be/conll2000/chunking/output.html but * containing lemmas instead of chunks. - * + * * @throws IOException */ @Test diff --git a/opennlp-tools/src/test/java/opennlp/tools/ml/BeamSearchTest.java b/opennlp-tools/src/test/java/opennlp/tools/ml/BeamSearchTest.java index aca71fbc8..cf2e9079c 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/ml/BeamSearchTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/ml/BeamSearchTest.java @@ -130,8 +130,10 @@ public void testBestSequenceZeroLengthInput() { BeamSearch bs = new BeamSearch<>(3, model); Sequence seq = bs.bestSequence(sequence, null, cg, - (int i, String[] inputSequence, String[] outcomesSequence, - String outcome) -> {return true;}); + (int i, String[] inputSequence, String[] outcomesSequence, String outcome) -> { + return true; + }); + assertNotNull(seq); assertEquals(sequence.length, seq.getOutcomes().size()); } @@ -151,7 +153,9 @@ public void testBestSequenceOneElementInput() { Sequence seq = bs.bestSequence(sequence, null, cg, (int i, String[] inputSequence, String[] outcomesSequence, - String outcome) -> {return true;}); + String outcome) -> { + return true; + }); assertNotNull(seq); assertEquals(sequence.length, seq.getOutcomes().size()); @@ -173,7 +177,9 @@ public void testBestSequence() { Sequence seq = bs.bestSequence(sequence, null, cg, (int i, String[] inputSequence, String[] outcomesSequence, - String outcome) -> {return true;}); + String outcome) -> { + return true; + }); assertNotNull(seq); assertEquals(sequence.length, seq.getOutcomes().size()); @@ -198,10 +204,9 @@ public void testBestSequenceWithValidator() { BeamSearch bs = new BeamSearch<>(2, model, 0); Sequence seq = bs.bestSequence(sequence, null, cg, - (int i, String[] inputSequence, - String[] outcomesSequence, String outcome) -> { - return !"2".equals(outcome); - }); + (int i, String[] inputSequence, String[] outcomesSequence, String outcome) -> { + return !"2".equals(outcome); + }); assertNotNull(seq); assertEquals(sequence.length, seq.getOutcomes().size()); assertEquals("1", seq.getOutcomes().get(0)); diff --git a/opennlp-tools/src/test/java/opennlp/tools/ml/PrepAttachDataUtil.java b/opennlp-tools/src/test/java/opennlp/tools/ml/PrepAttachDataUtil.java index 79dd00d9f..32ae43478 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/ml/PrepAttachDataUtil.java +++ b/opennlp-tools/src/test/java/opennlp/tools/ml/PrepAttachDataUtil.java @@ -46,7 +46,7 @@ private static List readPpaFile(String filename) throws IOException { String[] items = line.split("\\s+"); String label = items[5]; String[] context = {"verb=" + items[1], "noun=" + items[2], - "prep=" + items[3], "prep_obj=" + items[4]}; + "prep=" + items[3], "prep_obj=" + items[4]}; events.add(new Event(label, context)); } } @@ -70,7 +70,7 @@ public static void testModel(MaxentModel model, double expecedAccuracy) throws I double[] ocs = model.eval(ev.getContext()); int best = 0; - for (int i=1; i ocs[best]) best = i; @@ -81,7 +81,7 @@ public static void testModel(MaxentModel model, double expecedAccuracy) throws I total++; } - double accuracy = correct/(double)total; + double accuracy = correct / (double) total; System.out.println("Accuracy on PPA devset: (" + correct + "/" + total + ") " + accuracy); assertEquals(expecedAccuracy, accuracy, .00001); diff --git a/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/RealValueModelTest.java b/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/RealValueModelTest.java index 9edd2e5c2..a841964dc 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/RealValueModelTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/RealValueModelTest.java @@ -46,7 +46,7 @@ public void testRealValuedWeightsVsRepeatWeighting() throws IOException { double[] repeatResults = repeatModel.eval(features2Classify); Assert.assertEquals(realResults.length, repeatResults.length); - for(int i=0; i invertedPredIndex = new HashMap(); - Map invertedOutcomeIndex = new HashMap(); + Map invertedPredIndex = new HashMap(); + Map invertedOutcomeIndex = new HashMap(); for (int i = 0; i < predLabels.length; i++) { invertedPredIndex.put(predLabels[i], i); } @@ -184,9 +185,9 @@ private double[] alignDoubleArrayForTestData(double[] expected, for (int i = 0; i < sortedOutcomeLabels.length; i++) { for (int j = 0; j < sortedPredLabels.length; j++) { aligned[i * sortedPredLabels.length + j] = expected[invertedOutcomeIndex - .get(sortedOutcomeLabels[i]) - * sortedPredLabels.length - + invertedPredIndex.get(sortedPredLabels[j])]; + .get(sortedOutcomeLabels[i]) + * sortedPredLabels.length + + invertedPredIndex.get(sortedPredLabels[j])]; } } return aligned; diff --git a/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/quasinewton/QNMinimizerTest.java b/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/quasinewton/QNMinimizerTest.java index d167dda55..41ef356fd 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/quasinewton/QNMinimizerTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/quasinewton/QNMinimizerTest.java @@ -16,6 +16,7 @@ * specific language governing permissions and limitations * under the License. */ + package opennlp.tools.ml.maxent.quasinewton; import static java.lang.Math.pow; @@ -87,13 +88,13 @@ public int getDimension() { @Override public double valueAt(double[] x) { - return pow(1-x[0], 2) + 100 * pow(x[1] - pow(x[0], 2), 2); + return pow(1 - x[0], 2) + 100 * pow(x[1] - pow(x[0], 2), 2); } @Override public double[] gradientAt(double[] x) { double[] g = new double[2]; - g[0] = -2*(1-x[0]) - 400 * (x[1] - pow(x[0], 2)) * x[0]; + g[0] = -2 * (1 - x[0]) - 400 * (x[1] - pow(x[0], 2)) * x[0]; g[1] = 200 * (x[1] - pow(x[0], 2)); return g; } diff --git a/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/quasinewton/QNTrainerTest.java b/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/quasinewton/QNTrainerTest.java index a7910bc34..0133d0cba 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/quasinewton/QNTrainerTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/ml/maxent/quasinewton/QNTrainerTest.java @@ -14,6 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package opennlp.tools.ml.maxent.quasinewton; import static org.junit.Assert.assertEquals; @@ -72,51 +73,51 @@ public void testInTinyDevSet() throws Exception { @Test public void testModel() throws IOException { - // given - RealValueFileEventStream rvfes1 = new RealValueFileEventStream( - "src/test/resources/data/opennlp/maxent/real-valued-weights-training-data.txt"); - DataIndexer testDataIndexer = new OnePassRealValueDataIndexer(rvfes1,1); - // when - QNModel trainedModel = new QNTrainer(15, true).trainModel( - ITERATIONS, testDataIndexer); - - assertTrue(trainedModel.equals(trainedModel)); - assertFalse(trainedModel.equals(null)); + // given + RealValueFileEventStream rvfes1 = new RealValueFileEventStream( + "src/test/resources/data/opennlp/maxent/real-valued-weights-training-data.txt"); + DataIndexer testDataIndexer = new OnePassRealValueDataIndexer(rvfes1,1); + // when + QNModel trainedModel = new QNTrainer(15, true).trainModel( + ITERATIONS, testDataIndexer); + + assertTrue(trainedModel.equals(trainedModel)); + assertFalse(trainedModel.equals(null)); } @Test public void testSerdeModel() throws IOException { - // given - RealValueFileEventStream rvfes1 = new RealValueFileEventStream( - "src/test/resources/data/opennlp/maxent/real-valued-weights-training-data.txt"); - DataIndexer testDataIndexer = new OnePassRealValueDataIndexer(rvfes1,1); - // when - QNModel trainedModel = new QNTrainer(5, 700, true).trainModel(ITERATIONS, testDataIndexer); - - ByteArrayOutputStream modelBytes = new ByteArrayOutputStream(); - GenericModelWriter modelWriter = new GenericModelWriter(trainedModel, - new DataOutputStream(modelBytes)); - modelWriter.persist(); - modelWriter.close(); - - GenericModelReader modelReader = new GenericModelReader(new BinaryFileDataReader( - new ByteArrayInputStream(modelBytes.toByteArray()))); - AbstractModel readModel = modelReader.getModel(); - QNModel deserModel = (QNModel) readModel; - - assertTrue(trainedModel.equals(deserModel)); - - String[] features2Classify = new String[] { - "feature2","feature3", "feature3", - "feature3","feature3", "feature3", - "feature3","feature3", "feature3", - "feature3","feature3", "feature3"}; - double[] eval01 = trainedModel.eval(features2Classify); - double[] eval02 = deserModel.eval(features2Classify); - - assertEquals(eval01.length, eval02.length); - for (int i = 0; i < eval01.length; i++) { - assertEquals(eval01[i], eval02[i], 0.00000001); - } + // given + RealValueFileEventStream rvfes1 = new RealValueFileEventStream( + "src/test/resources/data/opennlp/maxent/real-valued-weights-training-data.txt"); + DataIndexer testDataIndexer = new OnePassRealValueDataIndexer(rvfes1,1); + // when + QNModel trainedModel = new QNTrainer(5, 700, true).trainModel(ITERATIONS, testDataIndexer); + + ByteArrayOutputStream modelBytes = new ByteArrayOutputStream(); + GenericModelWriter modelWriter = new GenericModelWriter(trainedModel, + new DataOutputStream(modelBytes)); + modelWriter.persist(); + modelWriter.close(); + + GenericModelReader modelReader = new GenericModelReader(new BinaryFileDataReader( + new ByteArrayInputStream(modelBytes.toByteArray()))); + AbstractModel readModel = modelReader.getModel(); + QNModel deserModel = (QNModel) readModel; + + assertTrue(trainedModel.equals(deserModel)); + + String[] features2Classify = new String[] { + "feature2","feature3", "feature3", + "feature3","feature3", "feature3", + "feature3","feature3", "feature3", + "feature3","feature3", "feature3"}; + double[] eval01 = trainedModel.eval(features2Classify); + double[] eval02 = deserModel.eval(features2Classify); + + assertEquals(eval01.length, eval02.length); + for (int i = 0; i < eval01.length; i++) { + assertEquals(eval01[i], eval02[i], 0.00000001); + } } } diff --git a/opennlp-tools/src/test/java/opennlp/tools/ml/naivebayes/NaiveBayesModelReadWriteTest.java b/opennlp-tools/src/test/java/opennlp/tools/ml/naivebayes/NaiveBayesModelReadWriteTest.java index a6e467ef9..05b30af4d 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/ml/naivebayes/NaiveBayesModelReadWriteTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/ml/naivebayes/NaiveBayesModelReadWriteTest.java @@ -14,6 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package opennlp.tools.ml.naivebayes; import java.io.File; diff --git a/opennlp-tools/src/test/java/opennlp/tools/namefind/DictionaryNameFinderEvaluatorTest.java b/opennlp-tools/src/test/java/opennlp/tools/namefind/DictionaryNameFinderEvaluatorTest.java index 2150de732..89c918ec0 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/namefind/DictionaryNameFinderEvaluatorTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/namefind/DictionaryNameFinderEvaluatorTest.java @@ -67,7 +67,7 @@ public void testEvaluator() throws IOException, URISyntaxException { */ private static ObjectStream createSample() throws IOException, URISyntaxException { - + InputStreamFactory in = new ResourceAsStreamFactory( DictionaryNameFinderEvaluatorTest.class, "/opennlp/tools/namefind/AnnotatedSentences.txt"); diff --git a/opennlp-tools/src/test/java/opennlp/tools/namefind/DictionaryNameFinderTest.java b/opennlp-tools/src/test/java/opennlp/tools/namefind/DictionaryNameFinderTest.java index 822a63ea4..7714bf52b 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/namefind/DictionaryNameFinderTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/namefind/DictionaryNameFinderTest.java @@ -30,7 +30,7 @@ /** *Tests for the {@link DictionaryNameFinder} class. */ -public class DictionaryNameFinderTest{ +public class DictionaryNameFinderTest { private Dictionary mDictionary = new Dictionary(); private TokenNameFinder mNameFinder; @@ -40,9 +40,7 @@ public DictionaryNameFinderTest() { StringList vanessa = new StringList(new String[]{"Vanessa"}); mDictionary.put(vanessa); - StringList vanessaWilliams = new - StringList(new String[]{"Vanessa", - "Williams"}); + StringList vanessaWilliams = new StringList(new String[]{"Vanessa", "Williams"}); mDictionary.put(vanessaWilliams); StringList max = new StringList(new String[]{"Max"}); diff --git a/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderEventStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderEventStreamTest.java index 9522c132d..a3150e367 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderEventStreamTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderEventStreamTest.java @@ -29,7 +29,7 @@ /** * This is the test class for {@link NameFinderEventStream}. */ -public class NameFinderEventStreamTest{ +public class NameFinderEventStreamTest { /** * Tests the correctly generated outcomes for a test sentence. diff --git a/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderMETest.java b/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderMETest.java index e55cc176c..19e364ec1 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderMETest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderMETest.java @@ -64,8 +64,8 @@ public void testNameFinder() throws Exception { String encoding = "ISO-8859-1"; ObjectStream sampleStream = - new NameSampleDataStream( - new PlainTextByLineStream(new MockInputStreamFactory(in), encoding)); + new NameSampleDataStream( + new PlainTextByLineStream(new MockInputStreamFactory(in), encoding)); TrainingParameters params = new TrainingParameters(); params.put(TrainingParameters.ITERATIONS_PARAM, Integer.toString(70)); @@ -79,15 +79,15 @@ public void testNameFinder() throws Exception { // now test if it can detect the sample sentences String sentence[] = {"Alisa", - "appreciated", - "the", - "hint", - "and", - "enjoyed", - "a", - "delicious", - "traditional", - "meal."}; + "appreciated", + "the", + "hint", + "and", + "enjoyed", + "a", + "delicious", + "traditional", + "meal."}; Span names[] = nameFinder.find(sentence); @@ -126,7 +126,7 @@ public void testNameFinderWithTypes() throws Exception { String encoding = "ISO-8859-1"; ObjectStream sampleStream = new NameSampleDataStream( - new PlainTextByLineStream(new MockInputStreamFactory(in), encoding)); + new PlainTextByLineStream(new MockInputStreamFactory(in), encoding)); TrainingParameters params = new TrainingParameters(); params.put(TrainingParameters.ITERATIONS_PARAM, Integer.toString(70)); @@ -173,7 +173,7 @@ public void testOnlyWithNames() throws Exception { "opennlp/tools/namefind/OnlyWithNames.train"); ObjectStream sampleStream = new NameSampleDataStream( - new PlainTextByLineStream(new MockInputStreamFactory(in), "UTF-8")); + new PlainTextByLineStream(new MockInputStreamFactory(in), "UTF-8")); TrainingParameters params = new TrainingParameters(); params.put(TrainingParameters.ITERATIONS_PARAM, Integer.toString(70)); @@ -187,7 +187,7 @@ public void testOnlyWithNames() throws Exception { // now test if it can detect the sample sentences String[] sentence = ("Neil Abercrombie Anibal Acevedo-Vila Gary Ackerman " + - "Robert Aderholt Daniel Akaka Todd Akin Lamar Alexander Rodney Alexander").split("\\s+"); + "Robert Aderholt Daniel Akaka Todd Akin Lamar Alexander Rodney Alexander").split("\\s+"); Span[] names1 = nameFinder.find(sentence); @@ -210,7 +210,7 @@ public void testOnlyWithNamesWithTypes() throws Exception { "opennlp/tools/namefind/OnlyWithNamesWithTypes.train"); ObjectStream sampleStream = new NameSampleDataStream( - new PlainTextByLineStream(new MockInputStreamFactory(in), "UTF-8")); + new PlainTextByLineStream(new MockInputStreamFactory(in), "UTF-8")); TrainingParameters params = new TrainingParameters(); params.put(TrainingParameters.ITERATIONS_PARAM, Integer.toString(70)); @@ -224,7 +224,7 @@ public void testOnlyWithNamesWithTypes() throws Exception { // now test if it can detect the sample sentences String[] sentence = ("Neil Abercrombie Anibal Acevedo-Vila Gary Ackerman " + - "Robert Aderholt Daniel Akaka Todd Akin Lamar Alexander Rodney Alexander").split("\\s+"); + "Robert Aderholt Daniel Akaka Todd Akin Lamar Alexander Rodney Alexander").split("\\s+"); Span[] names1 = nameFinder.find(sentence); @@ -273,14 +273,14 @@ public void testOnlyWithEntitiesWithTypes() throws Exception { } private boolean hasOtherAsOutcome(TokenNameFinderModel nameFinderModel) { - SequenceClassificationModel model = nameFinderModel.getNameFinderSequenceModel(); - String[] outcomes = model.getOutcomes(); - for (int i = 0; i < outcomes.length; i++) { + SequenceClassificationModel model = nameFinderModel.getNameFinderSequenceModel(); + String[] outcomes = model.getOutcomes(); + for (int i = 0; i < outcomes.length; i++) { if (outcomes[i].equals(NameFinderME.OTHER)) { return true; } } - return false; + return false; } @Test @@ -304,7 +304,7 @@ public void testNameFinderWithMultipleTypes() throws Exception { "opennlp/tools/namefind/voa1.train"); ObjectStream sampleStream = new NameSampleDataStream( - new PlainTextByLineStream(new MockInputStreamFactory(in), "UTF-8")); + new PlainTextByLineStream(new MockInputStreamFactory(in), "UTF-8")); TrainingParameters params = new TrainingParameters(); params.put(TrainingParameters.ITERATIONS_PARAM, Integer.toString(70)); diff --git a/opennlp-tools/src/test/java/opennlp/tools/namefind/NameSampleDataStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/namefind/NameSampleDataStreamTest.java index 8bb39f919..8de230d9b 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/namefind/NameSampleDataStreamTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/namefind/NameSampleDataStreamTest.java @@ -103,7 +103,7 @@ public void testWithoutNameTypes() throws Exception { } ds.close(); - + assertEquals(expectedNames.length, names.size()); assertEquals(createDefaultSpan(6,8), spans.get(0)); assertEquals(createDefaultSpan(3,4), spans.get(1)); @@ -189,14 +189,12 @@ public void testWithNameTypes() throws Exception { names.put(nameSpan.getType(), new ArrayList()); spans.put(nameSpan.getType(), new ArrayList()); } - names.get(nameSpan.getType()) - .add(sublistToString(ns.getSentence(), nameSpan)); - spans.get(nameSpan.getType()) - .add(nameSpan); + names.get(nameSpan.getType()).add(sublistToString(ns.getSentence(), nameSpan)); + spans.get(nameSpan.getType()).add(nameSpan); } } ds.close(); - + String[] expectedPerson = { "Barack Obama", "Obama", "Obama", "Lee Myung - bak", "Obama", "Obama", "Scott Snyder", "Snyder", "Obama", "Obama", "Obama", "Tim Peters", "Obama", "Peters" }; @@ -329,13 +327,13 @@ public void testClearAdaptiveData() throws IOException { assertFalse(trainingStream.read().isClearAdaptiveDataSet()); assertTrue(trainingStream.read().isClearAdaptiveDataSet()); assertNull(trainingStream.read()); - + trainingStream.close(); } @Test public void testHtmlNameSampleParsing() throws IOException { - InputStreamFactory in = new ResourceAsStreamFactory(getClass(), + InputStreamFactory in = new ResourceAsStreamFactory(getClass(), "/opennlp/tools/namefind/html1.train"); NameSampleDataStream ds = new NameSampleDataStream( @@ -394,7 +392,7 @@ public void testHtmlNameSampleParsing() throws IOException { assertEquals("", ns.getSentence()[0]); assertNull(ds.read()); - + ds.close(); } } diff --git a/opennlp-tools/src/test/java/opennlp/tools/namefind/NameSampleTest.java b/opennlp-tools/src/test/java/opennlp/tools/namefind/NameSampleTest.java index 5971a5593..4fb9ab736 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/namefind/NameSampleTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/namefind/NameSampleTest.java @@ -49,7 +49,7 @@ private static NameSample createSimpleNameSample(boolean useTypes) { new Span(14, 15, "Location")}; NameSample nameSample; - if(useTypes) { + if (useTypes) { nameSample = new NameSample(sentence, names, false); } else { @@ -74,7 +74,7 @@ public void testNoTypesToString() { String nameSampleStr = createSimpleNameSample(false).toString(); assertEquals(" U . S . President Barack Obama is considering " + - "sending additional American forces to Afghanistan .", nameSampleStr); + "sending additional American forces to Afghanistan .", nameSampleStr); } /** @@ -87,9 +87,9 @@ public void testWithTypesToString() throws Exception { assertEquals(" U . S . President Barack Obama is considering sending additional American forces to Afghanistan .", nameSampleStr); NameSample parsedSample = NameSample.parse(" U . S . " + - "President Barack Obama is considering sending " + - "additional American forces to Afghanistan .", - false); + "President Barack Obama is considering sending " + + "additional American forces to Afghanistan .", + false); assertEquals(createSimpleNameSample(true), parsedSample); } @@ -148,7 +148,7 @@ public void testTypeWithSpecialChars() throws Exception { /** * Test if it fails to parse empty type */ - @Test(expected=IOException.class) + @Test(expected = IOException.class) public void testMissingType() throws Exception { NameSample.parse(" token ", false); @@ -158,7 +158,7 @@ public void testMissingType() throws Exception { * Test if it fails to parse type with space * @throws Exception */ - @Test(expected=IOException.class) + @Test(expected = IOException.class) public void testTypeWithSpace() throws Exception { NameSample.parse(" token ", false); @@ -168,7 +168,7 @@ public void testTypeWithSpace() throws Exception { * Test if it fails to parse type with new line * @throws Exception */ - @Test(expected=IOException.class) + @Test(expected = IOException.class) public void testTypeWithNewLine() throws Exception { NameSample.parse(" token ", false); @@ -178,7 +178,7 @@ public void testTypeWithNewLine() throws Exception { * Test if it fails to parse type with : * @throws Exception */ - @Test(expected=IOException.class) + @Test(expected = IOException.class) public void testTypeWithInvalidChar1() throws Exception { NameSample.parse(" token ", false); @@ -188,7 +188,7 @@ public void testTypeWithInvalidChar1() throws Exception { * Test if it fails to parse type with > * @throws Exception */ - @Test(expected=IOException.class) + @Test(expected = IOException.class) public void testTypeWithInvalidChar2() throws Exception { NameSample.parse("a> token ", false); diff --git a/opennlp-tools/src/test/java/opennlp/tools/namefind/RegexNameFinderTest.java b/opennlp-tools/src/test/java/opennlp/tools/namefind/RegexNameFinderTest.java index 262d61fbd..3b1c00a07 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/namefind/RegexNameFinderTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/namefind/RegexNameFinderTest.java @@ -14,6 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package opennlp.tools.namefind; import static org.junit.Assert.assertTrue; @@ -84,7 +85,7 @@ public void testFindMatchingPatternWithoutMatchingTokenBounds() { Pattern testPattern = Pattern.compile("[0-8] year"); // does match "0 year" String sentence[] = new String[]{"a", "80", "year", "c"}; -Pattern[] patterns = new Pattern[]{testPattern}; + Pattern[] patterns = new Pattern[]{testPattern}; Map regexMap = new HashMap<>(); String type = "testtype"; diff --git a/opennlp-tools/src/test/java/opennlp/tools/ngram/NGramModelTest.java b/opennlp-tools/src/test/java/opennlp/tools/ngram/NGramModelTest.java index db7efa909..190232066 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/ngram/NGramModelTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/ngram/NGramModelTest.java @@ -16,6 +16,7 @@ * specific language governing permissions and limitations * under the License. */ + package opennlp.tools.ngram; import static org.junit.Assert.assertEquals; @@ -191,7 +192,7 @@ public void testSerialize() throws Exception { // remove AL header int start = modelString.indexOf(""); - String asfHeaderString = modelString.substring(start, end +3); + String asfHeaderString = modelString.substring(start, end + 3); modelString = modelString.replace(asfHeaderString, ""); String outputString = out.toString(Charset.forName("UTF-8").name()); assertEquals(modelString.replaceAll("\n", "").replaceAll("\r", "").replaceAll("\t", "").replaceAll(" ", ""), diff --git a/opennlp-tools/src/test/java/opennlp/tools/ngram/NGramUtilsTest.java b/opennlp-tools/src/test/java/opennlp/tools/ngram/NGramUtilsTest.java index 54d80b7a2..29115b1bb 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/ngram/NGramUtilsTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/ngram/NGramUtilsTest.java @@ -16,6 +16,7 @@ * specific language governing permissions and limitations * under the License. */ + package opennlp.tools.ngram; import java.util.Collection; diff --git a/opennlp-tools/src/test/java/opennlp/tools/parser/ParseTest.java b/opennlp-tools/src/test/java/opennlp/tools/parser/ParseTest.java index 129fc11fd..e9645ad33 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/parser/ParseTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/parser/ParseTest.java @@ -80,15 +80,15 @@ public void testShow() { @Test public void testTokenReplacement() { Parse p1 = Parse.parseParse("(TOP (S-CLF (NP-SBJ (PRP It) )(VP (VBD was) " + - " (NP-PRD (NP (DT the) (NN trial) )(PP (IN of) " + - " (NP (NP (NN oleomargarine) (NN heir) )(NP (NNP Minot) " + - " (PRN (-LRB- -LRB-) (NNP Mickey) " + - " (-RRB- -RRB-) )(NNP Jelke) )))(PP (IN for) " + - " (NP (JJ compulsory) (NN prostitution) " + - " ))(PP-LOC (IN in) (NP (NNP New) (NNP York) " + - " )))(SBAR (WHNP-1 (WDT that) )(S (VP (VBD put) " + - " (NP (DT the) (NN spotlight) )(PP (IN on) (NP (DT the) " + - " (JJ international) (NN play-girl) ))))))(. .) ))"); + " (NP-PRD (NP (DT the) (NN trial) )(PP (IN of) " + + " (NP (NP (NN oleomargarine) (NN heir) )(NP (NNP Minot) " + + " (PRN (-LRB- -LRB-) (NNP Mickey) " + + " (-RRB- -RRB-) )(NNP Jelke) )))(PP (IN for) " + + " (NP (JJ compulsory) (NN prostitution) " + + " ))(PP-LOC (IN in) (NP (NNP New) (NNP York) " + + " )))(SBAR (WHNP-1 (WDT that) )(S (VP (VBD put) " + + " (NP (DT the) (NN spotlight) )(PP (IN on) (NP (DT the) " + + " (JJ international) (NN play-girl) ))))))(. .) ))"); StringBuffer parseString = new StringBuffer(); p1.show(parseString); diff --git a/opennlp-tools/src/test/java/opennlp/tools/parser/ParserTestUtil.java b/opennlp-tools/src/test/java/opennlp/tools/parser/ParserTestUtil.java index 6c5196ed7..8ad807a6b 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/parser/ParserTestUtil.java +++ b/opennlp-tools/src/test/java/opennlp/tools/parser/ParserTestUtil.java @@ -33,37 +33,37 @@ public class ParserTestUtil { - public static HeadRules createTestHeadRules() throws IOException { - InputStream headRulesIn = - ParserTestUtil.class.getResourceAsStream("/opennlp/tools/parser/en_head_rules"); + public static HeadRules createTestHeadRules() throws IOException { + InputStream headRulesIn = + ParserTestUtil.class.getResourceAsStream("/opennlp/tools/parser/en_head_rules"); - HeadRules headRules = new HeadRules(new BufferedReader( - new InputStreamReader(headRulesIn, "UTF-8"))); + HeadRules headRules = new HeadRules(new BufferedReader( + new InputStreamReader(headRulesIn, "UTF-8"))); - headRulesIn.close(); + headRulesIn.close(); - return headRules; - } + return headRules; + } - public static ObjectStream openTestTrainingData() - throws IOException { + public static ObjectStream openTestTrainingData() + throws IOException { - ObjectStream resetableSampleStream = new ObjectStream () { + ObjectStream resetableSampleStream = new ObjectStream() { - private ObjectStream samples; + private ObjectStream samples; - public void close() throws IOException { - samples.close(); - } + public void close() throws IOException { + samples.close(); + } - public Parse read() throws IOException { - return samples.read(); - } + public Parse read() throws IOException { + return samples.read(); + } - public void reset() throws IOException { - try { - if (samples != null) - samples.close(); + public void reset() throws IOException { + try { + if (samples != null) + samples.close(); InputStreamFactory in = new ResourceAsStreamFactory(getClass(), "/opennlp/tools/parser/parser.train"); samples = new ParseSampleStream(new PlainTextByLineStream(in, StandardCharsets.UTF_8)); @@ -71,11 +71,11 @@ public void reset() throws IOException { // Should never happen Assert.fail(e.getMessage()); } - } - }; + } + }; - resetableSampleStream.reset(); + resetableSampleStream.reset(); - return resetableSampleStream; - } + return resetableSampleStream; + } } diff --git a/opennlp-tools/src/test/java/opennlp/tools/parser/chunking/ParserTest.java b/opennlp-tools/src/test/java/opennlp/tools/parser/chunking/ParserTest.java index e9c6006f0..9aed029b8 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/parser/chunking/ParserTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/parser/chunking/ParserTest.java @@ -52,8 +52,8 @@ public void testChunkingParserTraining() throws Exception { // TODO: // Tests parsing to make sure the code does not has // a bug which fails always with a runtime exception -// parser.parse(Parse.parseParse("She was just another freighter from the " + -// "States and she seemed as commonplace as her name .")); + // parser.parse(Parse.parseParse("She was just another freighter from the " + + // "States and she seemed as commonplace as her name .")); // Test serializing and de-serializing model ByteArrayOutputStream outArray = new ByteArrayOutputStream(); diff --git a/opennlp-tools/src/test/java/opennlp/tools/postag/DummyPOSTaggerFactory.java b/opennlp-tools/src/test/java/opennlp/tools/postag/DummyPOSTaggerFactory.java index c6cddc2eb..4a4f1d20a 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/postag/DummyPOSTaggerFactory.java +++ b/opennlp-tools/src/test/java/opennlp/tools/postag/DummyPOSTaggerFactory.java @@ -69,7 +69,7 @@ public Map createArtifactSerializersMap() { @Override public Map createArtifactMap() { Map artifactMap = super.createArtifactMap(); - if(this.dict != null) + if (this.dict != null) artifactMap.put(DUMMY_POSDICT, this.dict); return artifactMap; } diff --git a/opennlp-tools/src/test/java/opennlp/tools/postag/POSDictionaryTest.java b/opennlp-tools/src/test/java/opennlp/tools/postag/POSDictionaryTest.java index 34b086104..21b0953f2 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/postag/POSDictionaryTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/postag/POSDictionaryTest.java @@ -47,7 +47,7 @@ private static POSDictionary serializeDeserializeDict(POSDictionary dict) throws dict.serialize(out); } finally { - out.close(); + out.close(); } POSDictionary serializedDictionary = null; diff --git a/opennlp-tools/src/test/java/opennlp/tools/postag/POSSampleTest.java b/opennlp-tools/src/test/java/opennlp/tools/postag/POSSampleTest.java index 71e64eab6..55b1a9be6 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/postag/POSSampleTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/postag/POSSampleTest.java @@ -62,7 +62,7 @@ public static POSSample createPredSample() throws InvalidFormatException { @Test public void testParse() throws InvalidFormatException { String sentence = "the_DT stories_NNS about_IN well-heeled_JJ " + - "communities_NNS and_CC developers_NNS"; + "communities_NNS and_CC developers_NNS"; POSSample sample = POSSample.parse(sentence); diff --git a/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerFactoryTest.java b/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerFactoryTest.java index ec7656596..7d43d7a82 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerFactoryTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerFactoryTest.java @@ -64,8 +64,7 @@ public void testPOSTaggerWithCustomFactory() throws IOException { .getResourceAsStream("TagDictionaryCaseSensitive.xml"))); Dictionary dic = POSTaggerME.buildNGramDictionary(createSampleStream(), 0); - POSModel posModel = trainPOSModel( - new DummyPOSTaggerFactory(dic, posDict)); + POSModel posModel = trainPOSModel(new DummyPOSTaggerFactory(dic, posDict)); POSTaggerFactory factory = posModel.getFactory(); assertTrue(factory.getTagDictionary() instanceof DummyPOSDictionary); @@ -91,8 +90,7 @@ public void testPOSTaggerWithDefaultFactory() throws IOException { .getResourceAsStream("TagDictionaryCaseSensitive.xml")); Dictionary dic = POSTaggerME.buildNGramDictionary(createSampleStream(), 0); - POSModel posModel = trainPOSModel( - new POSTaggerFactory(dic, posDict)); + POSModel posModel = trainPOSModel(new POSTaggerFactory(dic, posDict)); POSTaggerFactory factory = posModel.getFactory(); assertTrue(factory.getTagDictionary() instanceof POSDictionary); diff --git a/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerMETest.java b/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerMETest.java index bcac9aeca..9bd6876d3 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerMETest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerMETest.java @@ -38,7 +38,7 @@ public class POSTaggerMETest { private static ObjectStream createSampleStream() throws IOException { - InputStreamFactory in = new ResourceAsStreamFactory(POSTaggerMETest.class, + InputStreamFactory in = new ResourceAsStreamFactory(POSTaggerMETest.class, "/opennlp/tools/postag/AnnotatedSentences.txt"); return new WordTagSampleStream(new PlainTextByLineStream(in, UTF_8)); @@ -67,11 +67,11 @@ public void testPOSTagger() throws IOException { String tags[] = tagger.tag(new String[] { "The", - "driver", - "got", - "badly", - "injured", - "."}); + "driver", + "got", + "badly", + "injured", + "."}); assertEquals(6, tags.length); diff --git a/opennlp-tools/src/test/java/opennlp/tools/postag/WordTagSampleStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/postag/WordTagSampleStreamTest.java index 4bbe278fd..3e16224d2 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/postag/WordTagSampleStreamTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/postag/WordTagSampleStreamTest.java @@ -42,7 +42,7 @@ public void testParseSimpleSample() throws IOException { sampleString.add("This_x1 is_x2 a_x3 test_x4 sentence_x5 ._x6"); WordTagSampleStream stream = - new WordTagSampleStream(new CollectionObjectStream(sampleString)); + new WordTagSampleStream(new CollectionObjectStream(sampleString)); POSSample sample = stream.read(); String words[] = sample.getSentence(); diff --git a/opennlp-tools/src/test/java/opennlp/tools/sentdetect/NewlineSentenceDetectorTest.java b/opennlp-tools/src/test/java/opennlp/tools/sentdetect/NewlineSentenceDetectorTest.java index 3c42a609f..92c2b915c 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/sentdetect/NewlineSentenceDetectorTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/sentdetect/NewlineSentenceDetectorTest.java @@ -27,7 +27,7 @@ public class NewlineSentenceDetectorTest { - private static void testSentenceValues(String sentences){ + private static void testSentenceValues(String sentences) { NewlineSentenceDetector sd = new NewlineSentenceDetector(); String results[] = sd.sentDetect(sentences); diff --git a/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SDEventStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SDEventStreamTest.java index 6f1949223..6e1233f6f 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SDEventStreamTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SDEventStreamTest.java @@ -42,7 +42,7 @@ public void testEventOutcomes() throws IOException { new Span(0, 15), new Span(16, 29)); ObjectStream sampleStream = - ObjectStreamUtils.createObjectStream(sample); + ObjectStreamUtils.createObjectStream(sample); Factory factory = new Factory(); diff --git a/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMETest.java b/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMETest.java index 4a33c5f20..ed9690374 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMETest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMETest.java @@ -39,7 +39,7 @@ public class SentenceDetectorMETest { @Test public void testSentenceDetector() throws IOException { - InputStreamFactory in = new ResourceAsStreamFactory(getClass(), + InputStreamFactory in = new ResourceAsStreamFactory(getClass(), "/opennlp/tools/sentdetect/Sentences.txt"); TrainingParameters mlParams = new TrainingParameters(); diff --git a/opennlp-tools/src/test/java/opennlp/tools/stemmer/PorterStemmerTest.java b/opennlp-tools/src/test/java/opennlp/tools/stemmer/PorterStemmerTest.java index a02591500..eb58aa1de 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/stemmer/PorterStemmerTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/stemmer/PorterStemmerTest.java @@ -31,7 +31,7 @@ public void testNotNull() { @Test public void testStemming() { - Assert.assertEquals(stemmer.stem("deny"), "deni" ); + Assert.assertEquals(stemmer.stem("deny"), "deni" ); Assert.assertEquals(stemmer.stem("declining"), "declin" ); Assert.assertEquals(stemmer.stem("diversity"), "divers" ); Assert.assertEquals(stemmer.stem("divers"), "diver" ); diff --git a/opennlp-tools/src/test/java/opennlp/tools/tokenize/DetokenizationDictionaryTest.java b/opennlp-tools/src/test/java/opennlp/tools/tokenize/DetokenizationDictionaryTest.java index bf42419e6..4dc9213e1 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/tokenize/DetokenizationDictionaryTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/tokenize/DetokenizationDictionaryTest.java @@ -29,7 +29,7 @@ import org.junit.Before; import org.junit.Test; -public class DetokenizationDictionaryTest{ +public class DetokenizationDictionaryTest { private String tokens[]; private Operation operations[]; diff --git a/opennlp-tools/src/test/java/opennlp/tools/tokenize/DictionaryDetokenizerTest.java b/opennlp-tools/src/test/java/opennlp/tools/tokenize/DictionaryDetokenizerTest.java index 398b18515..fdc3ca4d3 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/tokenize/DictionaryDetokenizerTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/tokenize/DictionaryDetokenizerTest.java @@ -27,7 +27,7 @@ import org.junit.Test; -public class DictionaryDetokenizerTest{ +public class DictionaryDetokenizerTest { @Test public void testDetokenizer() { @@ -40,8 +40,7 @@ public void testDetokenizer() { Operation.MOVE_RIGHT, Operation.MOVE_LEFT, Operation.RIGHT_LEFT_MATCHING, - Operation.MOVE_BOTH - }; + Operation.MOVE_BOTH}; DetokenizationDictionary dict = new DetokenizationDictionary(tokens, operations); Detokenizer detokenizer = new DictionaryDetokenizer(dict); @@ -59,7 +58,7 @@ public void testDetokenizer() { static Detokenizer createLatinDetokenizer() throws IOException { InputStream dictIn = DictionaryDetokenizerTest.class.getResourceAsStream( - "/opennlp/tools/tokenize/latin-detokenizer.xml"); + "/opennlp/tools/tokenize/latin-detokenizer.xml"); DetokenizationDictionary dict = new DetokenizationDictionary(dictIn); diff --git a/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokSpanEventStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokSpanEventStreamTest.java index fe678a9c9..68e9a701d 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokSpanEventStreamTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokSpanEventStreamTest.java @@ -41,7 +41,7 @@ public class TokSpanEventStreamTest { public void testEventOutcomes() throws IOException { ObjectStream sentenceStream = - ObjectStreamUtils.createObjectStream("\"out.\""); + ObjectStreamUtils.createObjectStream("\"out.\""); ObjectStream tokenSampleStream = new TokenSampleStream(sentenceStream); diff --git a/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenSampleStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenSampleStreamTest.java index 7ce1ddb45..815d93f77 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenSampleStreamTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenSampleStreamTest.java @@ -44,7 +44,7 @@ public void testParsingWhitespaceSeparatedTokens() throws IOException { String sampleTokens = "Slave to the wage"; ObjectStream sampleTokenStream = new TokenSampleStream( - ObjectStreamUtils.createObjectStream(sampleTokens)); + ObjectStreamUtils.createObjectStream(sampleTokens)); TokenSample tokenSample = sampleTokenStream.read(); @@ -69,7 +69,7 @@ public void testParsingSeparatedString() throws IOException { String sampleTokens = "abcd"; ObjectStream sampleTokenStream = new TokenSampleStream( - ObjectStreamUtils.createObjectStream(sampleTokens)); + ObjectStreamUtils.createObjectStream(sampleTokens)); TokenSample tokenSample = sampleTokenStream.read(); @@ -102,7 +102,7 @@ public void testParsingWhitespaceAndSeparatedString() throws IOException { String sampleTokens = "a bc de"; ObjectStream sampleTokenStream = new TokenSampleStream( - ObjectStreamUtils.createObjectStream(sampleTokens)); + ObjectStreamUtils.createObjectStream(sampleTokens)); TokenSample tokenSample = sampleTokenStream.read(); diff --git a/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenSampleTest.java b/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenSampleTest.java index 4308e5e94..9ae62479f 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenSampleTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenSampleTest.java @@ -63,9 +63,11 @@ public void testCreationWithDetokenizer() throws IOException { TokenSample a = new TokenSample(detokenizer, tokens); assertEquals("start () end. hyphen-string.", a.getText()); - // 0123456789012345678901234567 - assertEquals("start (" + TokenSample.DEFAULT_SEPARATOR_CHARS + ") end" + TokenSample.DEFAULT_SEPARATOR_CHARS + "." - + " hyphen" + TokenSample.DEFAULT_SEPARATOR_CHARS + "-" + TokenSample.DEFAULT_SEPARATOR_CHARS + "string" + TokenSample.DEFAULT_SEPARATOR_CHARS + ".", a.toString()); + // 0123456789012345678901234567 + assertEquals("start (" + TokenSample.DEFAULT_SEPARATOR_CHARS + ") end" + + TokenSample.DEFAULT_SEPARATOR_CHARS + "." + + " hyphen" + TokenSample.DEFAULT_SEPARATOR_CHARS + "-" + TokenSample.DEFAULT_SEPARATOR_CHARS + + "string" + TokenSample.DEFAULT_SEPARATOR_CHARS + ".", a.toString()); assertEquals(9, a.getTokenSpans().length); diff --git a/opennlp-tools/src/test/java/opennlp/tools/util/AbstractEventStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/util/AbstractEventStreamTest.java index ab44a4c6d..e5e854d77 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/util/AbstractEventStreamTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/util/AbstractEventStreamTest.java @@ -60,9 +60,9 @@ public TestEventStream(ObjectStream samples) { * @param sample parameter to specify the output * * @return it returns an {@link Iterator} which contains one - * {@link Event} object if the sample parameter equals - * {@link RESULT#EVENTS} or an empty {@link Iterator} if the sample - * parameter equals {@link RESULT#EMPTY}. + * {@link Event} object if the sample parameter equals + * {@link RESULT#EVENTS} or an empty {@link Iterator} if the sample + * parameter equals {@link RESULT#EMPTY}. */ @Override protected Iterator createEvents(RESULT sample) { diff --git a/opennlp-tools/src/test/java/opennlp/tools/util/ListHeapTest.java b/opennlp-tools/src/test/java/opennlp/tools/util/ListHeapTest.java index 0c75c189e..2860886ca 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/util/ListHeapTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/util/ListHeapTest.java @@ -30,7 +30,7 @@ public void testSimple() { Heap heap = new ListHeap(size); - for (int ai = 0; ai < 10; ai++){ + for (int ai = 0; ai < 10; ai++) { if (ai < size) assertEquals(ai, heap.size()); diff --git a/opennlp-tools/src/test/java/opennlp/tools/util/MockInputStreamFactory.java b/opennlp-tools/src/test/java/opennlp/tools/util/MockInputStreamFactory.java index 3c65833b2..9d77cad23 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/util/MockInputStreamFactory.java +++ b/opennlp-tools/src/test/java/opennlp/tools/util/MockInputStreamFactory.java @@ -31,11 +31,11 @@ public class MockInputStreamFactory implements InputStreamFactory { public MockInputStreamFactory(InputStream is) throws FileNotFoundException { this.is = is; } - + public MockInputStreamFactory(String str) throws FileNotFoundException { this.is = new ByteArrayInputStream(str.getBytes(StandardCharsets.UTF_8)); } - + public MockInputStreamFactory(String str, Charset charset) throws FileNotFoundException { this.is = new ByteArrayInputStream(str.getBytes(charset)); } diff --git a/opennlp-tools/src/test/java/opennlp/tools/util/PlainTextByLineStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/util/PlainTextByLineStreamTest.java index 66af38eea..71ceb1682 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/util/PlainTextByLineStreamTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/util/PlainTextByLineStreamTest.java @@ -48,7 +48,7 @@ public void testLineSegmentation() throws IOException { assertEquals("line2", stream.read()); assertEquals("line3", stream.read()); assertEquals("line4", stream.read()); - + stream.close(); } } diff --git a/opennlp-tools/src/test/java/opennlp/tools/util/StringUtilTest.java b/opennlp-tools/src/test/java/opennlp/tools/util/StringUtilTest.java index 8c5bb4b41..329b53324 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/util/StringUtilTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/util/StringUtilTest.java @@ -56,9 +56,9 @@ public void testIsEmpty() { assertTrue(!StringUtil.isEmpty("a")); } - @Test(expected=NullPointerException.class) + @Test(expected = NullPointerException.class) public void testIsEmptyWithNullString() { - // should raise a NPE + // should raise a NPE StringUtil.isEmpty(null); } diff --git a/opennlp-tools/src/test/java/opennlp/tools/util/eval/CrossValidationPartitionerTest.java b/opennlp-tools/src/test/java/opennlp/tools/util/eval/CrossValidationPartitionerTest.java index 5826a00b1..f14163596 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/util/eval/CrossValidationPartitionerTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/util/eval/CrossValidationPartitionerTest.java @@ -172,13 +172,15 @@ public void testFailSafty() throws IOException { firstTraining.read(); fail(); } - catch (IllegalStateException e) {} + catch (IllegalStateException e) { + } try { firstTraining.getTestSampleStream(); fail(); } - catch (IllegalStateException e) {} + catch (IllegalStateException e) { + } // Test that training iterator fails if there is a test iterator secondTraining.getTestSampleStream(); @@ -187,7 +189,8 @@ public void testFailSafty() throws IOException { secondTraining.read(); fail(); } - catch (IllegalStateException e) {} + catch (IllegalStateException e) { + } // Test that test iterator from previous partition fails // if there is a new partition @@ -201,7 +204,8 @@ public void testFailSafty() throws IOException { thridTest.read(); fail(); } - catch (IllegalStateException e) {} + catch (IllegalStateException e) { + } } @Test diff --git a/opennlp-tools/src/test/java/opennlp/tools/util/eval/FMeasureTest.java b/opennlp-tools/src/test/java/opennlp/tools/util/eval/FMeasureTest.java index 4097b03cd..c2a5ffb29 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/util/eval/FMeasureTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/util/eval/FMeasureTest.java @@ -65,7 +65,7 @@ public class FMeasureTest { }; private Span predictedToMerge[] = { - new Span(8, 9), + new Span(8, 9), new Span(14, 15), new Span(15, 16), new Span(100, 120), @@ -112,46 +112,46 @@ public void testRecall() { @Test public void testEmpty() { - FMeasure fm = new FMeasure(); - assertEquals(-1, fm.getFMeasure(), DELTA); - assertEquals(0, fm.getRecallScore(), DELTA); - assertEquals(0, fm.getPrecisionScore(), DELTA); + FMeasure fm = new FMeasure(); + assertEquals(-1, fm.getFMeasure(), DELTA); + assertEquals(0, fm.getRecallScore(), DELTA); + assertEquals(0, fm.getPrecisionScore(), DELTA); } @Test public void testPerfect() { - FMeasure fm = new FMeasure(); - fm.updateScores(gold, gold); - assertEquals(1, fm.getFMeasure(), DELTA); - assertEquals(1, fm.getRecallScore(), DELTA); - assertEquals(1, fm.getPrecisionScore(), DELTA); + FMeasure fm = new FMeasure(); + fm.updateScores(gold, gold); + assertEquals(1, fm.getFMeasure(), DELTA); + assertEquals(1, fm.getRecallScore(), DELTA); + assertEquals(1, fm.getPrecisionScore(), DELTA); } @Test public void testMerge() { - FMeasure fm = new FMeasure(); - fm.updateScores(gold, predicted); - fm.updateScores(goldToMerge, predictedToMerge); + FMeasure fm = new FMeasure(); + fm.updateScores(gold, predicted); + fm.updateScores(goldToMerge, predictedToMerge); - FMeasure fmMerge = new FMeasure(); - fmMerge.updateScores(gold, predicted); - FMeasure toMerge = new FMeasure(); - toMerge.updateScores(goldToMerge, predictedToMerge); - fmMerge.mergeInto(toMerge); + FMeasure fmMerge = new FMeasure(); + fmMerge.updateScores(gold, predicted); + FMeasure toMerge = new FMeasure(); + toMerge.updateScores(goldToMerge, predictedToMerge); + fmMerge.mergeInto(toMerge); - double selected1 = predicted.length; - double target1 = gold.length; - double tp1 = FMeasure.countTruePositives(gold, predicted); + double selected1 = predicted.length; + double target1 = gold.length; + double tp1 = FMeasure.countTruePositives(gold, predicted); - double selected2 = predictedToMerge.length; - double target2 = goldToMerge.length; - double tp2 = FMeasure.countTruePositives(goldToMerge, predictedToMerge); + double selected2 = predictedToMerge.length; + double target2 = goldToMerge.length; + double tp2 = FMeasure.countTruePositives(goldToMerge, predictedToMerge); - assertEquals((tp1 + tp2) / (target1 + target2), fm.getRecallScore(), DELTA); - assertEquals((tp1 + tp2) / (selected1 + selected2), fm.getPrecisionScore(), DELTA); + assertEquals((tp1 + tp2) / (target1 + target2), fm.getRecallScore(), DELTA); + assertEquals((tp1 + tp2) / (selected1 + selected2), fm.getPrecisionScore(), DELTA); - assertEquals(fm.getRecallScore(), fmMerge.getRecallScore(), DELTA); - assertEquals(fm.getPrecisionScore(), fmMerge.getPrecisionScore(), DELTA); + assertEquals(fm.getRecallScore(), fmMerge.getRecallScore(), DELTA); + assertEquals(fm.getPrecisionScore(), fmMerge.getPrecisionScore(), DELTA); } } \ No newline at end of file diff --git a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/FeatureGenWithSerializerMapping.java b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/FeatureGenWithSerializerMapping.java index edb7408d0..37a92e7f5 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/FeatureGenWithSerializerMapping.java +++ b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/FeatureGenWithSerializerMapping.java @@ -26,7 +26,7 @@ import opennlp.tools.util.model.ArtifactSerializer; public class FeatureGenWithSerializerMapping extends CustomFeatureGenerator - implements ArtifactToSerializerMapper { + implements ArtifactToSerializerMapper { @Override public void createFeatures(List features, String[] tokens, int index, diff --git a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/GeneratorFactoryTest.java b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/GeneratorFactoryTest.java index 9a285880f..cd12b80ba 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/GeneratorFactoryTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/GeneratorFactoryTest.java @@ -35,7 +35,7 @@ import org.junit.Test; public class GeneratorFactoryTest { - + @Test public void testCreationWithTokenClassFeatureGenerator() throws Exception { InputStream generatorDescriptorIn = getClass().getResourceAsStream( @@ -46,11 +46,12 @@ public void testCreationWithTokenClassFeatureGenerator() throws Exception { assertNotNull(generatorDescriptorIn); AggregatedFeatureGenerator aggregatedGenerator = - (AggregatedFeatureGenerator) GeneratorFactory.create(generatorDescriptorIn, null); + (AggregatedFeatureGenerator) GeneratorFactory.create(generatorDescriptorIn, null); assertEquals(1, aggregatedGenerator.getGenerators().size()); - assertEquals(TokenClassFeatureGenerator.class.getName(), aggregatedGenerator.getGenerators().iterator().next().getClass().getName()); - + assertEquals(TokenClassFeatureGenerator.class.getName(), + aggregatedGenerator.getGenerators().iterator().next().getClass().getName()); + } @Test @@ -66,16 +67,15 @@ public void testCreationWihtSimpleDescriptor() throws Exception { expectedGenerators.add(OutcomePriorFeatureGenerator.class.getName()); AggregatedFeatureGenerator aggregatedGenerator = - (AggregatedFeatureGenerator) GeneratorFactory.create(generatorDescriptorIn, null); + (AggregatedFeatureGenerator) GeneratorFactory.create(generatorDescriptorIn, null); - for (AdaptiveFeatureGenerator generator : - aggregatedGenerator.getGenerators()) { + for (AdaptiveFeatureGenerator generator : aggregatedGenerator.getGenerators()) { - expectedGenerators.remove(generator.getClass().getName()); + expectedGenerators.remove(generator.getClass().getName()); - // if of kind which requires parameters check that + // if of kind which requires parameters check that } // If this fails not all expected generators were found and @@ -93,7 +93,7 @@ public void testCreationWithCustomGenerator() throws Exception { assertNotNull(generatorDescriptorIn); AggregatedFeatureGenerator aggregatedGenerator = - (AggregatedFeatureGenerator) GeneratorFactory.create(generatorDescriptorIn, null); + (AggregatedFeatureGenerator) GeneratorFactory.create(generatorDescriptorIn, null); Collection embeddedGenerator = aggregatedGenerator.getGenerators(); diff --git a/opennlp-uima/src/main/java/opennlp/uima/chunker/Chunker.java b/opennlp-uima/src/main/java/opennlp/uima/chunker/Chunker.java index 5e41c88a3..d6c53ca3b 100644 --- a/opennlp-uima/src/main/java/opennlp/uima/chunker/Chunker.java +++ b/opennlp-uima/src/main/java/opennlp/uima/chunker/Chunker.java @@ -105,7 +105,7 @@ public void initialize(UimaContext context) super.initialize(context); - this.context = context; + this.context = context; mLogger = context.getLogger(); @@ -117,12 +117,12 @@ public void initialize(UimaContext context) try { ChunkerModelResource modelResource = - (ChunkerModelResource) context.getResourceObject(UimaUtil.MODEL_PARAMETER); + (ChunkerModelResource) context.getResourceObject(UimaUtil.MODEL_PARAMETER); - model = modelResource.getModel(); + model = modelResource.getModel(); } catch (ResourceAccessException e) { - throw new ResourceInitializationException(e); + throw new ResourceInitializationException(e); } mChunker = new ChunkerME(model); @@ -135,12 +135,12 @@ public void typeSystemInit(TypeSystem typeSystem) throws AnalysisEngineProcessException { // chunk type - mChunkType = AnnotatorUtil.getRequiredTypeParameter(context, typeSystem, + mChunkType = AnnotatorUtil.getRequiredTypeParameter(context, typeSystem, CHUNK_TYPE_PARAMETER); // chunk feature mChunkFeature = AnnotatorUtil.getRequiredFeatureParameter(context, mChunkType, - CHUNK_TAG_FEATURE_PARAMETER, CAS.TYPE_NAME_STRING); + CHUNK_TAG_FEATURE_PARAMETER, CAS.TYPE_NAME_STRING); // token type mTokenType = AnnotatorUtil.getRequiredTypeParameter(context, typeSystem, @@ -148,7 +148,7 @@ public void typeSystemInit(TypeSystem typeSystem) // pos feature mPosFeature = AnnotatorUtil.getRequiredFeatureParameter(context, mTokenType, UimaUtil.POS_FEATURE_PARAMETER, - CAS.TYPE_NAME_STRING); + CAS.TYPE_NAME_STRING); } private void addChunkAnnotation(CAS tcas, AnnotationFS tokenAnnotations[], @@ -171,7 +171,7 @@ public void process(CAS tcas) { String tokens[] = new String[tokenAnnotationIndex.size()]; String pos[] = new String[tokenAnnotationIndex.size()]; AnnotationFS tokenAnnotations[] = new AnnotationFS[tokenAnnotationIndex - .size()]; + .size()]; int index = 0; @@ -196,16 +196,16 @@ public void process(CAS tcas) { if (chunkTag.startsWith("B")) { if (start != -1) { addChunkAnnotation(tcas, tokenAnnotations, result[i - 1].substring(2), - start, end); + start, end); } start = i; end = i + 1; } else if (chunkTag.startsWith("I")) { - end = i + 1; + end = i + 1; } - else if (chunkTag.startsWith("O")){ + else if (chunkTag.startsWith("O")) { if (start != -1) { addChunkAnnotation(tcas, tokenAnnotations, result[i - 1].substring(2), start, end); @@ -215,7 +215,7 @@ else if (chunkTag.startsWith("O")){ } } else { - System.out.println("Unexpected tag: " + result[i]); + System.out.println("Unexpected tag: " + result[i]); } } diff --git a/opennlp-uima/src/main/java/opennlp/uima/chunker/ChunkerTrainer.java b/opennlp-uima/src/main/java/opennlp/uima/chunker/ChunkerTrainer.java index 8c6232b79..bbffe3178 100644 --- a/opennlp-uima/src/main/java/opennlp/uima/chunker/ChunkerTrainer.java +++ b/opennlp-uima/src/main/java/opennlp/uima/chunker/ChunkerTrainer.java @@ -110,10 +110,10 @@ public void initialize() throws ResourceInitializationException { * Initialize the current instance with the given type system. */ public void typeSystemInit(TypeSystem typeSystem) - throws ResourceInitializationException { + throws ResourceInitializationException { String sentenceTypeName = CasConsumerUtil.getRequiredStringParameter(mContext, - UimaUtil.SENTENCE_TYPE_PARAMETER); + UimaUtil.SENTENCE_TYPE_PARAMETER); mSentenceType = CasConsumerUtil.getType(typeSystem, sentenceTypeName); @@ -158,7 +158,7 @@ private void processSentence(CAS tcas, AnnotationFS sentence) { FSIndex chunkIndex = tcas.getAnnotationIndex(mChunkType); ContainingConstraint containingConstraint = - new ContainingConstraint(sentence); + new ContainingConstraint(sentence); Iterator chunkIterator = tcas.createFilteredIterator( chunkIndex.iterator(), containingConstraint); @@ -176,7 +176,7 @@ private void processChunk(CAS tcas, AnnotationFS chunk) { FSIndex tokenIndex = tcas.getAnnotationIndex(mTokenType); ContainingConstraint containingConstraint = - new ContainingConstraint(chunk); + new ContainingConstraint(chunk); Iterator tokenIterator = tcas.createFilteredIterator(tokenIndex.iterator(), containingConstraint); diff --git a/opennlp-uima/src/main/java/opennlp/uima/doccat/DocumentCategorizer.java b/opennlp-uima/src/main/java/opennlp/uima/doccat/DocumentCategorizer.java index cb71a508c..e21a798f8 100644 --- a/opennlp-uima/src/main/java/opennlp/uima/doccat/DocumentCategorizer.java +++ b/opennlp-uima/src/main/java/opennlp/uima/doccat/DocumentCategorizer.java @@ -49,7 +49,7 @@ public void typeSystemInit(TypeSystem typeSystem) // get feature name mCategoryFeature = AnnotatorUtil.getRequiredFeatureParameter(getContext(), mCategoryType, - "opennlp.uima.doccat.CategoryFeature", CAS.TYPE_NAME_STRING); + "opennlp.uima.doccat.CategoryFeature", CAS.TYPE_NAME_STRING); } @Override diff --git a/opennlp-uima/src/main/java/opennlp/uima/doccat/DocumentCategorizerTrainer.java b/opennlp-uima/src/main/java/opennlp/uima/doccat/DocumentCategorizerTrainer.java index 5b35a8204..9865cf88e 100644 --- a/opennlp-uima/src/main/java/opennlp/uima/doccat/DocumentCategorizerTrainer.java +++ b/opennlp-uima/src/main/java/opennlp/uima/doccat/DocumentCategorizerTrainer.java @@ -116,8 +116,8 @@ public void processCas(CAS cas) throws ResourceProcessException { // add to event collection DocumentSample sample = new DocumentSample( - categoryAnnotation.getStringValue(mCategoryFeature), - cas.getDocumentText()); + categoryAnnotation.getStringValue(mCategoryFeature), + cas.getDocumentText()); documentSamples.add(sample); } diff --git a/opennlp-uima/src/main/java/opennlp/uima/namefind/AbstractNameFinder.java b/opennlp-uima/src/main/java/opennlp/uima/namefind/AbstractNameFinder.java index 948ab5f12..356f316fa 100644 --- a/opennlp-uima/src/main/java/opennlp/uima/namefind/AbstractNameFinder.java +++ b/opennlp-uima/src/main/java/opennlp/uima/namefind/AbstractNameFinder.java @@ -63,22 +63,21 @@ protected void initialize() throws ResourceInitializationException { public final void initialize(UimaContext context) throws ResourceInitializationException { - super.initialize(context); + super.initialize(context); - this.context = context; + this.context = context; mLogger = context.getLogger(); if (mLogger.isLoggable(Level.INFO)) { - mLogger.log(Level.INFO, - "Initializing the " + name + "."); + mLogger.log(Level.INFO, "Initializing the " + name + "."); } isRemoveExistingAnnotations = AnnotatorUtil.getOptionalBooleanParameter( context, UimaUtil.IS_REMOVE_EXISTINGS_ANNOTAIONS); if (isRemoveExistingAnnotations == null) { - isRemoveExistingAnnotations = false; + isRemoveExistingAnnotations = false; } initialize(); @@ -91,7 +90,7 @@ public void typeSystemInit(TypeSystem typeSystem) throws AnalysisEngineProcessException { // sentence type - mSentenceType = AnnotatorUtil.getRequiredTypeParameter(context, typeSystem, + mSentenceType = AnnotatorUtil.getRequiredTypeParameter(context, typeSystem, UimaUtil.SENTENCE_TYPE_PARAMETER); // token type @@ -117,7 +116,7 @@ public void typeSystemInit(TypeSystem typeSystem) nameTypeMap.put(parts[0].trim(), typeSystem.getType(parts[1].trim())); } else { - mLogger.log(Level.WARNING, String.format("Failed to parse a part of the type mapping [%s]", mapping)); + mLogger.log(Level.WARNING, String.format("Failed to parse a part of the type mapping [%s]", mapping)); } } @@ -125,12 +124,12 @@ public void typeSystemInit(TypeSystem typeSystem) } if (mNameType == null && mNameTypeMapping.size() == 0) { - throw new AnalysisEngineProcessException(new Exception("No name type or valid name type mapping configured!")); + throw new AnalysisEngineProcessException(new Exception("No name type or valid name type mapping configured!")); } } protected void postProcessAnnotations(Span detectedNames[], - AnnotationFS[] nameAnnotations) { + AnnotationFS[] nameAnnotations) { } /** diff --git a/opennlp-uima/src/main/java/opennlp/uima/namefind/NameFinder.java b/opennlp-uima/src/main/java/opennlp/uima/namefind/NameFinder.java index ca1377d1a..fd76aab8e 100644 --- a/opennlp-uima/src/main/java/opennlp/uima/namefind/NameFinder.java +++ b/opennlp-uima/src/main/java/opennlp/uima/namefind/NameFinder.java @@ -124,10 +124,10 @@ public void initialize() TokenNameFinderModelResource modelResource = (TokenNameFinderModelResource) context.getResourceObject(UimaUtil.MODEL_PARAMETER); - model = modelResource.getModel(); + model = modelResource.getModel(); } catch (ResourceAccessException e) { - throw new ResourceInitializationException(e); + throw new ResourceInitializationException(e); } mNameFinder = new NameFinderME(model); @@ -142,7 +142,7 @@ public void typeSystemInit(TypeSystem typeSystem) super.typeSystemInit(typeSystem); probabilityFeature = AnnotatorUtil.getOptionalFeatureParameter(context, mNameType, - UimaUtil.PROBABILITY_FEATURE_PARAMETER, CAS.TYPE_NAME_DOUBLE); + UimaUtil.PROBABILITY_FEATURE_PARAMETER, CAS.TYPE_NAME_DOUBLE); documentConfidenceType = AnnotatorUtil.getOptionalTypeParameter(context, typeSystem, "opennlp.uima.DocumentConfidenceType"); diff --git a/opennlp-uima/src/main/java/opennlp/uima/normalizer/Normalizer.java b/opennlp-uima/src/main/java/opennlp/uima/normalizer/Normalizer.java index 2118a0ae2..5491aee60 100644 --- a/opennlp-uima/src/main/java/opennlp/uima/normalizer/Normalizer.java +++ b/opennlp-uima/src/main/java/opennlp/uima/normalizer/Normalizer.java @@ -144,9 +144,9 @@ public void initialize(UimaContext context) } } - /** - * Initializes the type system. - */ + /** + * Initializes the type system. + */ public void typeSystemInit(TypeSystem typeSystem) throws AnalysisEngineProcessException { diff --git a/opennlp-uima/src/main/java/opennlp/uima/parser/Parser.java b/opennlp-uima/src/main/java/opennlp/uima/parser/Parser.java index 6e7ecda61..2223670e2 100644 --- a/opennlp-uima/src/main/java/opennlp/uima/parser/Parser.java +++ b/opennlp-uima/src/main/java/opennlp/uima/parser/Parser.java @@ -273,7 +273,7 @@ protected void process(CAS cas, AnnotationFS sentenceAnnotation) { List tokenSpans = new LinkedList<>(); - while(containingTokens.hasNext()) { + while (containingTokens.hasNext()) { AnnotationFS token = containingTokens.next(); tokenSpans.add(new Span(token.getBegin() - sentenceAnnotation.getBegin(), @@ -334,6 +334,7 @@ protected AnnotationFS createAnnotation(CAS cas, int offset, Parse parse) { /** * Releases allocated resources. */ + public void destroy() { mParser = null; } diff --git a/opennlp-uima/src/main/java/opennlp/uima/postag/POSTaggerTrainer.java b/opennlp-uima/src/main/java/opennlp/uima/postag/POSTaggerTrainer.java index 16e160585..f9e35d3cc 100644 --- a/opennlp-uima/src/main/java/opennlp/uima/postag/POSTaggerTrainer.java +++ b/opennlp-uima/src/main/java/opennlp/uima/postag/POSTaggerTrainer.java @@ -228,7 +228,7 @@ public boolean isStateless() { * Releases allocated resources. */ public void destroy() { - // dereference to allow garbage collection + // dereference to allow garbage collection mPOSSamples = null; } } diff --git a/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetector.java b/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetector.java index 49ed7eedb..4b7a6df34 100644 --- a/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetector.java +++ b/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetector.java @@ -47,7 +47,7 @@ * * * - * + * * *
    Type Name Description
    String opennlp.uima.ContainerType The name of the container type
    String opennlp.uima.ContainerType The name of the container type
    String opennlp.uima.ProbabilityFeature The name of the double * probability feature (not set by default)
    diff --git a/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetectorTrainer.java b/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetectorTrainer.java index 2c110bd2a..b02a87b84 100644 --- a/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetectorTrainer.java +++ b/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetectorTrainer.java @@ -166,15 +166,15 @@ public void collectionProcessComplete(ProcessTrace trace) SentenceDetectorFactory sdFactory = SentenceDetectorFactory.create( null, language, true, null, eos); - // TrainingParameters mlParams = ModelUtil.createTrainingParameters(100, 5); - TrainingParameters mlParams = ModelUtil.createDefaultTrainingParameters(); + // TrainingParameters mlParams = ModelUtil.createTrainingParameters(100, 5); + TrainingParameters mlParams = ModelUtil.createDefaultTrainingParameters(); ObjectStream samples = ObjectStreamUtils.createObjectStream(sentenceSamples); Writer samplesOut; if (sampleTraceFile != null) { - samplesOut = new OutputStreamWriter(new FileOutputStream(sampleTraceFile), sampleTraceFileEncoding); - samples = new SampleTraceStream<>(samples, samplesOut); + samplesOut = new OutputStreamWriter(new FileOutputStream(sampleTraceFile), sampleTraceFileEncoding); + samples = new SampleTraceStream<>(samples, samplesOut); } SentenceModel sentenceModel = SentenceDetectorME.train(language, samples, diff --git a/opennlp-uima/src/main/java/opennlp/uima/tokenize/SimpleTokenizer.java b/opennlp-uima/src/main/java/opennlp/uima/tokenize/SimpleTokenizer.java index c48ce0564..7400cc12e 100644 --- a/opennlp-uima/src/main/java/opennlp/uima/tokenize/SimpleTokenizer.java +++ b/opennlp-uima/src/main/java/opennlp/uima/tokenize/SimpleTokenizer.java @@ -49,7 +49,7 @@ public final class SimpleTokenizer extends AbstractTokenizer { * this instance. Not use the constructor. */ public SimpleTokenizer() { - super("OpenNLP Simple Tokenizer"); + super("OpenNLP Simple Tokenizer"); // must not be implemented ! } diff --git a/opennlp-uima/src/main/java/opennlp/uima/tokenize/TokenizerTrainer.java b/opennlp-uima/src/main/java/opennlp/uima/tokenize/TokenizerTrainer.java index 2b3605100..becb41ada 100644 --- a/opennlp-uima/src/main/java/opennlp/uima/tokenize/TokenizerTrainer.java +++ b/opennlp-uima/src/main/java/opennlp/uima/tokenize/TokenizerTrainer.java @@ -113,7 +113,7 @@ public final class TokenizerTrainer extends CasConsumer_ImplBase { */ public void initialize() throws ResourceInitializationException { - super.initialize(); + super.initialize(); mContext = getUimaContext(); @@ -134,7 +134,7 @@ public void initialize() throws ResourceInitializationException { mContext, IS_ALPHA_NUMERIC_OPTIMIZATION); if (isSkipAlphaNumerics == null) { - isSkipAlphaNumerics = false; + isSkipAlphaNumerics = false; } additionalTrainingDataFile = CasConsumerUtil.getOptionalStringParameter( diff --git a/opennlp-uima/src/main/java/opennlp/uima/tokenize/WhitespaceTokenizer.java b/opennlp-uima/src/main/java/opennlp/uima/tokenize/WhitespaceTokenizer.java index 227c825c2..06e349786 100644 --- a/opennlp-uima/src/main/java/opennlp/uima/tokenize/WhitespaceTokenizer.java +++ b/opennlp-uima/src/main/java/opennlp/uima/tokenize/WhitespaceTokenizer.java @@ -43,7 +43,7 @@ public final class WhitespaceTokenizer extends AbstractTokenizer { * this instance. Not use the constructor. */ public WhitespaceTokenizer() { - super("OpenNLP Whitespace Tokenizer"); + super("OpenNLP Whitespace Tokenizer"); // must not be implemented ! } diff --git a/opennlp-uima/src/main/java/opennlp/uima/util/AbstractModelResource.java b/opennlp-uima/src/main/java/opennlp/uima/util/AbstractModelResource.java index 320fb5c10..7dff4e50f 100644 --- a/opennlp-uima/src/main/java/opennlp/uima/util/AbstractModelResource.java +++ b/opennlp-uima/src/main/java/opennlp/uima/util/AbstractModelResource.java @@ -26,18 +26,18 @@ public abstract class AbstractModelResource implements SharedResourceObject { - protected T model; + protected T model; - protected abstract T loadModel(InputStream in) throws IOException; + protected abstract T loadModel(InputStream in) throws IOException; - public void load(DataResource resource) throws ResourceInitializationException { - try { - model = loadModel(resource.getInputStream()); - } catch (IOException e) { - throw new ResourceInitializationException( - ExceptionMessages.MESSAGE_CATALOG, - ExceptionMessages.IO_ERROR_MODEL_READING, new Object[] { - e.getMessage()}, e); - } - } + public void load(DataResource resource) throws ResourceInitializationException { + try { + model = loadModel(resource.getInputStream()); + } catch (IOException e) { + throw new ResourceInitializationException( + ExceptionMessages.MESSAGE_CATALOG, + ExceptionMessages.IO_ERROR_MODEL_READING, new Object[] { + e.getMessage()}, e); + } + } } diff --git a/opennlp-uima/src/main/java/opennlp/uima/util/AnnotationIteratorPair.java b/opennlp-uima/src/main/java/opennlp/uima/util/AnnotationIteratorPair.java index 228c99841..2e4ad6112 100644 --- a/opennlp-uima/src/main/java/opennlp/uima/util/AnnotationIteratorPair.java +++ b/opennlp-uima/src/main/java/opennlp/uima/util/AnnotationIteratorPair.java @@ -28,14 +28,17 @@ public final class AnnotationIteratorPair { private final AnnotationFS annot; private final Iterable it; + public AnnotationIteratorPair(AnnotationFS a, Iterable it) { super(); this.annot = a; this.it = it; } + public AnnotationFS getAnnotation() { return this.annot; } + public Iterable getSubIterator() { return this.it; } diff --git a/opennlp-uima/src/main/java/opennlp/uima/util/AnnotatorUtil.java b/opennlp-uima/src/main/java/opennlp/uima/util/AnnotatorUtil.java index 2acbf0ade..d7a8216ae 100644 --- a/opennlp-uima/src/main/java/opennlp/uima/util/AnnotatorUtil.java +++ b/opennlp-uima/src/main/java/opennlp/uima/util/AnnotatorUtil.java @@ -120,18 +120,17 @@ public static Feature getRequiredFeature(Type type, String featureName, } public static Feature getRequiredFeatureParameter(UimaContext context, Type type, - String featureNameParameter) - throws AnalysisEngineProcessException { + String featureNameParameter) throws AnalysisEngineProcessException { - String featureName; + String featureName; - try { - featureName = getRequiredStringParameter(context, featureNameParameter); - } catch (ResourceInitializationException e) { - throw new OpenNlpAnnotatorProcessException(e); - } + try { + featureName = getRequiredStringParameter(context, featureNameParameter); + } catch (ResourceInitializationException e) { + throw new OpenNlpAnnotatorProcessException(e); + } - return getRequiredFeature(type, featureName); + return getRequiredFeature(type, featureName); } public static Feature getRequiredFeatureParameter(UimaContext context, @@ -244,11 +243,11 @@ public static Boolean getRequiredBooleanParameter(UimaContext context, } private static void checkForNull(Object value, String parameterName) - throws ResourceInitializationException { + throws ResourceInitializationException { if (value == null) { throw new ResourceInitializationException( - ExceptionMessages.MESSAGE_CATALOG, - ExceptionMessages.PARAMETER_NOT_FOUND, + ExceptionMessages.MESSAGE_CATALOG, + ExceptionMessages.PARAMETER_NOT_FOUND, new Object[] {parameterName}); } } @@ -273,7 +272,7 @@ public static Feature getOptionalFeatureParameter(UimaContext context, } public static Feature getOptionalFeature(Type type, String featureName, String rangeType) - throws AnalysisEngineProcessException{ + throws AnalysisEngineProcessException { Feature feature = type.getFeatureByBaseName(featureName); diff --git a/opennlp-uima/src/main/java/opennlp/uima/util/CasConsumerUtil.java b/opennlp-uima/src/main/java/opennlp/uima/util/CasConsumerUtil.java index 5735e20de..53889b979 100644 --- a/opennlp-uima/src/main/java/opennlp/uima/util/CasConsumerUtil.java +++ b/opennlp-uima/src/main/java/opennlp/uima/util/CasConsumerUtil.java @@ -42,16 +42,16 @@ private CasConsumerUtil(){ } public static InputStream getOptionalResourceAsStream(UimaContext context, - String name) throws ResourceInitializationException { - try { - return context.getResourceAsStream(name); - } catch (ResourceAccessException e) { - throw new ResourceInitializationException( - ResourceInitializationException.STANDARD_MESSAGE_CATALOG, - new Object[] { "There is an internal error in the UIMA SDK: " + - e.getMessage(), - e }); - } + String name) throws ResourceInitializationException { + try { + return context.getResourceAsStream(name); + } catch (ResourceAccessException e) { + throw new ResourceInitializationException( + ResourceInitializationException.STANDARD_MESSAGE_CATALOG, + new Object[] { "There is an internal error in the UIMA SDK: " + + e.getMessage(), + e }); + } } /** @@ -112,6 +112,7 @@ public static Type getOptionalType(TypeSystem typeSystem, String name) throws ResourceInitializationException { return typeSystem.getType(name); } + /** * Retrieves a required parameter form the given context. * @@ -189,13 +190,12 @@ public static Boolean getRequiredBooleanParameter(UimaContext context, } private static void checkForNull(Object value, String parameterName) - throws ResourceInitializationException{ + throws ResourceInitializationException { if (value == null) { throw new ResourceInitializationException( ResourceInitializationException.STANDARD_MESSAGE_CATALOG, - new Object[] { "The " + parameterName + " is a " + - "required parameter!" }); + new Object[] { "The " + parameterName + " is a required parameter!" }); } } @@ -221,36 +221,35 @@ else if (value instanceof String) { else { throw new ResourceInitializationException( ResourceInitializationException.STANDARD_MESSAGE_CATALOG, - new Object[] {"The parameter: " + parameter + " does not have" + - " the expected type String"}); + new Object[] {"The parameter: " + parameter + " does not have the expected type String"}); } } public static String[] getOptionalStringArrayParameter(UimaContext context, - String parameter) throws ResourceInitializationException { - - Object value = getOptionalParameter(context, parameter); - - if (value instanceof String[]) { - return (String[]) value; - } else if (value == null) { - return new String[0]; - } else { - throw new ResourceInitializationException( - ResourceInitializationException.STANDARD_MESSAGE_CATALOG, - new Object[] { "The parameter: " + parameter - + " does not have the expected type String array" }); - } + String parameter) throws ResourceInitializationException { + + Object value = getOptionalParameter(context, parameter); + + if (value instanceof String[]) { + return (String[]) value; + } else if (value == null) { + return new String[0]; + } else { + throw new ResourceInitializationException( + ResourceInitializationException.STANDARD_MESSAGE_CATALOG, + new Object[] { "The parameter: " + parameter + + " does not have the expected type String array" }); } + } /** - * Retrieves an optional boolean parameter from the given context. - * - * @param context - * @param parameter - * @return the boolean parameter or null if not set - * @throws ResourceInitializationException - */ + * Retrieves an optional boolean parameter from the given context. + * + * @param context + * @param parameter + * @return the boolean parameter or null if not set + * @throws ResourceInitializationException + */ public static Integer getOptionalIntegerParameter(UimaContext context, String parameter) throws ResourceInitializationException { @@ -265,8 +264,7 @@ else if (value instanceof Integer) { else { throw new ResourceInitializationException( ResourceInitializationException.STANDARD_MESSAGE_CATALOG, - new Object[] {"The parameter: " + parameter + " does not have " + - "the expected type Integer"}); + new Object[] {"The parameter: " + parameter + " does not have the expected type Integer"}); } } @@ -313,8 +311,7 @@ else if (value instanceof Float) { else { throw new ResourceInitializationException( ResourceInitializationException.STANDARD_MESSAGE_CATALOG, - new Object[] {"The parameter: " + parameter + " does not have" + - " the expected type Float"}); + new Object[] {"The parameter: " + parameter + " does not have the expected type Float"}); } } @@ -340,8 +337,7 @@ else if (value instanceof Boolean) { else { throw new ResourceInitializationException( ResourceInitializationException.STANDARD_MESSAGE_CATALOG, - new Object[] {"The parameter: " + parameter + " does not have" + - " the expected type Boolean"}); + new Object[] {"The parameter: " + parameter + " does not have the expected type Boolean"}); } } @@ -370,7 +366,7 @@ private static Object getOptionalParameter(UimaContext context, * @throws ResourceInitializationException - if type does not match */ public static void checkFeatureType(Feature feature, String expectedType) - throws ResourceInitializationException { + throws ResourceInitializationException { if (!feature.getRange().getName().equals(expectedType)) { throw new ResourceInitializationException( ResourceInitializationException.STANDARD_MESSAGE_CATALOG, @@ -381,46 +377,46 @@ public static void checkFeatureType(Feature feature, String expectedType) } public static Dictionary createOptionalDictionary(UimaContext context, String parameter) - throws ResourceInitializationException { - String dictionaryName = CasConsumerUtil.getOptionalStringParameter( - context, parameter); + throws ResourceInitializationException { + String dictionaryName = CasConsumerUtil.getOptionalStringParameter( + context, parameter); - Dictionary dictionary = null; + Dictionary dictionary = null; - if (dictionaryName != null) { + if (dictionaryName != null) { - Logger logger = context.getLogger(); + Logger logger = context.getLogger(); - try { + try { - InputStream dictIn = CasConsumerUtil.getOptionalResourceAsStream(context, - dictionaryName); + InputStream dictIn = CasConsumerUtil.getOptionalResourceAsStream(context, + dictionaryName); - if (dictIn == null) { - String message = "The dictionary file " + dictionaryName + - " does not exist!"; + if (dictIn == null) { + String message = "The dictionary file " + dictionaryName + + " does not exist!"; - if (logger.isLoggable(Level.WARNING)) { - logger.log(Level.WARNING, message); - } + if (logger.isLoggable(Level.WARNING)) { + logger.log(Level.WARNING, message); + } - return null; - } + return null; + } - dictionary = new Dictionary(dictIn); + dictionary = new Dictionary(dictIn); - } catch (IOException e) { - // if this fails just print error message and continue - String message = "IOException during dictionary reading, " - + "running without dictionary: " + e.getMessage(); + } catch (IOException e) { + // if this fails just print error message and continue + String message = "IOException during dictionary reading, " + + "running without dictionary: " + e.getMessage(); - if (logger.isLoggable(Level.WARNING)) { - logger.log(Level.WARNING, message); - } - } + if (logger.isLoggable(Level.WARNING)) { + logger.log(Level.WARNING, message); + } + } - return dictionary; - } else - return null; + return dictionary; + } else + return null; } } \ No newline at end of file diff --git a/opennlp-uima/src/main/java/opennlp/uima/util/UimaUtil.java b/opennlp-uima/src/main/java/opennlp/uima/util/UimaUtil.java index 5d4efc6d3..f066c8c59 100644 --- a/opennlp-uima/src/main/java/opennlp/uima/util/UimaUtil.java +++ b/opennlp-uima/src/main/java/opennlp/uima/util/UimaUtil.java @@ -74,7 +74,7 @@ private UimaUtil(){ "opennlp.uima.ProbabilityFeature"; public static final String IS_REMOVE_EXISTINGS_ANNOTAIONS = - "opennlp.uima.IsRemoveExistingAnnotations"; + "opennlp.uima.IsRemoveExistingAnnotations"; public static final String ADDITIONAL_TRAINING_DATA_FILE = "opennlp.uima.AdditionalTrainingDataFile";