From 2e1f7e759bac3d12aa21e39bbf97fb51ddeaa86d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rn=20Kottmann?= Date: Wed, 11 Jan 2017 22:40:03 +0100 Subject: [PATCH] OPENNLP-923: Wrap all lines longer than 110 chars And also add checkstyle enforcement --- checkstyle-suppressions.xml | 2 +- checkstyle.xml | 8 +- .../opennlp/bratann/NameFinderAnnService.java | 6 +- .../MorfologikDictionaryBuilderParams.java | 6 +- .../opennlp/tools/chunker/ChunkSample.java | 10 +- .../chunker/ChunkerContextGenerator.java | 6 +- .../java/opennlp/tools/chunker/ChunkerME.java | 7 +- .../DefaultChunkerContextGenerator.java | 3 +- .../opennlp/tools/cmdline/ArgumentParser.java | 22 +++-- .../opennlp/tools/cmdline/CmdLineUtil.java | 3 +- .../tools/cmdline/GenerateManualTool.java | 12 ++- .../tools/cmdline/PerformanceMonitor.java | 4 +- .../cmdline/SystemInputStreamFactory.java | 3 +- .../tools/cmdline/chunker/ChunkerMETool.java | 6 +- .../tools/cmdline/chunker/TrainingParams.java | 3 +- .../dictionary/DictionaryBuilderTool.java | 3 +- .../tools/cmdline/doccat/DoccatTool.java | 4 +- .../tools/cmdline/doccat/TrainingParams.java | 9 +- .../entitylinker/EntityLinkerTool.java | 3 +- .../cmdline/lemmatizer/TrainingParams.java | 4 +- .../TokenNameFinderCrossValidatorTool.java | 3 +- .../cmdline/namefind/TokenNameFinderTool.java | 4 +- .../namefind/TokenNameFinderTrainerTool.java | 6 +- .../tools/cmdline/parser/ParserTool.java | 8 +- .../cmdline/parser/ParserTrainerTool.java | 3 +- .../tools/cmdline/postag/POSTaggerTool.java | 3 +- .../tools/cmdline/postag/TrainingParams.java | 12 ++- .../sentdetect/SentenceDetectorTool.java | 4 +- .../cmdline/sentdetect/TrainingParams.java | 3 +- .../cmdline/tokenizer/TrainingParams.java | 6 +- .../serializer/DictionarySerializer.java | 3 +- .../DocumentCategorizerEventStream.java | 3 +- .../tools/doccat/DocumentSampleStream.java | 6 +- .../tools/doccat/NGramFeatureGenerator.java | 6 +- .../tools/entitylinker/EntityLinker.java | 3 +- .../entitylinker/EntityLinkerFactory.java | 3 +- .../tools/entitylinker/LinkedSpan.java | 3 +- .../formats/BioNLP2004NameSampleStream.java | 3 +- .../formats/Conll02NameSampleStream.java | 3 +- .../formats/Conll03NameSampleStream.java | 3 +- .../tools/formats/ConllXPOSSampleStream.java | 3 +- .../formats/EvalitaNameSampleStream.java | 3 +- .../formats/LeipzigDoccatSampleStream.java | 6 +- .../tools/formats/ad/ADNameSampleStream.java | 3 +- .../formats/ad/ADNameSampleStreamFactory.java | 3 +- .../formats/ad/ADPOSSampleStreamFactory.java | 6 +- .../ad/ADSentenceSampleStreamFactory.java | 3 +- .../formats/brat/BratNameSampleStream.java | 5 +- .../convert/NameToSentenceSampleStream.java | 3 +- .../ParseToPOSSampleStreamFactory.java | 3 +- .../ConstitParseSampleStreamFactory.java | 4 +- .../muc/Muc6NameSampleStreamFactory.java | 3 +- .../OntoNotesNameSampleStreamFactory.java | 5 +- .../languagemodel/NGramLanguageModel.java | 7 +- .../DefaultLemmatizerContextGenerator.java | 3 +- .../opennlp/tools/lemmatizer/LemmaSample.java | 3 +- .../lemmatizer/LemmaSampleEventStream.java | 3 +- .../LemmatizerContextGenerator.java | 6 +- .../tools/lemmatizer/LemmatizerME.java | 7 +- .../java/opennlp/tools/ml/BeamSearch.java | 7 +- .../opennlp/tools/ml/maxent/GISTrainer.java | 18 ++-- .../tools/ml/model/AbstractDataIndexer.java | 8 +- .../opennlp/tools/ml/model/AbstractModel.java | 11 ++- .../tools/ml/model/AbstractModelReader.java | 2 +- .../tools/ml/model/EvalParameters.java | 3 +- .../tools/ml/model/MutableContext.java | 3 +- .../ml/model/OnePassRealValueDataIndexer.java | 6 +- .../ml/model/PlainTextFileDataReader.java | 3 +- .../ml/model/SequenceClassificationModel.java | 4 +- .../tools/ml/model/TwoPassDataIndexer.java | 3 +- .../tools/ml/naivebayes/LogProbabilities.java | 10 +- .../tools/ml/naivebayes/LogProbability.java | 9 +- .../naivebayes/NaiveBayesEvalParameters.java | 3 +- .../tools/ml/naivebayes/NaiveBayesModel.java | 23 +++-- .../tools/ml/naivebayes/Probabilities.java | 3 +- .../tools/ml/naivebayes/Probability.java | 6 +- .../tools/ml/perceptron/PerceptronModel.java | 6 +- .../SimplePerceptronSequenceTrainer.java | 26 +++-- .../BilouNameFinderSequenceValidator.java | 3 +- .../namefind/DefaultNameContextGenerator.java | 11 ++- .../tools/namefind/DocumentNameFinder.java | 5 +- .../tools/namefind/NameContextGenerator.java | 6 +- .../tools/namefind/NameFinderEventStream.java | 14 +-- .../opennlp/tools/namefind/NameFinderME.java | 3 +- .../namefind/NameSampleSequenceStream.java | 6 +- .../namefind/RegexNameFinderFactory.java | 29 ++++-- .../tools/namefind/TokenNameFinder.java | 4 +- .../TokenNameFinderCrossValidator.java | 3 +- .../namefind/TokenNameFinderEvaluator.java | 3 +- .../namefind/TokenNameFinderFactory.java | 8 +- .../opennlp/tools/ngram/NGramGenerator.java | 6 +- .../java/opennlp/tools/ngram/NGramModel.java | 3 +- .../java/opennlp/tools/ngram/NGramUtils.java | 16 ++-- .../tools/parser/AbstractBottomUpParser.java | 69 +++++++++----- .../parser/AbstractContextGenerator.java | 94 ++++++++++++------- .../parser/AbstractParserEventStream.java | 7 +- .../main/java/opennlp/tools/parser/Parse.java | 32 ++++--- .../java/opennlp/tools/parser/Parser.java | 16 ++-- .../tools/parser/ParserCrossValidator.java | 4 +- .../chunking/CheckContextGenerator.java | 8 +- .../opennlp/tools/parser/chunking/Parser.java | 44 ++++++--- .../parser/chunking/ParserEventStream.java | 15 ++- .../tools/parser/lang/en/HeadRules.java | 12 ++- .../lang/es/AncoraSpanishHeadRules.java | 17 ++-- .../treeinsert/CheckContextGenerator.java | 1 - .../tools/parser/treeinsert/Parser.java | 58 ++++++++---- .../postag/DefaultPOSContextGenerator.java | 9 +- .../opennlp/tools/postag/POSTaggerME.java | 4 +- .../sentdetect/DefaultSDContextGenerator.java | 3 +- .../tools/sentdetect/SDCrossValidator.java | 9 +- .../sentdetect/SentenceDetectorEvaluator.java | 3 +- .../tools/sentdetect/SentenceDetectorME.java | 4 +- .../tools/sentdetect/SentenceModel.java | 4 +- .../sentdetect/SentenceSampleStream.java | 3 +- .../opennlp/tools/tokenize/TokenSample.java | 3 +- .../tools/tokenize/TokenizerFactory.java | 3 +- .../tokenize/lang/en/TokenSampleStream.java | 9 +- .../util/BeamSearchContextGenerator.java | 3 +- .../java/opennlp/tools/util/Sequence.java | 3 +- .../java/opennlp/tools/util/StringUtil.java | 33 ++++--- .../BrownBigramFeatureGenerator.java | 6 +- .../util/featuregen/GeneratorFactory.java | 19 ++-- .../featuregen/WindowFeatureGenerator.java | 6 +- .../WordClusterFeatureGenerator.java | 5 +- .../opennlp/tools/util/model/BaseModel.java | 5 +- .../opennlp/tools/util/model/ModelUtil.java | 14 +-- .../java/opennlp/uima/chunker/Chunker.java | 6 +- .../opennlp/uima/chunker/ChunkerTrainer.java | 6 +- .../uima/namefind/AbstractNameFinder.java | 6 +- .../uima/namefind/NameFinderTrainer.java | 29 ++++-- .../java/opennlp/uima/postag/POSTagger.java | 3 +- .../sentdetect/SentenceDetectorTrainer.java | 3 +- 132 files changed, 733 insertions(+), 410 deletions(-) diff --git a/checkstyle-suppressions.xml b/checkstyle-suppressions.xml index 0e08e996a..0f578c435 100644 --- a/checkstyle-suppressions.xml +++ b/checkstyle-suppressions.xml @@ -25,4 +25,4 @@ - \ No newline at end of file + diff --git a/checkstyle.xml b/checkstyle.xml index 143b1f003..2fac7e288 100644 --- a/checkstyle.xml +++ b/checkstyle.xml @@ -51,10 +51,10 @@ - - - - + + + + diff --git a/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderAnnService.java b/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderAnnService.java index 7482e0fd5..41c3d377a 100644 --- a/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderAnnService.java +++ b/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderAnnService.java @@ -48,9 +48,9 @@ public static void main(String[] args) throws Exception { if (args.length == 0) { System.out.println("Usage:"); - System.out.println( - "[NameFinderAnnService -serverPort port] [-tokenizerModel file] [-ruleBasedTokenizer whitespace|simple] " + - "[-sentenceDetectorModel file] namefinderFile|nameFinderURI"); + System.out.println("[NameFinderAnnService -serverPort port] [-tokenizerModel file] " + + "[-ruleBasedTokenizer whitespace|simple] " + + "[-sentenceDetectorModel file] namefinderFile|nameFinderURI"); return; } diff --git a/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/MorfologikDictionaryBuilderParams.java b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/MorfologikDictionaryBuilderParams.java index 6d12c255a..cdcbdebde 100644 --- a/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/MorfologikDictionaryBuilderParams.java +++ b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/builder/MorfologikDictionaryBuilderParams.java @@ -28,7 +28,8 @@ */ interface MorfologikDictionaryBuilderParams extends EncodingParameter { - @ParameterDescription(valueName = "in", description = "The input file (base,inflected,tag). An associated metadata (*.info) file must exist.") + @ParameterDescription(valueName = "in", + description = "The input file (base,inflected,tag). An associated metadata (*.info) file must exist.") File getInputFile(); @ParameterDescription(valueName = "true|false", description = "Accept leading BOM bytes (UTF-8).") @@ -51,7 +52,8 @@ interface MorfologikDictionaryBuilderParams extends EncodingParameter { @OptionalParameter(defaultValue = "false") Boolean getOverwrite(); - @ParameterDescription(valueName = "true|false", description = "Validate input to make sure it makes sense.") + @ParameterDescription(valueName = "true|false", + description = "Validate input to make sure it makes sense.") @OptionalParameter(defaultValue = "false") Boolean getValidate(); } diff --git a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkSample.java b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkSample.java index 9aa2e33dc..174ca6186 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkSample.java +++ b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkSample.java @@ -140,7 +140,8 @@ public static Span[] phrasesAsSpanList(String[] aSentence, String[] aTags, return phrases.toArray(new Span[phrases.size()]); } - private static void validateArguments(int sentenceSize, int tagsSize, int predsSize) throws IllegalArgumentException { + private static void validateArguments(int sentenceSize, int tagsSize, int predsSize) + throws IllegalArgumentException { if (sentenceSize != tagsSize || tagsSize != predsSize) throw new IllegalArgumentException( "All arrays must have the same length: " + @@ -152,7 +153,9 @@ private static void validateArguments(int sentenceSize, int tagsSize, int predsS /** * Creates a nice to read string for the phrases formatted as following:
* - * [NP Rockwell_NNP ] [VP said_VBD ] [NP the_DT agreement_NN ] [VP calls_VBZ ] [SBAR for_IN ] [NP it_PRP ] [VP to_TO supply_VB ] [NP 200_CD additional_JJ so-called_JJ shipsets_NNS ] [PP for_IN ] [NP the_DT planes_NNS ] ._. + * [NP Rockwell_NNP ] [VP said_VBD ] [NP the_DT agreement_NN ] [VP calls_VBZ ] [SBAR for_IN ] + * [NP it_PRP ] [VP to_TO supply_VB ] [NP 200_CD additional_JJ so-called_JJ shipsets_NNS ] + * [PP for_IN ] [NP the_DT planes_NNS ] ._. * * * @return a nice to read string representation of the chunk phases @@ -195,7 +198,8 @@ public String toString() { StringBuilder chunkString = new StringBuilder(); for (int ci = 0; ci < preds.size(); ci++) { - chunkString.append(sentence.get(ci)).append(" ").append(tags.get(ci)).append(" ").append(preds.get(ci)).append("\n"); + chunkString.append(sentence.get(ci)).append(" ").append(tags.get(ci)) + .append(" ").append(preds.get(ci)).append("\n"); } return chunkString.toString(); } diff --git a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerContextGenerator.java index 25e531a61..590bc8535 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerContextGenerator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerContextGenerator.java @@ -27,9 +27,11 @@ public interface ChunkerContextGenerator extends BeamSearchContextGeneratortoString methods of these objects should return the token text. + * @param toks The tokens of the sentence. The toString methods of these objects + * should return the token text. * @param tags The POS tags for the the specified tokens. - * @param preds The previous decisions made in the taging of this sequence. Only indices less than i will be examined. + * @param preds The previous decisions made in the taging of this sequence. + * Only indices less than i will be examined. * @return An array of predictive contexts on which a model basis its decisions. */ String[] getContext(int i, String[] toks, String[] tags, String[] preds); diff --git a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerME.java b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerME.java index 9ab8e1a42..a59b5ce78 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerME.java +++ b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerME.java @@ -63,8 +63,8 @@ public class ChunkerME implements Chunker { * @param sequenceValidator The {@link SequenceValidator} to determines whether the outcome * is valid for the preceding sequence. This can be used to implement constraints * on what sequences are valid. - * @deprecated Use {@link #ChunkerME(ChunkerModel, int)} instead - * and use the {@link ChunkerFactory} to configure the {@link SequenceValidator} and {@link ChunkerContextGenerator}. + * @deprecated Use {@link #ChunkerME(ChunkerModel, int)} instead and use the {@link ChunkerFactory} + * to configure the {@link SequenceValidator} and {@link ChunkerContextGenerator}. */ @Deprecated private ChunkerME(ChunkerModel model, int beamSize, SequenceValidator sequenceValidator, @@ -140,7 +140,8 @@ public Sequence[] topKSequences(String[] sentence, String[] tags, double minSequ /** * Populates the specified array with the probabilities of the last decoded sequence. The * sequence was determined based on the previous call to chunk. The - * specified array should be at least as large as the numbe of tokens in the previous call to chunk. + * specified array should be at least as large as the numbe of tokens in the previous + * call to chunk. * * @param probs An array used to hold the probabilities of the last decoded sequence. */ diff --git a/opennlp-tools/src/main/java/opennlp/tools/chunker/DefaultChunkerContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/chunker/DefaultChunkerContextGenerator.java index 387994bdb..76616d4c2 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/chunker/DefaultChunkerContextGenerator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/chunker/DefaultChunkerContextGenerator.java @@ -30,7 +30,8 @@ public class DefaultChunkerContextGenerator implements ChunkerContextGenerator { public DefaultChunkerContextGenerator() { } - public String[] getContext(int index, String[] sequence, String[] priorDecisions, Object[] additionalContext) { + public String[] getContext(int index, String[] sequence, + String[] priorDecisions, Object[] additionalContext) { return getContext(index,sequence,(String[]) additionalContext[0],priorDecisions); } diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/ArgumentParser.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/ArgumentParser.java index 02d1a863e..4f103dbdd 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/ArgumentParser.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/ArgumentParser.java @@ -179,18 +179,22 @@ private static void checkProxyInterfaces(Class... proxyInterfaces) { throw new IllegalArgumentException(method.getName() + " method name does not start with 'get'!"); // check that method has zero arguments - if (method.getParameterTypes().length != 0) - throw new IllegalArgumentException(method.getName() + " method must have zero parameters but has " + - method.getParameterTypes().length + "!"); + if (method.getParameterTypes().length != 0) { + throw new IllegalArgumentException(method.getName() + + " method must have zero parameters but has " + + method.getParameterTypes().length + "!"); + } // check return types of interface Class returnType = method.getReturnType(); Set> compatibleReturnTypes = argumentFactories.keySet(); - if (!compatibleReturnTypes.contains(returnType)) - throw new IllegalArgumentException(method.getName() + " method must have compatible return type! Got " + - returnType + ", expected one of " + compatibleReturnTypes); + if (!compatibleReturnTypes.contains(returnType)) { + throw new IllegalArgumentException(method.getName() + + " method must have compatible return type! Got " + + returnType + ", expected one of " + compatibleReturnTypes); + } } } } @@ -293,7 +297,8 @@ public static List createArguments(Class... argProxyInterfaces) if (optional != null) isOptional = true; - Argument arg = new Argument(paramName.substring(1), desc.valueName(), desc.description(), isOptional); + Argument arg = new Argument(paramName.substring(1), + desc.valueName(), desc.description(), isOptional); arguments.add(arg); @@ -468,7 +473,8 @@ public static String validateArgumentsLoudly(String args[], Class... argP * @return parsed parameters * * @throws TerminateToolException if an argument value cannot be parsed. - * @throws IllegalArgumentException if validateArguments returns false, if the proxy interface is not compatible. + * @throws IllegalArgumentException if validateArguments returns false, + * if the proxy interface is not compatible. */ @SuppressWarnings("unchecked") public static T parse(String args[], Class argProxyInterface) { diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/CmdLineUtil.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/CmdLineUtil.java index 0b3785583..424f2973f 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/CmdLineUtil.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/CmdLineUtil.java @@ -330,7 +330,8 @@ public static TrainingParameters loadTrainingParameters(String paramFile, TrainerFactory.TrainerType trainerType = TrainerFactory.getTrainerType(params.getSettings()); - if (!supportSequenceTraining && trainerType.equals(TrainerFactory.TrainerType.EVENT_MODEL_SEQUENCE_TRAINER)) { + if (!supportSequenceTraining + && trainerType.equals(TrainerFactory.TrainerType.EVENT_MODEL_SEQUENCE_TRAINER)) { throw new TerminateToolException(1, "Sequence training is not supported!"); } } diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/GenerateManualTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/GenerateManualTool.java index cf237c153..36bb95156 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/GenerateManualTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/GenerateManualTool.java @@ -149,7 +149,8 @@ private static void appendArgumentTable( "\n"); sb.append( - "FormatArgumentValueOptionalDescription\n"); + "FormatArgumentValue" + + "OptionalDescription\n"); sb.append("\n"); for (String format : formatArguments.keySet()) { @@ -198,12 +199,15 @@ private static void appendHeader(StringBuilder sb) { + "KIND, either express or implied. See the License for the\n" + "specific language governing permissions and limitations\n" + "under the License.\n" + "-->\n" + "\n\n" - + "\n\n" + "\n\n" + "The Command Line Interface\n\n" + "" - + "This section details the available tools and parameters of the Command Line Interface. For a introduction in its usage please refer to . " + + "This section details the available tools and parameters of the Command Line Interface. " + + "For a introduction in its usage please refer to . " + "\n\n"); } diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/PerformanceMonitor.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/PerformanceMonitor.java index b0a396f43..082b27c75 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/PerformanceMonitor.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/PerformanceMonitor.java @@ -120,8 +120,8 @@ public void run() { averageThroughput = 0; } - out.printf("current: %.1f " + unit + "/s avg: %.1f " + unit + "/s total: %d " + unit + "%n", currentThroughput, - averageThroughput, counter); + out.printf("current: %.1f " + unit + "/s avg: %.1f " + unit + "/s total: %d " + + unit + "%n", currentThroughput, averageThroughput, counter); lastTimeStamp = System.currentTimeMillis(); lastCount = counter; diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/SystemInputStreamFactory.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/SystemInputStreamFactory.java index 0ecc321cd..d9f8d3ff9 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/SystemInputStreamFactory.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/SystemInputStreamFactory.java @@ -39,7 +39,8 @@ public InputStream createInputStream() throws IOException { return System.in; } else { - throw new UnsupportedOperationException("The System.in stream can't be re-created to read from the beginning!"); + throw new UnsupportedOperationException( + "The System.in stream can't be re-created to read from the beginning!"); } } } diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerMETool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerMETool.java index f47947305..c00dc1862 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerMETool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerMETool.java @@ -55,7 +55,8 @@ public void run(String[] args) { PerformanceMonitor perfMon = null; try { - lineStream = new PlainTextByLineStream(new SystemInputStreamFactory(), SystemInputStreamFactory.encoding()); + lineStream = new PlainTextByLineStream(new SystemInputStreamFactory(), + SystemInputStreamFactory.encoding()); perfMon = new PerformanceMonitor(System.err, "sent"); perfMon.start(); String line; @@ -72,7 +73,8 @@ public void run(String[] args) { String[] chunks = chunker.chunk(posSample.getSentence(), posSample.getTags()); - System.out.println(new ChunkSample(posSample.getSentence(), posSample.getTags(), chunks).nicePrint()); + System.out.println(new ChunkSample(posSample.getSentence(), + posSample.getTags(), chunks).nicePrint()); perfMon.incrementCounter(); } diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/TrainingParams.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/TrainingParams.java index 09caad5b5..d2305cb70 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/TrainingParams.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/TrainingParams.java @@ -28,7 +28,8 @@ */ interface TrainingParams extends BasicTrainingParams { - @ParameterDescription(valueName = "factoryName", description = "A sub-class of ChunkerFactory where to get implementation and resources.") + @ParameterDescription(valueName = "factoryName", + description = "A sub-class of ChunkerFactory where to get implementation and resources.") @OptionalParameter String getFactory(); diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/dictionary/DictionaryBuilderTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/dictionary/DictionaryBuilderTool.java index a0992eaa1..91c8d19cc 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/dictionary/DictionaryBuilderTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/dictionary/DictionaryBuilderTool.java @@ -60,7 +60,8 @@ public void run(String[] args) { dict.serialize(out); } catch (IOException e) { - throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + e.getMessage(), e); + throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + + e.getMessage(), e); } } } diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTool.java index 4ff985475..49a55d3a1 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTool.java @@ -64,8 +64,8 @@ public void run(String[] args) { perfMon.start(); try { - documentStream = new ParagraphStream( - new PlainTextByLineStream(new SystemInputStreamFactory(), SystemInputStreamFactory.encoding())); + documentStream = new ParagraphStream(new PlainTextByLineStream( + new SystemInputStreamFactory(), SystemInputStreamFactory.encoding())); String document; while ((document = documentStream.read()) != null) { String[] tokens = model.getFactory().getTokenizer().tokenize(document); diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/TrainingParams.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/TrainingParams.java index f70f3f7dd..4c4f0df35 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/TrainingParams.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/TrainingParams.java @@ -28,15 +28,18 @@ */ interface TrainingParams extends BasicTrainingParams { - @ParameterDescription(valueName = "fg", description = "Comma separated feature generator classes. Bag of words is used if not specified.") + @ParameterDescription(valueName = "fg", + description = "Comma separated feature generator classes. Bag of words is used if not specified.") @OptionalParameter String getFeatureGenerators(); - @ParameterDescription(valueName = "tokenizer", description = "Tokenizer implementation. WhitespaceTokenizer is used if not specified.") + @ParameterDescription(valueName = "tokenizer", + description = "Tokenizer implementation. WhitespaceTokenizer is used if not specified.") @OptionalParameter String getTokenizer(); - @ParameterDescription(valueName = "factoryName", description = "A sub-class of DoccatFactory where to get implementation and resources.") + @ParameterDescription(valueName = "factoryName", + description = "A sub-class of DoccatFactory where to get implementation and resources.") @OptionalParameter String getFactory(); diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/entitylinker/EntityLinkerTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/entitylinker/EntityLinkerTool.java index f82e362e6..f248a2cb5 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/entitylinker/EntityLinkerTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/entitylinker/EntityLinkerTool.java @@ -121,7 +121,8 @@ public void run(String[] args) { text.append("\n"); } - List linkedSpans = entityLinker.find(text.toString(), sentences, tokensBySentence, namesBySentence); + List linkedSpans = + entityLinker.find(text.toString(), sentences, tokensBySentence, namesBySentence); for (int i = 0; i < linkedSpans.size(); i++) { System.out.println(linkedSpans.get(i)); diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/lemmatizer/TrainingParams.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/lemmatizer/TrainingParams.java index cd26f5429..faaecf5dc 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/lemmatizer/TrainingParams.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/lemmatizer/TrainingParams.java @@ -28,8 +28,8 @@ */ interface TrainingParams extends BasicTrainingParams { - @ParameterDescription(valueName = "factoryName", description = "A sub-class of LemmatizerFactory where to get implementation and resources.") + @ParameterDescription(valueName = "factoryName", + description = "A sub-class of LemmatizerFactory where to get implementation and resources.") @OptionalParameter String getFactory(); - } diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java index 459a3e5e2..fab9e15af 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java @@ -92,7 +92,8 @@ else if ("BILOU".equals(sequenceCodecImplName)) { sequenceCodecImplName = BilouCodec.class.getName(); } - SequenceCodec sequenceCodec = TokenNameFinderFactory.instantiateSequenceCodec(sequenceCodecImplName); + SequenceCodec sequenceCodec = + TokenNameFinderFactory.instantiateSequenceCodec(sequenceCodecImplName); TokenNameFinderFactory nameFinderFactory; try { diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTool.java index 92a846a38..24760053a 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTool.java @@ -67,8 +67,8 @@ public void run(String[] args) { perfMon.start(); try { - untokenizedLineStream = - new PlainTextByLineStream(new SystemInputStreamFactory(), SystemInputStreamFactory.encoding()); + untokenizedLineStream = new PlainTextByLineStream( + new SystemInputStreamFactory(), SystemInputStreamFactory.encoding()); String line; while ((line = untokenizedLineStream.read()) != null) { String whitespaceTokenizerLine[] = WhitespaceTokenizer.INSTANCE.tokenize(line); diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java index f75f97642..8a4bd4994 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java @@ -103,7 +103,8 @@ public static Map loadResources(File resourcePath, File featureG if (featureGenDescriptor != null) { try (InputStream xmlDescriptorIn = CmdLineUtil.openInFile(featureGenDescriptor)) { - artifactSerializers.putAll(GeneratorFactory.extractCustomArtifactSerializerMappings(xmlDescriptorIn)); + artifactSerializers.putAll( + GeneratorFactory.extractCustomArtifactSerializerMappings(xmlDescriptorIn)); } catch (IOException e) { // TODO: Improve error handling! e.printStackTrace(); @@ -195,7 +196,8 @@ else if ("BILOU".equals(sequenceCodecImplName)) { sequenceCodecImplName = BilouCodec.class.getName(); } - SequenceCodec sequenceCodec = TokenNameFinderFactory.instantiateSequenceCodec(sequenceCodecImplName); + SequenceCodec sequenceCodec = + TokenNameFinderFactory.instantiateSequenceCodec(sequenceCodecImplName); TokenNameFinderFactory nameFinderFactory; try { diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTool.java index 690e3c06d..499fa5810 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTool.java @@ -54,7 +54,8 @@ public String getHelp() { + "-bs n: Use a beam size of n.\n" + "-ap f: Advance outcomes in with at least f% of the probability mass.\n" + "-k n: Show the top n parses. This will also display their log-probablities.\n" - + "-tk tok_model: Use the specified tokenizer model to tokenize the sentences. Defaults to a WhitespaceTokenizer."; + + "-tk tok_model: Use the specified tokenizer model to tokenize the sentences. " + + "Defaults to a WhitespaceTokenizer."; } private static Pattern untokenizedParenPattern1 = Pattern.compile("([^ ])([({)}])"); @@ -124,7 +125,7 @@ public void run(String[] args) { Tokenizer tokenizer = WhitespaceTokenizer.INSTANCE; String tokenizerModelName = CmdLineUtil.getParameter( "-tk", args ); if (tokenizerModelName != null ) { - TokenizerModel tokenizerModel = new TokenizerModelLoader().load( new File( tokenizerModelName ) ); + TokenizerModel tokenizerModel = new TokenizerModelLoader().load(new File(tokenizerModelName)); tokenizer = new TokenizerME( tokenizerModel ); } @@ -133,7 +134,8 @@ public void run(String[] args) { ObjectStream lineStream = null; PerformanceMonitor perfMon = null; try { - lineStream = new PlainTextByLineStream(new SystemInputStreamFactory(), SystemInputStreamFactory.encoding()); + lineStream = new PlainTextByLineStream(new SystemInputStreamFactory(), + SystemInputStreamFactory.encoding()); perfMon = new PerformanceMonitor(System.err, "sent"); perfMon.start(); String line; diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java index 928ea2910..87790357b 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java @@ -108,7 +108,8 @@ else if ("es".equals(params.getLang())) { return (HeadRules) headRulesObject; } else { - throw new TerminateToolException(-1, "HeadRules Artifact Serializer must create an object of type HeadRules!"); + throw new TerminateToolException(-1, + "HeadRules Artifact Serializer must create an object of type HeadRules!"); } } diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTool.java index 61e322561..dc93226e0 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTool.java @@ -56,7 +56,8 @@ public void run(String[] args) { PerformanceMonitor perfMon = null; try { - lineStream = new PlainTextByLineStream(new SystemInputStreamFactory(), SystemInputStreamFactory.encoding()); + lineStream = + new PlainTextByLineStream(new SystemInputStreamFactory(), SystemInputStreamFactory.encoding()); perfMon = new PerformanceMonitor(System.err, "sent"); perfMon.start(); String line; diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/TrainingParams.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/TrainingParams.java index 629553368..221dcbe8f 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/TrainingParams.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/TrainingParams.java @@ -30,7 +30,8 @@ */ interface TrainingParams extends BasicTrainingParams { - @ParameterDescription(valueName = "maxent|perceptron|perceptron_sequence", description = "The type of the token name finder model. One of maxent|perceptron|perceptron_sequence.") + @ParameterDescription(valueName = "maxent|perceptron|perceptron_sequence", + description = "The type of the token name finder model. One of maxent|perceptron|perceptron_sequence.") @OptionalParameter(defaultValue = "maxent") String getType(); @@ -38,15 +39,18 @@ interface TrainingParams extends BasicTrainingParams { @OptionalParameter File getDict(); - @ParameterDescription(valueName = "cutoff", description = "NGram cutoff. If not specified will not create ngram dictionary.") + @ParameterDescription(valueName = "cutoff", + description = "NGram cutoff. If not specified will not create ngram dictionary.") @OptionalParameter Integer getNgram(); - @ParameterDescription(valueName = "tagDictCutoff", description = "TagDictionary cutoff. If specified will create/expand a mutable TagDictionary") + @ParameterDescription(valueName = "tagDictCutoff", + description = "TagDictionary cutoff. If specified will create/expand a mutable TagDictionary") @OptionalParameter Integer getTagDictCutoff(); - @ParameterDescription(valueName = "factoryName", description = "A sub-class of POSTaggerFactory where to get implementation and resources.") + @ParameterDescription(valueName = "factoryName", + description = "A sub-class of POSTaggerFactory where to get implementation and resources.") @OptionalParameter String getFactory(); } diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorTool.java index 3aaf27468..9b759b4a3 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorTool.java @@ -63,8 +63,8 @@ public void run(String[] args) { perfMon.start(); try { - ObjectStream paraStream = new ParagraphStream(new PlainTextByLineStream(new SystemInputStreamFactory(), - SystemInputStreamFactory.encoding())); + ObjectStream paraStream = new ParagraphStream(new PlainTextByLineStream( + new SystemInputStreamFactory(), SystemInputStreamFactory.encoding())); String para; while ((para = paraStream.read()) != null) { diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/TrainingParams.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/TrainingParams.java index f2722914e..fbdf4db8b 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/TrainingParams.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/TrainingParams.java @@ -38,7 +38,8 @@ interface TrainingParams extends BasicTrainingParams { @OptionalParameter String getEosChars(); - @ParameterDescription(valueName = "factoryName", description = "A sub-class of SentenceDetectorFactory where to get implementation and resources.") + @ParameterDescription(valueName = "factoryName", + description = "A sub-class of SentenceDetectorFactory where to get implementation and resources.") @OptionalParameter String getFactory(); } diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TrainingParams.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TrainingParams.java index 0405833b3..237173aa6 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TrainingParams.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TrainingParams.java @@ -29,7 +29,8 @@ * Note: Do not use this class, internal use only! */ interface TrainingParams extends BasicTrainingParams { - @ParameterDescription(valueName = "isAlphaNumOpt", description = "Optimization flag to skip alpha numeric tokens for further tokenization") + @ParameterDescription(valueName = "isAlphaNumOpt", + description = "Optimization flag to skip alpha numeric tokens for further tokenization") @OptionalParameter(defaultValue = "false") Boolean getAlphaNumOpt(); @@ -37,7 +38,8 @@ interface TrainingParams extends BasicTrainingParams { @OptionalParameter File getAbbDict(); - @ParameterDescription(valueName = "factoryName", description = "A sub-class of TokenizerFactory where to get implementation and resources.") + @ParameterDescription(valueName = "factoryName", + description = "A sub-class of TokenizerFactory where to get implementation and resources.") @OptionalParameter String getFactory(); } diff --git a/opennlp-tools/src/main/java/opennlp/tools/dictionary/serializer/DictionarySerializer.java b/opennlp-tools/src/main/java/opennlp/tools/dictionary/serializer/DictionarySerializer.java index 13f8927ea..2cee45de9 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/dictionary/serializer/DictionarySerializer.java +++ b/opennlp-tools/src/main/java/opennlp/tools/dictionary/serializer/DictionarySerializer.java @@ -238,7 +238,8 @@ public static boolean create(InputStream in, EntryInserter inserter) * @param entries entries to serialize * * @throws IOException If an I/O error occurs - * @deprecated Use {@link DictionarySerializer#serialize(java.io.OutputStream, java.util.Iterator, boolean)} instead + * @deprecated Use + * {@link DictionarySerializer#serialize(java.io.OutputStream, java.util.Iterator, boolean)} instead */ @Deprecated public static void serialize(OutputStream out, Iterator entries) diff --git a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerEventStream.java b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerEventStream.java index 77da8916f..3fbe63d70 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerEventStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerEventStream.java @@ -37,7 +37,8 @@ public class DocumentCategorizerEventStream extends AbstractEventStream data, FeatureGenerator... featureGenerators) { + public DocumentCategorizerEventStream(ObjectStream data, + FeatureGenerator... featureGenerators) { super(data); mContextGenerator = diff --git a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentSampleStream.java index e81f9f57f..b35742fb3 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentSampleStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentSampleStream.java @@ -24,11 +24,13 @@ import opennlp.tools.util.ObjectStream; /** - * This class reads in string encoded training samples, parses them and outputs {@link DocumentSample} objects. + * This class reads in string encoded training samples, parses them and + * outputs {@link DocumentSample} objects. *

* Format:
* Each line contains one sample document.
- * The category is the first string in the line followed by a tab and whitespace separated document tokens.
+ * The category is the first string in the line followed by a tab and whitespace + * separated document tokens.
* Sample line: category-string tab-char whitespace-separated-tokens line-break-char(s)
*/ public class DocumentSampleStream extends FilterObjectStream { diff --git a/opennlp-tools/src/main/java/opennlp/tools/doccat/NGramFeatureGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/doccat/NGramFeatureGenerator.java index ef5d2a3c7..8283adc45 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/doccat/NGramFeatureGenerator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/doccat/NGramFeatureGenerator.java @@ -47,10 +47,12 @@ public NGramFeatureGenerator(int minGram, int maxGram) throws InvalidFormatExcep this.minGram = minGram; this.maxGram = maxGram; } else { - throw new InvalidFormatException("Minimum range value (minGram) should be less than or equal to maximum range value (maxGram)!"); + throw new InvalidFormatException( + "Minimum range value (minGram) should be less than or equal to maximum range value (maxGram)!"); } } else { - throw new InvalidFormatException("Both minimum range value (minGram) & maximum range value (maxGram) should be greater than or equal to 1!"); + throw new InvalidFormatException("Both minimum range value (minGram) & maximum " + + "range value (maxGram) should be greater than or equal to 1!"); } } diff --git a/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinker.java b/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinker.java index 2533bcdbd..6f06bc773 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinker.java +++ b/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinker.java @@ -89,5 +89,6 @@ public interface EntityLinker { * Span[] corresponds to * @return */ - List find(String doctext, Span[] sentences, Span[][] tokensBySentence, Span[][] namesBySentence, int sentenceIndex); + List find(String doctext, Span[] sentences, Span[][] tokensBySentence, + Span[][] namesBySentence, int sentenceIndex); } diff --git a/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinkerFactory.java b/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinkerFactory.java index f29a0873c..1c1f67c3c 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinkerFactory.java +++ b/opennlp-tools/src/main/java/opennlp/tools/entitylinker/EntityLinkerFactory.java @@ -39,7 +39,8 @@ public class EntityLinkerFactory { * @return an EntityLinker impl * @throws java.io.IOException */ - public static synchronized EntityLinker getLinker(String entityType, EntityLinkerProperties properties) throws IOException { + public static synchronized EntityLinker getLinker(String entityType, EntityLinkerProperties properties) + throws IOException { if (entityType == null || properties == null) { throw new IllegalArgumentException("Null argument in entityLinkerFactory"); } diff --git a/opennlp-tools/src/main/java/opennlp/tools/entitylinker/LinkedSpan.java b/opennlp-tools/src/main/java/opennlp/tools/entitylinker/LinkedSpan.java index 31c0803a6..8d7dc44bc 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/entitylinker/LinkedSpan.java +++ b/opennlp-tools/src/main/java/opennlp/tools/entitylinker/LinkedSpan.java @@ -113,7 +113,8 @@ public void setSearchTerm(String searchTerm) { @Override public String toString() { - return "LinkedSpan\nsentenceid=" + sentenceid + "\nsearchTerm=" + searchTerm + "\nlinkedEntries=\n" + linkedEntries + "\n"; + return "LinkedSpan\nsentenceid=" + sentenceid + "\nsearchTerm=" + searchTerm + + "\nlinkedEntries=\n" + linkedEntries + "\n"; } @Override diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/BioNLP2004NameSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/BioNLP2004NameSampleStream.java index 4f62a3a50..ce993f0bb 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/BioNLP2004NameSampleStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/BioNLP2004NameSampleStream.java @@ -154,7 +154,8 @@ else if (tag.equals("O")) { if (beginIndex != -1) names.add(new Span(beginIndex, endIndex, tags.get(beginIndex).substring(2))); - return new NameSample(sentence.toArray(new String[sentence.size()]), names.toArray(new Span[names.size()]), isClearAdaptiveData); + return new NameSample(sentence.toArray(new String[sentence.size()]), + names.toArray(new Span[names.size()]), isClearAdaptiveData); } else if (line != null) { // Just filter out empty events, if two lines in a row are empty diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStream.java index f1986e730..8ceb95749 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStream.java @@ -193,7 +193,8 @@ else if (tag.equals("O")) { if (beginIndex != -1) names.add(extract(beginIndex, endIndex, tags.get(beginIndex))); - return new NameSample(sentence.toArray(new String[sentence.size()]), names.toArray(new Span[names.size()]), isClearAdaptiveData); + return new NameSample(sentence.toArray(new String[sentence.size()]), + names.toArray(new Span[names.size()]), isClearAdaptiveData); } else if (line != null) { // Just filter out empty events, if two lines in a row are empty diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStream.java index f2498b8a0..d5354b9dd 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStream.java @@ -178,7 +178,8 @@ else if (!tag.endsWith(tags.get(beginIndex).substring(1))) { if (beginIndex != -1) names.add(extract(beginIndex, endIndex, tags.get(beginIndex))); - return new NameSample(sentence.toArray(new String[sentence.size()]), names.toArray(new Span[names.size()]), isClearAdaptiveData); + return new NameSample(sentence.toArray(new String[sentence.size()]), + names.toArray(new Span[names.size()]), isClearAdaptiveData); } else if (line != null) { // Just filter out empty events, if two lines in a row are empty diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXPOSSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXPOSSampleStream.java index 4ca637451..74ae62ac5 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXPOSSampleStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXPOSSampleStream.java @@ -92,7 +92,8 @@ public POSSample read() throws IOException { if (tokens.size() == 0) sample = read(); - sample = new POSSample(tokens.toArray(new String[tokens.size()]), tags.toArray(new String[tags.size()])); + sample = new POSSample(tokens.toArray(new String[tokens.size()]), + tags.toArray(new String[tags.size()])); } return sample; diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/EvalitaNameSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/EvalitaNameSampleStream.java index 68a57fbec..de2c54303 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/EvalitaNameSampleStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/EvalitaNameSampleStream.java @@ -206,7 +206,8 @@ else if (tag.equals("O")) { if (beginIndex != -1) names.add(extract(beginIndex, endIndex, tags.get(beginIndex))); - return new NameSample(sentence.toArray(new String[sentence.size()]), names.toArray(new Span[names.size()]), isClearAdaptiveData); + return new NameSample(sentence.toArray(new String[sentence.size()]), + names.toArray(new Span[names.size()]), isClearAdaptiveData); } else if (line != null) { // Just filter out empty events, if two lines in a row are empty diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDoccatSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDoccatSampleStream.java index a9c734ae1..321f7c491 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDoccatSampleStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDoccatSampleStream.java @@ -50,7 +50,8 @@ public class LeipzigDoccatSampleStream extends * Creates a new LeipzigDoccatSampleStream with the specified parameters. * * @param language the Leipzig input sentences.txt file - * @param sentencesPerDocument the number of sentences which should be grouped into once {@link DocumentSample} + * @param sentencesPerDocument the number of sentences which + * should be grouped into once {@link DocumentSample} * @param in the InputStream pointing to the contents of the sentences.txt input file * @throws IOException IOException */ @@ -67,7 +68,8 @@ public LeipzigDoccatSampleStream(String language, int sentencesPerDocument, Toke * Creates a new LeipzigDoccatSampleStream with the specified parameters. * * @param language the Leipzig input sentences.txt file - * @param sentencesPerDocument the number of sentences which should be grouped into once {@link DocumentSample} + * @param sentencesPerDocument the number of sentences which should be + * grouped into once {@link DocumentSample} * @param in the InputStream pointing to the contents of the sentences.txt input file * @throws IOException IOException */ diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStream.java index 1741999ca..5b4b926ca 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStream.java @@ -71,7 +71,8 @@ public class ADNameSampleStream implements ObjectStream { private static final Pattern whitespacePattern = Pattern.compile("\\s+"); private static final Pattern underlinePattern = Pattern.compile("[_]+"); - private static final Pattern hyphenPattern = Pattern.compile("((\\p{L}+)-$)|(^-(\\p{L}+)(.*))|((\\p{L}+)-(\\p{L}+)(.*))"); + private static final Pattern hyphenPattern = + Pattern.compile("((\\p{L}+)-$)|(^-(\\p{L}+)(.*))|((\\p{L}+)-(\\p{L}+)(.*))"); private static final Pattern alphanumericPattern = Pattern.compile("^[\\p{L}\\p{Nd}]+$"); /** diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStreamFactory.java b/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStreamFactory.java index c93764ab5..e9603a882 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStreamFactory.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStreamFactory.java @@ -50,7 +50,8 @@ interface Parameters { @ParameterDescription(valueName = "sampleData", description = "data to be used, usually a file name.") File getData(); - @ParameterDescription(valueName = "split", description = "if true all hyphenated tokens will be separated (default true)") + @ParameterDescription(valueName = "split", + description = "if true all hyphenated tokens will be separated (default true)") @OptionalParameter(defaultValue = "true") Boolean getSplitHyphenatedTokens(); diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADPOSSampleStreamFactory.java b/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADPOSSampleStreamFactory.java index a551635e7..ab97ae2d4 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADPOSSampleStreamFactory.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADPOSSampleStreamFactory.java @@ -39,7 +39,8 @@ public class ADPOSSampleStreamFactory extends LanguageSampleStreamFactory { interface Parameters { - @ParameterDescription(valueName = "charsetName", description = "encoding for reading and writing text, if absent the system default is used.") + @ParameterDescription(valueName = "charsetName", + description = "encoding for reading and writing text, if absent the system default is used.") Charset getEncoding(); @ParameterDescription(valueName = "sampleData", description = "data to be used, usually a file name.") @@ -52,7 +53,8 @@ interface Parameters { @OptionalParameter(defaultValue = "false") Boolean getExpandME(); - @ParameterDescription(valueName = "includeFeatures", description = "combine POS Tags with word features, like number and gender.") + @ParameterDescription(valueName = "includeFeatures", + description = "combine POS Tags with word features, like number and gender.") @OptionalParameter(defaultValue = "false") Boolean getIncludeFeatures(); } diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADSentenceSampleStreamFactory.java b/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADSentenceSampleStreamFactory.java index 4e2828fbe..59ebe4605 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADSentenceSampleStreamFactory.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADSentenceSampleStreamFactory.java @@ -48,7 +48,8 @@ interface Parameters { @ParameterDescription(valueName = "language", description = "language which is being processed.") String getLang(); - @ParameterDescription(valueName = "includeTitles", description = "if true will include sentences marked as headlines.") + @ParameterDescription(valueName = "includeTitles", + description = "if true will include sentences marked as headlines.") @OptionalParameter(defaultValue = "false") Boolean getIncludeTitles(); } diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratNameSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratNameSampleStream.java index 6f75c3e02..499b99d99 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratNameSampleStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratNameSampleStream.java @@ -137,8 +137,9 @@ protected List read(BratDocument sample) throws IOException { names.add(new Span(nameBeginIndex, nameEndIndex, entity.getType())); } else { - System.err.println("Dropped entity " + entity.getId() + " (" + entitySpan.getCoveredText(sample.getText()) + ") " + " in document " + - sample.getId() + ", it is not matching tokenization!"); + System.err.println("Dropped entity " + entity.getId() + " (" + + entitySpan.getCoveredText(sample.getText()) + ") " + " in document " + + sample.getId() + ", it is not matching tokenization!"); } } } diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/convert/NameToSentenceSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/convert/NameToSentenceSampleStream.java index 554d0d14e..b5f3196fa 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/convert/NameToSentenceSampleStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/convert/NameToSentenceSampleStream.java @@ -26,7 +26,8 @@ */ public class NameToSentenceSampleStream extends AbstractToSentenceSampleStream { - public NameToSentenceSampleStream(Detokenizer detokenizer, ObjectStream samples, int chunkSize) { + public NameToSentenceSampleStream(Detokenizer detokenizer, + ObjectStream samples, int chunkSize) { super(detokenizer, samples, chunkSize); } diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/convert/ParseToPOSSampleStreamFactory.java b/opennlp-tools/src/main/java/opennlp/tools/formats/convert/ParseToPOSSampleStreamFactory.java index cafb7ee04..4c4c7f080 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/convert/ParseToPOSSampleStreamFactory.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/convert/ParseToPOSSampleStreamFactory.java @@ -37,7 +37,8 @@ private ParseToPOSSampleStreamFactory() { public ObjectStream create(String[] args) { - ParseSampleStreamFactory.Parameters params = ArgumentParser.parse(args, ParseSampleStreamFactory.Parameters.class); + ParseSampleStreamFactory.Parameters params = + ArgumentParser.parse(args, ParseSampleStreamFactory.Parameters.class); ObjectStream parseSampleStream = StreamFactoryRegistry.getFactory(Parse.class, StreamFactoryRegistry.DEFAULT_FORMAT).create( diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/frenchtreebank/ConstitParseSampleStreamFactory.java b/opennlp-tools/src/main/java/opennlp/tools/formats/frenchtreebank/ConstitParseSampleStreamFactory.java index 432d625a7..6985a304a 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/frenchtreebank/ConstitParseSampleStreamFactory.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/frenchtreebank/ConstitParseSampleStreamFactory.java @@ -41,8 +41,8 @@ public ObjectStream create(String[] args) { Parameters params = ArgumentParser.parse(args, Parameters.class); - return new ConstitParseSampleStream(new FileToByteArraySampleStream(new DirectorySampleStream(params.getData(), - null, false))); + return new ConstitParseSampleStream(new FileToByteArraySampleStream( + new DirectorySampleStream(params.getData(), null, false))); } public static void registerFactory() { diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/muc/Muc6NameSampleStreamFactory.java b/opennlp-tools/src/main/java/opennlp/tools/formats/muc/Muc6NameSampleStreamFactory.java index e5ef4a204..a64ffc2e9 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/muc/Muc6NameSampleStreamFactory.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/muc/Muc6NameSampleStreamFactory.java @@ -54,7 +54,8 @@ public ObjectStream create(String[] args) { ObjectStream mucDocStream = new FileToStringSampleStream( new DirectorySampleStream(params.getData(), - file -> StringUtil.toLowerCase(file.getName()).endsWith(".sgm"), false), Charset.forName("UTF-8")); + file -> StringUtil.toLowerCase(file.getName()).endsWith(".sgm"), false), + Charset.forName("UTF-8")); return new MucNameSampleStream(tokenizer, mucDocStream); } diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesNameSampleStreamFactory.java b/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesNameSampleStreamFactory.java index f3ab66fc3..167e6dbb9 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesNameSampleStreamFactory.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/ontonotes/OntoNotesNameSampleStreamFactory.java @@ -48,11 +48,12 @@ public ObjectStream create(String[] args) { return file.isDirectory(); }, true); - return new OntoNotesNameSampleStream(new FileToStringSampleStream(documentStream, Charset.forName("UTF-8"))); + return new OntoNotesNameSampleStream( + new FileToStringSampleStream(documentStream, Charset.forName("UTF-8"))); } public static void registerFactory() { StreamFactoryRegistry.registerFactory(NameSample.class, "ontonotes", new OntoNotesNameSampleStreamFactory()); } -} \ No newline at end of file +} diff --git a/opennlp-tools/src/main/java/opennlp/tools/languagemodel/NGramLanguageModel.java b/opennlp-tools/src/main/java/opennlp/tools/languagemodel/NGramLanguageModel.java index 47e9e7784..f13651b52 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/languagemodel/NGramLanguageModel.java +++ b/opennlp-tools/src/main/java/opennlp/tools/languagemodel/NGramLanguageModel.java @@ -27,9 +27,10 @@ import opennlp.tools.util.StringList; /** - * A {@link opennlp.tools.languagemodel.LanguageModel} based on a {@link opennlp.tools.ngram.NGramModel} using Laplace - * smoothing probability estimation to get the probabilities of the ngrams. - * See also {@link NGramUtils#calculateLaplaceSmoothingProbability(opennlp.tools.util.StringList, Iterable, int, Double)}. + * A {@link opennlp.tools.languagemodel.LanguageModel} based on a {@link opennlp.tools.ngram.NGramModel} + * using Laplace smoothing probability estimation to get the probabilities of the ngrams. + * See also {@link NGramUtils#calculateLaplaceSmoothingProbability( + * opennlp.tools.util.StringList, Iterable, int, Double)}. */ public class NGramLanguageModel extends NGramModel implements LanguageModel { diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DefaultLemmatizerContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DefaultLemmatizerContextGenerator.java index cba1c14f3..cdd2383f7 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DefaultLemmatizerContextGenerator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DefaultLemmatizerContextGenerator.java @@ -55,7 +55,8 @@ protected static String[] getSuffixes(String lex) { return suffs; } - public String[] getContext(int index, String[] sequence, String[] priorDecisions, Object[] additionalContext) { + public String[] getContext(int index, String[] sequence, String[] priorDecisions, + Object[] additionalContext) { return getContext(index, sequence, (String[]) additionalContext[0], priorDecisions); } diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSample.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSample.java index 8dbfc2460..553eb3a6e 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSample.java +++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSample.java @@ -76,7 +76,8 @@ public String[] getLemmas() { return lemmas.toArray(new String[lemmas.size()]); } - private void validateArguments(int tokensSize, int tagsSize, int lemmasSize) throws IllegalArgumentException { + private void validateArguments(int tokensSize, int tagsSize, int lemmasSize) + throws IllegalArgumentException { if (tokensSize != tagsSize || tagsSize != lemmasSize) { throw new IllegalArgumentException( "All arrays must have the same length: " + diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSampleEventStream.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSampleEventStream.java index 591597354..fc1a558a4 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSampleEventStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSampleEventStream.java @@ -51,7 +51,8 @@ protected Iterator createEvents(LemmaSample sample) { String[] tagsArray = sample.getTags(); String[] lemmasArray = sample.getLemmas(); for (int ei = 0, el = sample.getTokens().length; ei < el; ei++) { - events.add(new Event(lemmasArray[ei], contextGenerator.getContext(ei,toksArray,tagsArray,lemmasArray))); + events.add(new Event(lemmasArray[ei], + contextGenerator.getContext(ei,toksArray,tagsArray,lemmasArray))); } return events.iterator(); } diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerContextGenerator.java index 6dc1e6e2d..1b6fc0b11 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerContextGenerator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerContextGenerator.java @@ -27,9 +27,11 @@ public interface LemmatizerContextGenerator extends BeamSearchContextGeneratortoString methods of these objects should return the token text. + * @param toks The tokens of the sentence. The toString methods of + * these objects should return the token text. * @param tags The POS tags for the the specified tokens. - * @param lemmas The previous decisions made in the tagging of this sequence. Only indices less than i will be examined. + * @param lemmas The previous decisions made in the tagging of this sequence. + * Only indices less than i will be examined. * @return An array of predictive contexts on which a model basis its decisions. */ String[] getContext(int i, String[] toks, String[] tags, String[] lemmas); diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerME.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerME.java index 2ec5691d5..98a19f508 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerME.java +++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerME.java @@ -123,7 +123,8 @@ public Sequence[] topKSequences(String[] sentence, String[] tags, double minSequ /** * Populates the specified array with the probabilities of the last decoded sequence. The * sequence was determined based on the previous call to lemmatize. The - * specified array should be at least as large as the number of tokens in the previous call to lemmatize. + * specified array should be at least as large as the number of tokens in the + * previous call to lemmatize. * * @param probs An array used to hold the probabilities of the last decoded sequence. */ @@ -169,8 +170,8 @@ public static LemmatizerModel train(String languageCode, } else if (TrainerType.EVENT_MODEL_SEQUENCE_TRAINER.equals(trainerType)) { LemmaSampleSequenceStream ss = new LemmaSampleSequenceStream(samples, contextGenerator); - EventModelSequenceTrainer trainer = TrainerFactory.getEventModelSequenceTrainer(trainParams.getSettings(), - manifestInfoEntries); + EventModelSequenceTrainer trainer = + TrainerFactory.getEventModelSequenceTrainer(trainParams.getSettings(), manifestInfoEntries); lemmatizerModel = trainer.train(ss); } else if (TrainerType.SEQUENCE_TRAINER.equals(trainerType)) { diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/BeamSearch.java b/opennlp-tools/src/main/java/opennlp/tools/ml/BeamSearch.java index e55cb5212..4ce8b7e53 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/BeamSearch.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/BeamSearch.java @@ -76,12 +76,15 @@ public BeamSearch(int size, MaxentModel model, int cacheSize) { * Returns the best sequence of outcomes based on model for this object. * * @param sequence The input sequence. - * @param additionalContext An Object[] of additional context. This is passed to the context generator blindly with the assumption that the context are appropiate. + * @param additionalContext An Object[] of additional context. + * This is passed to the context generator blindly with the + * assumption that the context are appropiate. * * @return The top ranked sequence of outcomes or null if no sequence could be found */ public Sequence[] bestSequences(int numSequences, T[] sequence, - Object[] additionalContext, double minSequenceScore, BeamSearchContextGenerator cg, SequenceValidator validator) { + Object[] additionalContext, double minSequenceScore, + BeamSearchContextGenerator cg, SequenceValidator validator) { Queue prev = new PriorityQueue<>(size); Queue next = new PriorityQueue<>(size); diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GISTrainer.java b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GISTrainer.java index b4bd3bc9a..b19870510 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GISTrainer.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/maxent/GISTrainer.java @@ -402,7 +402,8 @@ else if (useSimpleSmoothing) { /* Create and return the model ****/ // To be compatible with old models the correction constant is always 1 - return new GISModel(params, predLabels, outcomeLabels, 1, evalParams.getCorrectionParam()); + return new GISModel(params, predLabels, outcomeLabels, 1, + evalParams.getCorrectionParam()); } @@ -410,7 +411,8 @@ else if (useSimpleSmoothing) { private void findParameters(int iterations, double correctionConstant) { int threads = modelExpects.length; ExecutorService executor = Executors.newFixedThreadPool(threads); - CompletionService completionService = new ExecutorCompletionService<>(executor); + CompletionService completionService = + new ExecutorCompletionService<>(executor); double prevLL = 0.0; double currLL; display("Performing " + iterations + " iterations.\n"); @@ -510,10 +512,12 @@ public ModelExpactationComputeTask call() { // numTimesEventsSeen must also be thread safe if (values != null && values[ei] != null) { - modelExpects[threadIndex][pi].updateParameter(aoi,modelDistribution[oi] * values[ei][j] * numTimesEventsSeen[ei]); + modelExpects[threadIndex][pi].updateParameter(aoi,modelDistribution[oi] + * values[ei][j] * numTimesEventsSeen[ei]); } else { - modelExpects[threadIndex][pi].updateParameter(aoi,modelDistribution[oi] * numTimesEventsSeen[ei]); + modelExpects[threadIndex][pi].updateParameter(aoi,modelDistribution[oi] + * numTimesEventsSeen[ei]); } } } @@ -553,7 +557,8 @@ synchronized double getLoglikelihood() { } /* Compute one iteration of GIS and retutn log-likelihood.*/ - private double nextIteration(double correctionConstant, CompletionService completionService) { + private double nextIteration(double correctionConstant, + CompletionService completionService) { // compute contribution of p(a|b_i) for each feature and the new // correction parameter double loglikelihood = 0.0; @@ -627,7 +632,8 @@ private double nextIteration(double correctionConstant, CompletionService eventsToCompare, boolean sort) throws InsufficientTrainingDataException { + protected int sortAndMerge(List eventsToCompare, boolean sort) + throws InsufficientTrainingDataException { int numUniqueEvents = 1; numEvents = eventsToCompare.size(); if (sort && eventsToCompare.size() > 0) { @@ -156,7 +157,8 @@ public int getNumEvents() { * @param counter The predicate counters. * @param cutoff The cutoff which determines whether a predicate is included. */ - protected static void update(String[] ec, Set predicateSet, Map counter, int cutoff) { + protected static void update(String[] ec, Set predicateSet, + Map counter, int cutoff) { for (String s : ec) { Integer i = counter.get(s); if (i == null) { @@ -192,4 +194,4 @@ protected static String[] toIndexedStringArray(Map labelToIndexM public float[][] getValues() { return null; } -} \ No newline at end of file +} diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/model/AbstractModel.java b/opennlp-tools/src/main/java/opennlp/tools/ml/model/AbstractModel.java index 5d6303a8f..095a6cc54 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/model/AbstractModel.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/model/AbstractModel.java @@ -41,7 +41,8 @@ public enum ModelType { Maxent,Perceptron,MaxentQn,NaiveBayes } /** The type of the model. */ protected ModelType modelType; - public AbstractModel(Context[] params, String[] predLabels, Map pmap, String[] outcomeNames) { + public AbstractModel(Context[] params, String[] predLabels, + Map pmap, String[] outcomeNames) { this.pmap = pmap; this.outcomeNames = outcomeNames; this.evalParams = new EvalParameters(params,outcomeNames.length); @@ -52,7 +53,8 @@ public AbstractModel(Context[] params, String[] predLabels, String[] outcomeName this.evalParams = new EvalParameters(params,outcomeNames.length); } - public AbstractModel(Context[] params, String[] predLabels, String[] outcomeNames, int correctionConstant,double correctionParam) { + public AbstractModel(Context[] params, String[] predLabels, String[] outcomeNames, + int correctionConstant,double correctionParam) { init(predLabels,outcomeNames); this.evalParams = new EvalParameters(params,correctionParam,correctionConstant,outcomeNames.length); } @@ -101,7 +103,8 @@ public ModelType getModelType() { */ public final String getAllOutcomes(double[] ocs) { if (ocs.length != outcomeNames.length) { - return "The double array sent as a parameter to GISModel.getAllOutcomes() must not have been produced by this model."; + return "The double array sent as a parameter to GISModel.getAllOutcomes() " + + "must not have been produced by this model."; } else { DecimalFormat df = new DecimalFormat("0.0000"); @@ -195,4 +198,4 @@ public boolean equals(Object obj) { return false; } -} \ No newline at end of file +} diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/model/AbstractModelReader.java b/opennlp-tools/src/main/java/opennlp/tools/ml/model/AbstractModelReader.java index bb44d8b69..0af4bb388 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/model/AbstractModelReader.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/model/AbstractModelReader.java @@ -136,7 +136,7 @@ protected Context[] getParameters(int[][] outcomePatterns) throws java.io.IOExce //construct outcome pattern int[] outcomePattern = new int[outcomePatterns[i].length - 1]; System.arraycopy(outcomePatterns[i], 1, outcomePattern, 0, outcomePatterns[i].length - 1); - //System.err.println("outcomePattern "+i+" of "+outcomePatterns.length+" with "+outcomePatterns[i].length+" outcomes "); + //populate parameters for each context which uses this outcome pattern. for (int j = 0; j < outcomePatterns[i][0]; j++) { double[] contextParameters = new double[outcomePatterns[i].length - 1]; diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/model/EvalParameters.java b/opennlp-tools/src/main/java/opennlp/tools/ml/model/EvalParameters.java index 4510038fc..a35ff807f 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/model/EvalParameters.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/model/EvalParameters.java @@ -49,7 +49,8 @@ public class EvalParameters { * @param correctionConstant The correction constant. * @param numOutcomes The number of outcomes. */ - public EvalParameters(Context[] params, double correctionParam, double correctionConstant, int numOutcomes) { + public EvalParameters(Context[] params, double correctionParam, + double correctionConstant, int numOutcomes) { this.params = params; this.correctionParam = correctionParam; this.numOutcomes = numOutcomes; diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/model/MutableContext.java b/opennlp-tools/src/main/java/opennlp/tools/ml/model/MutableContext.java index 675925214..108c08a67 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/model/MutableContext.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/model/MutableContext.java @@ -49,7 +49,8 @@ public void setParameter(int outcomeIndex, double value) { } /** - * Updated the parameter or expected value at the specified outcomeIndex by adding the specified value to its current value. + * Updated the parameter or expected value at the specified outcomeIndex by + * adding the specified value to its current value. * * @param outcomeIndex The index of the parameter or expected value to be updated. * @param value The value to be added. diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/model/OnePassRealValueDataIndexer.java b/opennlp-tools/src/main/java/opennlp/tools/ml/model/OnePassRealValueDataIndexer.java index 7cf805306..f76424f1c 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/model/OnePassRealValueDataIndexer.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/model/OnePassRealValueDataIndexer.java @@ -39,7 +39,8 @@ public class OnePassRealValueDataIndexer extends OnePassDataIndexer { float[][] values; @Deprecated - public OnePassRealValueDataIndexer(ObjectStream eventStream, int cutoff, boolean sort) throws IOException { + public OnePassRealValueDataIndexer(ObjectStream eventStream, int cutoff, boolean sort) + throws IOException { super(eventStream,cutoff,sort); } @@ -62,7 +63,8 @@ public float[][] getValues() { return values; } - protected int sortAndMerge(List eventsToCompare,boolean sort) throws InsufficientTrainingDataException { + protected int sortAndMerge(List eventsToCompare,boolean sort) + throws InsufficientTrainingDataException { int numUniqueEvents = super.sortAndMerge(eventsToCompare,sort); values = new float[numUniqueEvents][]; int numEvents = eventsToCompare.size(); diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/model/PlainTextFileDataReader.java b/opennlp-tools/src/main/java/opennlp/tools/ml/model/PlainTextFileDataReader.java index f09d831e0..591e25301 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/model/PlainTextFileDataReader.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/model/PlainTextFileDataReader.java @@ -34,7 +34,8 @@ public class PlainTextFileDataReader implements DataReader { public PlainTextFileDataReader(File f) throws IOException { if (f.getName().endsWith(".gz")) { - input = new BufferedReader(new InputStreamReader(new BufferedInputStream(new GZIPInputStream(new BufferedInputStream(new FileInputStream(f)))))); + input = new BufferedReader(new InputStreamReader(new BufferedInputStream( + new GZIPInputStream(new BufferedInputStream(new FileInputStream(f)))))); } else { input = new BufferedReader(new InputStreamReader(new BufferedInputStream(new FileInputStream(f)))); diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/model/SequenceClassificationModel.java b/opennlp-tools/src/main/java/opennlp/tools/ml/model/SequenceClassificationModel.java index e38ce9088..72748710b 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/model/SequenceClassificationModel.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/model/SequenceClassificationModel.java @@ -53,8 +53,8 @@ Sequence bestSequence(T[] sequence, Object[] additionalContext, * * @return */ - Sequence[] bestSequences(int numSequences, T[] sequence, - Object[] additionalContext, double minSequenceScore, BeamSearchContextGenerator cg, SequenceValidator validator); + Sequence[] bestSequences(int numSequences, T[] sequence, Object[] additionalContext, + double minSequenceScore, BeamSearchContextGenerator cg, SequenceValidator validator); /** * Finds the n most probable sequences. diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/model/TwoPassDataIndexer.java b/opennlp-tools/src/main/java/opennlp/tools/ml/model/TwoPassDataIndexer.java index 7dc1fd4cb..3755940b6 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/model/TwoPassDataIndexer.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/model/TwoPassDataIndexer.java @@ -189,7 +189,8 @@ private int computeEventCounts(ObjectStream eventStream, Writer eventStor return eventCount; } - private List index(int numEvents, ObjectStream es, Map predicateIndex) throws IOException { + private List index(int numEvents, ObjectStream es, + Map predicateIndex) throws IOException { Map omap = new HashMap<>(); int outcomeCount = 0; List eventsToCompare = new ArrayList<>(numEvents); diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/LogProbabilities.java b/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/LogProbabilities.java index 0814cf3f5..0e4b0df3b 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/LogProbabilities.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/LogProbabilities.java @@ -21,9 +21,10 @@ import java.util.Map; /** - * Class implementing the probability distribution over labels returned by a classifier as a log of probabilities. - * This is necessary because floating point precision in Java does not allow for high-accuracy representation of very low probabilities - * such as would occur in a text categorizer. + * Class implementing the probability distribution over labels returned by + * a classifier as a log of probabilities. + * This is necessary because floating point precision in Java does not allow for high-accuracy + * representation of very low probabilities such as would occur in a text categorizer. * * @param the label (category) class * @@ -53,7 +54,8 @@ public void set(T t, Probability probability) { } /** - * Assigns a probability to a label, discarding any previously assigned probability, if the new probability is greater than the old one. + * Assigns a probability to a label, discarding any previously assigned probability, + * if the new probability is greater than the old one. * * @param t the label to which the probability is being assigned * @param probability the probability to assign diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/LogProbability.java b/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/LogProbability.java index b93925394..7c080cbc7 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/LogProbability.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/LogProbability.java @@ -49,7 +49,8 @@ public void set(Probability probability) { } /** - * Assigns a probability to a label, discarding any previously assigned probability, if the new probability is greater than the old one. + * Assigns a probability to a label, discarding any previously assigned probability, + * if the new probability is greater than the old one. * * @param probability the probability to assign */ @@ -61,7 +62,8 @@ public void setIfLarger(double probability) { } /** - * Assigns a probability to a label, discarding any previously assigned probability, if the new probability is greater than the old one. + * Assigns a probability to a label, discarding any previously assigned probability, + * if the new probability is greater than the old one. * * @param probability the probability to assign */ @@ -90,7 +92,8 @@ public void setLog(double probability) { } /** - * Compounds the existing probability mass on the label with the new probability passed in to the method. + * Compounds the existing probability mass on the label with the new + * probability passed in to the method. * * @param probability the probability weight to add */ diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/NaiveBayesEvalParameters.java b/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/NaiveBayesEvalParameters.java index 65d8589b3..8d3823868 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/NaiveBayesEvalParameters.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/NaiveBayesEvalParameters.java @@ -28,7 +28,8 @@ public class NaiveBayesEvalParameters extends EvalParameters { protected double[] outcomeTotals; protected long vocabulary; - public NaiveBayesEvalParameters(Context[] params, int numOutcomes, double[] outcomeTotals, long vocabulary) { + public NaiveBayesEvalParameters(Context[] params, int numOutcomes, + double[] outcomeTotals, long vocabulary) { super(params, 0, 0, numOutcomes); this.outcomeTotals = outcomeTotals; this.vocabulary = vocabulary; diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/NaiveBayesModel.java b/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/NaiveBayesModel.java index 1af4f14e8..87d2b9dd5 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/NaiveBayesModel.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/NaiveBayesModel.java @@ -33,17 +33,20 @@ public class NaiveBayesModel extends AbstractModel { protected double[] outcomeTotals; protected long vocabulary; - public NaiveBayesModel(Context[] params, String[] predLabels, Map pmap, String[] outcomeNames) { + public NaiveBayesModel(Context[] params, String[] predLabels, Map pmap, + String[] outcomeNames) { super(params, predLabels, pmap, outcomeNames); outcomeTotals = initOutcomeTotals(outcomeNames, params); - this.evalParams = new NaiveBayesEvalParameters(params, outcomeNames.length, outcomeTotals, predLabels.length); + this.evalParams = new NaiveBayesEvalParameters(params, outcomeNames.length, + outcomeTotals, predLabels.length); modelType = ModelType.NaiveBayes; } public NaiveBayesModel(Context[] params, String[] predLabels, String[] outcomeNames) { super(params, predLabels, outcomeNames); outcomeTotals = initOutcomeTotals(outcomeNames, params); - this.evalParams = new NaiveBayesEvalParameters(params, outcomeNames.length, outcomeTotals, predLabels.length); + this.evalParams = new NaiveBayesEvalParameters(params, outcomeNames.length, + outcomeTotals, predLabels.length); modelType = ModelType.NaiveBayes; } @@ -86,11 +89,14 @@ public static double[] eval(int[] context, double[] prior, EvalParameters model) return eval(context, null, prior, model, true); } - public static double[] eval(int[] context, float[] values, double[] prior, EvalParameters model, boolean normalize) { + public static double[] eval(int[] context, float[] values, double[] prior, + EvalParameters model, boolean normalize) { Probabilities probabilities = new LogProbabilities<>(); Context[] params = model.getParams(); - double[] outcomeTotals = model instanceof NaiveBayesEvalParameters ? ((NaiveBayesEvalParameters) model).getOutcomeTotals() : new double[prior.length]; - long vocabulary = model instanceof NaiveBayesEvalParameters ? ((NaiveBayesEvalParameters) model).getVocabulary() : 0; + double[] outcomeTotals = model instanceof NaiveBayesEvalParameters + ? ((NaiveBayesEvalParameters) model).getOutcomeTotals() : new double[prior.length]; + long vocabulary = model instanceof NaiveBayesEvalParameters + ? ((NaiveBayesEvalParameters) model).getVocabulary() : 0; double[] activeParameters; int[] activeOutcomes; double value = 1; @@ -125,7 +131,8 @@ public static double[] eval(int[] context, float[] values, double[] prior, EvalP return prior; } - private static double getProbability(double numerator, double denominator, double vocabulary, boolean isSmoothed) { + private static double getProbability(double numerator, double denominator, + double vocabulary, boolean isSmoothed) { if (isSmoothed) return getSmoothedProbability(numerator, denominator, vocabulary); else if (denominator == 0 || denominator < Double.MIN_VALUE) @@ -139,4 +146,4 @@ private static double getSmoothedProbability(double numerator, double denominato return 1.0 * (numerator + delta) / (denominator + delta * vocabulary); } -} \ No newline at end of file +} diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/Probabilities.java b/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/Probabilities.java index 10f448156..6357b63f5 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/Probabilities.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/Probabilities.java @@ -61,7 +61,8 @@ public void set(T t, Probability probability) { } /** - * Assigns a probability to a label, discarding any previously assigned probability, if the new probability is greater than the old one. + * Assigns a probability to a label, discarding any previously assigned probability, + * if the new probability is greater than the old one. * * @param t the label to which the probability is being assigned * @param probability the probability to assign diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/Probability.java b/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/Probability.java index 7474d1cb8..703ad527c 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/Probability.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/Probability.java @@ -50,7 +50,8 @@ public void set(Probability probability) { } /** - * Assigns a probability to a label, discarding any previously assigned probability, if the new probability is greater than the old one. + * Assigns a probability to a label, discarding any previously assigned probability, + * if the new probability is greater than the old one. * * @param probability the probability to assign */ @@ -61,7 +62,8 @@ public void setIfLarger(double probability) { } /** - * Assigns a probability to a label, discarding any previously assigned probability, if the new probability is greater than the old one. + * Assigns a probability to a label, discarding any previously assigned probability, + * if the new probability is greater than the old one. * * @param probability the probability to assign */ diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/PerceptronModel.java b/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/PerceptronModel.java index c1131fda3..667732d04 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/PerceptronModel.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/PerceptronModel.java @@ -27,7 +27,8 @@ public class PerceptronModel extends AbstractModel { - public PerceptronModel(Context[] params, String[] predLabels, Map pmap, String[] outcomeNames) { + public PerceptronModel(Context[] params, String[] predLabels, Map pmap, + String[] outcomeNames) { super(params,predLabels,pmap,outcomeNames); modelType = ModelType.Perceptron; } @@ -63,7 +64,8 @@ public static double[] eval(int[] context, double[] prior, EvalParameters model) return eval(context,null,prior,model,true); } - public static double[] eval(int[] context, float[] values, double[] prior, EvalParameters model, boolean normalize) { + public static double[] eval(int[] context, float[] values, double[] prior, EvalParameters model, + boolean normalize) { Context[] params = model.getParams(); double[] activeParameters; int[] activeOutcomes; diff --git a/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/SimplePerceptronSequenceTrainer.java b/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/SimplePerceptronSequenceTrainer.java index 0a78ee00a..790056722 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/SimplePerceptronSequenceTrainer.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ml/perceptron/SimplePerceptronSequenceTrainer.java @@ -111,7 +111,8 @@ public AbstractModel doTrain(SequenceStream events) throws IOException { // << members related to AbstractSequenceTrainer - public AbstractModel trainModel(int iterations, SequenceStream sequenceStream, int cutoff, boolean useAverage) throws IOException { + public AbstractModel trainModel(int iterations, SequenceStream sequenceStream, + int cutoff, boolean useAverage) throws IOException { this.iterations = iterations; this.sequenceStream = sequenceStream; Map indexingParameters = new HashMap(); @@ -254,7 +255,8 @@ public void nextIteration(int iteration) throws IOException { for (int oi = 0; oi < numOutcomes; oi++) { featureCounts.get(oi).clear(); } - //System.err.print("train:");for (int ei=0;ei "+averageParams[pi].getParameters()[oi]); + //System.err.println("p updates["+pi+"]["+oi+"]=("+updates[pi][oi][ITER]+"," + // +updates[pi][oi][EVENT]+","+updates[pi][oi][VALUE]+") + ("+iteration+","+oei+"," + // +params[pi].getParameters()[oi]+") -> "+averageParams[pi].getParameters()[oi]); updates[pi][oi][VALUE] = (int) params[pi].getParameters()[oi]; updates[pi][oi][ITER] = iteration; updates[pi][oi][EVENT] = si; @@ -331,12 +337,15 @@ public void nextIteration(int iteration) throws IOException { double[] predParams = averageParams[pi].getParameters(); for (int oi = 0; oi < numOutcomes; oi++) { if (updates[pi][oi][VALUE] != 0) { - predParams[oi] += updates[pi][oi][VALUE] * (numSequences * (iterations - updates[pi][oi][ITER]) - updates[pi][oi][EVENT]); + predParams[oi] += updates[pi][oi][VALUE] * (numSequences + * (iterations - updates[pi][oi][ITER]) - updates[pi][oi][EVENT]); } if (predParams[oi] != 0) { predParams[oi] /= totIterations; averageParams[pi].setParameter(oi, predParams[oi]); - //System.err.println("updates["+pi+"]["+oi+"]=("+updates[pi][oi][ITER]+","+updates[pi][oi][EVENT]+","+updates[pi][oi][VALUE]+") + ("+iterations+","+0+","+params[pi].getParameters()[oi]+") -> "+averageParams[pi].getParameters()[oi]); + //System.err.println("updates["+pi+"]["+oi+"]=("+updates[pi][oi][ITER]+"," + // +updates[pi][oi][EVENT]+","+updates[pi][oi][VALUE]+") + ("+iterations+","+0+"," + // +params[pi].getParameters()[oi]+") -> "+averageParams[pi].getParameters()[oi]); } } } @@ -352,7 +361,8 @@ private void trainingStats(MutableContext[] params) throws IOException { Sequence sequence; while ((sequence = sequenceStream.read()) != null) { - Event[] taggerEvents = sequenceStream.updateContext(sequence, new PerceptronModel(params,predLabels,pmap,outcomeLabels)); + Event[] taggerEvents = sequenceStream.updateContext(sequence, + new PerceptronModel(params,predLabels,pmap,outcomeLabels)); for (int ei = 0; ei < taggerEvents.length; ei++, oei++) { int max = omap.get(taggerEvents[ei].getOutcome()); if (max == outcomeList[oei]) { diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/BilouNameFinderSequenceValidator.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/BilouNameFinderSequenceValidator.java index 177b93827..6e73504cb 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/namefind/BilouNameFinderSequenceValidator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/BilouNameFinderSequenceValidator.java @@ -52,7 +52,8 @@ public boolean validSequence(int i, String[] inputSequence, if (outcomesSequence.length - 1 > 0) { if (outcome.endsWith(NameFinderME.OTHER)) { - if (outcomesSequence[outcomesSequence.length - 1].endsWith(NameFinderME.START) || outcomesSequence[outcomesSequence.length - 1].endsWith(NameFinderME.CONTINUE)) { + if (outcomesSequence[outcomesSequence.length - 1].endsWith(NameFinderME.START) + || outcomesSequence[outcomesSequence.length - 1].endsWith(NameFinderME.CONTINUE)) { return false; } } diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/DefaultNameContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/DefaultNameContextGenerator.java index 28e10c9b6..9c65ba44b 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/namefind/DefaultNameContextGenerator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/DefaultNameContextGenerator.java @@ -102,9 +102,12 @@ public void clearAdaptiveData() { /** * Return the context for finding names at the specified index. - * @param index The index of the token in the specified toks array for which the context should be constructed. - * @param tokens The tokens of the sentence. The toString methods of these objects should return the token text. - * @param preds The previous decisions made in the tagging of this sequence. Only indices less than i will be examined. + * @param index The index of the token in the specified toks array for which the + * context should be constructed. + * @param tokens The tokens of the sentence. The toString methods + * of these objects should return the token text. + * @param preds The previous decisions made in the tagging of this sequence. + * Only indices less than i will be examined. * @param additionalContext Addition features which may be based on a context outside of the sentence. * * @return the context for finding names at the specified index. @@ -137,4 +140,4 @@ public String[] getContext(int index, String[] tokens, String[] preds, Object[] return features.toArray(new String[features.size()]); } -} \ No newline at end of file +} diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/DocumentNameFinder.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/DocumentNameFinder.java index c1c1b2d9d..b6514c2a8 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/namefind/DocumentNameFinder.java +++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/DocumentNameFinder.java @@ -31,8 +31,9 @@ public interface DocumentNameFinder { /** * Returns tokens span for the specified document of sentences and their tokens. * Span start and end indices are relative to the sentence they are in. - * For example, a span identifying a name consisting of the first and second word of the second sentence would - * be 0..2 and be referenced as spans[1][0]. + * For example, a span identifying a name consisting of the first and second word + * of the second sentence would be 0..2 and be referenced as spans[1][0]. + * * @param document An array of tokens for each sentence of a document. * @return The token spans for each sentence of the specified document. */ diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/NameContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/NameContextGenerator.java index be3ee2ee7..a438cecff 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/namefind/NameContextGenerator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/NameContextGenerator.java @@ -33,14 +33,16 @@ public interface NameContextGenerator extends BeamSearchContextGenerator void addFeatureGenerator(AdaptiveFeatureGenerator generator); /** - * Informs all the feature generators for a name finder that the specified tokens have been classified with the coorisponds set of specified outcomes. + * Informs all the feature generators for a name finder that the specified tokens have + * been classified with the coorisponds set of specified outcomes. * @param tokens The tokens of the sentence or other text unit which has been processed. * @param outcomes The outcomes associated with the specified tokens. */ void updateAdaptiveData(String[] tokens, String[] outcomes); /** - * Informs all the feature generators for a name finder that the context of the adaptive data (typically a document) is no longer valid. + * Informs all the feature generators for a name finder that the context of the adaptive + * data (typically a document) is no longer valid. */ void clearAdaptiveData(); diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderEventStream.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderEventStream.java index f0e89ea0e..0afd3c1dd 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderEventStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderEventStream.java @@ -38,7 +38,8 @@ public class NameFinderEventStream extends opennlp.tools.util.AbstractEventStrea private NameContextGenerator contextGenerator; - private AdditionalContextFeatureGenerator additionalContextFeatureGenerator = new AdditionalContextFeatureGenerator(); + private AdditionalContextFeatureGenerator additionalContextFeatureGenerator = + new AdditionalContextFeatureGenerator(); private SequenceCodec codec; @@ -50,7 +51,8 @@ public class NameFinderEventStream extends opennlp.tools.util.AbstractEventStrea * @param type null or overrides the type parameter in the provided samples * @param contextGenerator The context generator used to generate features for the event stream. */ - public NameFinderEventStream(ObjectStream dataStream, String type, NameContextGenerator contextGenerator, SequenceCodec codec) { + public NameFinderEventStream(ObjectStream dataStream, String type, + NameContextGenerator contextGenerator, SequenceCodec codec) { super(dataStream); this.codec = codec; @@ -60,7 +62,8 @@ public NameFinderEventStream(ObjectStream dataStream, String type, N } this.contextGenerator = contextGenerator; - this.contextGenerator.addFeatureGenerator(new WindowFeatureGenerator(additionalContextFeatureGenerator, 8, 8)); + this.contextGenerator.addFeatureGenerator( + new WindowFeatureGenerator(additionalContextFeatureGenerator, 8, 8)); this.defaultType = type; } @@ -105,7 +108,8 @@ public static String[] generateOutcomes(Span[] names, String type, int length) { return outcomes; } - public static List generateEvents(String[] sentence, String[] outcomes, NameContextGenerator cg) { + public static List generateEvents(String[] sentence, String[] outcomes, + NameContextGenerator cg) { List events = new ArrayList<>(outcomes.length); for (int i = 0; i < outcomes.length; i++) { events.add(new Event(outcomes[i], cg.getContext(i, sentence, outcomes,null))); @@ -148,7 +152,6 @@ private void overrideType(Span[] names) { } } - /** * Generated previous decision features for each token based on contents of the specified map. * @param tokens The token for which the context is generated. @@ -162,6 +165,5 @@ public static String[][] additionalContext(String[] tokens, Map ac[ti][0] = "pd=" + pt; } return ac; - } } diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java index 684bea3c2..bb3603e67 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java +++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java @@ -252,7 +252,8 @@ else if (TrainerType.EVENT_MODEL_SEQUENCE_TRAINER.equals(trainerType)) { SequenceTrainer trainer = TrainerFactory.getSequenceModelTrainer( trainParams.getSettings(), manifestInfoEntries); - NameSampleSequenceStream ss = new NameSampleSequenceStream(samples, factory.createContextGenerator(), false); + NameSampleSequenceStream ss = + new NameSampleSequenceStream(samples, factory.createContextGenerator(), false); seqModel = trainer.train(ss); } else { throw new IllegalStateException("Unexpected trainer type!"); diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/NameSampleSequenceStream.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/NameSampleSequenceStream.java index 5ac519e26..cf19bf2f3 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/namefind/NameSampleSequenceStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/NameSampleSequenceStream.java @@ -44,7 +44,8 @@ public NameSampleSequenceStream(ObjectStream psi, AdaptiveFeatureGen this(psi, new DefaultNameContextGenerator(featureGen), true); } - public NameSampleSequenceStream(ObjectStream psi, AdaptiveFeatureGenerator featureGen, boolean useOutcomes) + public NameSampleSequenceStream(ObjectStream psi, + AdaptiveFeatureGenerator featureGen, boolean useOutcomes) throws IOException { this(psi, new DefaultNameContextGenerator(featureGen), useOutcomes); } @@ -70,7 +71,8 @@ public NameSampleSequenceStream(ObjectStream psi, NameContextGenerat @SuppressWarnings("unchecked") public Event[] updateContext(Sequence sequence, AbstractModel model) { - TokenNameFinder tagger = new NameFinderME(new TokenNameFinderModel("x-unspecified", model, Collections.emptyMap(), null)); + TokenNameFinder tagger = new NameFinderME(new TokenNameFinderModel( + "x-unspecified", model, Collections.emptyMap(), null)); String[] sentence = ((Sequence) sequence).getSource().getSentence(); String[] tags = seqCodec.encode(tagger.find(sentence), sentence.length); Event[] events = new Event[sentence.length]; diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/RegexNameFinderFactory.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/RegexNameFinderFactory.java index 8e185e6a4..e912caf4e 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/namefind/RegexNameFinderFactory.java +++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/RegexNameFinderFactory.java @@ -39,7 +39,8 @@ public class RegexNameFinderFactory { * @param defaults the OpenNLP default regexes * @return */ - public static synchronized RegexNameFinder getDefaultRegexNameFinders(Map config, DEFAULT_REGEX_NAME_FINDER... defaults) { + public static synchronized RegexNameFinder getDefaultRegexNameFinders( + Map config, DEFAULT_REGEX_NAME_FINDER... defaults) { if (config == null) { throw new IllegalArgumentException("config Map cannot be null"); } @@ -57,14 +58,16 @@ public static synchronized RegexNameFinder getDefaultRegexNameFinders(Map defaultsToMap(DEFAULT_REGEX_NAME_FINDER... defaults) { + private synchronized static Map defaultsToMap( + DEFAULT_REGEX_NAME_FINDER... defaults) { Map regexMap = new HashMap<>(); for (DEFAULT_REGEX_NAME_FINDER def : defaults) { regexMap.putAll(def.getRegexMap()); @@ -85,7 +88,10 @@ public enum DEFAULT_REGEX_NAME_FINDER implements RegexAble { @Override public Map getRegexMap() { Pattern[] p = new Pattern[1]; - // p[0] = Pattern.compile("([\\+(]?(\\d){2,}[)]?[- \\.]?(\\d){2,}[- \\.]?(\\d){2,}[- \\.]?(\\d){2,}[- \\.]?(\\d){2,})|([\\+(]?(\\d){2,}[)]?[- \\.]?(\\d){2,}[- \\.]?(\\d){2,}[- \\.]?(\\d){2,})|([\\+(]?(\\d){2,}[)]?[- \\.]?(\\d){2,}[- \\.]?(\\d){2,})", Pattern.CASE_INSENSITIVE); + // p[0] = Pattern.compile("([\\+(]?(\\d){2,}[)]?[- \\.]?(\\d){2,}[- \\.]?(\\d){2,}[- \\.]? + // (\\d){2,}[- \\.]?(\\d){2,})|([\\+(]?(\\d){2,}[)]?[- \\.]?(\\d){2,}[- \\.]?(\\d){2,}[- + // \\.]?(\\d){2,})|([\\+(]?(\\d){2,}[)]?[- \\.]?(\\d){2,}[- \\.]?(\\d){2,})", + // Pattern.CASE_INSENSITIVE); p[0] = Pattern.compile("((\\(\\d{3}\\) ?)|(\\d{3}-))?\\d{3}-\\d{4}"); Map regexMap = new HashMap<>(); regexMap.put(getType(), p); @@ -101,7 +107,12 @@ public String getType() { @Override public Map getRegexMap() { Pattern[] p = new Pattern[1]; - p[0] = Pattern.compile("([a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*|\"([\\x01-\\x08\\x0b\\x0c\\x0e-\\x1f\\x21\\x23-\\x5b\\x5d-\\x7f]|\\\\[\\x01-\\x09\\x0b\\x0c\\x0e-\\x7f])*\")@(?:(?:[a-z0-9]([a-z0-9-]*[a-z0-9])?\\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\\[((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?|[a-z0-9-]*[a-z0-9]:(?:[\\x01-\\x08\\x0b\\x0c\\x0e-\\x1f\\x21-\\x5a\\x53-\\x7f]|\\\\[\\x01-\\x09\\x0b\\x0c\\x0e-\\x7f])+)\\])", Pattern.CASE_INSENSITIVE); + p[0] = Pattern.compile("([a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*" + + "|\"([\\x01-\\x08\\x0b\\x0c\\x0e-\\x1f\\x21\\x23-\\x5b\\x5d-\\x7f]|\\\\[\\x01-\\x09" + + "\\x0b\\x0c\\x0e-\\x7f])*\")@(?:(?:[a-z0-9]([a-z0-9-]*[a-z0-9])?\\.)+[a-z0-9](?:[a-z0-9-]" + + "*[a-z0-9])?|\\[((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]" + + "?[0-9][0-9]?|[a-z0-9-]*[a-z0-9]:(?:[\\x01-\\x08\\x0b\\x0c\\x0e-\\x1f\\x21-\\x5a\\x53-\\x7f]" + + "|\\\\[\\x01-\\x09\\x0b\\x0c\\x0e-\\x7f])+)\\])", Pattern.CASE_INSENSITIVE); Map regexMap = new HashMap<>(); regexMap.put(getType(), p); return regexMap; @@ -140,7 +151,8 @@ public String getType() { @Override public Map getRegexMap() { Pattern[] p = new Pattern[1]; - p[0] = Pattern.compile("\\d{1,2}[A-Za-z]\\s*[A-Za-z]{2}\\s*\\d{1,5}\\s*\\d{1,5}", Pattern.CASE_INSENSITIVE); + p[0] = Pattern.compile("\\d{1,2}[A-Za-z]\\s*[A-Za-z]{2}\\s*\\d{1,5}\\s*\\d{1,5}", + Pattern.CASE_INSENSITIVE); Map regexMap = new HashMap<>(); regexMap.put(getType(), p); return regexMap; @@ -155,7 +167,10 @@ public String getType() { @Override public Map getRegexMap() { Pattern[] p = new Pattern[1]; - p[0] = Pattern.compile("([-|\\+]?\\d{1,3}[d|D|\\u00B0|\\s](\\s*\\d{1,2}['|\\u2019|\\s])?(\\s*\\d{1,2}[\\\"|\\u201d])?\\s*[N|n|S|s]?)(\\s*|,|,\\s*)([-|\\+]?\\d{1,3}[d|D|\\u00B0|\\s](\\s*\\d{1,2}['|\\u2019|\\s])?(\\s*\\d{1,2}[\\\"|\\u201d])?\\s*[E|e|W|w]?)", Pattern.CASE_INSENSITIVE); + p[0] = Pattern.compile("([-|\\+]?\\d{1,3}[d|D|\\u00B0|\\s](\\s*\\d{1,2}['|\\u2019|\\s])" + + "?(\\s*\\d{1,2}[\\\"|\\u201d])?\\s*[N|n|S|s]?)(\\s*|,|,\\s*)([-|\\+]?\\d{1,3}[d|D|\\u00B0|" + + "\\s](\\s*\\d{1,2}['|\\u2019|\\s])?(\\s*\\d{1,2}[\\\"|\\u201d])?\\s*[E|e|W|w]?)", + Pattern.CASE_INSENSITIVE); Map regexMap = new HashMap<>(); regexMap.put(getType(), p); return regexMap; diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinder.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinder.java index 6da6f4e51..3b5162ee1 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinder.java +++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinder.java @@ -24,7 +24,9 @@ */ public interface TokenNameFinder { - /** Generates name tags for the given sequence, typically a sentence, returning token spans for any identified names. + /** Generates name tags for the given sequence, typically a sentence, + * returning token spans for any identified names. + * * @param tokens an array of the tokens or words of the sequence, typically a sentence. * @return an array of spans for each of the names identified. */ diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderCrossValidator.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderCrossValidator.java index 38eba4474..6a68b86f0 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderCrossValidator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderCrossValidator.java @@ -210,7 +210,8 @@ public void evaluate(ObjectStream samples, int nFolds) while (partitioner.hasNext()) { - CrossValidationPartitioner.TrainingSampleStream trainingSampleStream = partitioner.next(); + CrossValidationPartitioner.TrainingSampleStream trainingSampleStream = + partitioner.next(); TokenNameFinderModel model; if (factory != null) { diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderEvaluator.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderEvaluator.java index 3c8c549c9..d58527bd1 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderEvaluator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderEvaluator.java @@ -48,7 +48,8 @@ public class TokenNameFinderEvaluator extends Evaluator { * @param nameFinder the {@link TokenNameFinder} to evaluate. * @param listeners evaluation sample listeners */ - public TokenNameFinderEvaluator(TokenNameFinder nameFinder, TokenNameFinderEvaluationMonitor ... listeners) { + public TokenNameFinderEvaluator(TokenNameFinder nameFinder, + TokenNameFinderEvaluationMonitor ... listeners) { super(listeners); this.nameFinder = nameFinder; } diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderFactory.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderFactory.java index ae2998b90..e7f0190cd 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderFactory.java +++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderFactory.java @@ -63,7 +63,8 @@ public TokenNameFinderFactory(byte[] featureGeneratorBytes, final Map resources, SequenceCodec seqCodec) { + void init(byte[] featureGeneratorBytes, final Map resources, + SequenceCodec seqCodec) { this.featureGeneratorBytes = featureGeneratorBytes; this.resources = resources; this.seqCodec = seqCodec; @@ -104,8 +105,9 @@ protected byte[] getFeatureGenerator() { return featureGeneratorBytes; } - public static TokenNameFinderFactory create(String subclassName, byte[] featureGeneratorBytes, final Map resources, - SequenceCodec seqCodec) throws InvalidFormatException { + public static TokenNameFinderFactory create(String subclassName, byte[] featureGeneratorBytes, + final Map resources, SequenceCodec seqCodec) + throws InvalidFormatException { TokenNameFinderFactory theFactory; if (subclassName == null) { // will create the default factory diff --git a/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramGenerator.java index ce2ea00a3..221c4bedb 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramGenerator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramGenerator.java @@ -33,7 +33,8 @@ public class NGramGenerator { * * @param input the input tokens the output ngrams will be derived from * @param n the number of tokens as the sliding window - * @param separator each string in each gram will be separated by this value if desired. Pass in empty string if no separator is desired + * @param separator each string in each gram will be separated by this value if desired. + * Pass in empty string if no separator is desired * @return */ public static List generate(List input, int n, String separator) { @@ -56,7 +57,8 @@ public static List generate(List input, int n, String separator) *Generates an nGram based on a char[] input * @param input the array of chars to convert to nGram * @param n The number of grams (chars) that each output gram will consist of - * @param separator each char in each gram will be separated by this value if desired. Pass in empty string if no separator is desired + * @param separator each char in each gram will be separated by this value if desired. + * Pass in empty string if no separator is desired * @return */ public static List generate(char[] input, int n, String separator) { diff --git a/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramModel.java b/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramModel.java index 296a6320d..6ebb82cd2 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramModel.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramModel.java @@ -278,7 +278,8 @@ public Dictionary toDictionary() { * Creates a dictionary which contains all {@link StringList}s which * are in the current {@link NGramModel}. * - * @param caseSensitive Specifies whether case distinctions should be kept in the creation of the dictionary. + * @param caseSensitive Specifies whether case distinctions should be kept + * in the creation of the dictionary. * * @return a dictionary of the ngrams */ diff --git a/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramUtils.java b/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramUtils.java index 098827225..9d8e1fb2a 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramUtils.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramUtils.java @@ -41,7 +41,8 @@ public class NGramUtils { * @return the Laplace smoothing probability * @see Additive Smoothing */ - public static double calculateLaplaceSmoothingProbability(StringList ngram, Iterable set, int size, Double k) { + public static double calculateLaplaceSmoothingProbability(StringList ngram, + Iterable set, int size, Double k) { return (count(ngram, set) + k) / (count(getNMinusOneTokenFirst(ngram), set) + k * 1); } @@ -81,7 +82,8 @@ public static double calculateBigramMLProbability(String x0, String x1, Collecti * @param set the vocabulary * @return the maximum likelihood probability */ - public static double calculateTrigramMLProbability(String x0, String x1, String x2, Iterable set) { + public static double calculateTrigramMLProbability(String x0, String x1, String x2, + Iterable set) { return calculateNgramMLProbability(new StringList(x0, x1, x2), set); } @@ -106,7 +108,8 @@ public static double calculateNgramMLProbability(StringList ngram, Iterable set, Double k) { + public static double calculateBigramPriorSmoothingProbability(String x0, String x1, + Collection set, Double k) { return (count(new StringList(x0, x1), set) + k * calculateUnigramMLProbability(x1, set)) / (count(new StringList(x0), set) + k * set.size()); } @@ -123,7 +126,8 @@ public static double calculateBigramPriorSmoothingProbability(String x0, String * @param lambda3 unigram interpolation factor * @return the linear interpolation probability */ - public static double calculateTrigramLinearInterpolationProbability(String x0, String x1, String x2, Collection set, + public static double calculateTrigramLinearInterpolationProbability(String x0, String x1, + String x2, Collection set, Double lambda1, Double lambda2, Double lambda3) { assert lambda1 + lambda2 + lambda3 == 1 : "lambdas sum should be equals to 1"; assert lambda1 > 0 && lambda2 > 0 && lambda3 > 0 : "lambdas should all be greater than 0"; @@ -142,7 +146,8 @@ public static double calculateTrigramLinearInterpolationProbability(String x0, S * @param set the vocabulary * @return the probability */ - public static double calculateMissingNgramProbabilityMass(StringList ngram, Double discount, Iterable set) { + public static double calculateMissingNgramProbabilityMass(StringList ngram, Double discount, + Iterable set) { Double missingMass = 0d; Double countWord = count(ngram, set); for (String word : flatSet(set)) { @@ -250,5 +255,4 @@ public static Collection getNGrams(StringList sequence, int size) { return ngrams; } - } diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/AbstractBottomUpParser.java b/opennlp-tools/src/main/java/opennlp/tools/parser/AbstractBottomUpParser.java index d0804bd75..b0fc3e4e6 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/parser/AbstractBottomUpParser.java +++ b/opennlp-tools/src/main/java/opennlp/tools/parser/AbstractBottomUpParser.java @@ -40,11 +40,11 @@ * Abstract class which contains code to tag and chunk parses for bottom up parsing and * leaves implementation of advancing parses and completing parses to extend class. *

- * Note:
The nodes within - * the returned parses are shared with other parses and therefore their parent node references will not be consistent - * with their child node reference. {@link #setParents setParents} can be used to make the parents consistent - * with a particular parse, but subsequent calls to setParents can invalidate the results of earlier - * calls.
+ * Note:
The nodes within the returned parses are shared with other parses + * and therefore their parent node references will not be consistent with their child + * node reference. {@link #setParents setParents} can be used to make the parents consistent + * with a particular parse, but subsequent calls to setParents can invalidate + * the results of earlier calls.
*/ public abstract class AbstractBottomUpParser implements Parser { @@ -172,7 +172,8 @@ public abstract class AbstractBottomUpParser implements Parser { */ protected boolean debugOn = false; - public AbstractBottomUpParser(POSTagger tagger, Chunker chunker, HeadRules headRules, int beamSize, double advancePercentage) { + public AbstractBottomUpParser(POSTagger tagger, Chunker chunker, HeadRules headRules, + int beamSize, double advancePercentage) { this.tagger = tagger; this.chunker = chunker; this.M = beamSize; @@ -210,8 +211,9 @@ public static void setParents(Parse p) { /** * Removes the punctuation from the specified set of chunks, adds it to the parses - * adjacent to the punctuation is specified, and returns a new array of parses with the punctuation - * removed. + * adjacent to the punctuation is specified, and returns a new array of parses with + * the punctuation removed. + * * @param chunks A set of parses. * @param punctSet The set of punctuation which is to be removed. * @return An array of parses which is a subset of chunks with punctuation removed. @@ -249,10 +251,12 @@ public static Parse[] collapsePunctuation(Parse[] chunks, Set punctSet) /** - * Advances the specified parse and returns the an array advanced parses whose probability accounts for - * more than the specified amount of probability mass. + * Advances the specified parse and returns the an array advanced parses whose + * probability accounts for more than the specified amount of probability mass. + * * @param p The parse to advance. - * @param probMass The amount of probability mass that should be accounted for by the advanced parses. + * @param probMass The amount of probability mass that should be accounted for + * by the advanced parses. */ protected abstract Parse[] advanceParses(final Parse p, double probMass); @@ -273,15 +277,18 @@ public Parse[] parse(Parse tokens, int numParses) { Parse guess = null; double minComplete = 2; double bestComplete = -100000; //approximating -infinity/0 in ln domain - while (odh.size() > 0 && (completeParses.size() < M || (odh.first()).getProb() < minComplete) && derivationStage < maxDerivationLength) { + while (odh.size() > 0 && (completeParses.size() < M || (odh.first()).getProb() < minComplete) + && derivationStage < maxDerivationLength) { ndh = new ListHeap<>(K); int derivationRank = 0; - for (Iterator pi = odh.iterator(); pi.hasNext() && derivationRank < K; derivationRank++) { // forearch derivation + for (Iterator pi = odh.iterator(); pi.hasNext() + && derivationRank < K; derivationRank++) { // forearch derivation Parse tp = pi.next(); //TODO: Need to look at this for K-best parsing cases /* - if (tp.getProb() < bestComplete) { //this parse and the ones which follow will never win, stop advancing. + //this parse and the ones which follow will never win, stop advancing. + if (tp.getProb() < bestComplete) { break; } */ @@ -329,7 +336,8 @@ else if (1 == derivationStage) { } else { //if (reportFailedParse) { - // System.err.println("Couldn't advance parse "+derivationStage+" stage "+derivationRank+"!\n"); + // System.err.println("Couldn't advance parse " + derivationStage + // + " stage " + derivationRank + "!\n"); //} advanceTop(tp); completeParses.add(tp); @@ -402,11 +410,13 @@ protected Parse[] advanceChunks(final Parse p, double minChunkScore) { String type = null; //System.err.print("sequence "+si+" "); for (int j = 0; j <= tags.length; j++) { - //if (j != tags.length) {System.err.println(words[j]+" "+ptags[j]+" "+tags[j]+" "+probs.get(j));} + // if (j != tags.length) {System.err.println(words[j]+" " + // +ptags[j]+" "+tags[j]+" "+probs.get(j));} if (j != tags.length) { newParses[si].addProb(Math.log(probs[j])); } - if (j != tags.length && tags[j].startsWith(CONT)) { // if continue just update end chunking tag don't use contTypeMap + // if continue just update end chunking tag don't use contTypeMap + if (j != tags.length && tags[j].startsWith(CONT)) { end = j; } else { //make previous constituent if it exists @@ -414,7 +424,8 @@ protected Parse[] advanceChunks(final Parse p, double minChunkScore) { //System.err.println("inserting tag "+tags[j]); Parse p1 = p.getChildren()[start]; Parse p2 = p.getChildren()[end]; - //System.err.println("Putting "+type+" at "+start+","+end+" for "+j+" "+newParses[si].getProb()); + // System.err.println("Putting "+type+" at "+start+","+end+" for " + // +j+" "+newParses[si].getProb()); Parse[] cons = new Parse[end - start + 1]; cons[0] = p1; //cons[0].label="Start-"+type; @@ -426,7 +437,8 @@ protected Parse[] advanceChunks(final Parse p, double minChunkScore) { //cons[ci].label="Cont-"+type; } } - Parse chunk = new Parse(p1.getText(), new Span(p1.getSpan().getStart(), p2.getSpan().getEnd()), type, 1, headRules.getHead(cons, type)); + Parse chunk = new Parse(p1.getText(), new Span(p1.getSpan().getStart(), + p2.getSpan().getEnd()), type, 1, headRules.getHead(cons, type)); chunk.isChunk(true); newParses[si].insert(chunk); } @@ -504,7 +516,8 @@ private static boolean lastChild(Parse child, Parse parent, Set punctSet } /** - * Creates a n-gram dictionary from the specified data stream using the specified head rule and specified cut-off. + * Creates a n-gram dictionary from the specified data stream using the specified + * head rule and specified cut-off. * * @param data The data stream of parses. * @param rules The head rules for the parses. @@ -512,8 +525,8 @@ private static boolean lastChild(Parse child, Parse parent, Set punctSet * n-gram to be saved as part of the dictionary. * @return A dictionary object. */ - public static Dictionary buildDictionary(ObjectStream data, HeadRules rules, TrainingParameters params) - throws IOException { + public static Dictionary buildDictionary(ObjectStream data, HeadRules rules, + TrainingParameters params) throws IOException { int cutoff = 5; @@ -538,7 +551,8 @@ public static Dictionary buildDictionary(ObjectStream data, HeadRules rul mdict.add(new StringList(words), 1, 1); //add tri-grams and bi-grams for inital sequence - Parse[] chunks = collapsePunctuation(ParserEventStream.getInitialChunks(p),rules.getPunctuationTags()); + Parse[] chunks = collapsePunctuation(ParserEventStream.getInitialChunks(p), + rules.getPunctuationTags()); String[] cwords = new String[chunks.length]; for (int wi = 0; wi < cwords.length; wi++) { cwords[wi] = chunks[wi].getHead().getCoveredText(); @@ -548,7 +562,8 @@ public static Dictionary buildDictionary(ObjectStream data, HeadRules rul //emulate reductions to produce additional n-grams int ci = 0; while (ci < chunks.length) { - //System.err.println("chunks["+ci+"]="+chunks[ci].getHead().getCoveredText()+" chunks.length="+chunks.length + " " + chunks[ci].getParent()); + // System.err.println("chunks["+ci+"]="+chunks[ci].getHead().getCoveredText() + // +" chunks.length="+chunks.length + " " + chunks[ci].getParent()); if (chunks[ci].getParent() == null) { chunks[ci].show(); @@ -593,11 +608,13 @@ else if (window.length == 2) { } /** - * Creates a n-gram dictionary from the specified data stream using the specified head rule and specified cut-off. + * Creates a n-gram dictionary from the specified data stream using the specified + * head rule and specified cut-off. * * @param data The data stream of parses. * @param rules The head rules for the parses. - * @param cutoff The minimum number of entries required for the n-gram to be saved as part of the dictionary. + * @param cutoff The minimum number of entries required for the n-gram to be + * saved as part of the dictionary. * @return A dictionary object. */ public static Dictionary buildDictionary(ObjectStream data, HeadRules rules, int cutoff) diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/AbstractContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/parser/AbstractContextGenerator.java index df45c139b..b4b32e367 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/parser/AbstractContextGenerator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/parser/AbstractContextGenerator.java @@ -36,7 +36,8 @@ public abstract class AbstractContextGenerator { protected boolean useLabel; /** - * Creates punctuation feature for the specified punctuation at the specified index based on the punctuation mark. + * Creates punctuation feature for the specified punctuation at the specified index + * based on the punctuation mark. * @param punct The punctuation which is in context. * @param i The index of the punctuation with relative to the parse. * @return Punctuation feature for the specified parse and the specified punctuation at the specfied index. @@ -46,7 +47,8 @@ protected String punct(Parse punct, int i) { } /** - * Creates punctuation feature for the specified punctuation at the specfied index based on the punctuation's tag. + * Creates punctuation feature for the specified punctuation at the specfied index + * based on the punctuation's tag. * @param punct The punctuation which is in context. * @param i The index of the punctuation relative to the parse. * @return Punctuation feature for the specified parse and the specified punctuation at the specfied index. @@ -158,8 +160,10 @@ protected void cons2(List features, Cons c0, Cons c1, Collection * @param punct1s The punctuation between the first and second node. * @param punct2s The punctuation between the second and third node. * @param trigram Specifies whether lexical tri-gram features between these nodes should be generated. - * @param bigram1 Specifies whether lexical bi-gram features between the first and second node should be generated. - * @param bigram2 Specifies whether lexical bi-gram features between the second and third node should be generated. + * @param bigram1 Specifies whether lexical bi-gram features between the first and second + * node should be generated. + * @param bigram2 Specifies whether lexical bi-gram features between the second and third + * node should be generated. */ protected void cons3(List features, Cons c0, Cons c1, Cons c2, Collection punct1s, Collection punct2s, boolean trigram, boolean bigram1, boolean bigram2) { @@ -199,17 +203,25 @@ protected void cons3(List features, Cons c0, Cons c1, Cons c2, Collectio String punctbo2 = punctbo(pi2.next(),c2.index <= 0 ? c2.index - 1 : c2.index); for (Iterator pi1 = punct1s.iterator(); pi1.hasNext();) { String punctbo1 = punctbo(pi1.next(),c1.index <= 0 ? c1.index - 1 : c1.index); - if (trigram) features.add(c0.cons + "," + punctbo1 + "," + c1.cons + "," + punctbo2 + "," + c2.cons); - - if (bigram2) features.add(c0.consbo + "," + punctbo1 + "," + c1.cons + "," + punctbo2 + "," + c2.cons); - if (c0.unigram && c2.unigram) features.add(c0.cons + "," + punctbo1 + "," + c1.consbo + "," + punctbo2 + "," + c2.cons); - if (bigram1) features.add(c0.cons + "," + punctbo1 + "," + c1.cons + "," + punctbo2 + "," + c2.consbo); - - if (c2.unigram) features.add(c0.consbo + "," + punctbo1 + "," + c1.consbo + "," + punctbo2 + "," + c2.cons); - if (c1.unigram) features.add(c0.consbo + "," + punctbo1 + "," + c1.cons + "," + punctbo2 + "," + c2.consbo); - if (c0.unigram) features.add(c0.cons + "," + punctbo1 + "," + c1.consbo + "," + punctbo2 + "," + c2.consbo); + if (trigram) + features.add(c0.cons + "," + punctbo1 + "," + c1.cons + "," + punctbo2 + "," + c2.cons); + + if (bigram2) + features.add(c0.consbo + "," + punctbo1 + "," + c1.cons + "," + punctbo2 + "," + c2.cons); + if (c0.unigram && c2.unigram) + features.add(c0.cons + "," + punctbo1 + "," + c1.consbo + "," + punctbo2 + "," + c2.cons); + if (bigram1) + features.add(c0.cons + "," + punctbo1 + "," + c1.cons + "," + punctbo2 + "," + c2.consbo); + + if (c2.unigram) + features.add(c0.consbo + "," + punctbo1 + "," + c1.consbo + "," + punctbo2 + "," + c2.cons); + if (c1.unigram) + features.add(c0.consbo + "," + punctbo1 + "," + c1.cons + "," + punctbo2 + "," + c2.consbo); + if (c0.unigram) + features.add(c0.cons + "," + punctbo1 + "," + c1.consbo + "," + punctbo2 + "," + c2.consbo); features.add(c0.consbo + "," + punctbo1 + "," + c1.consbo + "," + punctbo2 + "," + c2.consbo); + if (zeroBackOff) { if (bigram1) features.add(c0.cons + "," + punctbo1 + "," + c1.cons + "," + punctbo2); if (c1.unigram) features.add(c0.consbo + "," + punctbo1 + "," + c1.cons + "," + punctbo2); @@ -225,8 +237,10 @@ protected void cons3(List features, Cons c0, Cons c1, Cons c2, Collectio String punctbo2 = punctbo(pi2.next(),c2.index <= 0 ? c2.index - 1 : c2.index); if (trigram) features.add(c0.cons + "," + c1.cons + "," + punctbo2 + "," + c2.cons); - if (bigram2) features.add(c0.consbo + "," + c1.cons + "," + punctbo2 + "," + c2.cons); - if (c0.unigram && c2.unigram) features.add(c0.cons + "," + c1.consbo + "," + punctbo2 + "," + c2.cons); + if (bigram2) + features.add(c0.consbo + "," + c1.cons + "," + punctbo2 + "," + c2.cons); + if (c0.unigram && c2.unigram) + features.add(c0.cons + "," + c1.consbo + "," + punctbo2 + "," + c2.cons); if (bigram1) features.add(c0.cons + "," + c1.cons + "," + punctbo2 + "," + c2.consbo); if (c2.unigram) features.add(c0.consbo + "," + c1.consbo + "," + punctbo2 + "," + c2.cons); @@ -249,15 +263,22 @@ protected void cons3(List features, Cons c0, Cons c1, Cons c2, Collectio //cons(0),punctbo(1),cons(1),cons(2) for (Iterator pi1 = punct1s.iterator(); pi1.hasNext();) { String punctbo1 = punctbo(pi1.next(), c1.index <= 0 ? c1.index - 1 : c1.index); - if (trigram) features.add(c0.cons + "," + punctbo1 + "," + c1.cons + "," + c2.cons); - - if (bigram2) features.add(c0.consbo + "," + punctbo1 + "," + c1.cons + "," + c2.cons); - if (c0.unigram && c2.unigram) features.add(c0.cons + "," + punctbo1 + "," + c1.consbo + "," + c2.cons); - if (bigram1) features.add(c0.cons + "," + punctbo1 + "," + c1.cons + "," + c2.consbo); - - if (c2.unigram) features.add(c0.consbo + "," + punctbo1 + "," + c1.consbo + "," + c2.cons); - if (c1.unigram) features.add(c0.consbo + "," + punctbo1 + "," + c1.cons + "," + c2.consbo); - if (c0.unigram) features.add(c0.cons + "," + punctbo1 + "," + c1.consbo + "," + c2.consbo); + if (trigram) + features.add(c0.cons + "," + punctbo1 + "," + c1.cons + "," + c2.cons); + + if (bigram2) + features.add(c0.consbo + "," + punctbo1 + "," + c1.cons + "," + c2.cons); + if (c0.unigram && c2.unigram) + features.add(c0.cons + "," + punctbo1 + "," + c1.consbo + "," + c2.cons); + if (bigram1) + features.add(c0.cons + "," + punctbo1 + "," + c1.cons + "," + c2.consbo); + + if (c2.unigram) + features.add(c0.consbo + "," + punctbo1 + "," + c1.consbo + "," + c2.cons); + if (c1.unigram) + features.add(c0.consbo + "," + punctbo1 + "," + c1.cons + "," + c2.consbo); + if (c0.unigram) + features.add(c0.cons + "," + punctbo1 + "," + c1.consbo + "," + c2.consbo); features.add(c0.consbo + "," + punctbo1 + "," + c1.consbo + "," + c2.consbo); @@ -289,14 +310,16 @@ protected void cons3(List features, Cons c0, Cons c1, Cons c2, Collectio * @param punctuation The punctuation adjacent and between the specified surrounding node. * @param features A list to which features are added. */ - protected void surround(Parse node, int i, String type, Collection punctuation, List features) { + protected void surround(Parse node, int i, String type, Collection punctuation, + List features) { StringBuilder feat = new StringBuilder(20); feat.append("s").append(i).append("="); if (punctuation != null) { for (Iterator pi = punctuation.iterator(); pi.hasNext();) { Parse punct = pi.next(); if (node != null) { - feat.append(node.getHead().getCoveredText()).append("|").append(type).append("|").append(node.getType()).append("|").append(punct.getType()); + feat.append(node.getHead().getCoveredText()).append("|").append(type) + .append("|").append(node.getType()).append("|").append(punct.getType()); } else { feat.append(type).append("|").append(EOS).append("|").append(punct.getType()); @@ -321,7 +344,8 @@ protected void surround(Parse node, int i, String type, Collection punctu } else { if (node != null) { - feat.append(node.getHead().getCoveredText()).append("|").append(type).append("|").append(node.getType()); + feat.append(node.getHead().getCoveredText()).append("|").append(type) + .append("|").append(node.getType()); } else { feat.append(type).append("|").append(EOS); @@ -350,7 +374,8 @@ protected void surround(Parse node, int i, String type, Collection punctu */ protected void checkcons(Parse child, String i, String type, List features) { StringBuilder feat = new StringBuilder(20); - feat.append("c").append(i).append("=").append(child.getType()).append("|").append(child.getHead().getCoveredText()).append("|").append(type); + feat.append("c").append(i).append("=").append(child.getType()).append("|") + .append(child.getHead().getCoveredText()).append("|").append(type); features.add(feat.toString()); feat.setLength(0); feat.append("c").append(i).append("*=").append(child.getType()).append("|").append(type); @@ -359,16 +384,21 @@ protected void checkcons(Parse child, String i, String type, List featur protected void checkcons(Parse p1, Parse p2, String type, List features) { StringBuilder feat = new StringBuilder(20); - feat.append("cil=").append(type).append(",").append(p1.getType()).append("|").append(p1.getHead().getCoveredText()).append(",").append(p2.getType()).append("|").append(p2.getHead().getCoveredText()); + feat.append("cil=").append(type).append(",").append(p1.getType()).append("|") + .append(p1.getHead().getCoveredText()).append(",").append(p2.getType()) + .append("|").append(p2.getHead().getCoveredText()); features.add(feat.toString()); feat.setLength(0); - feat.append("ci*l=").append(type).append(",").append(p1.getType()).append(",").append(p2.getType()).append("|").append(p2.getHead().getCoveredText()); + feat.append("ci*l=").append(type).append(",").append(p1.getType()).append(",") + .append(p2.getType()).append("|").append(p2.getHead().getCoveredText()); features.add(feat.toString()); feat.setLength(0); - feat.append("cil*=").append(type).append(",").append(p1.getType()).append("|").append(p1.getHead().getCoveredText()).append(",").append(p2.getType()); + feat.append("cil*=").append(type).append(",").append(p1.getType()).append("|") + .append(p1.getHead().getCoveredText()).append(",").append(p2.getType()); features.add(feat.toString()); feat.setLength(0); - feat.append("ci*l*=").append(type).append(",").append(p1.getType()).append(",").append(p2.getType()); + feat.append("ci*l*=").append(type).append(",").append(p1.getType()) + .append(",").append(p2.getType()); features.add(feat.toString()); } diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/AbstractParserEventStream.java b/opennlp-tools/src/main/java/opennlp/tools/parser/AbstractParserEventStream.java index 8d4f77442..7e0323d8f 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/parser/AbstractParserEventStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/parser/AbstractParserEventStream.java @@ -48,7 +48,8 @@ public abstract class AbstractParserEventStream extends opennlp.tools.util.Abstr protected boolean fixPossesives; protected Dictionary dict; - public AbstractParserEventStream(ObjectStream d, HeadRules rules, ParserEventTypeEnum etype, Dictionary dict) { + public AbstractParserEventStream(ObjectStream d, + HeadRules rules, ParserEventTypeEnum etype, Dictionary dict) { super(d); this.dict = dict; if (etype == ParserEventTypeEnum.CHUNK) { @@ -163,7 +164,9 @@ private void addChunkEvents(List chunkEvents, Parse[] chunks) { } } for (int ti = 0, tl = toks.size(); ti < tl; ti++) { - chunkEvents.add(new Event(preds.get(ti), chunkerContextGenerator.getContext(ti, toks.toArray(new String[toks.size()]), tags.toArray(new String[tags.size()]), preds.toArray(new String[preds.size()])))); + chunkEvents.add(new Event(preds.get(ti), + chunkerContextGenerator.getContext(ti, toks.toArray(new String[toks.size()]), + tags.toArray(new String[tags.size()]), preds.toArray(new String[preds.size()])))); } } diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/Parse.java b/opennlp-tools/src/main/java/opennlp/tools/parser/Parse.java index 335f063ce..5ee4f0ace 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/parser/Parse.java +++ b/opennlp-tools/src/main/java/opennlp/tools/parser/Parse.java @@ -137,8 +137,8 @@ public class Parse implements Cloneable, Comparable { private static boolean useFunctionTags; /** - * Creates a new parse node for this specified text and span of the specified type with the specified probability - * and the specified head index. + * Creates a new parse node for this specified text and span of the specified type + * with the specified probability and the specified head index. * * @param text The text of the sentence for which this node is a part of. * @param span The character offsets for this node within the specified text. @@ -159,8 +159,8 @@ public Parse(String text, Span span, String type, double p, int index) { } /** - * Creates a new parse node for this specified text and span of the specified type with the specified probability - * and the specified head and head index. + * Creates a new parse node for this specified text and span of the specified type with + * the specified probability and the specified head and head index. * * @param text The text of the sentence for which this node is a part of. * @param span The character offsets for this node within the specified text. @@ -348,7 +348,8 @@ else if (sp.contains(ic)) { //System.err.println("Parse.insert:adding con="+constituent+" to "+this); parts.add(pi, constituent); constituent.setParent(this); - //System.err.println("Parse.insert: "+constituent.hashCode()+" -> "+constituent.getParent().hashCode()); + // System.err.println("Parse.insert: "+constituent.hashCode()+" -> " + // +constituent.getParent().hashCode()); } else { throw new IllegalArgumentException("Inserting constituent not contained in the sentence!"); @@ -468,9 +469,11 @@ public Span getSpan() { } /** - * Returns the log of the product of the probability associated with all the decisions which formed this constituent. + * Returns the log of the product of the probability associated with all the + * decisions which formed this constituent. * - * @return The log of the product of the probability associated with all the decisions which formed this constituent. + * @return The log of the product of the probability associated with all the + * decisions which formed this constituent. */ public double getProb() { return prob; @@ -530,7 +533,9 @@ public void remove(int index) { public Parse adjoinRoot(Parse node, HeadRules rules, int parseIndex) { Parse lastChild = parts.get(parseIndex); - Parse adjNode = new Parse(this.text,new Span(lastChild.getSpan().getStart(),node.getSpan().getEnd()),lastChild.getType(),1,rules.getHead(new Parse[]{lastChild,node},lastChild.getType())); + Parse adjNode = new Parse(this.text,new Span(lastChild.getSpan().getStart(), + node.getSpan().getEnd()),lastChild.getType(),1, + rules.getHead(new Parse[]{lastChild,node},lastChild.getType())); adjNode.parts.add(lastChild); if (node.prevPunctSet != null) { adjNode.parts.addAll(node.prevPunctSet); @@ -550,7 +555,8 @@ public Parse adjoinRoot(Parse node, HeadRules rules, int parseIndex) { */ public Parse adjoin(Parse sister, HeadRules rules) { Parse lastChild = parts.get(parts.size() - 1); - Parse adjNode = new Parse(this.text,new Span(lastChild.getSpan().getStart(),sister.getSpan().getEnd()),lastChild.getType(),1,rules.getHead(new Parse[]{lastChild,sister},lastChild.getType())); + Parse adjNode = new Parse(this.text,new Span(lastChild.getSpan().getStart(),sister.getSpan().getEnd()), + lastChild.getType(),1,rules.getHead(new Parse[]{lastChild,sister},lastChild.getType())); adjNode.parts.add(lastChild); if (sister.prevPunctSet != null) { adjNode.parts.addAll(sister.prevPunctSet); @@ -1096,12 +1102,14 @@ public static void addNames(String tag, Span[] names, Parse[] tokens) { } } if (!crossingKids) { - commonParent.insert(new Parse(commonParent.getText(), nameSpan, tag, 1.0, endToken.getHeadIndex())); + commonParent.insert(new Parse(commonParent.getText(), nameSpan, + tag, 1.0, endToken.getHeadIndex())); } else { if (commonParent.getType().equals("NP")) { Parse[] grandKids = kids[0].getChildren(); if (grandKids.length > 1 && nameSpan.contains(grandKids[grandKids.length - 1].getSpan())) { - commonParent.insert(new Parse(commonParent.getText(), commonParent.getSpan(), tag, 1.0, commonParent.getHeadIndex())); + commonParent.insert(new Parse(commonParent.getText(), commonParent.getSpan(), + tag, 1.0, commonParent.getHeadIndex())); } } } @@ -1109,4 +1117,4 @@ public static void addNames(String tag, Span[] names, Parse[] tokens) { } } } -} \ No newline at end of file +} diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/Parser.java b/opennlp-tools/src/main/java/opennlp/tools/parser/Parser.java index 64964f22b..688921b8c 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/parser/Parser.java +++ b/opennlp-tools/src/main/java/opennlp/tools/parser/Parser.java @@ -15,7 +15,6 @@ * limitations under the License. */ - package opennlp.tools.parser; /** @@ -26,10 +25,10 @@ public interface Parser { /** * Returns the specified number of parses or fewer for the specified tokens.
* Note: The nodes within - * the returned parses are shared with other parses and therefore their parent node references will not be consistent - * with their child node reference. {@link Parse#setParent(Parse)} can be used to make the parents consistent - * with a particular parse, but subsequent calls to setParents can invalidate the results of earlier - * calls.
+ * the returned parses are shared with other parses and therefore their parent node references + * will not be consistent with their child node reference. {@link Parse#setParent(Parse)} + * can be used to make the parents consistent with a particular parse, but subsequent calls + * to setParents can invalidate the results of earlier calls.
* @param tokens A parse containing the tokens with a single parent node. * @param numParses The number of parses desired. * @return the specified number of parses for the specified tokens. @@ -38,9 +37,10 @@ public interface Parser { /** * Returns a parse for the specified parse of tokens. + * * @param tokens The root node of a flat parse containing only tokens. - * @return A full parse of the specified tokens or the flat chunks of the tokens if a fullparse could not be found. + * @return A full parse of the specified tokens or the flat chunks of the tokens if a + * fullparse could not be found. */ Parse parse(Parse tokens); - -} \ No newline at end of file +} diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/ParserCrossValidator.java b/opennlp-tools/src/main/java/opennlp/tools/parser/ParserCrossValidator.java index f50f27725..c45f7021b 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/parser/ParserCrossValidator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/parser/ParserCrossValidator.java @@ -38,8 +38,8 @@ public class ParserCrossValidator { private ParserEvaluationMonitor[] monitors; - public ParserCrossValidator(String languageCode, TrainingParameters params, HeadRules rules, ParserType parserType, - ParserEvaluationMonitor... monitors) { + public ParserCrossValidator(String languageCode, TrainingParameters params, + HeadRules rules, ParserType parserType, ParserEvaluationMonitor... monitors) { this.languageCode = languageCode; this.params = params; this.rules = rules; diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/CheckContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/CheckContextGenerator.java index 9f05ea654..776ba744c 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/CheckContextGenerator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/CheckContextGenerator.java @@ -31,7 +31,8 @@ public class CheckContextGenerator extends AbstractContextGenerator { /** - * Creates a new context generator for generating predictive context for deciding when a constituent is complete. + * Creates a new context generator for generating predictive context for deciding + * when a constituent is complete. */ public CheckContextGenerator() { super(); @@ -43,8 +44,9 @@ public String[] getContext(Object o) { } /** - * Returns predictive context for deciding whether the specified constituents between the specified start and end index - * can be combined to form a new constituent of the specified type. + * Returns predictive context for deciding whether the specified constituents between the + * specified start and end index can be combined to form a new constituent of the specified type. + * * @param constituents The constituents which have yet to be combined into new constituents. * @param type The type of the new constituent proposed. * @param start The first constituent of the proposed constituent. diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/Parser.java b/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/Parser.java index fd558ff30..3dec21267 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/Parser.java +++ b/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/Parser.java @@ -86,7 +86,8 @@ public Parser(ParserModel model) { } /** - * Creates a new parser using the specified models and head rules using the specified beam size and advance percentage. + * Creates a new parser using the specified models and head rules using the specified beam + * size and advance percentage. * @param buildModel The model to assign constituent labels. * @param checkModel The model to determine a constituent is complete. * @param tagger The model to assign pos-tags. @@ -96,7 +97,8 @@ public Parser(ParserModel model) { * @param advancePercentage The minimal amount of probability mass which advanced outcomes must represent. * Only outcomes which contribute to the top "advancePercentage" will be explored. */ - private Parser(MaxentModel buildModel, MaxentModel checkModel, POSTagger tagger, Chunker chunker, HeadRules headRules, int beamSize, double advancePercentage) { + private Parser(MaxentModel buildModel, MaxentModel checkModel, POSTagger tagger, Chunker chunker, + HeadRules headRules, int beamSize, double advancePercentage) { super(tagger, chunker, headRules, beamSize, advancePercentage); this.buildModel = buildModel; this.checkModel = checkModel; @@ -200,12 +202,17 @@ else if (contTypeMap.containsKey(tag)) { } Parse newParse1 = (Parse) p.clone(); //clone parse if (createDerivationString) newParse1.getDerivation().append(max).append("-"); - newParse1.setChild(originalAdvanceIndex,tag); //replace constituent being labeled to create new derivation + //replace constituent being labeled to create new derivation + newParse1.setChild(originalAdvanceIndex,tag); newParse1.addProb(Math.log(bprob)); //check - //String[] context = checkContextGenerator.getContext(newParse1.getChildren(), lastStartType, lastStartIndex, advanceNodeIndex); - checkModel.eval(checkContextGenerator.getContext(collapsePunctuation(newParse1.getChildren(),punctSet), lastStartType, lastStartIndex, advanceNodeIndex), cprobs); - //System.out.println("check "+lastStartType+" "+cprobs[completeIndex]+" "+cprobs[incompleteIndex]+" "+tag+" "+java.util.Arrays.asList(context)); + //String[] context = checkContextGenerator.getContext(newParse1.getChildren(), lastStartType, + // lastStartIndex, advanceNodeIndex); + checkModel.eval(checkContextGenerator.getContext( + collapsePunctuation(newParse1.getChildren(),punctSet), lastStartType, lastStartIndex, + advanceNodeIndex), cprobs); + //System.out.println("check "+lastStartType+" "+cprobs[completeIndex]+" "+cprobs[incompleteIndex] + // +" "+tag+" "+java.util.Arrays.asList(context)); Parse newParse2; if (cprobs[completeIndex] > q) { //make sure a reduce is likely newParse2 = (Parse) newParse1.clone(); @@ -225,12 +232,18 @@ else if (contTypeMap.containsKey(tag)) { flat &= cons[ci].isPosTag(); } if (!flat) { //flat chunks are done by chunker - if (lastStartIndex == 0 && advanceNodeIndex == numNodes - 1) { //check for top node to include end and begining punctuation - //System.err.println("ParserME.advanceParses: reducing entire span: "+new Span(lastStartNode.getSpan().getStart(), advanceNode.getSpan().getEnd())+" "+lastStartType+" "+java.util.Arrays.asList(children)); - newParse2.insert(new Parse(p.getText(), p.getSpan(), lastStartType, cprobs[1], headRules.getHead(cons, lastStartType))); + //check for top node to include end and begining punctuation + if (lastStartIndex == 0 && advanceNodeIndex == numNodes - 1) { + //System.err.println("ParserME.advanceParses: reducing entire span: " + // +new Span(lastStartNode.getSpan().getStart(), advanceNode.getSpan().getEnd())+" " + // +lastStartType+" "+java.util.Arrays.asList(children)); + newParse2.insert(new Parse(p.getText(), p.getSpan(), lastStartType, cprobs[1], + headRules.getHead(cons, lastStartType))); } else { - newParse2.insert(new Parse(p.getText(), new Span(lastStartNode.getSpan().getStart(), advanceNode.getSpan().getEnd()), lastStartType, cprobs[1], headRules.getHead(cons, lastStartType))); + newParse2.insert(new Parse(p.getText(), new Span(lastStartNode.getSpan().getStart(), + advanceNode.getSpan().getEnd()), lastStartType, cprobs[1], + headRules.getHead(cons, lastStartType))); } newParsesList.add(newParse2); } @@ -253,7 +266,7 @@ else if (contTypeMap.containsKey(tag)) { * will be removed soon. */ @Deprecated - public static AbstractModel train(ObjectStream es, int iterations, int cut) throws java.io.IOException { + public static AbstractModel train(ObjectStream es, int iterations, int cut) throws IOException { return opennlp.tools.ml.maxent.GIS.trainModel(iterations, new TwoPassDataIndexer(es, cut)); } @@ -265,7 +278,8 @@ public static void mergeReportIntoManifest(Map manifest, } } - public static ParserModel train(String languageCode, ObjectStream parseSamples, HeadRules rules, TrainingParameters mlParams) + public static ParserModel train(String languageCode, ObjectStream parseSamples, + HeadRules rules, TrainingParameters mlParams) throws IOException { System.err.println("Building dictionary"); @@ -280,7 +294,8 @@ public static ParserModel train(String languageCode, ObjectStream parseSa System.err.println("Training builder"); ObjectStream bes = new ParserEventStream(parseSamples, rules, ParserEventTypeEnum.BUILD, mdict); Map buildReportMap = new HashMap<>(); - EventTrainer buildTrainer = TrainerFactory.getEventTrainer(mlParams.getSettings("build"), buildReportMap); + EventTrainer buildTrainer = + TrainerFactory.getEventTrainer(mlParams.getSettings("build"), buildReportMap); MaxentModel buildModel = buildTrainer.train(bes); mergeReportIntoManifest(manifestInfoEntries, buildReportMap, "build"); @@ -309,7 +324,8 @@ public static ParserModel train(String languageCode, ObjectStream parseSa System.err.println("Training checker"); ObjectStream kes = new ParserEventStream(parseSamples, rules, ParserEventTypeEnum.CHECK); Map checkReportMap = new HashMap<>(); - EventTrainer checkTrainer = TrainerFactory.getEventTrainer( mlParams.getSettings("check"), checkReportMap); + EventTrainer checkTrainer = + TrainerFactory.getEventTrainer(mlParams.getSettings("check"), checkReportMap); MaxentModel checkModel = checkTrainer.train(kes); mergeReportIntoManifest(manifestInfoEntries, checkReportMap, "check"); diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/ParserEventStream.java b/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/ParserEventStream.java index a5734c55d..d99a1fa7e 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/ParserEventStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/ParserEventStream.java @@ -38,13 +38,15 @@ public class ParserEventStream extends AbstractParserEventStream { protected CheckContextGenerator kcg; /** - * Create an event stream based on the specified data stream of the specified type using the specified head rules. + * Create an event stream based on the specified data stream of the specified type using + * the specified head rules. * @param d A 1-parse-per-line Penn Treebank Style parse. * @param rules The head rules. * @param etype The type of events desired (tag, chunk, build, or check). * @param dict A tri-gram dictionary to reduce feature generation. */ - public ParserEventStream(ObjectStream d, HeadRules rules, ParserEventTypeEnum etype, Dictionary dict) { + public ParserEventStream(ObjectStream d, HeadRules rules, + ParserEventTypeEnum etype, Dictionary dict) { super(d,rules,etype,dict); } @@ -85,7 +87,8 @@ public static Parse[] reduceChunks(Parse[] chunks, int ci, Parse parent) { reduceStart++; Parse[] reducedChunks; if (!type.equals(AbstractBottomUpParser.TOP_NODE)) { - reducedChunks = new Parse[chunks.length - (reduceEnd - reduceStart + 1) + 1]; //total - num_removed + 1 (for new node) + //total - num_removed + 1 (for new node) + reducedChunks = new Parse[chunks.length - (reduceEnd - reduceStart + 1) + 1]; //insert nodes before reduction System.arraycopy(chunks, 0, reducedChunks, 0, reduceStart); //insert reduced node @@ -108,7 +111,8 @@ public static Parse[] reduceChunks(Parse[] chunks, int ci, Parse parent) { } /** - * Adds events for parsing (post tagging and chunking to the specified list of events for the specified parse chunks. + * Adds events for parsing (post tagging and chunking to the specified list of events for + * the specified parse chunks. * @param parseEvents The events for the specified chunks. * @param chunks The incomplete parses to be parsed. */ @@ -128,7 +132,8 @@ protected void addParseEvents(List parseEvents, Parse[] chunks) { else { outcome = AbstractBottomUpParser.CONT + type; } - //System.err.println("parserEventStream.addParseEvents: chunks["+ci+"]="+c+" label="+outcome+" bcg="+bcg); + // System.err.println("parserEventStream.addParseEvents: chunks["+ci+"]="+c+" label=" + // +outcome+" bcg="+bcg); c.setLabel(outcome); if (etype == ParserEventTypeEnum.BUILD) { parseEvents.add(new Event(outcome, bcg.getContext(chunks, ci))); diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/lang/en/HeadRules.java b/opennlp-tools/src/main/java/opennlp/tools/parser/lang/en/HeadRules.java index 8b8f0bef8..8893f0281 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/parser/lang/en/HeadRules.java +++ b/opennlp-tools/src/main/java/opennlp/tools/parser/lang/en/HeadRules.java @@ -47,10 +47,10 @@ */ public class HeadRules implements opennlp.tools.parser.HeadRules, GapLabeler, SerializableArtifact { - public static class HeadRulesSerializer implements ArtifactSerializer { + public static class HeadRulesSerializer implements ArtifactSerializer { - public opennlp.tools.parser.lang.en.HeadRules create(InputStream in) throws IOException { - return new opennlp.tools.parser.lang.en.HeadRules(new BufferedReader(new InputStreamReader(in, "UTF-8"))); + public HeadRules create(InputStream in) throws IOException { + return new HeadRules(new BufferedReader(new InputStreamReader(in, "UTF-8"))); } public void serialize(opennlp.tools.parser.lang.en.HeadRules artifact, OutputStream out) @@ -224,7 +224,8 @@ public void labelGaps(Stack stack) { Constituent con2 = stack.get(stack.size() - 3); Constituent con3 = stack.get(stack.size() - 4); Constituent con4 = stack.get(stack.size() - 5); - //System.err.println("con0="+con0.label+" con1="+con1.label+" con2="+con2.label+" con3="+con3.label+" con4="+con4.label); + // System.err.println("con0="+con0.label+" con1="+con1.label+" con2=" + // +con2.label+" con3="+con3.label+" con4="+con4.label); //subject extraction if (con1.getLabel().equals("NP") && con2.getLabel().equals("S") && con3.getLabel().equals("SBAR")) { con1.setLabel(con1.getLabel() + "-G"); @@ -232,7 +233,8 @@ public void labelGaps(Stack stack) { con3.setLabel(con3.getLabel() + "-G"); } //object extraction - else if (con1.getLabel().equals("NP") && con2.getLabel().equals("VP") && con3.getLabel().equals("S") && con4.getLabel().equals("SBAR")) { + else if (con1.getLabel().equals("NP") && con2.getLabel().equals("VP") + && con3.getLabel().equals("S") && con4.getLabel().equals("SBAR")) { con1.setLabel(con1.getLabel() + "-G"); con2.setLabel(con2.getLabel() + "-G"); con3.setLabel(con3.getLabel() + "-G"); diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/lang/es/AncoraSpanishHeadRules.java b/opennlp-tools/src/main/java/opennlp/tools/parser/lang/es/AncoraSpanishHeadRules.java index 464d73729..7bffd5850 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/parser/lang/es/AncoraSpanishHeadRules.java +++ b/opennlp-tools/src/main/java/opennlp/tools/parser/lang/es/AncoraSpanishHeadRules.java @@ -36,6 +36,7 @@ import java.util.StringTokenizer; import opennlp.tools.parser.Constituent; import opennlp.tools.parser.GapLabeler; +import opennlp.tools.parser.HeadRules; import opennlp.tools.parser.Parse; import opennlp.tools.parser.chunking.Parser; import opennlp.tools.util.model.ArtifactSerializer; @@ -57,12 +58,12 @@ * Other changes include removal of deprecated methods. * */ -public class AncoraSpanishHeadRules implements opennlp.tools.parser.HeadRules, GapLabeler, SerializableArtifact { +public class AncoraSpanishHeadRules implements HeadRules, GapLabeler, SerializableArtifact { - public static class HeadRulesSerializer implements ArtifactSerializer { + public static class HeadRulesSerializer implements ArtifactSerializer { - public opennlp.tools.parser.lang.es.AncoraSpanishHeadRules create(InputStream in) throws IOException { - return new opennlp.tools.parser.lang.es.AncoraSpanishHeadRules(new BufferedReader(new InputStreamReader(in, "UTF-8"))); + public AncoraSpanishHeadRules create(InputStream in) throws IOException { + return new AncoraSpanishHeadRules(new BufferedReader(new InputStreamReader(in, "UTF-8"))); } public void serialize(opennlp.tools.parser.lang.es.AncoraSpanishHeadRules artifact, OutputStream out) @@ -227,15 +228,17 @@ public void labelGaps(Stack stack) { Constituent con2 = stack.get(stack.size() - 3); Constituent con3 = stack.get(stack.size() - 4); Constituent con4 = stack.get(stack.size() - 5); - //System.err.println("con0="+con0.label+" con1="+con1.label+" con2="+con2.label+" con3="+con3.label+" con4="+con4.label); + //subject extraction - if (con1.getLabel().equals("SN") && con2.getLabel().equals("S") && con3.getLabel().equals("GRUP.NOM")) { + if (con1.getLabel().equals("SN") + && con2.getLabel().equals("S") && con3.getLabel().equals("GRUP.NOM")) { con1.setLabel(con1.getLabel() + "-G"); con2.setLabel(con2.getLabel() + "-G"); con3.setLabel(con3.getLabel() + "-G"); } //object extraction - else if (con1.getLabel().equals("SN") && con2.getLabel().equals("GRUP.VERB") && con3.getLabel().equals("S") && con4.getLabel().equals("GRUP.NOM")) { + else if (con1.getLabel().equals("SN") && con2.getLabel().equals("GRUP.VERB") + && con3.getLabel().equals("S") && con4.getLabel().equals("GRUP.NOM")) { con1.setLabel(con1.getLabel() + "-G"); con2.setLabel(con2.getLabel() + "-G"); con3.setLabel(con3.getLabel() + "-G"); diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/CheckContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/CheckContextGenerator.java index 325690a4f..821f57803 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/CheckContextGenerator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/CheckContextGenerator.java @@ -76,7 +76,6 @@ public String[] getContext(Parse parent, Parse[] constituents, int index, boolea } else { for (int ri = 0; ri <= pi; ri++) { - //System.err.println(pi+" removing "+((Parse)rf.get(0)).getType()+" "+rf.get(0)+" "+(rf.size()-1)+" remain"); rf.remove(0); } } diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/Parser.java b/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/Parser.java index e6c13481a..5a99df447 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/Parser.java +++ b/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/Parser.java @@ -111,7 +111,9 @@ public Parser(ParserModel model) { this(model, defaultBeamSize, defaultAdvancePercentage); } - private Parser(MaxentModel buildModel, MaxentModel attachModel, MaxentModel checkModel, POSTagger tagger, Chunker chunker, HeadRules headRules, int beamSize, double advancePercentage) { + private Parser(MaxentModel buildModel, MaxentModel attachModel, MaxentModel checkModel, + POSTagger tagger, Chunker chunker, HeadRules headRules, int beamSize, + double advancePercentage) { super(tagger,chunker,headRules,beamSize,advancePercentage); this.buildModel = buildModel; this.attachModel = attachModel; @@ -249,8 +251,9 @@ else if (numNodes == 1) { //put sentence initial and final punct in top node buildModel.eval(buildContextGenerator.getContext(children, advanceNodeIndex), bprobs); double doneProb = bprobs[doneIndex]; if (debugOn) - System.out.println("adi=" + advanceNodeIndex + " " + advanceNode.getType() + "." + advanceNode.getLabel() - + " " + advanceNode + " choose build=" + (1 - doneProb) + " attach=" + doneProb); + System.out.println("adi=" + advanceNodeIndex + " " + advanceNode.getType() + "." + + advanceNode.getLabel() + " " + advanceNode + " choose build=" + (1 - doneProb) + + " attach=" + doneProb); if (1 - doneProb > q) { double bprobSum = 0; @@ -276,7 +279,8 @@ else if (numNodes == 1) { //put sentence initial and final punct in top node newParse1.addProb(Math.log(bprob)); newParsesList.add(newParse1); if (checkComplete) { - cprobs = checkModel.eval(checkContextGenerator.getContext(newNode,children,advanceNodeIndex,false)); + cprobs = checkModel.eval(checkContextGenerator.getContext(newNode, children, + advanceNodeIndex,false)); if (debugOn) System.out.println("building " + tag + " " + bprob + " c=" + cprobs[completeIndex]); if (cprobs[completeIndex] > probMass) { //just complete advances setComplete(newNode); @@ -314,14 +318,17 @@ else if (1 - cprobs[completeIndex] > probMass) { //just incomplete advances //mark nodes as built if (checkComplete) { if (isComplete(advanceNode)) { - newParse1.setChild(originalAdvanceIndex,Parser.BUILT + "." + Parser.COMPLETE); //replace constituent being labeled to create new derivation + //replace constituent being labeled to create new derivation + newParse1.setChild(originalAdvanceIndex,Parser.BUILT + "." + Parser.COMPLETE); } else { - newParse1.setChild(originalAdvanceIndex,Parser.BUILT + "." + Parser.INCOMPLETE); //replace constituent being labeled to create new derivation + //replace constituent being labeled to create new derivation + newParse1.setChild(originalAdvanceIndex,Parser.BUILT + "." + Parser.INCOMPLETE); } } else { - newParse1.setChild(originalAdvanceIndex,Parser.BUILT); //replace constituent being labeled to create new derivation + //replace constituent being labeled to create new derivation + newParse1.setChild(originalAdvanceIndex,Parser.BUILT); } newParse1.addProb(Math.log(doneProb)); if (advanceNodeIndex == 0) { //no attach if first node. @@ -333,7 +340,8 @@ else if (1 - cprobs[completeIndex] > probMass) { //just incomplete advances Parse fn = rf.get(fi); attachModel.eval(attachContextGenerator.getContext(children, advanceNodeIndex, rf, fi), aprobs); if (debugOn) { - //List cs = java.util.Arrays.asList(attachContextGenerator.getContext(children, advanceNodeIndex,rf,fi,punctSet)); + // List cs = java.util.Arrays.asList(attachContextGenerator.getContext(children, + // advanceNodeIndex,rf,fi,punctSet)); System.out.println("Frontier node(" + fi + "): " + fn.getType() + "." + fn.getLabel() + " " + fn + " <- " + advanceNode.getType() + " " + advanceNode + " d=" + aprobs[daughterAttachIndex] + " s=" + aprobs[sisterAttachIndex] + " "); @@ -347,12 +355,14 @@ else if (1 - cprobs[completeIndex] > probMass) { //just incomplete advances if (prob > q && ( (!checkComplete && (attachments[ai] != daughterAttachIndex || !isComplete(fn))) || - (checkComplete && ((attachments[ai] == daughterAttachIndex && !isComplete(fn)) || (attachments[ai] == sisterAttachIndex && isComplete(fn)))))) { + (checkComplete && ((attachments[ai] == daughterAttachIndex && !isComplete(fn)) + || (attachments[ai] == sisterAttachIndex && isComplete(fn)))))) { Parse newParse2 = newParse1.cloneRoot(fn,originalZeroIndex); Parse[] newKids = Parser.collapsePunctuation(newParse2.getChildren(),punctSet); //remove node from top level since were going to attach it (including punct) for (int ri = originalZeroIndex + 1; ri <= originalAdvanceIndex; ri++) { - //System.out.println(at"-removing "+(originalZeroIndex+1)+" "+newParse2.getChildren()[originalZeroIndex+1]); + //System.out.println(at"-removing "+(originalZeroIndex+1)+" " + // +newParse2.getChildren()[originalZeroIndex+1]); newParse2.remove(originalZeroIndex + 1); } List crf = getRightFrontier(newParse2,punctSet); @@ -382,7 +392,8 @@ else if (1 - cprobs[completeIndex] > probMass) { //just incomplete advances newParse2.addProb(Math.log(prob)); newParsesList.add(newParse2); if (checkComplete) { - cprobs = checkModel.eval(checkContextGenerator.getContext(updatedNode,newKids,advanceNodeIndex,true)); + cprobs = checkModel.eval( + checkContextGenerator.getContext(updatedNode,newKids,advanceNodeIndex,true)); if (cprobs[completeIndex] > probMass) { setComplete(updatedNode); newParse2.addProb(Math.log(cprobs[completeIndex])); @@ -400,7 +411,9 @@ else if (1 - cprobs[completeIndex] > probMass) { newParsesList.add(newParse3); setIncomplete(updatedNode); newParse2.addProb(Math.log(1 - cprobs[completeIndex])); - if (debugOn) System.out.println("Advancing both complete and incomplete nodes; c=" + cprobs[completeIndex]); + if (debugOn) + System.out.println("Advancing both complete and incomplete nodes; c=" + + cprobs[completeIndex]); } } } @@ -459,9 +472,11 @@ public static ParserModel train(String languageCode, ParserEventTypeEnum.BUILD, mdict); Map buildReportMap = new HashMap<>(); - EventTrainer buildTrainer = TrainerFactory.getEventTrainer(mlParams.getSettings("build"), buildReportMap); + EventTrainer buildTrainer = TrainerFactory.getEventTrainer( + mlParams.getSettings("build"), buildReportMap); MaxentModel buildModel = buildTrainer.train(bes); - opennlp.tools.parser.chunking.Parser.mergeReportIntoManifest(manifestInfoEntries, buildReportMap, "build"); + opennlp.tools.parser.chunking.Parser.mergeReportIntoManifest( + manifestInfoEntries, buildReportMap, "build"); parseSamples.reset(); @@ -471,9 +486,11 @@ public static ParserModel train(String languageCode, ParserEventTypeEnum.CHECK); Map checkReportMap = new HashMap<>(); - EventTrainer checkTrainer = TrainerFactory.getEventTrainer(mlParams.getSettings("check"), checkReportMap); + EventTrainer checkTrainer = TrainerFactory.getEventTrainer( + mlParams.getSettings("check"), checkReportMap); MaxentModel checkModel = checkTrainer.train(kes); - opennlp.tools.parser.chunking.Parser.mergeReportIntoManifest(manifestInfoEntries, checkReportMap, "check"); + opennlp.tools.parser.chunking.Parser.mergeReportIntoManifest( + manifestInfoEntries, checkReportMap, "check"); parseSamples.reset(); @@ -482,9 +499,11 @@ public static ParserModel train(String languageCode, ObjectStream attachEvents = new ParserEventStream(parseSamples, rules, ParserEventTypeEnum.ATTACH); Map attachReportMap = new HashMap<>(); - EventTrainer attachTrainer = TrainerFactory.getEventTrainer(mlParams.getSettings("attach"), attachReportMap); + EventTrainer attachTrainer = TrainerFactory.getEventTrainer( + mlParams.getSettings("attach"), attachReportMap); MaxentModel attachModel = attachTrainer.train(attachEvents); - opennlp.tools.parser.chunking.Parser.mergeReportIntoManifest(manifestInfoEntries, attachReportMap, "attach"); + opennlp.tools.parser.chunking.Parser.mergeReportIntoManifest( + manifestInfoEntries, attachReportMap, "attach"); // TODO: Remove cast for HeadRules return new ParserModel(languageCode, buildModel, checkModel, @@ -512,7 +531,8 @@ public static ParserModel train(String languageCode, } @Deprecated - public static AbstractModel train(ObjectStream es, int iterations, int cut) throws java.io.IOException { + public static AbstractModel train(ObjectStream es, int iterations, int cut) + throws IOException { return opennlp.tools.ml.maxent.GIS.trainModel(iterations, new TwoPassDataIndexer(es, cut)); } } diff --git a/opennlp-tools/src/main/java/opennlp/tools/postag/DefaultPOSContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/postag/DefaultPOSContextGenerator.java index 469bf0076..3035ca523 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/postag/DefaultPOSContextGenerator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/postag/DefaultPOSContextGenerator.java @@ -84,16 +84,19 @@ protected static String[] getSuffixes(String lex) { return suffs; } - public String[] getContext(int index, String[] sequence, String[] priorDecisions, Object[] additionalContext) { + public String[] getContext(int index, String[] sequence, String[] priorDecisions, + Object[] additionalContext) { return getContext(index,sequence,priorDecisions); } /** - * Returns the context for making a pos tag decision at the specified token index given the specified tokens and previous tags. + * Returns the context for making a pos tag decision at the specified token index + * given the specified tokens and previous tags. * @param index The index of the token for which the context is provided. * @param tokens The tokens in the sentence. * @param tags The tags assigned to the previous words in the sentence. - * @return The context for making a pos tag decision at the specified token index given the specified tokens and previous tags. + * @return The context for making a pos tag decision at the specified token index + * given the specified tokens and previous tags. */ public String[] getContext(int index, Object[] tokens, String[] tags) { String next, nextnext = null, lex, prev, prevprev = null; diff --git a/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerME.java b/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerME.java index 5a3961c6f..8f4d94fc1 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerME.java +++ b/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerME.java @@ -249,8 +249,8 @@ public static POSModel train(String languageCode, } else if (TrainerType.EVENT_MODEL_SEQUENCE_TRAINER.equals(trainerType)) { POSSampleSequenceStream ss = new POSSampleSequenceStream(samples, contextGenerator); - EventModelSequenceTrainer trainer = TrainerFactory.getEventModelSequenceTrainer(trainParams.getSettings(), - manifestInfoEntries); + EventModelSequenceTrainer trainer = + TrainerFactory.getEventModelSequenceTrainer(trainParams.getSettings(), manifestInfoEntries); posModel = trainer.train(ss); } else if (TrainerType.SEQUENCE_TRAINER.equals(trainerType)) { diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/DefaultSDContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/DefaultSDContextGenerator.java index 5dd64b274..3aea95721 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/DefaultSDContextGenerator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/DefaultSDContextGenerator.java @@ -188,7 +188,8 @@ protected void collectFeatures(String prefix, String suffix, String previous, St * @param next Space delimited token following token containing eos character. * @param eosChar the EOS character been analyzed */ - protected void collectFeatures(String prefix, String suffix, String previous, String next, Character eosChar) { + protected void collectFeatures(String prefix, String suffix, String previous, + String next, Character eosChar) { buf.append("x="); buf.append(prefix); collectFeats.add(buf.toString()); diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SDCrossValidator.java b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SDCrossValidator.java index fab03f678..ebf0a3ce1 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SDCrossValidator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SDCrossValidator.java @@ -51,7 +51,8 @@ public SDCrossValidator(String languageCode, TrainingParameters params, /** * @deprecated Use - * {@link #SDCrossValidator(String, TrainingParameters, SentenceDetectorFactory, SentenceDetectorEvaluationMonitor...)} + * {@link #SDCrossValidator(String, TrainingParameters, + * SentenceDetectorFactory, SentenceDetectorEvaluationMonitor...)} * and pass in a {@link SentenceDetectorFactory}. */ public SDCrossValidator(String languageCode, TrainingParameters params) { @@ -61,7 +62,8 @@ public SDCrossValidator(String languageCode, TrainingParameters params) { /** * @deprecated use - * {@link #SDCrossValidator(String, TrainingParameters, SentenceDetectorFactory, SentenceDetectorEvaluationMonitor...)} + * {@link #SDCrossValidator(String, TrainingParameters, SentenceDetectorFactory, + * SentenceDetectorEvaluationMonitor...)} * instead and pass in a TrainingParameters object. */ public SDCrossValidator(String languageCode, TrainingParameters params, @@ -71,7 +73,8 @@ public SDCrossValidator(String languageCode, TrainingParameters params, } /** - * @deprecated use {@link #SDCrossValidator(String, TrainingParameters, SentenceDetectorFactory, SentenceDetectorEvaluationMonitor...)} + * @deprecated use {@link #SDCrossValidator(String, TrainingParameters, + * SentenceDetectorFactory, SentenceDetectorEvaluationMonitor...)} * instead and pass in a TrainingParameters object. */ public SDCrossValidator(String languageCode) { diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorEvaluator.java b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorEvaluator.java index 24d50b670..58c984632 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorEvaluator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorEvaluator.java @@ -63,7 +63,8 @@ private Span[] trimSpans(String document, Span spans[]) { @Override protected SentenceSample processSample(SentenceSample sample) { - Span predictions[] = trimSpans(sample.getDocument(), sentenceDetector.sentPosDetect(sample.getDocument())); + Span predictions[] = + trimSpans(sample.getDocument(), sentenceDetector.sentPosDetect(sample.getDocument())); Span[] references = trimSpans(sample.getDocument(), sample.getSentences()); fmeasure.updateScores(references, predictions); diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java index 2eb358550..7f28a9b4d 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java +++ b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java @@ -336,8 +336,10 @@ public static SentenceModel train(String languageCode, * {@link #train(String, ObjectStream, SentenceDetectorFactory, TrainingParameters)} * and pass in af {@link SentenceDetectorFactory}. */ + @Deprecated public static SentenceModel train(String languageCode, ObjectStream samples, boolean useTokenEnd, Dictionary abbreviations) throws IOException { - return train(languageCode, samples, useTokenEnd, abbreviations, ModelUtil.createDefaultTrainingParameters()); + return train(languageCode, samples, useTokenEnd, abbreviations, + ModelUtil.createDefaultTrainingParameters()); } } diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceModel.java b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceModel.java index 47fab0d36..a71621067 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceModel.java +++ b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceModel.java @@ -56,8 +56,8 @@ public SentenceModel(String languageCode, MaxentModel sentModel, * {@link #SentenceModel(String, MaxentModel, Map, SentenceDetectorFactory)} * instead and pass in a {@link SentenceDetectorFactory} */ - public SentenceModel(String languageCode, MaxentModel sentModel, - boolean useTokenEnd, Dictionary abbreviations, char[] eosCharacters, Map manifestInfoEntries) { + public SentenceModel(String languageCode, MaxentModel sentModel, boolean useTokenEnd, + Dictionary abbreviations, char[] eosCharacters, Map manifestInfoEntries) { this(languageCode, sentModel, manifestInfoEntries, new SentenceDetectorFactory(languageCode, useTokenEnd, abbreviations, eosCharacters)); diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceSampleStream.java index 94c397782..a5c418b69 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceSampleStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceSampleStream.java @@ -59,7 +59,8 @@ public SentenceSample read() throws IOException { } if (sentenceSpans.size() > 0) { - return new SentenceSample(sentencesString.toString(), sentenceSpans.toArray(new Span[sentenceSpans.size()])); + return new SentenceSample(sentencesString.toString(), + sentenceSpans.toArray(new Span[sentenceSpans.size()])); } else { return null; diff --git a/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenSample.java b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenSample.java index 700f6ea29..4f3ebd289 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenSample.java +++ b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenSample.java @@ -148,7 +148,8 @@ public String toString() { return sentence.toString(); } - private static void addToken(StringBuilder sample, List tokenSpans, String token, boolean isNextMerged) { + private static void addToken(StringBuilder sample, List tokenSpans, + String token, boolean isNextMerged) { int tokenSpanStart = sample.length(); sample.append(token); diff --git a/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerFactory.java b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerFactory.java index ffa793bc9..366e433ea 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerFactory.java +++ b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerFactory.java @@ -127,7 +127,8 @@ public Map createManifestEntries() { * @param subclassName the name of the class implementing the {@link TokenizerFactory} * @param languageCode the language code the tokenizer should use * @param abbreviationDictionary an optional dictionary containing abbreviations, or null if not present - * @param useAlphaNumericOptimization indicate if the alpha numeric optimization should be enabled or disabled + * @param useAlphaNumericOptimization indicate if the alpha numeric optimization + * should be enabled or disabled * @param alphaNumericPattern the pattern the alpha numeric optimization should use * * @return the instance of the Tokenizer Factory diff --git a/opennlp-tools/src/main/java/opennlp/tools/tokenize/lang/en/TokenSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/tokenize/lang/en/TokenSampleStream.java index aa19fc738..deb24a15e 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/tokenize/lang/en/TokenSampleStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/tokenize/lang/en/TokenSampleStream.java @@ -84,11 +84,10 @@ else if (token.equals("-RCB-")) { } } else { - if (!lastToken.equals("``") && (!lastToken.equals("\"") || evenq) && !lastToken.equals("(") && !lastToken.equals("{") - && !lastToken.equals("$") && !lastToken.equals("#")) { - //System.out.print(" "+token); - length++; - } + if (!lastToken.equals("``") && (!lastToken.equals("\"") || evenq) && !lastToken.equals("(") + && !lastToken.equals("{") && !lastToken.equals("$") && !lastToken.equals("#")) { + length++; + } } } if (token.equals("\"")) { diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/BeamSearchContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/util/BeamSearchContextGenerator.java index 4367db68a..565ded377 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/util/BeamSearchContextGenerator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/util/BeamSearchContextGenerator.java @@ -25,7 +25,8 @@ public interface BeamSearchContextGenerator { /** Returns the context for the specified position in the specified sequence (list). * @param index The index of the sequence. * @param sequence The sequence of items over which the beam search is performed. - * @param priorDecisions The sequence of decisions made prior to the context for which this decision is being made. + * @param priorDecisions The sequence of decisions made prior to the context for + * which this decision is being made. * @param additionalContext Any addition context specific to a class implementing this interface. * @return the context for the specified position in the specified sequence. */ diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/Sequence.java b/opennlp-tools/src/main/java/opennlp/tools/util/Sequence.java index 4721cc635..828e99a76 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/util/Sequence.java +++ b/opennlp-tools/src/main/java/opennlp/tools/util/Sequence.java @@ -101,7 +101,8 @@ public double getScore() { } /** Populates an array with the probabilities associated with the outcomes of this sequence. - * @param ps a pre-allocated array to use to hold the values of the probabilities of the outcomes for this sequence. + * @param ps a pre-allocated array to use to hold the values of the + * probabilities of the outcomes for this sequence. */ public void getProbs(double[] ps) { for (int pi = 0, pl = probs.size(); pi < pl; pi++) { diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/StringUtil.java b/opennlp-tools/src/main/java/opennlp/tools/util/StringUtil.java index c5e5d62c2..43ac4df48 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/util/StringUtil.java +++ b/opennlp-tools/src/main/java/opennlp/tools/util/StringUtil.java @@ -174,7 +174,8 @@ public static int[][] levenshteinDistance(String wordForm, String lemma) { cost = 1; } //obtain minimum distance from calculating deletion, insertion, substitution - distance[i][j] = minimum(distance[i - 1][j] + 1, distance[i][j - 1] + 1, distance[i - 1][j - 1] + cost); + distance[i][j] = minimum(distance[i - 1][j] + 1, distance[i][j - 1] + + 1, distance[i - 1][j - 1] + cost); } } return distance; @@ -188,7 +189,8 @@ public static int[][] levenshteinDistance(String wordForm, String lemma) { * @param distance the levenshtein distance * @param permutations the number of permutations */ - public static void computeShortestEditScript(String wordForm, String lemma, int[][] distance, StringBuffer permutations) { + public static void computeShortestEditScript(String wordForm, String lemma, + int[][] distance, StringBuffer permutations) { int n = distance.length; int m = distance[0].length; @@ -200,32 +202,41 @@ public static void computeShortestEditScript(String wordForm, String lemma, int[ if (distance[wordFormLength][lemmaLength] == 0) { break; } - if ((lemmaLength > 0 && wordFormLength > 0) && (distance[wordFormLength - 1][lemmaLength - 1] < distance[wordFormLength][lemmaLength])) { - permutations.append('R').append(Integer.toString(wordFormLength - 1)).append(wordForm.charAt(wordFormLength - 1)).append(lemma.charAt(lemmaLength - 1)); + if ((lemmaLength > 0 && wordFormLength > 0) && (distance[wordFormLength - 1][lemmaLength - 1] + < distance[wordFormLength][lemmaLength])) { + permutations.append('R').append(Integer.toString(wordFormLength - 1)) + .append(wordForm.charAt(wordFormLength - 1)).append(lemma.charAt(lemmaLength - 1)); lemmaLength--; wordFormLength--; continue; } - if (lemmaLength > 0 && (distance[wordFormLength][lemmaLength - 1] < distance[wordFormLength][lemmaLength])) { - permutations.append('I').append(Integer.toString(wordFormLength)).append(lemma.charAt(lemmaLength - 1)); + if (lemmaLength > 0 && (distance[wordFormLength][lemmaLength - 1] + < distance[wordFormLength][lemmaLength])) { + permutations.append('I').append(Integer.toString(wordFormLength)) + .append(lemma.charAt(lemmaLength - 1)); lemmaLength--; continue; } - if (wordFormLength > 0 && (distance[wordFormLength - 1][lemmaLength] < distance[wordFormLength][lemmaLength])) { - permutations.append('D').append(Integer.toString(wordFormLength - 1)).append(wordForm.charAt(wordFormLength - 1)); + if (wordFormLength > 0 && (distance[wordFormLength - 1][lemmaLength] + < distance[wordFormLength][lemmaLength])) { + permutations.append('D').append(Integer.toString(wordFormLength - 1)) + .append(wordForm.charAt(wordFormLength - 1)); wordFormLength--; continue; } - if ((wordFormLength > 0 && lemmaLength > 0) && (distance[wordFormLength - 1][lemmaLength - 1] == distance[wordFormLength][lemmaLength])) { + if ((wordFormLength > 0 && lemmaLength > 0) && (distance[wordFormLength - 1][lemmaLength - 1] + == distance[wordFormLength][lemmaLength])) { wordFormLength--; lemmaLength--; continue ; } - if (wordFormLength > 0 && (distance[wordFormLength - 1][lemmaLength] == distance[wordFormLength][lemmaLength])) { + if (wordFormLength > 0 && (distance[wordFormLength - 1][lemmaLength] + == distance[wordFormLength][lemmaLength])) { wordFormLength--; continue; } - if (lemmaLength > 0 && (distance[wordFormLength][lemmaLength - 1] == distance[wordFormLength][lemmaLength])) { + if (lemmaLength > 0 && (distance[wordFormLength][lemmaLength - 1] + == distance[wordFormLength][lemmaLength])) { lemmaLength--; continue; } diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownBigramFeatureGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownBigramFeatureGenerator.java index 8be8a7786..4f0a24a93 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownBigramFeatureGenerator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownBigramFeatureGenerator.java @@ -37,13 +37,15 @@ public void createFeatures(List features, String[] tokens, int index, if (index > 0) { List prevWordClasses = BrownTokenClasses.getWordClasses(tokens[index - 1], brownLexicon); for (int i = 0; i < wordClasses.size() && i < prevWordClasses.size(); i++) - features.add("p" + "browncluster" + "," + "browncluster" + "=" + prevWordClasses.get(i) + "," + wordClasses.get(i)); + features.add("p" + "browncluster" + "," + "browncluster" + "=" + + prevWordClasses.get(i) + "," + wordClasses.get(i)); } if (index + 1 < tokens.length) { List nextWordClasses = BrownTokenClasses.getWordClasses(tokens[index + 1], brownLexicon); for (int i = 0; i < wordClasses.size() && i < nextWordClasses.size(); i++) { - features.add("browncluster" + "," + "n" + "browncluster" + "=" + wordClasses.get(i) + "," + nextWordClasses.get(i)); + features.add("browncluster" + "," + "n" + "browncluster" + "=" + + wordClasses.get(i) + "," + nextWordClasses.get(i)); } } } diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java index 9065fb6e1..31af680c4 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java +++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java @@ -290,10 +290,12 @@ public AdaptiveFeatureGenerator create(Element generatorElement, if (!(dictResource instanceof WordClusterDictionary)) { - throw new InvalidFormatException("Not a WordClusterDictionary resource for key: " + dictResourceKey); + throw new InvalidFormatException("Not a WordClusterDictionary resource for key: " + + dictResourceKey); } - return new WordClusterFeatureGenerator((WordClusterDictionary) dictResource, dictResourceKey, lowerCaseDictionary); + return new WordClusterFeatureGenerator((WordClusterDictionary) dictResource, + dictResourceKey, lowerCaseDictionary); } static void register(Map factoryMap) { @@ -515,7 +517,8 @@ public AdaptiveFeatureGenerator create(Element generatorElement, " an aggregator element"); } - AdaptiveFeatureGenerator nestedGenerator = GeneratorFactory.createGenerator(nestedGeneratorElement, resourceManager); + AdaptiveFeatureGenerator nestedGenerator = + GeneratorFactory.createGenerator(nestedGeneratorElement, resourceManager); String prevLengthString = generatorElement.getAttribute("prevLength"); @@ -524,7 +527,8 @@ public AdaptiveFeatureGenerator create(Element generatorElement, try { prevLength = Integer.parseInt(prevLengthString); } catch (NumberFormatException e) { - throw new InvalidFormatException("prevLength attribute '" + prevLengthString + "' is not a number!", e); + throw new InvalidFormatException("prevLength attribute '" + prevLengthString + + "' is not a number!", e); } String nextLengthString = generatorElement.getAttribute("nextLength"); @@ -534,7 +538,8 @@ public AdaptiveFeatureGenerator create(Element generatorElement, try { nextLength = Integer.parseInt(nextLengthString); } catch (NumberFormatException e) { - throw new InvalidFormatException("nextLength attribute '" + nextLengthString + "' is not a number!", e); + throw new InvalidFormatException("nextLength attribute '" + nextLengthString + + "' is not a number!", e); } return new WindowFeatureGenerator(nestedGenerator, prevLength, nextLength); @@ -591,8 +596,8 @@ public AdaptiveFeatureGenerator create(Element generatorElement, String featureGeneratorClassName = generatorElement.getAttribute("class"); - AdaptiveFeatureGenerator generator = ExtensionLoader.instantiateExtension(AdaptiveFeatureGenerator.class, - featureGeneratorClassName); + AdaptiveFeatureGenerator generator = + ExtensionLoader.instantiateExtension(AdaptiveFeatureGenerator.class, featureGeneratorClassName); if (generator instanceof CustomFeatureGenerator) { diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WindowFeatureGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WindowFeatureGenerator.java index b52a85677..cd3d93178 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WindowFeatureGenerator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WindowFeatureGenerator.java @@ -60,7 +60,8 @@ public WindowFeatureGenerator(AdaptiveFeatureGenerator generator, int prevWindow * @param nextWindowSize * @param generators */ - public WindowFeatureGenerator(int prevWindowSize, int nextWindowSize, AdaptiveFeatureGenerator... generators) { + public WindowFeatureGenerator(int prevWindowSize, int nextWindowSize, + AdaptiveFeatureGenerator... generators) { this(new AggregatedFeatureGenerator(generators), prevWindowSize, nextWindowSize); } @@ -125,6 +126,7 @@ public void clearAdaptiveData() { @Override public String toString() { - return super.toString() + ": Prev window size: " + prevWindowSize + ", Next window size: " + nextWindowSize; + return super.toString() + ": Prev window size: " + prevWindowSize + + ", Next window size: " + nextWindowSize; } } diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WordClusterFeatureGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WordClusterFeatureGenerator.java index b5955add6..d9cb50174 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WordClusterFeatureGenerator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WordClusterFeatureGenerator.java @@ -27,7 +27,8 @@ public class WordClusterFeatureGenerator implements AdaptiveFeatureGenerator { private String resourceName; private boolean lowerCaseDictionary; - public WordClusterFeatureGenerator(WordClusterDictionary dict, String dictResourceKey, boolean lowerCaseDictionary) { + public WordClusterFeatureGenerator(WordClusterDictionary dict, + String dictResourceKey, boolean lowerCaseDictionary) { tokenDictionary = dict; resourceName = dictResourceKey; this.lowerCaseDictionary = lowerCaseDictionary; @@ -46,4 +47,4 @@ public void createFeatures(List features, String[] tokens, int index, features.add(resourceName + clusterId); } } -} \ No newline at end of file +} diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/model/BaseModel.java b/opennlp-tools/src/main/java/opennlp/tools/util/model/BaseModel.java index a7f42878e..22adf071e 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/util/model/BaseModel.java +++ b/opennlp-tools/src/main/java/opennlp/tools/util/model/BaseModel.java @@ -428,8 +428,9 @@ protected void validateArtifactMap() throws InvalidFormatException { // Reject loading a snapshot model with a non-snapshot version if (!Version.currentVersion().isSnapshot() && version.isSnapshot()) { - throw new InvalidFormatException("Model version " + version + " is a snapshot - snapshot models are not " + - "supported by this non-snapshot version (" + Version.currentVersion() + ") of OpenNLP!"); + throw new InvalidFormatException("Model version " + version + + " is a snapshot - snapshot models are not supported by this non-snapshot version (" + + Version.currentVersion() + ") of OpenNLP!"); } } } diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/model/ModelUtil.java b/opennlp-tools/src/main/java/opennlp/tools/util/model/ModelUtil.java index 5111ce9eb..48bfa58d5 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/util/model/ModelUtil.java +++ b/opennlp-tools/src/main/java/opennlp/tools/util/model/ModelUtil.java @@ -63,12 +63,14 @@ public static void writeModel(MaxentModel model, final OutputStream out) if (out == null) throw new IllegalArgumentException("out parameter must not be null!"); - GenericModelWriter modelWriter = new GenericModelWriter((AbstractModel) model, new DataOutputStream(new OutputStream() { - @Override - public void write(int b) throws IOException { - out.write(b); - } - })); + GenericModelWriter modelWriter = new GenericModelWriter((AbstractModel) model, + new DataOutputStream(new OutputStream() { + @Override + public void write(int b) throws IOException { + out.write(b); + } + })); + modelWriter.persist(); } diff --git a/opennlp-uima/src/main/java/opennlp/uima/chunker/Chunker.java b/opennlp-uima/src/main/java/opennlp/uima/chunker/Chunker.java index d6c53ca3b..6b9275c49 100644 --- a/opennlp-uima/src/main/java/opennlp/uima/chunker/Chunker.java +++ b/opennlp-uima/src/main/java/opennlp/uima/chunker/Chunker.java @@ -147,8 +147,8 @@ public void typeSystemInit(TypeSystem typeSystem) UimaUtil.TOKEN_TYPE_PARAMETER); // pos feature - mPosFeature = AnnotatorUtil.getRequiredFeatureParameter(context, mTokenType, UimaUtil.POS_FEATURE_PARAMETER, - CAS.TYPE_NAME_STRING); + mPosFeature = AnnotatorUtil.getRequiredFeatureParameter( + context, mTokenType, UimaUtil.POS_FEATURE_PARAMETER, CAS.TYPE_NAME_STRING); } private void addChunkAnnotation(CAS tcas, AnnotationFS tokenAnnotations[], @@ -231,4 +231,4 @@ public void destroy() { // dereference model to allow garbage collection mChunker = null; } -} \ No newline at end of file +} diff --git a/opennlp-uima/src/main/java/opennlp/uima/chunker/ChunkerTrainer.java b/opennlp-uima/src/main/java/opennlp/uima/chunker/ChunkerTrainer.java index bbffe3178..6bf9365aa 100644 --- a/opennlp-uima/src/main/java/opennlp/uima/chunker/ChunkerTrainer.java +++ b/opennlp-uima/src/main/java/opennlp/uima/chunker/ChunkerTrainer.java @@ -204,7 +204,9 @@ public void collectionProcessComplete(ProcessTrace trace) throws ResourceProcessException, IOException { GIS.PRINT_MESSAGES = false; - ChunkerModel chunkerModel = ChunkerME.train(language, ObjectStreamUtils.createObjectStream(mChunkSamples), ModelUtil.createDefaultTrainingParameters(), ChunkerFactory.create(null)); + ChunkerModel chunkerModel = ChunkerME.train(language, + ObjectStreamUtils.createObjectStream(mChunkSamples), + ModelUtil.createDefaultTrainingParameters(), ChunkerFactory.create(null)); // dereference to allow garbage collection mChunkSamples = null; @@ -228,4 +230,4 @@ public boolean isStateless() { public void destroy() { mChunkSamples = null; } -} \ No newline at end of file +} diff --git a/opennlp-uima/src/main/java/opennlp/uima/namefind/AbstractNameFinder.java b/opennlp-uima/src/main/java/opennlp/uima/namefind/AbstractNameFinder.java index b2948e3ef..793da86da 100644 --- a/opennlp-uima/src/main/java/opennlp/uima/namefind/AbstractNameFinder.java +++ b/opennlp-uima/src/main/java/opennlp/uima/namefind/AbstractNameFinder.java @@ -120,7 +120,8 @@ public void typeSystemInit(TypeSystem typeSystem) nameTypeMap.put(parts[0].trim(), typeSystem.getType(parts[1].trim())); } else { - mLogger.log(Level.WARNING, String.format("Failed to parse a part of the type mapping [%s]", mapping)); + mLogger.log(Level.WARNING, + String.format("Failed to parse a part of the type mapping [%s]", mapping)); } } @@ -128,7 +129,8 @@ public void typeSystemInit(TypeSystem typeSystem) } if (mNameType == null && mNameTypeMapping.size() == 0) { - throw new AnalysisEngineProcessException(new Exception("No name type or valid name type mapping configured!")); + throw new AnalysisEngineProcessException( + new Exception("No name type or valid name type mapping configured!")); } } diff --git a/opennlp-uima/src/main/java/opennlp/uima/namefind/NameFinderTrainer.java b/opennlp-uima/src/main/java/opennlp/uima/namefind/NameFinderTrainer.java index 620758089..570083191 100644 --- a/opennlp-uima/src/main/java/opennlp/uima/namefind/NameFinderTrainer.java +++ b/opennlp-uima/src/main/java/opennlp/uima/namefind/NameFinderTrainer.java @@ -81,20 +81,29 @@ * * * - * - * - * - * - * - * - * + * + * + * + * + * + * + * + * + * + * + * + * + * + * *
Type Name Description
String opennlp.uima.opennlp.uima.TrainingParamsFile Training Parameters Properties file
String opennlp.uima.FeatureGeneratorFile Feature Generator definition file which contain the feature generator configuration
String opennlp.uima.FeatureGeneratorResources Feature Generator resources dictionary
String opennlp.uima.AdditionalTrainingDataFile Training file which contains additional data in the OpenNLP format
String opennlp.uima.AdditionalTrainingDataEncoding Encoding of the additional training data
String opennlp.uima.SampleTraceFile All training samples are traced to this file
String opennlp.uima.SampleTraceFileEncoding Encoding of the sample trace file
String opennlp.uima.opennlp.uima.TrainingParamsFileTraining Parameters Properties file
String opennlp.uima.FeatureGeneratorFileFeature Generator definition file which contain the feature generator configuration
String opennlp.uima.FeatureGeneratorResourcesFeature Generator resources dictionary
String opennlp.uima.AdditionalTrainingDataFileTraining file which contains additional data in the OpenNLP format
String opennlp.uima.AdditionalTrainingDataEncodingEncoding of the additional training data
String opennlp.uima.SampleTraceFileAll training samples are traced to this file
String opennlp.uima.SampleTraceFileEncodingEncoding of the sample trace file
*

*/ public final class NameFinderTrainer extends CasConsumer_ImplBase { - private static final String FEATURE_GENERATOR_DEFINITION_FILE_PARAMETER = "opennlp.uima.FeatureGeneratorFile"; - private static final String FEATURE_GENERATOR_RESOURCES_PARAMETER = "opennlp.uima.FeatureGeneratorResources"; + private static final String FEATURE_GENERATOR_DEFINITION_FILE_PARAMETER = + "opennlp.uima.FeatureGeneratorFile"; + private static final String FEATURE_GENERATOR_RESOURCES_PARAMETER = + "opennlp.uima.FeatureGeneratorResources"; private Logger logger; @@ -431,4 +440,4 @@ public void destroy() { // dereference to allow garbage collection nameFinderSamples = null; } -} \ No newline at end of file +} diff --git a/opennlp-uima/src/main/java/opennlp/uima/postag/POSTagger.java b/opennlp-uima/src/main/java/opennlp/uima/postag/POSTagger.java index 358e82c25..1464ab192 100644 --- a/opennlp-uima/src/main/java/opennlp/uima/postag/POSTagger.java +++ b/opennlp-uima/src/main/java/opennlp/uima/postag/POSTagger.java @@ -60,7 +60,8 @@ * * * - * + * + * * * *
Type Name Description
String opennlp.uima.ProbabilityFeature The name of the double probability feature (not set by default)
String opennlp.uima.ProbabilityFeatureThe name of the double probability feature (not set by default)
Integer opennlp.uima.BeamSize
String opennlp.uima.DictionaryName The name of the dictionary file
diff --git a/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetectorTrainer.java b/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetectorTrainer.java index b02a87b84..54c880427 100644 --- a/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetectorTrainer.java +++ b/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetectorTrainer.java @@ -62,7 +62,8 @@ * Type Name Description * String opennlp.uima.ModelName The name of the model file * String opennlp.uima.SentenceType The full name of the sentence type - * String opennlp.uima.EOSChars A string containing end-of-sentence characters + * String opennlp.uima.EOSChars + * A string containing end-of-sentence characters * */ public final class SentenceDetectorTrainer extends CasConsumer_ImplBase {