From d223b0c4a399d1894ba9c4bce87474fdf6865bbe Mon Sep 17 00:00:00 2001 From: smarthi Date: Sat, 24 Dec 2016 00:54:43 -0500 Subject: [PATCH] OPENNLP-901: Replace references to deprecated NameFinderME.train() --- .../java/opennlp/morfologik/cmdline/CLI.java | 4 +- .../TokenNameFinderCrossValidatorTool.java | 4 +- .../namefind/TokenNameFinderTrainerTool.java | 11 +- .../opennlp/tools/namefind/NameFinderME.java | 141 +----------------- .../TokenNameFinderCrossValidator.java | 13 +- .../namefind/TokenNameFinderFactory.java | 38 +++-- .../tools/namefind/TokenNameFinderModel.java | 64 +------- .../tools/postag/POSTaggerFactory.java | 13 +- .../tools/namefind/NameFinderMETest.java | 46 +++--- .../TokenNameFinderCrossValidatorTest.java | 13 +- 10 files changed, 68 insertions(+), 279 deletions(-) diff --git a/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/CLI.java b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/CLI.java index f92d178df..5205739f7 100644 --- a/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/CLI.java +++ b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/cmdline/CLI.java @@ -40,9 +40,9 @@ public final class CLI { private static Map toolLookupMap; static { - toolLookupMap = new LinkedHashMap(); + toolLookupMap = new LinkedHashMap<>(); - List tools = new LinkedList(); + List tools = new LinkedList<>(); tools.add(new MorfologikDictionaryBuilderTool()); tools.add(new XMLDictionaryToTableTool()); diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java index 93f52ec4c..aa1e343c1 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java @@ -73,7 +73,7 @@ public void run(String format, String[] args) { sampleStream = new NameSampleTypeFilter(nameTypes, sampleStream); } - List> listeners = new LinkedList>(); + List> listeners = new LinkedList<>(); if (params.getMisclassified()) { listeners.add(new NameEvaluationErrorListener()); } @@ -94,7 +94,7 @@ else if ("BILOU".equals(sequenceCodecImplName)) { SequenceCodec sequenceCodec = TokenNameFinderFactory.instantiateSequenceCodec(sequenceCodecImplName); - TokenNameFinderFactory nameFinderFactory = null; + TokenNameFinderFactory nameFinderFactory; try { nameFinderFactory = TokenNameFinderFactory.create(params.getFactory(), featureGeneratorBytes, resources, sequenceCodec); diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java index 1f8a365da..b2ccfc5a7 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java @@ -89,13 +89,13 @@ static byte[] openFeatureGeneratorBytes(File featureGenDescriptorFile) { * @return a map consisting of the file name of the resource and its corresponding Object */ public static Map loadResources(File resourcePath, File featureGenDescriptor) { - Map resources = new HashMap(); + Map resources = new HashMap<>(); if (resourcePath != null) { Map artifactSerializers = TokenNameFinderModel .createArtifactSerializers(); - List elements = new ArrayList(); + List elements = new ArrayList<>(); ArtifactSerializer serializer = null; @@ -134,9 +134,6 @@ public static Map loadResources(File resourcePath, File featureG try (InputStream resourceIn = CmdLineUtil.openInFile(resourceFile)) { resources.put(resourceName, serializer.create(resourceIn)); - } catch (InvalidFormatException e) { - // TODO: Fix exception handling - e.printStackTrace(); } catch (IOException e) { // TODO: Fix exception handling e.printStackTrace(); @@ -160,7 +157,7 @@ static Map loadResources(String resourceDirectory, File featureG return loadResources(resourcePath, featureGeneratorDescriptor); } - return new HashMap(); + return new HashMap<>(); } public void run(String format, String[] args) { @@ -200,7 +197,7 @@ else if ("BILOU".equals(sequenceCodecImplName)) { SequenceCodec sequenceCodec = TokenNameFinderFactory.instantiateSequenceCodec(sequenceCodecImplName); - TokenNameFinderFactory nameFinderFactory = null; + TokenNameFinderFactory nameFinderFactory; try { nameFinderFactory = TokenNameFinderFactory.create(params.getFactory(), featureGeneratorBytes, resources, sequenceCodec); diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java index ff8c14380..69f7a4ef1 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java +++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java @@ -26,6 +26,7 @@ import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; + import opennlp.tools.ml.BeamSearch; import opennlp.tools.ml.EventModelSequenceTrainer; import opennlp.tools.ml.EventTrainer; @@ -43,14 +44,7 @@ import opennlp.tools.util.TrainingParameters; import opennlp.tools.util.featuregen.AdaptiveFeatureGenerator; import opennlp.tools.util.featuregen.AdditionalContextFeatureGenerator; -import opennlp.tools.util.featuregen.BigramNameFeatureGenerator; -import opennlp.tools.util.featuregen.CachedFeatureGenerator; import opennlp.tools.util.featuregen.GeneratorFactory; -import opennlp.tools.util.featuregen.OutcomePriorFeatureGenerator; -import opennlp.tools.util.featuregen.PreviousMapFeatureGenerator; -import opennlp.tools.util.featuregen.SentenceFeatureGenerator; -import opennlp.tools.util.featuregen.TokenClassFeatureGenerator; -import opennlp.tools.util.featuregen.TokenFeatureGenerator; import opennlp.tools.util.featuregen.WindowFeatureGenerator; /** @@ -91,21 +85,6 @@ public NameFinderME(TokenNameFinderModel model) { new WindowFeatureGenerator(additionalContextFeatureGenerator, 8, 8)); } - @Deprecated - /* - * @deprecated the default feature generation is now always included in the models and loaded - * if not by the factory. Subclasses using this methods should do the same. - */ - static AdaptiveFeatureGenerator createFeatureGenerator() { - return new CachedFeatureGenerator( - new WindowFeatureGenerator(new TokenFeatureGenerator(), 2, 2), - new WindowFeatureGenerator(new TokenClassFeatureGenerator(true), 2, 2), - new OutcomePriorFeatureGenerator(), - new PreviousMapFeatureGenerator(), - new BigramNameFeatureGenerator(), - new SentenceFeatureGenerator(true, false)); - } - private static AdaptiveFeatureGenerator createFeatureGenerator( byte[] generatorDescriptor, final Map resources) throws IOException { @@ -288,124 +267,6 @@ else if (TrainerType.EVENT_MODEL_SEQUENCE_TRAINER.equals(trainerType)) { } } - /** - * Trains a name finder model. - * - * @param languageCode the language of the training data - * @param type null or an override type for all types in the training data - * @param samples the training data - * @param trainParams machine learning train parameters - * @param generator null or the feature generator - * @param resources the resources for the name finder or null if none - * - * @return the newly trained model - * - * @throws IOException - * @deprecated use - * {@link NameFinderME#train(String, String, ObjectStream, TrainingParameters, TokenNameFinderFactory)} - * instead. - */ - @Deprecated - static TokenNameFinderModel train(String languageCode, String type, ObjectStream samples, - TrainingParameters trainParams, AdaptiveFeatureGenerator generator, final Map resources) - throws IOException { - - if (languageCode == null) { - throw new IllegalArgumentException("languageCode must not be null!"); - } - - String beamSizeString = trainParams.getSettings().get(BeamSearch.BEAM_SIZE_PARAMETER); - - int beamSize = NameFinderME.DEFAULT_BEAM_SIZE; - if (beamSizeString != null) { - beamSize = Integer.parseInt(beamSizeString); - } - - Map manifestInfoEntries = new HashMap<>(); - - AdaptiveFeatureGenerator featureGenerator; - - if (generator != null) { - featureGenerator = generator; - } else { - featureGenerator = createFeatureGenerator(); - } - - MaxentModel nameFinderModel = null; - - SequenceClassificationModel seqModel = null; - - TrainerType trainerType = TrainerFactory.getTrainerType(trainParams.getSettings()); - - if (TrainerType.EVENT_MODEL_TRAINER.equals(trainerType)) { - ObjectStream eventStream = new NameFinderEventStream(samples, type, - new DefaultNameContextGenerator(featureGenerator), new BioCodec()); - - EventTrainer trainer = TrainerFactory.getEventTrainer(trainParams.getSettings(), manifestInfoEntries); - nameFinderModel = trainer.train(eventStream); - } else if (TrainerType.EVENT_MODEL_SEQUENCE_TRAINER.equals(trainerType)) { - NameSampleSequenceStream ss = new NameSampleSequenceStream(samples, featureGenerator); - - EventModelSequenceTrainer trainer = TrainerFactory.getEventModelSequenceTrainer( - trainParams.getSettings(), manifestInfoEntries); - nameFinderModel = trainer.train(ss); - } else if (TrainerType.SEQUENCE_TRAINER.equals(trainerType)) { - SequenceTrainer trainer = TrainerFactory.getSequenceModelTrainer( - trainParams.getSettings(), manifestInfoEntries); - - NameSampleSequenceStream ss = new NameSampleSequenceStream(samples, featureGenerator, false); - seqModel = trainer.train(ss); - } else { - throw new IllegalStateException("Unexpected trainer type!"); - } - - // TODO: Pass the sequence codec down to the model! We will just store the class - // name in the model, and then always use the extension loader to create it! - // The cmd line interface, will replace shortcuts with actual class names. - // depending on which one is not null! - if (seqModel != null) { - return new TokenNameFinderModel(languageCode, seqModel, null, - resources, manifestInfoEntries, new BioCodec(), new TokenNameFinderFactory()); - } else { - return new TokenNameFinderModel(languageCode, nameFinderModel, beamSize, null, - resources, manifestInfoEntries, new BioCodec(), new TokenNameFinderFactory()); - } - } - - /** - * Trains a name finder model. - * - * @param languageCode the language of the training data - * @param type null or an override type for all types in the training data - * @param samples the training data - * @param trainParams machine learning train parameters - * @param featureGeneratorBytes descriptor to configure the feature generation - * or null - * @param resources the resources for the name finder or null if none - * - * @return the newly trained model - * - * @throws IOException - * @deprecated use - * {@link NameFinderME#train(String, String, ObjectStream, TrainingParameters, TokenNameFinderFactory)} - * instead. - */ - @Deprecated - static TokenNameFinderModel train(String languageCode, String type, - ObjectStream samples, TrainingParameters trainParams, - byte[] featureGeneratorBytes, final Map resources) - throws IOException { - - TokenNameFinderModel model = train(languageCode, type, samples, trainParams, - createFeatureGenerator(featureGeneratorBytes, resources), resources); - - if (featureGeneratorBytes != null) { - model = model.updateFeatureGenerator(featureGeneratorBytes); - } - - return model; - } - /** * Gets the name type from the outcome * diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderCrossValidator.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderCrossValidator.java index 3d2547ba0..b4ff4e1be 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderCrossValidator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderCrossValidator.java @@ -170,9 +170,7 @@ public TokenNameFinderCrossValidator(String languageCode, String type, this.type = type; this.featureGeneratorBytes = featureGeneratorBytes; this.resources = resources; - this.params = trainParams; - this.listeners = listeners; } @@ -212,17 +210,16 @@ public void evaluate(ObjectStream samples, int nFolds) while (partitioner.hasNext()) { - CrossValidationPartitioner.TrainingSampleStream trainingSampleStream = partitioner - .next(); + CrossValidationPartitioner.TrainingSampleStream trainingSampleStream = partitioner.next(); TokenNameFinderModel model; if (factory != null) { - model = opennlp.tools.namefind.NameFinderME.train(languageCode, type, new DocumentToNameSampleStream(trainingSampleStream), params, factory); + model = NameFinderME.train(languageCode, type, new DocumentToNameSampleStream(trainingSampleStream), + params, factory); } else { - model = opennlp.tools.namefind.NameFinderME.train(languageCode, type, - new DocumentToNameSampleStream(trainingSampleStream), params, featureGeneratorBytes, resources); - + model = NameFinderME.train(languageCode, type, new DocumentToNameSampleStream(trainingSampleStream), + params, TokenNameFinderFactory.create(null, featureGeneratorBytes, resources, new BioCodec())); } // do testing diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderFactory.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderFactory.java index 90381ff0e..55f1ab694 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderFactory.java +++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderFactory.java @@ -30,12 +30,19 @@ import opennlp.tools.util.ext.ExtensionLoader; import opennlp.tools.util.featuregen.AdaptiveFeatureGenerator; import opennlp.tools.util.featuregen.AggregatedFeatureGenerator; -import opennlp.tools.util.featuregen.FeatureGeneratorResourceProvider; +import opennlp.tools.util.featuregen.BigramNameFeatureGenerator; +import opennlp.tools.util.featuregen.CachedFeatureGenerator; import opennlp.tools.util.featuregen.GeneratorFactory; +import opennlp.tools.util.featuregen.OutcomePriorFeatureGenerator; +import opennlp.tools.util.featuregen.PreviousMapFeatureGenerator; +import opennlp.tools.util.featuregen.SentenceFeatureGenerator; +import opennlp.tools.util.featuregen.TokenClassFeatureGenerator; +import opennlp.tools.util.featuregen.TokenFeatureGenerator; +import opennlp.tools.util.featuregen.WindowFeatureGenerator; // Idea of this factory is that most resources/impls used by the name finder // can be modified through this class! -// That only works if thats the central class used for training/runtime +// That only works if that's the central class used for training/runtime public class TokenNameFinderFactory extends BaseToolFactory { @@ -52,7 +59,7 @@ public TokenNameFinderFactory() { } public TokenNameFinderFactory(byte[] featureGeneratorBytes, final Map resources, - SequenceCodec seqCodec) { + SequenceCodec seqCodec) { init(featureGeneratorBytes, resources, seqCodec); } @@ -142,7 +149,13 @@ public NameContextGenerator createContextGenerator() { AdaptiveFeatureGenerator featureGenerator = createFeatureGenerators(); if (featureGenerator == null) { - featureGenerator = NameFinderME.createFeatureGenerator(); + featureGenerator = new CachedFeatureGenerator( + new WindowFeatureGenerator(new TokenFeatureGenerator(), 2, 2), + new WindowFeatureGenerator(new TokenClassFeatureGenerator(true), 2, 2), + new OutcomePriorFeatureGenerator(), + new PreviousMapFeatureGenerator(), + new BigramNameFeatureGenerator(), + new SentenceFeatureGenerator(true, false)); } return new DefaultNameContextGenerator(featureGenerator); @@ -160,7 +173,7 @@ public NameContextGenerator createContextGenerator() { public AdaptiveFeatureGenerator createFeatureGenerators() { if (featureGeneratorBytes == null && artifactProvider != null) { - featureGeneratorBytes = (byte[]) artifactProvider.getArtifact( + featureGeneratorBytes = artifactProvider.getArtifact( TokenNameFinderModel.GENERATOR_DESCRIPTOR_ENTRY_NAME); } @@ -172,15 +185,12 @@ public AdaptiveFeatureGenerator createFeatureGenerators() { AdaptiveFeatureGenerator generator; try { - generator = GeneratorFactory.create(descriptorIn, new FeatureGeneratorResourceProvider() { - - public Object getResource(String key) { - if (artifactProvider != null) { - return artifactProvider.getArtifact(key); - } - else { - return resources.get(key); - } + generator = GeneratorFactory.create(descriptorIn, key -> { + if (artifactProvider != null) { + return artifactProvider.getArtifact(key); + } + else { + return resources.get(key); } }); } catch (InvalidFormatException e) { diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java index a4780f544..ea2db5014 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java +++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java @@ -23,18 +23,14 @@ import java.io.InputStream; import java.io.OutputStream; import java.net.URL; -import java.util.Collections; import java.util.Map; import java.util.Properties; - import opennlp.tools.ml.BeamSearch; import opennlp.tools.ml.model.MaxentModel; import opennlp.tools.ml.model.SequenceClassificationModel; import opennlp.tools.util.BaseToolFactory; import opennlp.tools.util.InvalidFormatException; import opennlp.tools.util.SequenceCodec; -import opennlp.tools.util.featuregen.AdaptiveFeatureGenerator; -import opennlp.tools.util.featuregen.AggregatedFeatureGenerator; import opennlp.tools.util.featuregen.BrownCluster; import opennlp.tools.util.featuregen.WordClusterDictionary; import opennlp.tools.util.model.ArtifactSerializer; @@ -58,9 +54,7 @@ public static class FeatureGeneratorCreationError extends RuntimeException { private static class ByteArraySerializer implements ArtifactSerializer { - public byte[] create(InputStream in) throws IOException, - InvalidFormatException { - + public byte[] create(InputStream in) throws IOException { return ModelUtil.read(in); } @@ -155,20 +149,6 @@ private void init(Object nameFinderModel, checkArtifactMap(); } - /** - * @deprecated use getNameFinderSequenceModel instead. This method will be removed soon. - */ - @Deprecated - public MaxentModel getNameFinderModel() { - - if (artifactMap.get(MAXENT_MODEL_ENTRY_NAME) instanceof MaxentModel) { - return (MaxentModel) artifactMap.get(MAXENT_MODEL_ENTRY_NAME); - } - else { - return null; - } - } - public SequenceClassificationModel getNameFinderSequenceModel() { Properties manifest = (Properties) artifactMap.get(MANIFEST_ENTRY); @@ -200,46 +180,6 @@ public TokenNameFinderFactory getFactory() { return (TokenNameFinderFactory) this.toolFactory; } - // TODO: This should be moved to the NameFinderFactory ... !!! - // Lets deprecate it! - - /** - * Creates the {@link AdaptiveFeatureGenerator}. Usually this - * is a set of generators contained in the {@link AggregatedFeatureGenerator}. - * - * Note: - * The generators are created on every call to this method. - * - * @return the feature generator or null if there is no descriptor in the model - * @deprecated use TokenNameFinderFactory.createFeatureGenerators instead! - */ - @Deprecated - public AdaptiveFeatureGenerator createFeatureGenerators() { - return getFactory().createFeatureGenerators(); - } - - public TokenNameFinderModel updateFeatureGenerator(byte descriptor[]) { - - TokenNameFinderModel model; - - if (getNameFinderModel() != null) { - model = new TokenNameFinderModel(getLanguage(), getNameFinderModel(), 1, - descriptor, Collections.emptyMap(), Collections.emptyMap(), - getFactory().createSequenceCodec(), getFactory()); - } - else { - model = new TokenNameFinderModel(getLanguage(), getNameFinderSequenceModel(), - descriptor, Collections.emptyMap(), Collections.emptyMap(), - getFactory().createSequenceCodec(), getFactory()); - } - - model.artifactMap.clear(); - model.artifactMap.putAll(artifactMap); - model.artifactMap.put(GENERATOR_DESCRIPTOR_ENTRY_NAME, descriptor); - - return model; - } - @Override protected void createArtifactSerializers(Map serializers) { super.createArtifactSerializers(serializers); @@ -276,7 +216,7 @@ public static Map createArtifactSerializers() { return serializers; } - boolean isModelValid(MaxentModel model) { + private boolean isModelValid(MaxentModel model) { String outcomes[] = new String[model.getNumOutcomes()]; diff --git a/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerFactory.java b/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerFactory.java index 630edc4d5..6115994d2 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerFactory.java +++ b/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerFactory.java @@ -19,7 +19,6 @@ import java.io.File; import java.io.FileInputStream; -import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; @@ -27,7 +26,6 @@ import java.util.HashSet; import java.util.Map; import java.util.Set; - import opennlp.tools.dictionary.Dictionary; import opennlp.tools.ml.model.AbstractModel; import opennlp.tools.util.BaseToolFactory; @@ -95,12 +93,12 @@ public Map createArtifactMap() { } public TagDictionary createTagDictionary(File dictionary) - throws InvalidFormatException, FileNotFoundException, IOException { + throws IOException { return createTagDictionary(new FileInputStream(dictionary)); } public TagDictionary createTagDictionary(InputStream in) - throws InvalidFormatException, IOException { + throws IOException { return POSDictionary.create(in); } @@ -146,8 +144,7 @@ public SequenceValidator getSequenceValidator() { static class POSDictionarySerializer implements ArtifactSerializer { - public POSDictionary create(InputStream in) throws IOException, - InvalidFormatException { + public POSDictionary create(InputStream in) throws IOException { return POSDictionary.create(new UncloseableInputStream(in)); } @@ -164,13 +161,13 @@ static void register(Map factories) { protected void validatePOSDictionary(POSDictionary posDict, AbstractModel posModel) throws InvalidFormatException { - Set dictTags = new HashSet(); + Set dictTags = new HashSet<>(); for (String word : posDict) { Collections.addAll(dictTags, posDict.getTags(word)); } - Set modelTags = new HashSet(); + Set modelTags = new HashSet<>(); for (int i = 0; i < posModel.getNumOutcomes(); i++) { modelTags.add(posModel.getOutcome(i)); diff --git a/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderMETest.java b/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderMETest.java index cc8b0d198..e55cc176c 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderMETest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderMETest.java @@ -18,21 +18,19 @@ package opennlp.tools.namefind; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; - import java.io.InputStream; import java.util.Collections; - -import opennlp.tools.ml.model.MaxentModel; +import opennlp.tools.ml.model.SequenceClassificationModel; import opennlp.tools.util.MockInputStreamFactory; import opennlp.tools.util.ObjectStream; import opennlp.tools.util.PlainTextByLineStream; import opennlp.tools.util.Span; import opennlp.tools.util.TrainingParameters; - import org.junit.Test; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + /** * This is the test class for {@link NameFinderME}. *

@@ -74,7 +72,7 @@ public void testNameFinder() throws Exception { params.put(TrainingParameters.CUTOFF_PARAM, Integer.toString(1)); TokenNameFinderModel nameFinderModel = NameFinderME.train("en", TYPE, sampleStream, - params, (byte[]) null, Collections.emptyMap()); + params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec())); TokenNameFinder nameFinder = new NameFinderME(nameFinderModel); @@ -116,8 +114,6 @@ public void testNameFinder() throws Exception { /** * Train NamefinderME using AnnotatedSentencesWithTypes.txt with "person" * nameType and try the model in a sample text. - * - * @throws Exception */ @Test public void testNameFinderWithTypes() throws Exception { @@ -137,7 +133,7 @@ public void testNameFinderWithTypes() throws Exception { params.put(TrainingParameters.CUTOFF_PARAM, Integer.toString(1)); TokenNameFinderModel nameFinderModel = NameFinderME.train("en", TYPE, sampleStream, - params, (byte[]) null, Collections.emptyMap()); + params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec())); NameFinderME nameFinder = new NameFinderME(nameFinderModel); @@ -167,8 +163,6 @@ public void testNameFinderWithTypes() throws Exception { /** * Train NamefinderME using OnlyWithNames.train. The goal is to check if the model validator accepts it. * This is related to the issue OPENNLP-9 - * - * @throws Exception */ @Test public void testOnlyWithNames() throws Exception { @@ -186,7 +180,7 @@ public void testOnlyWithNames() throws Exception { params.put(TrainingParameters.CUTOFF_PARAM, Integer.toString(1)); TokenNameFinderModel nameFinderModel = NameFinderME.train("en", TYPE, sampleStream, - params, (byte[]) null, Collections.emptyMap()); + params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec())); NameFinderME nameFinder = new NameFinderME(nameFinderModel); @@ -206,8 +200,6 @@ public void testOnlyWithNames() throws Exception { /** * Train NamefinderME using OnlyWithNamesWithTypes.train. The goal is to check if the model validator accepts it. * This is related to the issue OPENNLP-9 - * - * @throws Exception */ @Test public void testOnlyWithNamesWithTypes() throws Exception { @@ -225,7 +217,7 @@ public void testOnlyWithNamesWithTypes() throws Exception { params.put(TrainingParameters.CUTOFF_PARAM, Integer.toString(1)); TokenNameFinderModel nameFinderModel = NameFinderME.train("en", TYPE, sampleStream, - params, (byte[]) null, Collections.emptyMap()); + params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec())); NameFinderME nameFinder = new NameFinderME(nameFinderModel); @@ -247,8 +239,6 @@ public void testOnlyWithNamesWithTypes() throws Exception { /** * Train NamefinderME using OnlyWithNames.train. The goal is to check if the model validator accepts it. * This is related to the issue OPENNLP-9 - * - * @throws Exception */ @Test public void testOnlyWithEntitiesWithTypes() throws Exception { @@ -266,7 +256,7 @@ public void testOnlyWithEntitiesWithTypes() throws Exception { params.put(TrainingParameters.CUTOFF_PARAM, Integer.toString(1)); TokenNameFinderModel nameFinderModel = NameFinderME.train("en", TYPE, sampleStream, - params, (byte[]) null, Collections.emptyMap()); + params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec())); NameFinderME nameFinder = new NameFinderME(nameFinderModel); @@ -283,13 +273,13 @@ public void testOnlyWithEntitiesWithTypes() throws Exception { } private boolean hasOtherAsOutcome(TokenNameFinderModel nameFinderModel) { - MaxentModel model = nameFinderModel.getNameFinderModel(); - for (int i = 0; i < model.getNumOutcomes(); i++) { - String outcome = model.getOutcome(i); - if (outcome.equals(NameFinderME.OTHER)) { - return true; - } - } + SequenceClassificationModel model = nameFinderModel.getNameFinderSequenceModel(); + String[] outcomes = model.getOutcomes(); + for (int i = 0; i < outcomes.length; i++) { + if (outcomes[i].equals(NameFinderME.OTHER)) { + return true; + } + } return false; } @@ -304,8 +294,6 @@ public void testDropOverlappingSpans() { /** * Train NamefinderME using voa1.train with several * nameTypes and try the model in a sample text. - * - * @throws Exception */ @Test public void testNameFinderWithMultipleTypes() throws Exception { @@ -323,7 +311,7 @@ public void testNameFinderWithMultipleTypes() throws Exception { params.put(TrainingParameters.CUTOFF_PARAM, Integer.toString(1)); TokenNameFinderModel nameFinderModel = NameFinderME.train("en", TYPE, sampleStream, - params, (byte[]) null, Collections.emptyMap()); + params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec())); NameFinderME nameFinder = new NameFinderME(nameFinderModel); diff --git a/opennlp-tools/src/test/java/opennlp/tools/namefind/TokenNameFinderCrossValidatorTest.java b/opennlp-tools/src/test/java/opennlp/tools/namefind/TokenNameFinderCrossValidatorTest.java index 22567ee74..bc22aa0b3 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/namefind/TokenNameFinderCrossValidatorTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/namefind/TokenNameFinderCrossValidatorTest.java @@ -18,15 +18,10 @@ package opennlp.tools.namefind; import static java.nio.charset.StandardCharsets.ISO_8859_1; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertTrue; import java.io.ByteArrayOutputStream; import java.util.Collections; import java.util.Map; - -import org.junit.Test; - import opennlp.tools.cmdline.namefind.NameEvaluationErrorListener; import opennlp.tools.formats.ResourceAsStreamFactory; import opennlp.tools.util.InputStreamFactory; @@ -34,13 +29,17 @@ import opennlp.tools.util.PlainTextByLineStream; import opennlp.tools.util.TrainingParameters; import opennlp.tools.util.model.ModelType; +import org.junit.Test; + +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; public class TokenNameFinderCrossValidatorTest { private final String TYPE = null; @Test - /** + /* * Test that reproduces jira OPENNLP-463 */ public void testWithNullResources() throws Exception { @@ -67,7 +66,7 @@ public void testWithNullResources() throws Exception { } @Test - /** + /* * Test that tries to reproduce jira OPENNLP-466 */ public void testWithNameEvaluationErrorListener() throws Exception {