Skip to content

Commit

Permalink
OPENNLP-719: Override any name type with specified type
Browse files Browse the repository at this point in the history
Closes #48
  • Loading branch information
wcolen authored and kottmann committed Apr 20, 2017
1 parent f078274 commit ae0dfee
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 20 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ protected Iterator<Event> createEvents(NameSample sample) {

Span[] names = sample.getNames();
if (!Objects.isNull(this.defaultType)) {
overrideDefaultType(names);
overrideType(names);
}

String outcomes[] = codec.encode(names, sample.getSentence().length);
Expand All @@ -140,13 +140,11 @@ protected Iterator<Event> createEvents(NameSample sample) {
return generateEvents(tokens, outcomes, contextGenerator).iterator();
}

private void overrideDefaultType(Span[] names) {
private void overrideType(Span[] names) {
for (int i = 0; i < names.length; i++) {
Span n = names[i];
if (Objects.isNull(n.getType())) {
names[i] = new Span(n.getStart(), n.getEnd(), this.defaultType,
n.getProb());
}
names[i] = new Span(n.getStart(), n.getEnd(), this.defaultType,
n.getProb());
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ public void testOutcomesTypeCantOverride() throws IOException {
ObjectStream<Event> eventStream = new NameFinderEventStream(
ObjectStreamUtils.createObjectStream(nameSample), type, CG, null);

String prefix = "person-";
String prefix = type + "-";
Assert.assertEquals(prefix + NameFinderME.START, eventStream.read().getOutcome());
Assert.assertEquals(prefix + NameFinderME.CONTINUE,
eventStream.read().getOutcome());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,8 @@
*/
public class NameFinderMETest {

private final String TYPE = "default";
private final String TYPE_OVERRIDE = "aType";
private final String DEFAULT = "default";

@Test
public void testNameFinder() throws Exception {
Expand All @@ -71,7 +72,7 @@ public void testNameFinder() throws Exception {
params.put(TrainingParameters.ITERATIONS_PARAM, Integer.toString(70));
params.put(TrainingParameters.CUTOFF_PARAM, Integer.toString(1));

TokenNameFinderModel nameFinderModel = NameFinderME.train("en", TYPE, sampleStream,
TokenNameFinderModel nameFinderModel = NameFinderME.train("en", null, sampleStream,
params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec()));

TokenNameFinder nameFinder = new NameFinderME(nameFinderModel);
Expand All @@ -92,7 +93,7 @@ public void testNameFinder() throws Exception {
Span names[] = nameFinder.find(sentence);

assertEquals(1, names.length);
assertEquals(new Span(0, 1, TYPE), names[0]);
assertEquals(new Span(0, 1, DEFAULT), names[0]);

sentence = new String[] {
"Hi",
Expand All @@ -107,8 +108,8 @@ public void testNameFinder() throws Exception {
names = nameFinder.find(sentence);

assertEquals(2, names.length);
assertEquals(new Span(1, 2, TYPE), names[0]);
assertEquals(new Span(4, 6, TYPE), names[1]);
assertEquals(new Span(1, 2, DEFAULT), names[0]);
assertEquals(new Span(4, 6, DEFAULT), names[1]);
}

/**
Expand All @@ -132,7 +133,7 @@ public void testNameFinderWithTypes() throws Exception {
params.put(TrainingParameters.ITERATIONS_PARAM, Integer.toString(70));
params.put(TrainingParameters.CUTOFF_PARAM, Integer.toString(1));

TokenNameFinderModel nameFinderModel = NameFinderME.train("en", TYPE, sampleStream,
TokenNameFinderModel nameFinderModel = NameFinderME.train("en", null, sampleStream,
params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec()));

NameFinderME nameFinder = new NameFinderME(nameFinderModel);
Expand Down Expand Up @@ -169,6 +170,39 @@ public void testOnlyWithNames() throws Exception {

// train the name finder

InputStream in = getClass().getClassLoader().getResourceAsStream(
"opennlp/tools/namefind/OnlyWithNames.train");

ObjectStream<NameSample> sampleStream = new NameSampleDataStream(
new PlainTextByLineStream(new MockInputStreamFactory(in), "UTF-8"));

TrainingParameters params = new TrainingParameters();
params.put(TrainingParameters.ITERATIONS_PARAM, Integer.toString(70));
params.put(TrainingParameters.CUTOFF_PARAM, Integer.toString(1));

TokenNameFinderModel nameFinderModel = NameFinderME.train("en", null, sampleStream,
params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec()));

NameFinderME nameFinder = new NameFinderME(nameFinderModel);

// now test if it can detect the sample sentences

String[] sentence = ("Neil Abercrombie Anibal Acevedo-Vila Gary Ackerman " +
"Robert Aderholt Daniel Akaka Todd Akin Lamar Alexander Rodney Alexander").split("\\s+");

Span[] names1 = nameFinder.find(sentence);

assertEquals(new Span(0, 2, DEFAULT), names1[0]);
assertEquals(new Span(2, 4, DEFAULT), names1[1]);
assertEquals(new Span(4, 6, DEFAULT), names1[2]);
assertTrue(!hasOtherAsOutcome(nameFinderModel));
}

@Test
public void testOnlyWithNamesTypeOverride() throws Exception {

// train the name finder

InputStream in = getClass().getClassLoader().getResourceAsStream(
"opennlp/tools/namefind/OnlyWithNames.train");

Expand All @@ -179,7 +213,7 @@ public void testOnlyWithNames() throws Exception {
params.put(TrainingParameters.ITERATIONS_PARAM, Integer.toString(70));
params.put(TrainingParameters.CUTOFF_PARAM, Integer.toString(1));

TokenNameFinderModel nameFinderModel = NameFinderME.train("en", TYPE, sampleStream,
TokenNameFinderModel nameFinderModel = NameFinderME.train("en", TYPE_OVERRIDE, sampleStream,
params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec()));

NameFinderME nameFinder = new NameFinderME(nameFinderModel);
Expand All @@ -191,9 +225,9 @@ public void testOnlyWithNames() throws Exception {

Span[] names1 = nameFinder.find(sentence);

assertEquals(new Span(0, 2, TYPE), names1[0]);
assertEquals(new Span(2, 4, TYPE), names1[1]);
assertEquals(new Span(4, 6, TYPE), names1[2]);
assertEquals(new Span(0, 2, TYPE_OVERRIDE), names1[0]);
assertEquals(new Span(2, 4, TYPE_OVERRIDE), names1[1]);
assertEquals(new Span(4, 6, TYPE_OVERRIDE), names1[2]);
assertTrue(!hasOtherAsOutcome(nameFinderModel));
}

Expand All @@ -216,7 +250,7 @@ public void testOnlyWithNamesWithTypes() throws Exception {
params.put(TrainingParameters.ITERATIONS_PARAM, Integer.toString(70));
params.put(TrainingParameters.CUTOFF_PARAM, Integer.toString(1));

TokenNameFinderModel nameFinderModel = NameFinderME.train("en", TYPE, sampleStream,
TokenNameFinderModel nameFinderModel = NameFinderME.train("en", null, sampleStream,
params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec()));

NameFinderME nameFinder = new NameFinderME(nameFinderModel);
Expand Down Expand Up @@ -255,7 +289,7 @@ public void testOnlyWithEntitiesWithTypes() throws Exception {
params.put(TrainingParameters.ITERATIONS_PARAM, Integer.toString(70));
params.put(TrainingParameters.CUTOFF_PARAM, Integer.toString(1));

TokenNameFinderModel nameFinderModel = NameFinderME.train("en", TYPE, sampleStream,
TokenNameFinderModel nameFinderModel = NameFinderME.train("en", null, sampleStream,
params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec()));

NameFinderME nameFinder = new NameFinderME(nameFinderModel);
Expand Down Expand Up @@ -310,7 +344,7 @@ public void testNameFinderWithMultipleTypes() throws Exception {
params.put(TrainingParameters.ITERATIONS_PARAM, Integer.toString(70));
params.put(TrainingParameters.CUTOFF_PARAM, Integer.toString(1));

TokenNameFinderModel nameFinderModel = NameFinderME.train("en", TYPE, sampleStream,
TokenNameFinderModel nameFinderModel = NameFinderME.train("en", null, sampleStream,
params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec()));

NameFinderME nameFinder = new NameFinderME(nameFinderModel);
Expand Down

0 comments on commit ae0dfee

Please sign in to comment.