Skip to content

Commit

Permalink
Merge pull request #1334 from dkpro/bugfix/1329-Span-annotations-with…
Browse files Browse the repository at this point in the history
…-slot-features-may-disappear-from-WebAnno-TSV

Bugfix/1329 span annotations with slot features may disappear from web anno tsv
  • Loading branch information
reckart committed Mar 11, 2019
2 parents 1d77164 + dcb7196 commit 9e2c7ef
Show file tree
Hide file tree
Showing 12 changed files with 125 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ public final class MimeTypes
public final static String APPLICATION_VND_XMI_XML = "application/vnd.xmi+xml";


// DKPro application types
// DKPro Core application types
public final static String APPLICATION_X_ANCORA_XML = "application/x.org.dkpro.ancora+xml";
public final static String APPLICATION_X_BNC = "application/x.org.dkpro.bnc+xml";
public final static String APPLICATION_X_BRAT = "application/x.org.dkpro.brat";
Expand Down Expand Up @@ -67,7 +67,7 @@ public final class MimeTypes
// Non-standard text types
public final static String TEXT_TCF = "text/tcf+xml";

// DKPro text types
// DKPro Core text types
public final static String TEXT_X_CONLL_2000 = "text/x.org.dkpro.conll-2000";
public final static String TEXT_X_CONLL_2002 = "text/x.org.dkpro.conll-2002";
public final static String TEXT_X_CONLL_2003 = "text/x.org.dkpro.conll-2003";
Expand All @@ -84,6 +84,7 @@ public final class MimeTypes
public final static String TEXT_X_PTB_CHUNKED = "text/x.org.dkpro.ptb-chunked";
public final static String TEXT_X_PTB_COMBINED = "text/x.org.dkpro.ptb-combined";
public final static String TEXT_X_REUTERS21578 = "text/x.org.dkpro.reuters21578";
public final static String TEXT_X_WEBANNO_TSV3 = "text/x.org.dkpro.webanno-tsv3";

// OpenNLP model types
public final static String APPLICATION_X_OPENNLP_CHUNK = "application/x.org.dkpro.core.opennlp.chunk";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,20 @@

import org.apache.uima.collection.CollectionException;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.MimeTypeCapability;
import org.apache.uima.fit.descriptor.ResourceMetaData;
import org.apache.uima.jcas.JCas;
import org.dkpro.core.io.webanno.tsv.internal.tsv3x.Tsv3XDeserializer;

import de.tudarmstadt.ukp.dkpro.core.api.io.JCasResourceCollectionReader_ImplBase;
import de.tudarmstadt.ukp.dkpro.core.api.parameter.ComponentParameters;
import de.tudarmstadt.ukp.dkpro.core.api.parameter.MimeTypes;

/**
* Reads the WebAnno TSV v3.x format.
*/
@ResourceMetaData(name = "WebAnno TSV v3.x Reader")
@MimeTypeCapability({MimeTypes.TEXT_X_WEBANNO_TSV3})
public class WebannoTsv3XReader
extends JCasResourceCollectionReader_ImplBase
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@

import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.MimeTypeCapability;
import org.apache.uima.fit.descriptor.ResourceMetaData;
import org.apache.uima.fit.descriptor.TypeCapability;
import org.apache.uima.jcas.JCas;
import org.dkpro.core.io.webanno.tsv.internal.tsv3x.Tsv3XCasDocumentBuilder;
import org.dkpro.core.io.webanno.tsv.internal.tsv3x.Tsv3XCasSchemaAnalyzer;
Expand All @@ -32,10 +35,18 @@

import de.tudarmstadt.ukp.dkpro.core.api.io.JCasFileWriter_ImplBase;
import de.tudarmstadt.ukp.dkpro.core.api.parameter.ComponentParameters;
import de.tudarmstadt.ukp.dkpro.core.api.parameter.MimeTypes;

/**
* Writes the WebAnno TSV v3.x format.
*/
@ResourceMetaData(name = "WebAnno TSV v3.x Writer")
@MimeTypeCapability({MimeTypes.TEXT_X_WEBANNO_TSV3})
@TypeCapability(
inputs = {
"de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData",
"de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token",
"de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence"})
public class WebannoTsv3XWriter
extends JCasFileWriter_ImplBase
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -300,13 +300,15 @@ private static void scanUnitForAmbiguousSlotReferences(TsvUnit aUnit)
for (AnnotationFS aFS : annotationsForColumn) {
FeatureStructure[] links = getFeature(aFS, col.uimaFeature,
FeatureStructure[].class);
for (FeatureStructure link : links) {
AnnotationFS targetFS = getFeature(link, TsvSchema.FEAT_SLOT_TARGET,
AnnotationFS.class);
if (targetFS == null) {
throw new IllegalStateException("Slot link has no target: " + link);
if (links != null) {
for (FeatureStructure link : links) {
AnnotationFS targetFS = getFeature(link, TsvSchema.FEAT_SLOT_TARGET,
AnnotationFS.class);
if (targetFS == null) {
throw new IllegalStateException("Slot link has no target: " + link);
}
aUnit.getDocument().addDisambiguationId(targetFS);
}
aUnit.getDocument().addDisambiguationId(targetFS);
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -833,12 +833,13 @@ private void setFeatures(TsvColumn aCol, TsvUnit aUnit, AnnotationFS aAnnotation
FeatureStructure[] links = getFeature(aAnnotation,
aCol.uimaFeature.getShortName(), FeatureStructure[].class);

assert values.length == links.length;
assert (links.length == 0 && values.length == 1 && NULL_VALUE.equals(values[0]))
|| (values.length == links.length);

for (int i = 0; i < values.length; i++) {
String value = values[i];

if (NULL_COLUMN.equals(value)) {
if (NULL_VALUE.equals(value) || NULL_COLUMN.equals(value)) {
continue;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,9 @@ private static void writeSlotTarget(PrintWriter aOut, TsvDocument aDoc, TsvColum
}
}
else {
aOut.print(NULL_COLUMN);
// If the slot hosts has no slots, we use this column as a placeholder so we know
// the span of the slot host
aOut.print(NULL_VALUE);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1120,6 +1120,35 @@ WebannoTsv3Writer.PARAM_LINK_TYPES, asList("webanno.custom.LinkType"),
WebannoTsv3Writer.PARAM_SLOT_TARGETS, asList("webanno.custom.SimpleSpan"));
}

@Test
public void testUnsetSlotFeature() throws Exception
{
JCas jcas = makeJCasOneSentence();
CAS cas = jcas.getCas();

List<Token> tokens = new ArrayList<>(select(jcas, Token.class));

Token t1 = tokens.get(0);
Token t2 = tokens.get(1);
Token t3 = tokens.get(2);

Type type = cas.getTypeSystem().getType("webanno.custom.SimpleSpan");
AnnotationFS s2 = cas.createAnnotation(type, t2.getBegin(), t2.getEnd());
cas.addFsToIndexes(s2);
AnnotationFS s3 = cas.createAnnotation(type, t3.getBegin(), t3.getEnd());
cas.addFsToIndexes(s3);

makeLinkHostFS(jcas, "webanno.custom.FlexLinkHost", t1.getBegin(), t1.getEnd(),
(FeatureStructure[]) null);

writeAndAssertEquals(jcas,
WebannoTsv3Writer.PARAM_SLOT_FEATS, asList("webanno.custom.FlexLinkHost:links"),
WebannoTsv3Writer.PARAM_SPAN_LAYERS, asList("webanno.custom.SimpleSpan",
"webanno.custom.SimpleLinkHost"),
WebannoTsv3Writer.PARAM_LINK_TYPES, asList("webanno.custom.FlexLinkType"),
WebannoTsv3Writer.PARAM_SLOT_TARGETS, asList("webanno.custom.SimpleSpan"));
}

@Test
public void testSimpleSlotFeatureWithoutValues() throws Exception
{
Expand Down Expand Up @@ -1752,8 +1781,10 @@ private static AnnotationFS makeLinkHostFS(JCas aJCas, String aType, int aBegin,
{
Type hostType = aJCas.getTypeSystem().getType(aType);
AnnotationFS hostA1 = aJCas.getCas().createAnnotation(hostType, aBegin, aEnd);
hostA1.setFeatureValue(hostType.getFeatureByBaseName("links"),
FSCollectionFactory.createFSArray(aJCas, asList(aLinks)));
if (aLinks != null) {
hostA1.setFeatureValue(hostType.getFeatureByBaseName("links"),
FSCollectionFactory.createFSArray(aJCas, asList(aLinks)));
}
aJCas.getCas().addFsToIndexes(hostA1);
return hostA1;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,37 @@
</featureDescription>
</features>
</typeDescription>
<typeDescription>
<name>webanno.custom.FlexLinkHost</name>
<description/>
<supertypeName>uima.tcas.Annotation</supertypeName>
<features>
<featureDescription>
<name>links</name>
<description/>
<rangeTypeName>uima.cas.FSArray</rangeTypeName>
<elementType>webanno.custom.FlexLinkType</elementType>
<multipleReferencesAllowed>false</multipleReferencesAllowed>
</featureDescription>
</features>
</typeDescription>
<typeDescription>
<name>webanno.custom.FlexLinkType</name>
<description/>
<supertypeName>uima.cas.TOP</supertypeName>
<features>
<featureDescription>
<name>role</name>
<description/>
<rangeTypeName>uima.cas.String</rangeTypeName>
</featureDescription>
<featureDescription>
<name>target</name>
<description/>
<rangeTypeName>uima.tcas.Annotation</rangeTypeName>
</featureDescription>
</features>
</typeDescription>
<typeDescription>
<name>webanno.custom.SimpleChain</name>
<description/>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@


#Text=This is a test .
1-1 0-4 This _[1]|pr1[2] _|1-1[1]
1-1 0-4 This _[1]|pr1[2] *|1-1[1]
1-2 5-7 is _ _
1-3 8-9 a _ _
1-4 10-14 test _ _
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@


#Text=This is a test .
1-1 0-4 This _ _[1]|_[2] _|_ val1[1]|val2[2]
1-1 0-4 This _ _[1]|_[2] *|* val1[1]|val2[2]
1-2 5-7 is * _ _ _
1-3 8-9 a * _ _ _
1-4 10-14 test _ _ _ _
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#FORMAT=WebAnno TSV 3.2
#T_SP=webanno.custom.SimpleSpan|
#T_SP=webanno.custom.FlexLinkHost|ROLE_webanno.custom.FlexLinkHost:links_webanno.custom.FlexLinkType|uima.tcas.Annotation


#Text=This is a test .
1-1 0-4 This _ _ *
1-2 5-7 is * _ _
1-3 8-9 a * _ _
1-4 10-14 test _ _ _
1-5 15-16 . _ _ _
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
<?xml version="1.0" encoding="UTF-8"?><xmi:XMI xmlns:pos="http:///de/tudarmstadt/ukp/dkpro/core/api/lexmorph/type/pos.ecore" xmlns:tcas="http:///uima/tcas.ecore" xmlns:xmi="http://www.omg.org/XMI" xmlns:cas="http:///uima/cas.ecore" xmlns:tweet="http:///de/tudarmstadt/ukp/dkpro/core/api/lexmorph/type/pos/tweet.ecore" xmlns:morph="http:///de/tudarmstadt/ukp/dkpro/core/api/lexmorph/type/morph.ecore" xmlns:dependency="http:///de/tudarmstadt/ukp/dkpro/core/api/syntax/type/dependency.ecore" xmlns:type6="http:///de/tudarmstadt/ukp/dkpro/core/api/semantics/type.ecore" xmlns:type="http:///de/tudarmstadt/ukp/dkpro/core/api/anomaly/type.ecore" xmlns:type7="http:///de/tudarmstadt/ukp/dkpro/core/api/syntax/type.ecore" xmlns:type3="http:///de/tudarmstadt/ukp/dkpro/core/api/metadata/type.ecore" xmlns:type4="http:///de/tudarmstadt/ukp/dkpro/core/api/ner/type.ecore" xmlns:type5="http:///de/tudarmstadt/ukp/dkpro/core/api/segmentation/type.ecore" xmlns:type2="http:///de/tudarmstadt/ukp/dkpro/core/api/coref/type.ecore" xmlns:constituent="http:///de/tudarmstadt/ukp/dkpro/core/api/syntax/type/constituent.ecore" xmlns:chunk="http:///de/tudarmstadt/ukp/dkpro/core/api/syntax/type/chunk.ecore" xmlns:custom="http:///webanno/custom.ecore" xmi:version="2.0">
<cas:NULL xmi:id="0"/>
<type3:DocumentMetaData xmi:id="1" sofa="12" begin="0" end="16" documentId="doc" isLastSegment="false"/>
<type5:Token xmi:id="19" sofa="12" begin="0" end="4"/>
<type5:Token xmi:id="28" sofa="12" begin="5" end="7"/>
<type5:Token xmi:id="37" sofa="12" begin="8" end="9"/>
<type5:Token xmi:id="46" sofa="12" begin="10" end="14"/>
<type5:Token xmi:id="55" sofa="12" begin="15" end="16"/>
<type5:Sentence xmi:id="64" sofa="12" begin="0" end="16"/>
<custom:SimpleSpan xmi:id="68" sofa="12" begin="5" end="7"/>
<custom:SimpleSpan xmi:id="72" sofa="12" begin="8" end="9"/>
<custom:FlexLinkHost xmi:id="76" sofa="12" begin="0" end="4"/>
<cas:Sofa xmi:id="12" sofaNum="1" sofaID="_InitialView" mimeType="text" sofaString="This is a test ."/>
<cas:View sofa="12" members="1 19 28 37 46 55 64 68 72 76"/>
</xmi:XMI>

0 comments on commit 9e2c7ef

Please sign in to comment.