diff --git a/dkpro-core-io-brat-asl/pom.xml b/dkpro-core-io-brat-asl/pom.xml
index b58d182682..a19e49701f 100644
--- a/dkpro-core-io-brat-asl/pom.xml
+++ b/dkpro-core-io-brat-asl/pom.xml
@@ -46,6 +46,14 @@
org.apache.commons
commons-lang3
+
+ commons-logging
+ commons-logging-api
+
+
+ de.tudarmstadt.ukp.dkpro.core
+ de.tudarmstadt.ukp.dkpro.core.api.segmentation-asl
+
org.springframework
spring-core
diff --git a/dkpro-core-io-brat-asl/src/main/java/de/tudarmstadt/ukp/dkpro/core/io/brat/BratWriter.java b/dkpro-core-io-brat-asl/src/main/java/de/tudarmstadt/ukp/dkpro/core/io/brat/BratWriter.java
index 46f47b2df7..3446277da7 100644
--- a/dkpro-core-io-brat-asl/src/main/java/de/tudarmstadt/ukp/dkpro/core/io/brat/BratWriter.java
+++ b/dkpro-core-io-brat-asl/src/main/java/de/tudarmstadt/ukp/dkpro/core/io/brat/BratWriter.java
@@ -17,39 +17,22 @@
*/
package de.tudarmstadt.ukp.dkpro.core.io.brat;
-import static org.apache.uima.fit.util.JCasUtil.selectAll;
-
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.StringWriter;
import java.io.Writer;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.LinkedHashMap;
-import java.util.LinkedHashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
+import java.util.Collection;
import java.util.Set;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
-import org.apache.uima.cas.CAS;
-import org.apache.uima.cas.Feature;
-import org.apache.uima.cas.FeatureStructure;
-import org.apache.uima.cas.Type;
-import org.apache.uima.cas.TypeSystem;
-import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.MimeTypeCapability;
import org.apache.uima.fit.descriptor.ResourceMetaData;
-import org.apache.uima.fit.util.FSUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
@@ -59,19 +42,8 @@
import de.tudarmstadt.ukp.dkpro.core.api.io.JCasFileWriter_ImplBase;
import de.tudarmstadt.ukp.dkpro.core.api.parameter.ComponentParameters;
import de.tudarmstadt.ukp.dkpro.core.api.parameter.MimeTypes;
-import de.tudarmstadt.ukp.dkpro.core.io.brat.internal.model.BratAnnotation;
import de.tudarmstadt.ukp.dkpro.core.io.brat.internal.model.BratAnnotationDocument;
-import de.tudarmstadt.ukp.dkpro.core.io.brat.internal.model.BratAttributeDecl;
import de.tudarmstadt.ukp.dkpro.core.io.brat.internal.model.BratConfiguration;
-import de.tudarmstadt.ukp.dkpro.core.io.brat.internal.model.BratConstants;
-import de.tudarmstadt.ukp.dkpro.core.io.brat.internal.model.BratEventAnnotation;
-import de.tudarmstadt.ukp.dkpro.core.io.brat.internal.model.BratEventAnnotationDecl;
-import de.tudarmstadt.ukp.dkpro.core.io.brat.internal.model.BratEventArgument;
-import de.tudarmstadt.ukp.dkpro.core.io.brat.internal.model.BratEventArgumentDecl;
-import de.tudarmstadt.ukp.dkpro.core.io.brat.internal.model.BratRelationAnnotation;
-import de.tudarmstadt.ukp.dkpro.core.io.brat.internal.model.BratTextAnnotation;
-import de.tudarmstadt.ukp.dkpro.core.io.brat.internal.model.BratTextAnnotationDrawingDecl;
-import de.tudarmstadt.ukp.dkpro.core.io.brat.internal.model.Offsets;
import de.tudarmstadt.ukp.dkpro.core.io.brat.internal.model.RelationParam;
import de.tudarmstadt.ukp.dkpro.core.io.brat.internal.model.TypeMapping;
import eu.openminted.share.annotations.api.DocumentationResource;
@@ -147,7 +119,6 @@ public class BratWriter extends JCasFileWriter_ImplBase
@ConfigurationParameter(name = PARAM_RELATION_TYPES, mandatory = true, defaultValue = {
"de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency:Governor:Dependent" })
private Set relationTypes;
- private Map parsedRelationTypes;
// /**
// * Types that are events. Optionally, multiple slot features can be specified.
@@ -176,7 +147,6 @@ public class BratWriter extends JCasFileWriter_ImplBase
"de.tudarmstadt.ukp.dkpro.core.api.ner.type.(\\w+) -> $1"
})
private String[] typeMappings;
- private TypeMapping typeMapping;
/**
* The brat web application can currently not handle attributes on relations, thus they are
@@ -209,41 +179,33 @@ public class BratWriter extends JCasFileWriter_ImplBase
@ConfigurationParameter(name = PARAM_SHORT_ATTRIBUTE_NAMES, mandatory = true, defaultValue = "false")
private boolean shortAttributeNames;
- private int nextEventAnnotationId;
- private int nextTextAnnotationId;
- private int nextRelationAnnotationId;
- private int nextAttributeId;
- private int nextPaletteIndex;
- private Map spanIdMap;
-
private BratConfiguration conf;
- private final static Pattern NEWLINE_EXTRACT_PATTERN = Pattern.compile("(.+?)(?:\\R|$)+");
-
- private Set warnings;
+ private DKPro2Brat converter;
@Override
public void initialize(UimaContext aContext)
throws ResourceInitializationException
{
super.initialize(aContext);
- conf = new BratConfiguration();
-
- warnings = new LinkedHashSet();
- parsedRelationTypes = new HashMap<>();
- for (String rel : relationTypes) {
- RelationParam p = RelationParam.parse(rel);
- parsedRelationTypes.put(p.getType(), p);
- }
-
// parsedEventTypes = new HashMap<>();
// for (String rel : eventTypes) {
// EventParam p = EventParam.parse(rel);
// parsedEventTypes.put(p.getType(), p);
// }
+ conf = new BratConfiguration();
+ converter = new DKPro2Brat(conf);
+ converter.setWriteNullAttributes(writeNullAttributes);
+ converter.setWriteRelationAttributes(writeRelationAttributes);
+ converter.setShortAttributeNames(shortAttributeNames);
+ converter.setPalette(palette);
+ converter.setExcludeTypes(excludeTypes);
+ converter.setSpanTypes(spanTypes);
+ converter.setRelationTypes(
+ relationTypes.stream().map(RelationParam::parse).collect(Collectors.toList()));
if (enableTypeMappings) {
- typeMapping = new TypeMapping(typeMappings);
+ converter.setTypeMapping(new TypeMapping(typeMappings));
}
}
@@ -251,13 +213,6 @@ public void initialize(UimaContext aContext)
public void process(JCas aJCas)
throws AnalysisEngineProcessException
{
- nextEventAnnotationId = 1;
- nextTextAnnotationId = 1;
- nextRelationAnnotationId = 1;
- nextAttributeId = 1;
- nextPaletteIndex = 0;
- spanIdMap = new HashMap<>();
-
try {
if (".ann".equals(filenameSuffix)) {
writeText(aJCas);
@@ -284,10 +239,6 @@ public void collectionProcessComplete()
catch (IOException e) {
throw new AnalysisEngineProcessException(e);
}
-
- for (String warning : warnings) {
- getLogger().warn(warning);
- }
}
private void writeAnnotationConfiguration()
@@ -311,52 +262,10 @@ private void writeAnnotations(JCas aJCas)
{
BratAnnotationDocument doc = new BratAnnotationDocument();
- List relationFS = new ArrayList<>();
+ Collection warnings = converter.convert(aJCas, doc);
- Map eventFS = new LinkedHashMap<>();
-
- // Go through all the annotations but only handle the ones that have no references to
- // other annotations.
- for (FeatureStructure fs : selectAll(aJCas)) {
- // Skip document annotation
- if (fs == aJCas.getDocumentAnnotationFs()) {
- continue;
- }
-
- // Skip excluded types
- if (excludeTypes.contains(fs.getType().getName())) {
- getLogger().debug("Excluding [" + fs.getType().getName() + "]");
- continue;
- }
-
- if (spanTypes.contains(fs.getType().getName())) {
- writeTextAnnotation(doc, (AnnotationFS) fs);
- }
- else if (parsedRelationTypes.containsKey(fs.getType().getName())) {
- relationFS.add(fs);
- }
- else if (hasNonPrimitiveFeatures(fs) && (fs instanceof AnnotationFS)) {
-// else if (parsedEventTypes.containsKey(fs.getType().getName())) {
- BratEventAnnotation event = writeEventAnnotation(doc, (AnnotationFS) fs);
- eventFS.put(event, fs);
- }
- else if (fs instanceof AnnotationFS) {
- warnings.add("Assuming annotation type [" + fs.getType().getName() + "] is span");
- writeTextAnnotation(doc, (AnnotationFS) fs);
- }
- else {
- warnings.add("Skipping annotation with type [" + fs.getType().getName() + "]");
- }
- }
-
- // Handle relations now since now we can resolve their targets to IDs.
- for (FeatureStructure fs : relationFS) {
- writeRelationAnnotation(doc, fs);
- }
-
- // Handle event slots now since now we can resolve their targets to IDs.
- for (Entry e : eventFS.entrySet()) {
- writeSlots(doc, e.getKey(), e.getValue());
+ for (String warning : warnings) {
+ getLogger().warn(warning);
}
switch (filenameSuffix) {
@@ -410,352 +319,6 @@ else if (fs instanceof AnnotationFS) {
}
}
- /**
- * Checks if the feature structure has non-default non-primitive properties.
- */
- private boolean hasNonPrimitiveFeatures(FeatureStructure aFS)
- {
- for (Feature f : aFS.getType().getFeatures()) {
- if (CAS.FEATURE_BASE_NAME_SOFA.equals(f.getShortName())) {
- continue;
- }
-
- if (!f.getRange().isPrimitive()) {
- return true;
- }
- }
-
- return false;
- }
-
- private String getBratType(Type aType)
- {
- if (enableTypeMappings) {
- return typeMapping.getBratType(aType);
- }
- else {
- return aType.getName().replace('.', '-');
- }
- }
-
- private BratEventAnnotation writeEventAnnotation(BratAnnotationDocument aDoc, AnnotationFS aFS)
- {
-
- // Write trigger annotation
- BratTextAnnotation trigger = splitNewline(aFS);
-
- nextTextAnnotationId++;
-
- // Write event annotation
- BratEventAnnotation event = new BratEventAnnotation(nextEventAnnotationId,
- getBratType(aFS.getType()), trigger.getId());
- spanIdMap.put(aFS, event.getId());
- nextEventAnnotationId++;
-
- // We do not add the trigger annotations to the document - they are owned by the event
- //aDoc.addAnnotation(trigger);
- event.setTriggerAnnotation(trigger);
-
- // Write attributes
- writeAttributes(event, aFS);
-
- // Slots are written later after we know all the span/event IDs
-
- conf.addLabelDecl(event.getType(), aFS.getType().getShortName(), aFS.getType()
- .getShortName().substring(0, 1));
-
- if (!conf.hasDrawingDecl(event.getType())) {
- conf.addDrawingDecl(new BratTextAnnotationDrawingDecl(event.getType(), "black",
- palette[nextPaletteIndex % palette.length]));
- nextPaletteIndex++;
- }
-
- aDoc.addAnnotation(event);
- return event;
- }
-
- private void writeSlots(BratAnnotationDocument aDoc, BratEventAnnotation aEvent,
- FeatureStructure aFS)
- {
- String superType = getBratType(aFS.getCAS().getTypeSystem().getParent(aFS.getType()));
- String type = getBratType(aFS.getType());
-
- assert type.equals(aEvent.getType());
-
- BratEventAnnotationDecl decl = conf.getEventDecl(type);
- if (decl == null) {
- decl = new BratEventAnnotationDecl(superType, type);
- conf.addEventDecl(decl);
- }
-
- Map> slots = new LinkedHashMap<>();
- for (Feature feat : aFS.getType().getFeatures()) {
- if (!isSlotFeature(aFS, feat)) {
- continue;
- }
- String slot = feat.getShortName();
-
- List args = slots.get(slot);
- if (args == null) {
- args = new ArrayList<>();
- slots.put(slot, args);
- }
-
- if (
- FSUtil.isMultiValuedFeature(aFS, feat)
- // this can only be true for array types
- && feat.getRange().getComponentType() != null
- // Avoid calling getParent on TOP
- && !CAS.TYPE_NAME_TOP.equals(feat.getRange().getComponentType().getName())
- && CAS.TYPE_NAME_TOP.equals(aFS.getCAS().getTypeSystem()
- .getParent(feat.getRange().getComponentType()).getName())
- && (feat.getRange().getComponentType().getFeatureByBaseName("target") != null)
- && (feat.getRange().getComponentType().getFeatureByBaseName("role") != null)
- ) {
- // Handle WebAnno-style slot links
- // FIXME It would be better if the link type could be configured, e.g. what
- // is the name of the link feature and what is the name of the role feature...
- // but right now we just keep it hard-coded to the values that are used
- // in the DKPro Core SemArgLink and that are also hard-coded in WebAnno
- BratEventArgumentDecl slotDecl = new BratEventArgumentDecl(slot,
- BratConstants.CARD_ZERO_OR_MORE);
- decl.addSlot(slotDecl);
-
- FeatureStructure[] links = FSUtil.getFeature(aFS, feat, FeatureStructure[].class);
- if (links != null) {
- for (FeatureStructure link : links) {
- FeatureStructure target = FSUtil.getFeature(link, "target",
- FeatureStructure.class);
- Feature roleFeat = link.getType().getFeatureByBaseName("role");
- BratEventArgument arg = new BratEventArgument(slot, args.size(),
- spanIdMap.get(target));
- args.add(arg);
-
- // Attach the role attribute to the target span
- BratAnnotation targetAnno = aDoc.getAnnotation(spanIdMap.get(target));
- writePrimitiveAttribute(targetAnno, link, roleFeat);
- }
- }
- }
- else if (FSUtil.isMultiValuedFeature(aFS, feat)) {
- // Handle normal multi-valued features
- BratEventArgumentDecl slotDecl = new BratEventArgumentDecl(slot,
- BratConstants.CARD_ZERO_OR_MORE);
- decl.addSlot(slotDecl);
-
- FeatureStructure[] targets = FSUtil.getFeature(aFS, feat, FeatureStructure[].class);
- if (targets != null) {
- for (FeatureStructure target : targets) {
- BratEventArgument arg = new BratEventArgument(slot, args.size(),
- spanIdMap.get(target));
- args.add(arg);
- }
- }
- }
- else {
- // Handle normal single-valued features
- BratEventArgumentDecl slotDecl = new BratEventArgumentDecl(slot,
- BratConstants.CARD_OPTIONAL);
- decl.addSlot(slotDecl);
-
- FeatureStructure target = FSUtil.getFeature(aFS, feat, FeatureStructure.class);
- if (target != null) {
- BratEventArgument arg = new BratEventArgument(slot, args.size(),
- spanIdMap.get(target));
- args.add(arg);
- }
- }
- }
-
- aEvent.setArguments(slots.values().stream().flatMap(args -> args.stream())
- .collect(Collectors.toList()));
- }
-
- private boolean isSlotFeature(FeatureStructure aFS, Feature aFeature)
- {
- return !isInternalFeature(aFeature)
- && (FSUtil.isMultiValuedFeature(aFS, aFeature) || !aFeature.getRange()
- .isPrimitive());
- }
-
- private void writeRelationAnnotation(BratAnnotationDocument aDoc, FeatureStructure aFS)
- {
- RelationParam rel = parsedRelationTypes.get(aFS.getType().getName());
-
- FeatureStructure arg1 = aFS.getFeatureValue(aFS.getType().getFeatureByBaseName(
- rel.getArg1()));
- FeatureStructure arg2 = aFS.getFeatureValue(aFS.getType().getFeatureByBaseName(
- rel.getArg2()));
-
- if (arg1 == null || arg2 == null) {
- throw new IllegalArgumentException("Dangling relation");
- }
-
- String arg1Id = spanIdMap.get(arg1);
- String arg2Id = spanIdMap.get(arg2);
-
- if (arg1Id == null || arg2Id == null) {
- throw new IllegalArgumentException("Unknown targets!");
- }
-
- String superType = getBratType(aFS.getCAS().getTypeSystem().getParent(aFS.getType()));
- String type = getBratType(aFS.getType());
-
- BratRelationAnnotation anno = new BratRelationAnnotation(nextRelationAnnotationId,
- type, rel.getArg1(), arg1Id, rel.getArg2(), arg2Id);
- nextRelationAnnotationId++;
-
- conf.addRelationDecl(superType, type, rel.getArg1(), rel.getArg2());
-
- conf.addLabelDecl(anno.getType(), aFS.getType().getShortName(), aFS.getType()
- .getShortName().substring(0, 1));
-
- aDoc.addAnnotation(anno);
-
- // brat doesn't support attributes on relations
- // https://github.com/nlplab/brat/issues/791
- if (writeRelationAttributes) {
- writeAttributes(anno, aFS);
- }
- }
-
-
-
- private BratTextAnnotation splitNewline(AnnotationFS aFS)
- {
-
- // extract all but newlines as groups
- Matcher m = NEWLINE_EXTRACT_PATTERN.matcher(aFS.getCoveredText());
- List offsets = new ArrayList<>();
- while (m.find()) {
- Offsets offset = new Offsets(m.start(1) + aFS.getBegin(), m.end(1) + aFS.getBegin() );
- offsets.add(offset);
- }
- // replaces any group of newline by one space
- String[] texts = new String[] { aFS.getCoveredText().replaceAll("\\R+", " ") };
- return new BratTextAnnotation(nextTextAnnotationId, getBratType(aFS.getType()), offsets,
- texts);
- }
-
- private void writeTextAnnotation(BratAnnotationDocument aDoc, AnnotationFS aFS)
- {
- String superType = getBratType(aFS.getCAS().getTypeSystem().getParent(aFS.getType()));
- String type = getBratType(aFS.getType());
- BratTextAnnotation anno = splitNewline(aFS);
-
- nextTextAnnotationId++;
-
- conf.addEntityDecl(superType, type);
-
- conf.addLabelDecl(anno.getType(), aFS.getType().getShortName(), aFS.getType()
- .getShortName().substring(0, 1));
-
- if (!conf.hasDrawingDecl(anno.getType())) {
- conf.addDrawingDecl(new BratTextAnnotationDrawingDecl(anno.getType(), "black",
- palette[nextPaletteIndex % palette.length]));
- nextPaletteIndex++;
- }
-
- aDoc.addAnnotation(anno);
-
- writeAttributes(anno, aFS);
-
- spanIdMap.put(aFS, anno.getId());
- }
-
- private boolean isInternalFeature(Feature aFeature)
- {
- // https://issues.apache.org/jira/browse/UIMA-4565
- return "uima.cas.AnnotationBase:sofa".equals(aFeature.getName());
- // return CAS.FEATURE_FULL_NAME_SOFA.equals(aFeature.getName());
- }
-
- private void writeAttributes(BratAnnotation aAnno, FeatureStructure aFS)
- {
- for (Feature feat : aFS.getType().getFeatures()) {
- // Skip Sofa feature
- if (isInternalFeature(feat)) {
- continue;
- }
-
- // No need to write begin / end, they are already on the text annotation
- if (CAS.FEATURE_FULL_NAME_BEGIN.equals(feat.getName()) ||
- CAS.FEATURE_FULL_NAME_END.equals(feat.getName())) {
- continue;
- }
-
- // No need to write link endpoints again, they are already on the relation annotation
- RelationParam relParam = parsedRelationTypes.get(aFS.getType().getName());
- if (relParam != null) {
- if (relParam.getArg1().equals(feat.getShortName())
- || relParam.getArg2().equals(feat.getShortName())) {
- continue;
- }
- }
-
- if (feat.getRange().isPrimitive()) {
- writePrimitiveAttribute(aAnno, aFS, feat);
- }
- // The following warning is not relevant for event annotations because these render such
- // features as slots.
- else if (!(aAnno instanceof BratEventAnnotation)) {
- warnings.add(
- "Unable to render feature [" + feat.getName() + "] with range ["
- + feat.getRange().getName() + "] as attribute");
- }
- }
- }
-
- private void writePrimitiveAttribute(BratAnnotation aAnno, FeatureStructure aFS, Feature feat)
- {
- String featureValue = aFS.getFeatureValueAsString(feat);
-
- // Do not write attributes with null values unless this is explicitly enabled
- if (featureValue == null && !writeNullAttributes) {
- return;
- }
-
- String attributeName = shortAttributeNames ? feat.getShortName()
- : aAnno.getType() + '_' + feat.getShortName();
-
- aAnno.addAttribute(nextAttributeId, attributeName, featureValue);
- nextAttributeId++;
-
- // Do not write certain values to the visual/annotation configuration because
- // they are not compatible with the brat annotation file format. The values are
- // still maintained in the ann file.
- if (isValidFeatureValue(featureValue)) {
- // Features are inherited to subtypes in UIMA. By storing the attribute under
- // the name of the type that declares the feature (domain) instead of the name
- // of the actual instance we are processing, we make sure not to maintain
- // multiple value sets for the same feature.
- BratAttributeDecl attrDecl = conf.addAttributeDecl(
- aAnno.getType(),
- getAllSubtypes(aFS.getCAS().getTypeSystem(), feat.getDomain()),
- attributeName, featureValue);
- conf.addDrawingDecl(attrDecl);
- }
- }
-
- // This generates lots of types as well that we may not otherwise have in declared in the
- // brat configuration files, but brat doesn't seem to mind.
- private Set getAllSubtypes(TypeSystem aTS, Type aType)
- {
- Set types = new LinkedHashSet<>();
- aTS.getProperlySubsumedTypes(aType).stream().forEach(t -> types.add(getBratType(t)));
- return types;
- }
-
- /**
- * Some feature values do not need to be registered or cannot be registered because brat does
- * not support them.
- */
- private boolean isValidFeatureValue(String aFeatureValue)
- {
- // https://github.com/nlplab/brat/issues/1149
- return !(aFeatureValue == null || aFeatureValue.length() == 0 || aFeatureValue.equals(","));
- }
-
private void writeText(JCas aJCas)
throws IOException
{
diff --git a/dkpro-core-io-brat-asl/src/main/java/de/tudarmstadt/ukp/dkpro/core/io/brat/DKPro2Brat.java b/dkpro-core-io-brat-asl/src/main/java/de/tudarmstadt/ukp/dkpro/core/io/brat/DKPro2Brat.java
new file mode 100644
index 0000000000..9f1d2ab486
--- /dev/null
+++ b/dkpro-core-io-brat-asl/src/main/java/de/tudarmstadt/ukp/dkpro/core/io/brat/DKPro2Brat.java
@@ -0,0 +1,593 @@
+/*
+ * Copyright 2019
+ * Ubiquitous Knowledge Processing (UKP) Lab
+ * Technische Universität Darmstadt
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package de.tudarmstadt.ukp.dkpro.core.io.brat;
+
+import static org.apache.uima.fit.util.JCasUtil.selectAll;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.LinkedHashMap;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import java.util.stream.Collectors;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.Feature;
+import org.apache.uima.cas.FeatureStructure;
+import org.apache.uima.cas.Type;
+import org.apache.uima.cas.TypeSystem;
+import org.apache.uima.cas.text.AnnotationFS;
+import org.apache.uima.fit.util.FSUtil;
+import org.apache.uima.jcas.JCas;
+
+import de.tudarmstadt.ukp.dkpro.core.io.brat.internal.model.BratAnnotation;
+import de.tudarmstadt.ukp.dkpro.core.io.brat.internal.model.BratAnnotationDocument;
+import de.tudarmstadt.ukp.dkpro.core.io.brat.internal.model.BratAttributeDecl;
+import de.tudarmstadt.ukp.dkpro.core.io.brat.internal.model.BratConfiguration;
+import de.tudarmstadt.ukp.dkpro.core.io.brat.internal.model.BratConstants;
+import de.tudarmstadt.ukp.dkpro.core.io.brat.internal.model.BratEventAnnotation;
+import de.tudarmstadt.ukp.dkpro.core.io.brat.internal.model.BratEventAnnotationDecl;
+import de.tudarmstadt.ukp.dkpro.core.io.brat.internal.model.BratEventArgument;
+import de.tudarmstadt.ukp.dkpro.core.io.brat.internal.model.BratEventArgumentDecl;
+import de.tudarmstadt.ukp.dkpro.core.io.brat.internal.model.BratRelationAnnotation;
+import de.tudarmstadt.ukp.dkpro.core.io.brat.internal.model.BratTextAnnotation;
+import de.tudarmstadt.ukp.dkpro.core.io.brat.internal.model.BratTextAnnotationDrawingDecl;
+import de.tudarmstadt.ukp.dkpro.core.io.brat.internal.model.Offsets;
+import de.tudarmstadt.ukp.dkpro.core.io.brat.internal.model.RelationParam;
+import de.tudarmstadt.ukp.dkpro.core.io.brat.internal.model.TypeMapping;
+
+public class DKPro2Brat
+{
+ private final Log log = LogFactory.getLog(getClass());
+
+ private final static Pattern NEWLINE_EXTRACT_PATTERN = Pattern.compile("(.+?)(?:\\R|$)+");
+
+ private final BratConfiguration conf;
+
+ private int nextEventAnnotationId;
+ private int nextTextAnnotationId;
+ private int nextRelationAnnotationId;
+ private int nextAttributeId;
+ private int nextPaletteIndex;
+ private Map spanIdMap;
+
+ private Set warnings;
+
+ private String[] palette = new String[] { "#8dd3c7", "#ffffb3", "#bebada", "#fb8072", "#80b1d3",
+ "#fdb462", "#b3de69", "#fccde5", "#d9d9d9", "#bc80bd", "#ccebc5", "#ffed6f" };
+ private Set excludeTypes = Collections
+ .singleton("de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence");
+ private Set spanTypes = new HashSet<>();
+ private Map parsedRelationTypes = new HashMap<>();
+ private TypeMapping typeMapping;
+
+ private boolean writeRelationAttributes;
+ private boolean writeNullAttributes;
+ private boolean shortAttributeNames;
+
+ public DKPro2Brat(BratConfiguration aConf)
+ {
+ super();
+ conf = aConf;
+ }
+
+ public boolean isWriteRelationAttributes()
+ {
+ return writeRelationAttributes;
+ }
+
+ public void setWriteRelationAttributes(boolean aWriteRelationAttributes)
+ {
+ writeRelationAttributes = aWriteRelationAttributes;
+ }
+
+ public boolean isWriteNullAttributes()
+ {
+ return writeNullAttributes;
+ }
+
+ public void setWriteNullAttributes(boolean aWriteNullAttributes)
+ {
+ writeNullAttributes = aWriteNullAttributes;
+ }
+
+ public boolean isShortAttributeNames()
+ {
+ return shortAttributeNames;
+ }
+
+ public void setShortAttributeNames(boolean aShortAttributeNames)
+ {
+ shortAttributeNames = aShortAttributeNames;
+ }
+
+
+
+ public String[] getPalette()
+ {
+ return palette;
+ }
+
+ public void setPalette(String[] aPalette)
+ {
+ palette = aPalette;
+ }
+
+ public Set getExcludeTypes()
+ {
+ return excludeTypes;
+ }
+
+ public void setExcludeTypes(Set aExcludeTypes)
+ {
+ excludeTypes = aExcludeTypes;
+ }
+
+ public Map getRelationTypes()
+ {
+ return parsedRelationTypes;
+ }
+
+ public void setRelationTypes(Collection aRelationTypes)
+ {
+ aRelationTypes.stream().forEachOrdered(p -> parsedRelationTypes.put(p.getType(), p));
+ }
+
+ public Set getSpanTypes()
+ {
+ return spanTypes;
+ }
+
+ public void setSpanTypes(Set aSpanTypes)
+ {
+ spanTypes = aSpanTypes;
+ }
+
+ public TypeMapping getTypeMapping()
+ {
+ return typeMapping;
+ }
+
+ public void setTypeMapping(TypeMapping aTypeMapping)
+ {
+ typeMapping = aTypeMapping;
+ }
+
+ private void init()
+ {
+ nextEventAnnotationId = 1;
+ nextTextAnnotationId = 1;
+ nextRelationAnnotationId = 1;
+ nextAttributeId = 1;
+ nextPaletteIndex = 0;
+ spanIdMap = new HashMap<>();
+ warnings = new LinkedHashSet<>();
+ }
+
+ public Set convert(JCas aJCas, BratAnnotationDocument doc)
+ {
+ init();
+
+ List relationFS = new ArrayList<>();
+
+ Map eventFS = new LinkedHashMap<>();
+
+ // Go through all the annotations but only handle the ones that have no references to
+ // other annotations.
+ for (FeatureStructure fs : selectAll(aJCas)) {
+ // Skip document annotation
+ if (fs == aJCas.getDocumentAnnotationFs()) {
+ continue;
+ }
+
+ // Skip excluded types
+ if (excludeTypes.contains(fs.getType().getName())) {
+ log.debug("Excluding [" + fs.getType().getName() + "]");
+ continue;
+ }
+
+ if (spanTypes.contains(fs.getType().getName())) {
+ writeTextAnnotation(doc, (AnnotationFS) fs);
+ }
+ else if (parsedRelationTypes.containsKey(fs.getType().getName())) {
+ relationFS.add(fs);
+ }
+ else if (hasNonPrimitiveFeatures(fs) && (fs instanceof AnnotationFS)) {
+// else if (parsedEventTypes.containsKey(fs.getType().getName())) {
+ BratEventAnnotation event = writeEventAnnotation(doc, (AnnotationFS) fs);
+ eventFS.put(event, fs);
+ }
+ else if (fs instanceof AnnotationFS) {
+ warnings.add("Assuming annotation type [" + fs.getType().getName() + "] is span");
+ writeTextAnnotation(doc, (AnnotationFS) fs);
+ }
+ else {
+ warnings.add("Skipping annotation with type [" + fs.getType().getName() + "]");
+ }
+ }
+
+ // Handle relations now since now we can resolve their targets to IDs.
+ for (FeatureStructure fs : relationFS) {
+ writeRelationAnnotation(doc, fs);
+ }
+
+ // Handle event slots now since now we can resolve their targets to IDs.
+ for (Entry e : eventFS.entrySet()) {
+ writeSlots(doc, e.getKey(), e.getValue());
+ }
+
+ return warnings;
+ }
+
+ /**
+ * Checks if the feature structure has non-default non-primitive properties.
+ */
+ private boolean hasNonPrimitiveFeatures(FeatureStructure aFS)
+ {
+ for (Feature f : aFS.getType().getFeatures()) {
+ if (CAS.FEATURE_BASE_NAME_SOFA.equals(f.getShortName())) {
+ continue;
+ }
+
+ if (!f.getRange().isPrimitive()) {
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ private BratEventAnnotation writeEventAnnotation(BratAnnotationDocument aDoc, AnnotationFS aFS)
+ {
+
+ // Write trigger annotation
+ BratTextAnnotation trigger = splitNewline(aFS);
+
+ nextTextAnnotationId++;
+
+ // Write event annotation
+ BratEventAnnotation event = new BratEventAnnotation(nextEventAnnotationId,
+ getBratType(aFS.getType()), trigger.getId());
+ spanIdMap.put(aFS, event.getId());
+ nextEventAnnotationId++;
+
+ // We do not add the trigger annotations to the document - they are owned by the event
+ //aDoc.addAnnotation(trigger);
+ event.setTriggerAnnotation(trigger);
+
+ // Write attributes
+ writeAttributes(event, aFS);
+
+ // Slots are written later after we know all the span/event IDs
+
+ conf.addLabelDecl(event.getType(), aFS.getType().getShortName(), aFS.getType()
+ .getShortName().substring(0, 1));
+
+ if (!conf.hasDrawingDecl(event.getType())) {
+ conf.addDrawingDecl(new BratTextAnnotationDrawingDecl(event.getType(), "black",
+ palette[nextPaletteIndex % palette.length]));
+ nextPaletteIndex++;
+ }
+
+ aDoc.addAnnotation(event);
+ return event;
+ }
+
+ private void writeTextAnnotation(BratAnnotationDocument aDoc, AnnotationFS aFS)
+ {
+ String superType = getBratType(aFS.getCAS().getTypeSystem().getParent(aFS.getType()));
+ String type = getBratType(aFS.getType());
+ BratTextAnnotation anno = splitNewline(aFS);
+
+ nextTextAnnotationId++;
+
+ conf.addEntityDecl(superType, type);
+
+ conf.addLabelDecl(anno.getType(), aFS.getType().getShortName(), aFS.getType()
+ .getShortName().substring(0, 1));
+
+ if (!conf.hasDrawingDecl(anno.getType())) {
+ conf.addDrawingDecl(new BratTextAnnotationDrawingDecl(anno.getType(), "black",
+ palette[nextPaletteIndex % palette.length]));
+ nextPaletteIndex++;
+ }
+
+ aDoc.addAnnotation(anno);
+
+ writeAttributes(anno, aFS);
+
+ spanIdMap.put(aFS, anno.getId());
+ }
+
+ private void writeRelationAnnotation(BratAnnotationDocument aDoc, FeatureStructure aFS)
+ {
+ RelationParam rel = parsedRelationTypes.get(aFS.getType().getName());
+
+ FeatureStructure arg1 = aFS.getFeatureValue(aFS.getType().getFeatureByBaseName(
+ rel.getArg1()));
+ FeatureStructure arg2 = aFS.getFeatureValue(aFS.getType().getFeatureByBaseName(
+ rel.getArg2()));
+
+ if (arg1 == null || arg2 == null) {
+ throw new IllegalArgumentException("Dangling relation");
+ }
+
+ String arg1Id = spanIdMap.get(arg1);
+ String arg2Id = spanIdMap.get(arg2);
+
+ if (arg1Id == null || arg2Id == null) {
+ throw new IllegalArgumentException("Unknown targets!");
+ }
+
+ String superType = getBratType(aFS.getCAS().getTypeSystem().getParent(aFS.getType()));
+ String type = getBratType(aFS.getType());
+
+ BratRelationAnnotation anno = new BratRelationAnnotation(nextRelationAnnotationId,
+ type, rel.getArg1(), arg1Id, rel.getArg2(), arg2Id);
+ nextRelationAnnotationId++;
+
+ conf.addRelationDecl(superType, type, rel.getArg1(), rel.getArg2());
+
+ conf.addLabelDecl(anno.getType(), aFS.getType().getShortName(), aFS.getType()
+ .getShortName().substring(0, 1));
+
+ aDoc.addAnnotation(anno);
+
+ // brat doesn't support attributes on relations
+ // https://github.com/nlplab/brat/issues/791
+ if (writeRelationAttributes) {
+ writeAttributes(anno, aFS);
+ }
+ }
+
+ private void writeAttributes(BratAnnotation aAnno, FeatureStructure aFS)
+ {
+ for (Feature feat : aFS.getType().getFeatures()) {
+ // Skip Sofa feature
+ if (isInternalFeature(feat)) {
+ continue;
+ }
+
+ // No need to write begin / end, they are already on the text annotation
+ if (CAS.FEATURE_FULL_NAME_BEGIN.equals(feat.getName()) ||
+ CAS.FEATURE_FULL_NAME_END.equals(feat.getName())) {
+ continue;
+ }
+
+ // No need to write link endpoints again, they are already on the relation annotation
+ RelationParam relParam = parsedRelationTypes.get(aFS.getType().getName());
+ if (relParam != null) {
+ if (relParam.getArg1().equals(feat.getShortName())
+ || relParam.getArg2().equals(feat.getShortName())) {
+ continue;
+ }
+ }
+
+ if (feat.getRange().isPrimitive()) {
+ writePrimitiveAttribute(aAnno, aFS, feat);
+ }
+ // The following warning is not relevant for event annotations because these render such
+ // features as slots.
+ else if (!(aAnno instanceof BratEventAnnotation)) {
+ warnings.add(
+ "Unable to render feature [" + feat.getName() + "] with range ["
+ + feat.getRange().getName() + "] as attribute");
+ }
+ }
+ }
+
+ private void writeSlots(BratAnnotationDocument aDoc, BratEventAnnotation aEvent,
+ FeatureStructure aFS)
+ {
+ String superType = getBratType(aFS.getCAS().getTypeSystem().getParent(aFS.getType()));
+ String type = getBratType(aFS.getType());
+
+ assert type.equals(aEvent.getType());
+
+ BratEventAnnotationDecl decl = conf.getEventDecl(type);
+ if (decl == null) {
+ decl = new BratEventAnnotationDecl(superType, type);
+ conf.addEventDecl(decl);
+ }
+
+ Map> slots = new LinkedHashMap<>();
+ for (Feature feat : aFS.getType().getFeatures()) {
+ if (!isSlotFeature(aFS, feat)) {
+ continue;
+ }
+ String slot = feat.getShortName();
+
+ List args = slots.get(slot);
+ if (args == null) {
+ args = new ArrayList<>();
+ slots.put(slot, args);
+ }
+
+ if (
+ FSUtil.isMultiValuedFeature(aFS, feat)
+ // this can only be true for array types
+ && feat.getRange().getComponentType() != null
+ // Avoid calling getParent on TOP
+ && !CAS.TYPE_NAME_TOP.equals(feat.getRange().getComponentType().getName())
+ && CAS.TYPE_NAME_TOP.equals(aFS.getCAS().getTypeSystem()
+ .getParent(feat.getRange().getComponentType()).getName())
+ && (feat.getRange().getComponentType().getFeatureByBaseName("target") != null)
+ && (feat.getRange().getComponentType().getFeatureByBaseName("role") != null)
+ ) {
+ // Handle WebAnno-style slot links
+ // FIXME It would be better if the link type could be configured, e.g. what
+ // is the name of the link feature and what is the name of the role feature...
+ // but right now we just keep it hard-coded to the values that are used
+ // in the DKPro Core SemArgLink and that are also hard-coded in WebAnno
+ BratEventArgumentDecl slotDecl = new BratEventArgumentDecl(slot,
+ BratConstants.CARD_ZERO_OR_MORE);
+ decl.addSlot(slotDecl);
+
+ FeatureStructure[] links = FSUtil.getFeature(aFS, feat, FeatureStructure[].class);
+ if (links != null) {
+ for (FeatureStructure link : links) {
+ FeatureStructure target = FSUtil.getFeature(link, "target",
+ FeatureStructure.class);
+ Feature roleFeat = link.getType().getFeatureByBaseName("role");
+ BratEventArgument arg = new BratEventArgument(slot, args.size(),
+ spanIdMap.get(target));
+ args.add(arg);
+
+ // Attach the role attribute to the target span
+ BratAnnotation targetAnno = aDoc.getAnnotation(spanIdMap.get(target));
+ writePrimitiveAttribute(targetAnno, link, roleFeat);
+ }
+ }
+ }
+ else if (FSUtil.isMultiValuedFeature(aFS, feat)) {
+ // Handle normal multi-valued features
+ BratEventArgumentDecl slotDecl = new BratEventArgumentDecl(slot,
+ BratConstants.CARD_ZERO_OR_MORE);
+ decl.addSlot(slotDecl);
+
+ FeatureStructure[] targets = FSUtil.getFeature(aFS, feat, FeatureStructure[].class);
+ if (targets != null) {
+ for (FeatureStructure target : targets) {
+ BratEventArgument arg = new BratEventArgument(slot, args.size(),
+ spanIdMap.get(target));
+ args.add(arg);
+ }
+ }
+ }
+ else {
+ // Handle normal single-valued features
+ BratEventArgumentDecl slotDecl = new BratEventArgumentDecl(slot,
+ BratConstants.CARD_OPTIONAL);
+ decl.addSlot(slotDecl);
+
+ FeatureStructure target = FSUtil.getFeature(aFS, feat, FeatureStructure.class);
+ if (target != null) {
+ BratEventArgument arg = new BratEventArgument(slot, args.size(),
+ spanIdMap.get(target));
+ args.add(arg);
+ }
+ }
+ }
+
+ aEvent.setArguments(slots.values().stream().flatMap(args -> args.stream())
+ .collect(Collectors.toList()));
+ }
+
+ private boolean isSlotFeature(FeatureStructure aFS, Feature aFeature)
+ {
+ return !isInternalFeature(aFeature)
+ && (FSUtil.isMultiValuedFeature(aFS, aFeature) || !aFeature.getRange()
+ .isPrimitive());
+ }
+
+
+
+
+ private boolean isInternalFeature(Feature aFeature)
+ {
+ // https://issues.apache.org/jira/browse/UIMA-4565
+ return "uima.cas.AnnotationBase:sofa".equals(aFeature.getName());
+ // return CAS.FEATURE_FULL_NAME_SOFA.equals(aFeature.getName());
+ }
+
+ private void writePrimitiveAttribute(BratAnnotation aAnno, FeatureStructure aFS, Feature feat)
+ {
+ String featureValue = aFS.getFeatureValueAsString(feat);
+
+ // Do not write attributes with null values unless this is explicitly enabled
+ if (featureValue == null && !writeNullAttributes) {
+ return;
+ }
+
+ String attributeName = shortAttributeNames ? feat.getShortName()
+ : aAnno.getType() + '_' + feat.getShortName();
+
+ aAnno.addAttribute(nextAttributeId, attributeName, featureValue);
+ nextAttributeId++;
+
+ // Do not write certain values to the visual/annotation configuration because
+ // they are not compatible with the brat annotation file format. The values are
+ // still maintained in the ann file.
+ if (isValidFeatureValue(featureValue)) {
+ // Features are inherited to subtypes in UIMA. By storing the attribute under
+ // the name of the type that declares the feature (domain) instead of the name
+ // of the actual instance we are processing, we make sure not to maintain
+ // multiple value sets for the same feature.
+ BratAttributeDecl attrDecl = conf.addAttributeDecl(
+ aAnno.getType(),
+ getAllSubtypes(aFS.getCAS().getTypeSystem(), feat.getDomain()),
+ attributeName, featureValue);
+ conf.addDrawingDecl(attrDecl);
+ }
+ }
+
+ // This generates lots of types as well that we may not otherwise have in declared in the
+ // brat configuration files, but brat doesn't seem to mind.
+ private Set getAllSubtypes(TypeSystem aTS, Type aType)
+ {
+ Set types = new LinkedHashSet<>();
+ aTS.getProperlySubsumedTypes(aType).stream().forEach(t -> types.add(getBratType(t)));
+ return types;
+ }
+
+ /**
+ * Some feature values do not need to be registered or cannot be registered because brat does
+ * not support them.
+ */
+ private boolean isValidFeatureValue(String aFeatureValue)
+ {
+ // https://github.com/nlplab/brat/issues/1149
+ return !(aFeatureValue == null || aFeatureValue.length() == 0 || aFeatureValue.equals(","));
+ }
+
+ private BratTextAnnotation splitNewline(AnnotationFS aFS)
+ {
+
+ // extract all but newlines as groups
+ Matcher m = NEWLINE_EXTRACT_PATTERN.matcher(aFS.getCoveredText());
+ List offsets = new ArrayList<>();
+ while (m.find()) {
+ Offsets offset = new Offsets(m.start(1) + aFS.getBegin(), m.end(1) + aFS.getBegin() );
+ offsets.add(offset);
+ }
+ // replaces any group of newline by one space
+ String[] texts = new String[] { aFS.getCoveredText().replaceAll("\\R+", " ") };
+ return new BratTextAnnotation(nextTextAnnotationId, getBratType(aFS.getType()), offsets,
+ texts);
+ }
+
+ private String getBratType(Type aType)
+ {
+ if (typeMapping != null) {
+ return typeMapping.getBratType(aType);
+ }
+ else {
+ return aType.getName().replace('.', '-');
+ }
+ }
+}