Skip to content

Commit

Permalink
Merge branch '1.9.x'
Browse files Browse the repository at this point in the history
* 1.9.x:
  #1202 - TigerXmlWriter does not have an encoding parameter
  #1201 - Xces writers use wrong parameter name for file extensions
  #1074 - Generate OMTD-SHARE descriptors
  #1074 - Generate OMTD-SHARE descriptors
  • Loading branch information
reckart committed Mar 30, 2018
2 parents 93ffe23 + b10697d commit e0f64ce
Show file tree
Hide file tree
Showing 20 changed files with 210 additions and 20 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
de.tudarmstadt.ukp.dkpro.core.api.coref.type.CoreferenceChain=http://w3id.org/meta-share/omtd-share/Coreference
de.tudarmstadt.ukp.dkpro.core.api.coref.type.CoreferenceLink=http://w3id.org/meta-share/omtd-share/Coreference
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.morph.MorphologicalFeatures=http://w3id.org/meta-share/omtd-share/MorphologicalFeature
de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS = http://w3id.org/meta-share/omtd-share/PartOfSpeech

# Elevated types
# de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS_ADJ
# de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS_ADP
# de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS_ADV
# de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS_AUX
# de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS_CONJ
# de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS_DET
# de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS_INTJ
# de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS_NOUN
# de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS_NUM
# de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS_PART
# de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS_PRON
# de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS_PROPN
# de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS_PUNCT
# de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS_SCONJ
# de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS_SYM
# de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS_VERB
# de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS_X
# de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.tweet.POS_AT
# de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.tweet.POS_DM
# de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.tweet.POS_EMO
# de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.tweet.POS_HASH
# de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.tweet.POS_INT
# de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.tweet.POS_NNV
# de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.tweet.POS_NPV
# de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.tweet.POS_URL

# Deprecated elevated types
# de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.ADJ
# de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.ADP
# de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.ADV
# de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.ART
# de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.AUX
# de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.CARD
# de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.CONJ
# de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.DET
# de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.INTJ
# de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.N
# de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.NN
# de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.NOUN
# de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.NP
# de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.NUM
# de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.O
# de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.PART
# de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.PP
# de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.PR
# de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.PRON
# de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.PROPN
# de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.PRT
# de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.PUNC
# de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.PUNCT
# de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.SCONJ
# de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.SYM
# de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.V
# de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.VERB
# de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.X
# de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.tweet.AT
# de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.tweet.DM
# de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.tweet.EMO
# de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.tweet.HASH
# de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.tweet.INT
# de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.tweet.NNV
# de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.tweet.NPV
# de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.tweet.URL
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData
# de.tudarmstadt.ukp.dkpro.core.api.metadata.type.MetaDataStringField
# de.tudarmstadt.ukp.dkpro.core.api.metadata.type.TagDescription
# de.tudarmstadt.ukp.dkpro.core.api.metadata.type.TagsetDescription
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity=http://w3id.org/meta-share/omtd-share/NamedEntity
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ public final class MimeTypes

// DKPro application types
public final static String APPLICATION_X_ANCORA_XML = "application/x.org.dkpro.ancora+xml";
public final static String APPLICATION_X_BNC = "application/x.org.dkpro.bnc+xml";
public final static String APPLICATION_X_BRAT = "application/x.org.dkpro.brat";
public final static String APPLICATION_X_DITOP = "application/x.org.dkpro.ditop";
public final static String APPLICATION_X_FANGORN = "application/x.org.dkpro.fangorn";
public final static String APPLICATION_X_GATE_XML = "application/x.org.dkpro.gate+xml";
Expand All @@ -49,6 +51,8 @@ public final class MimeTypes
public final static String APPLICATION_X_SEMEVAL_2010_XML = "application/x.org.dkpro.semeval-2010+xml";
public final static String APPLICATION_X_TUEPP_XML = "application/x.org.dkpro.tuepp+xml";
public final static String APPLICATION_X_TUEBADZ_CHUNK = "application/x.org.dkpro.tuebadz-chunk";
public final static String APPLICATION_X_XCES = "application/x.org.dkpro.xces+xml";
public final static String APPLICATION_X_XCES_BASIC = "application/x.org.dkpro.xces-basic+xml";

// Standard text types (http://www.iana.org/assignments/media-types/media-types.xhtml)
public final static String TEXT_CSV = "text/csv";
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
application/pdf=http://w3id.org/meta-share/omtd-share/Pdf
application/rtf=http://w3id.org/meta-share/omtd-share/Rtf
application/tei+xml=http://w3id.org/meta-share/omtd-share/Tei
application/vnd.xmi+xml=http://w3id.org/meta-share/omtd-share/Xmi
#application/x.org.dkpro.ancora+xml
application/x.org.dkpro.bnc+xml=http://w3id.org/meta-share/omtd-share/BncFormat
application/x.org.dkpro.brat=http://w3id.org/meta-share/omtd-share/Brat
#application/x.org.dkpro.core.lingpipe.ner
#application/x.org.dkpro.core.opennlp.chunk
#application/x.org.dkpro.core.opennlp.lemma
#application/x.org.dkpro.core.opennlp.ner
#application/x.org.dkpro.core.opennlp.sent
#application/x.org.dkpro.core.opennlp.tagger=
#application/x.org.dkpro.core.opennlp.token=
#application/x.org.dkpro.core.stanfordnlp.ner=
#application/x.org.dkpro.core.stanfordnlp.tagger=
#application/x.org.dkpro.ditop=
application/x.org.dkpro.gate+xml=http://w3id.org/meta-share/omtd-share/GateXml
#application/x.org.dkpro.lif+json=
#application/x.org.dkpro.lxf+json=
application/x.org.dkpro.negra3=http://w3id.org/meta-share/omtd-share/NegraExport
application/x.org.dkpro.negra4=http://w3id.org/meta-share/omtd-share/NegraExport
application/x.org.dkpro.nif+turtle=http://w3id.org/meta-share/omtd-share/Nif
application/x.org.dkpro.reuters21578+sgml=http://w3id.org/meta-share/omtd-share/Reuters21578Sgml
#application/x.org.dkpro.semeval-2010+xml=
application/x.org.dkpro.tgrep2=http://w3id.org/meta-share/omtd-share/Tgrep2
application/x.org.dkpro.tiger+xml=http://w3id.org/meta-share/omtd-share/TigerXml
#application/x.org.dkpro.tuebadz-chunk=
application/x.org.dkpro.tuepp+xml=http://w3id.org/meta-share/omtd-share/Tuepp
application/x.org.dkpro.uima+binary=http://w3id.org/meta-share/omtd-share/BinaryCas
application/x.org.dkpro.uima+json=http://w3id.org/meta-share/omtd-share/Uima_json
application/x.org.dkpro.uima+xmi=http://w3id.org/meta-share/omtd-share/UimaCasFormat
application/x.org.dkpro.xces+xml=http://w3id.org/meta-share/omtd-share/Xces
application/x.org.dkpro.xces-basic+xml=http://w3id.org/meta-share/omtd-share/XcesIlspVariant
application/xhtml+xml=http://w3id.org/meta-share/omtd-share/Xhtml
#application/xml=
text/html=http://w3id.org/meta-share/omtd-share/Html
text/plain=http://w3id.org/meta-share/omtd-share/Text
text/rtf=http://w3id.org/meta-share/omtd-share/Rtf
text/tcf+xml=http://w3id.org/meta-share/omtd-share/Tcf
text/x.org.dkpro.conll-2000=http://w3id.org/meta-share/omtd-share/Conll2000
text/x.org.dkpro.conll-2002=http://w3id.org/meta-share/omtd-share/Conll2002
text/x.org.dkpro.conll-2003=http://w3id.org/meta-share/omtd-share/Conll2003
text/x.org.dkpro.conll-2006=http://w3id.org/meta-share/omtd-share/Conll2006
text/x.org.dkpro.conll-2008=http://w3id.org/meta-share/omtd-share/Conll2008
text/x.org.dkpro.conll-2009=http://w3id.org/meta-share/omtd-share/Conll2009
text/x.org.dkpro.conll-2012=http://w3id.org/meta-share/omtd-share/Conll2012
text/x.org.dkpro.conll-u=http://w3id.org/meta-share/omtd-share/ConllU
#text/x.org.dkpro.germeval-2014=
text/x.org.dkpro.imscwb=http://w3id.org/meta-share/omtd-share/Imscwb
#text/x.org.dkpro.ngram=
text/x.org.dkpro.ptb-chunked=http://w3id.org/meta-share/omtd-share/PtbChunked
text/x.org.dkpro.ptb-combined=http://w3id.org/meta-share/omtd-share/PtbCombined
text/x.org.dkpro.reuters21578=http://w3id.org/meta-share/omtd-share/Reuters21578Txt
# text/xml=
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Compound=http://w3id.org/meta-share/omtd-share/Compound
#de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.CompoundPart
de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Div=http://w3id.org/meta-share/omtd-share/StructuralAnnotationType
de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Document=http://w3id.org/meta-share/omtd-share/StructuralAnnotationType
de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Heading=http://w3id.org/meta-share/omtd-share/StructuralAnnotationType
de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma=http://w3id.org/meta-share/omtd-share/Lemma
#de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.LexicalPhrase
#de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.LinkingMorpheme
#de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.NGram
de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Paragraph=http://w3id.org/meta-share/omtd-share/Paragraph
de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence=http://w3id.org/meta-share/omtd-share/Sentence
#de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Split
de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Stem=http://w3id.org/meta-share/omtd-share/Stem
#de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.StopWord
#de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.SurfaceForm
de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token=http://w3id.org/meta-share/omtd-share/Token
#de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.TokenForm
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
de.tudarmstadt.ukp.dkpro.core.api.semantics.type.SemArg=http://w3id.org/meta-share/omtd-share/SemanticAnnotationType
de.tudarmstadt.ukp.dkpro.core.api.semantics.type.SemPred=http://w3id.org/meta-share/omtd-share/SemanticFrame
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
de.tudarmstadt.ukp.dkpro.core.api.syntax.type.chunk.Chunk=http://w3id.org/meta-share/omtd-share/Chunk
de.tudarmstadt.ukp.dkpro.core.api.syntax.type.constituent.Constituent=http://w3id.org/meta-share/omtd-share/Constituent
de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency=http://w3id.org/meta-share/omtd-share/Dependency

Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
"de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma",
"de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS" },
outputs = {
"de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.NamedEntity" })
"de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity" })
/**
* This Analysis Engine annotates
* English single words with semantic field information retrieved from an ExternalResource.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
* Reader for the British National Corpus (XML version).
*/
@ResourceMetaData(name = "British National Corpus (BNC) XML Reader")
@MimeTypeCapability(MimeTypes.APPLICATION_XML)
@MimeTypeCapability(MimeTypes.APPLICATION_X_BNC)
@TypeCapability(
outputs = {
"de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,13 +43,15 @@
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.collection.CollectionException;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.MimeTypeCapability;
import org.apache.uima.fit.descriptor.ResourceMetaData;
import org.apache.uima.fit.util.FSUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;

import de.tudarmstadt.ukp.dkpro.core.api.io.JCasResourceCollectionReader_ImplBase;
import de.tudarmstadt.ukp.dkpro.core.api.parameter.ComponentParameters;
import de.tudarmstadt.ukp.dkpro.core.api.parameter.MimeTypes;
import de.tudarmstadt.ukp.dkpro.core.io.brat.internal.model.BratAnnotation;
import de.tudarmstadt.ukp.dkpro.core.io.brat.internal.model.BratAnnotationDocument;
import de.tudarmstadt.ukp.dkpro.core.io.brat.internal.model.BratAttribute;
Expand All @@ -68,6 +70,7 @@
* @see <a href="http://brat.nlplab.org/configuration.html">brat configuration format</a>
*/
@ResourceMetaData(name = "Brat Reader")
@MimeTypeCapability({MimeTypes.APPLICATION_X_BRAT})
public class BratReader
extends JCasResourceCollectionReader_ImplBase
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
import org.apache.uima.cas.TypeSystem;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.MimeTypeCapability;
import org.apache.uima.fit.descriptor.ResourceMetaData;
import org.apache.uima.fit.util.FSUtil;
import org.apache.uima.jcas.JCas;
Expand All @@ -55,6 +56,7 @@

import de.tudarmstadt.ukp.dkpro.core.api.io.JCasFileWriter_ImplBase;
import de.tudarmstadt.ukp.dkpro.core.api.parameter.ComponentParameters;
import de.tudarmstadt.ukp.dkpro.core.api.parameter.MimeTypes;
import de.tudarmstadt.ukp.dkpro.core.io.brat.internal.model.BratAnnotation;
import de.tudarmstadt.ukp.dkpro.core.io.brat.internal.model.BratAnnotationDocument;
import de.tudarmstadt.ukp.dkpro.core.io.brat.internal.model.BratAttributeDecl;
Expand Down Expand Up @@ -84,6 +86,7 @@
* @see <a href="http://brat.nlplab.org/configuration.html">brat configuration format</a>
*/
@ResourceMetaData(name = "Brat Writer")
@MimeTypeCapability({MimeTypes.APPLICATION_X_BRAT})
public class BratWriter extends JCasFileWriter_ImplBase
{
/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,14 @@ public class TigerXmlWriter extends JCasFileWriter_ImplBase
@ConfigurationParameter(name = PARAM_FILENAME_EXTENSION, mandatory = true, defaultValue = ".xml")
private String filenameSuffix;

/**
* Character encoding of the output data.
*/
public static final String PARAM_TARGET_ENCODING = ComponentParameters.PARAM_TARGET_ENCODING;
@ConfigurationParameter(name = PARAM_TARGET_ENCODING, mandatory = true,
defaultValue = ComponentParameters.DEFAULT_ENCODING)
private String targetEncoding;

@Override
public void process(JCas aJCas)
throws AnalysisEngineProcessException
Expand All @@ -94,7 +102,7 @@ public void process(JCas aJCas)

XMLOutputFactory xmlOutputFactory = XMLOutputFactory.newInstance();
xmlEventWriter = new IndentingXMLEventWriter(
xmlOutputFactory.createXMLEventWriter(docOS));
xmlOutputFactory.createXMLEventWriter(docOS, targetEncoding));

JAXBContext context = JAXBContext.newInstance(TigerSentence.class);
Marshaller marshaller = context.createMarshaller();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,17 +34,20 @@
import javax.xml.stream.events.XMLEvent;

import org.apache.uima.collection.CollectionException;
import org.apache.uima.fit.descriptor.MimeTypeCapability;
import org.apache.uima.fit.descriptor.TypeCapability;
import org.apache.uima.fit.factory.JCasBuilder;
import org.apache.uima.jcas.JCas;
import org.dkpro.core.io.xces.models.XcesBodyBasic;
import org.dkpro.core.io.xces.models.XcesParaBasic;

import de.tudarmstadt.ukp.dkpro.core.api.io.JCasResourceCollectionReader_ImplBase;
import de.tudarmstadt.ukp.dkpro.core.api.parameter.MimeTypes;
import de.tudarmstadt.ukp.dkpro.core.api.resources.CompressionUtils;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Paragraph;

@TypeCapability(outputs = { "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Paragraph"})
@MimeTypeCapability({MimeTypes.APPLICATION_X_XCES_BASIC})
public class XcesBasicXmlReader
extends JCasResourceCollectionReader_ImplBase
{
Expand All @@ -70,6 +73,7 @@ public void getNext(JCas aJCas)

unmarshallerBasic.setEventHandler(new ValidationEventHandler()
{
@Override
public boolean handleEvent(ValidationEvent event)
{
throw new RuntimeException(event.getMessage(), event.getLinkedException());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@

import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.MimeTypeCapability;
import org.apache.uima.fit.descriptor.ResourceMetaData;
import org.apache.uima.fit.descriptor.TypeCapability;
import org.apache.uima.fit.util.JCasUtil;
Expand All @@ -44,24 +45,27 @@

import de.tudarmstadt.ukp.dkpro.core.api.io.JCasFileWriter_ImplBase;
import de.tudarmstadt.ukp.dkpro.core.api.parameter.ComponentParameters;
import de.tudarmstadt.ukp.dkpro.core.api.parameter.MimeTypes;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Paragraph;
import javanet.staxutils.IndentingXMLEventWriter;

@ResourceMetaData(name = "XCES Basic XML Writer")
@MimeTypeCapability({MimeTypes.APPLICATION_X_XCES_BASIC})
@TypeCapability(
inputs = {
"de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Paragraph" })
public class XcesBasicXmlWriter extends JCasFileWriter_ImplBase
public class XcesBasicXmlWriter
extends JCasFileWriter_ImplBase
{

public static final String PARAM_FILENAME_SUFFIX = "filenameSuffix";
@ConfigurationParameter(name = PARAM_FILENAME_SUFFIX, mandatory = true, defaultValue = ".xml")
private String filenameSuffix;
public static final String PARAM_FILENAME_EXTENSION =
ComponentParameters.PARAM_FILENAME_EXTENSION;
@ConfigurationParameter(name = PARAM_FILENAME_EXTENSION, mandatory = true, defaultValue = ".xml")
private String filenameExtension;

/**
* Character encoding of the output data.
*/
public static final String PARAM_TARGET_ENCODING = "targetEncoding";
public static final String PARAM_TARGET_ENCODING = ComponentParameters.PARAM_TARGET_ENCODING;
@ConfigurationParameter(name = PARAM_TARGET_ENCODING, mandatory = true,
defaultValue = ComponentParameters.DEFAULT_ENCODING)
private String targetEncoding;
Expand All @@ -73,7 +77,7 @@ public void process(JCas aJCas)
OutputStream docOS = null;
XMLEventWriter xmlEventWriter = null;
try {
docOS = getOutputStream(aJCas, filenameSuffix);
docOS = getOutputStream(aJCas, filenameExtension);
XMLOutputFactory xmlOutputFactory = XMLOutputFactory.newInstance();
xmlEventWriter = new IndentingXMLEventWriter(
xmlOutputFactory.createXMLEventWriter(docOS, targetEncoding));
Expand Down
Loading

0 comments on commit e0f64ce

Please sign in to comment.