Skip to content

Commit

Permalink
Merge branch 'master' into feature/issue1190-maui
Browse files Browse the repository at this point in the history
* master:
  #1041 - Add parameter to enable lower-cased lookup of first word in sentence in SfstAnnotator
  #1362 - NifWriter does not write out NE identifier
  #1362 - NifWriter does not write out NE identifier
  #1152 - Introduce "order" feature on tokens
  #1366 - Added support in CONLL-U reader for document and paragraph IDs
  #1041 - Add parameter to enable lower-cased lookup of first word in sentence in SfstAnnotator
  #1366 - Added support in CONLL-U reader for document and paragraph IDs
  #1367 - Support TCF orthography via SofaChangeAnnotations
  #1041 - Add parameter to enable lower-cased lookup of first word in sentence in SfstAnnotator
  #1327 - Update LIF support
  #1366 - Added support in CONLL-U reader for document and paragraph IDs
  #1367 - Support TCF orthography via SofaChangeAnnotations
  Forgot to commit the list declaration
  Warn if CONLL-U file contains multiple documents
  Added support in CONLL-U reader for document and paragraph IDs
  #186 - Change artifactId to "dkpro-core-XXX"
  #1299 - Update to CoreNLP 3.9.2
  #1337 - Connl2012 writer uses WordSense, but does not declare it
  #1299 - Update to CoreNLP 3.9.2
  Added parameter to enable lower-cased lookup of first word in sentence.
  • Loading branch information
reckart committed Jun 4, 2019
2 parents 6507c15 + b702a66 commit b9dfaf3
Show file tree
Hide file tree
Showing 41 changed files with 3,397 additions and 812 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ public abstract class ResourceObjectProviderBase<M>
* resolved when {@link #configure()} is called. (optional)
*/
public static final String GROUP_ID = "groupId";
public static final String COMPONENT_GROUP_ID = "componentGroupId";

/**
* The artifact ID of the Maven artifact containing a resource. Variables in the location are
Expand Down Expand Up @@ -212,6 +213,7 @@ public abstract class ResourceObjectProviderBase<M>
protected void init()
{
setDefault(GROUP_ID, "de.tudarmstadt.ukp.dkpro.core");
setDefault(COMPONENT_GROUP_ID, "org.dkpro.core");
setDefault(ARTIFACT_URI,
"mvn:${" + GROUP_ID + "}:${" + ARTIFACT_ID + "}:${" + VERSION + "}");
}
Expand Down Expand Up @@ -374,7 +376,7 @@ public void applyAutoOverrides(Object aObject)
}
}

protected List<URL> getPomUrlsForClass(String aModelGroup, String aModelArtifact,
protected List<URL> getPomUrlsForClass(String aComponentGroupId, String aModelArtifactId,
Class<?> aClass)
throws IOException
{
Expand Down Expand Up @@ -418,7 +420,7 @@ protected List<URL> getPomUrlsForClass(String aModelGroup, String aModelArtifact
Matcher matcher = pattern.matcher(base);
if (matcher.matches()) {
String artifactIdAndVersion = matcher.group("ID");
String pomPattern = base + "META-INF/maven/" + aModelGroup + "/"
String pomPattern = base + "META-INF/maven/" + aComponentGroupId + "/"
+ artifactIdAndVersion + "/pom.xml";
lookupPatterns.add(pomPattern);
ResourcePatternResolver resolver = new PathMatchingResourcePatternResolver();
Expand All @@ -434,9 +436,9 @@ protected List<URL> getPomUrlsForClass(String aModelGroup, String aModelArtifact
// models from the StanfordNLP module).
if (urls.isEmpty()) {
// This is the default strategy supposed to look in the JAR
String moduleArtifactId = aModelArtifact.split("-")[0];
String pomPattern = base + "META-INF/maven/" + aModelGroup + "/" + moduleArtifactId +
"*/pom.xml";
String moduleArtifactId = aModelArtifactId.split("-")[0];
String pomPattern = base + "META-INF/maven/" + aComponentGroupId + "/"
+ moduleArtifactId + "*/pom.xml";
lookupPatterns.add(pomPattern);
ResourcePatternResolver resolver = new PathMatchingResourcePatternResolver();
Resource[] resources = resolver.getResources(pomPattern);
Expand Down Expand Up @@ -468,11 +470,11 @@ protected List<URL> getPomUrlsForClass(String aModelGroup, String aModelArtifact
* the POM, or if no context object was set.
* @return the version of the required model.
*/
protected String getModelVersionFromMavenPom(String aModelGroup, String aModelArtifact,
Class<?> aClass)
protected String getModelVersionFromMavenPom(String aComponentGroupId, String aModelGroupId,
String aModelArtifactId, Class<?> aClass)
throws IOException
{
List<URL> urls = getPomUrlsForClass(aModelGroup, aModelArtifact, contextClass);
List<URL> urls = getPomUrlsForClass(aComponentGroupId, aModelArtifactId, contextClass);

for (URL pomUrl : urls) {
// Parse the POM
Expand All @@ -492,8 +494,8 @@ protected String getModelVersionFromMavenPom(String aModelGroup, String aModelAr
List<Dependency> deps = model.getDependencyManagement().getDependencies();
for (Dependency dep : deps) {
if (
StringUtils.equals(dep.getGroupId(), aModelGroup) &&
StringUtils.equals(dep.getArtifactId(), aModelArtifact)
StringUtils.equals(dep.getGroupId(), aModelGroupId) &&
StringUtils.equals(dep.getArtifactId(), aModelArtifactId)
) {
return dep.getVersion();
}
Expand Down Expand Up @@ -790,12 +792,22 @@ private Properties resolveDependency(Properties aProps)
resolved.getProperty(ARTIFACT_URI, "").contains("${" + VERSION + "}") &&
isNull(resolved.getProperty(VERSION))
) {
String groupId = pph.replacePlaceholders(aProps.getProperty(GROUP_ID), resolved);
String modelGroupId = pph.replacePlaceholders(aProps.getProperty(GROUP_ID), resolved);
String componentGroupId;

if (aProps.getProperty(COMPONENT_GROUP_ID) != null) {
componentGroupId = pph.replacePlaceholders(aProps.getProperty(COMPONENT_GROUP_ID),
resolved);
}
else {
componentGroupId = modelGroupId;
}

String artifactId = pph.replacePlaceholders(aProps.getProperty(ARTIFACT_ID), resolved);
try {
// If the version is to be auto-detected, then we must have a groupId and artifactId
resolved.put(VERSION,
getModelVersionFromMavenPom(groupId, artifactId, contextClass));
resolved.put(VERSION, getModelVersionFromMavenPom(componentGroupId, modelGroupId,
artifactId, contextClass));
}
catch (Throwable e) {
log.error("Unable to obtain version from POM", e);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,72 +1,151 @@
<?xml version="1.0" encoding="UTF-8"?>
<typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">

<name>Segmentation</name>

<description/>

<version>${version}</version>

<vendor>Ubiquitous Knowledge Processing (UKP) Lab, Technische Universität Darmstadt</vendor>

<imports>

<import name="desc.type.LexicalUnits"/>

</imports>

<types>

<typeDescription>

<name>de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Compound</name>

<description>This type represents a decompounding word, i.e.: flowerpot. Each Compound one have at least two Splits.</description>

<supertypeName>uima.tcas.Annotation</supertypeName>

<features>

<featureDescription>

<name>splits</name>

<description>A word that can be decomposed into different parts.</description>

<rangeTypeName>uima.cas.FSArray</rangeTypeName>

<elementType>de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Split</elementType>

</featureDescription>

</features>

</typeDescription>

<typeDescription>

<name>de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token</name>

<description>&lt;p&gt;Token is one of the two types commonly produced by a segmenter (the other being Sentence). A Token usually represents a word, although it may be used to represent multiple tightly connected words (e.g. "New York") or parts of a word (e.g. the possessive "'s"). One may choose to split compound words into multiple tokens, e.g. ("CamelCase" -&amp;gt; "Camel", "Case"; "Zauberstab" -&amp;gt; "Zauber", "stab"). Most processing components operate on Tokens, usually within the limits of the surrounding Sentence. E.g. a part-of-speech tagger analyses each Token in a Sentence and assigns a part-of-speech to each Token.&lt;/p&gt;</description>

<supertypeName>uima.tcas.Annotation</supertypeName>

<features>

<featureDescription>

<name>parent</name>

<description>the parent of this token. This feature is meant to be used in when the token participates in a constituency parse and then refers to a constituent containing this token. The type of this feature is {@link Annotation} to avoid adding a dependency on the syntax API module.</description>

<rangeTypeName>uima.tcas.Annotation</rangeTypeName>

</featureDescription>

<featureDescription>

<name>lemma</name>

<description/>

<rangeTypeName>de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma</rangeTypeName>

</featureDescription>

<featureDescription>

<name>stem</name>

<description/>

<rangeTypeName>de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Stem</rangeTypeName>

</featureDescription>

<featureDescription>

<name>pos</name>

<description/>

<rangeTypeName>de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS</rangeTypeName>

</featureDescription>

<featureDescription>

<name>morph</name>

<description>The morphological feature associated with this token.</description>

<rangeTypeName>de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.morph.MorphologicalFeatures</rangeTypeName>

</featureDescription>

<featureDescription>

<name>id</name>

<description>If this unit had an ID in the source format from which it was imported, it may be stored here. IDs are typically not assigned by DKPro Core components. If an ID is present, it should be respected by writers.</description>

<rangeTypeName>uima.cas.String</rangeTypeName>

</featureDescription>

<featureDescription>

<name>form</name>

<description>Potentially normalized form of the token text that should be used instead of the covered text if set.</description>

<rangeTypeName>de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.TokenForm</rangeTypeName>

</featureDescription>

<featureDescription>

<name>syntacticFunction</name>

<description/>

<rangeTypeName>uima.cas.String</rangeTypeName>

</featureDescription>

<featureDescription>

<name>order</name>

<description>Disambiguates the token order for tokens which have the same offsets, e.g. when the contraction "à" is analyzed as two tokens "a" and "a".</description>

<rangeTypeName>uima.cas.Integer</rangeTypeName>

</featureDescription>
</features>

</typeDescription>

</types>

</typeSystemDescription>
Loading

0 comments on commit b9dfaf3

Please sign in to comment.