Skip to content

Commit

Permalink
Merge branch 'master' into 2.0.x
Browse files Browse the repository at this point in the history
* master:
  #186 - Change artifactId to "dkpro-core-XXX"
  #1299 - Update to CoreNLP 3.9.2
  #1337 - Connl2012 writer uses WordSense, but does not declare it
  #1299 - Update to CoreNLP 3.9.2
  No issue. Fixed JavaDoc error.
  #1340 - Upgrade dependencies (1.11.0)
  #1358 - Improve error messages in TSV3
  #1357 - Upgrade to ICU4J 64.2
  #1340 - Upgrade dependencies (1.11.0)
  #1343 - Segmenter for Chinese
  #1343 - Segmenter for Chinese
  #1343 - Segmenter for Chinese
  #1343 - Segmenter for Chinese
  #1343 - Segmenter for Chinese
  #1340 - Upgrade dependencies

% Conflicts:
%	dkpro-core-decompounding-asl/src/test/java/org/dkpro/core/decompounding/uima/annotator/CompoundAnnotatorTest.java
%	dkpro-core-io-lxf-asl/src/main/java/org/dkpro/core/io/lxf/internal/DKPro2Lxf.java
  • Loading branch information
reckart committed May 31, 2019
2 parents 751bcb3 + 70cfbb5 commit e4c4f5c
Show file tree
Hide file tree
Showing 33 changed files with 653 additions and 202 deletions.
2 changes: 1 addition & 1 deletion dkpro-core-api-datasets-asl/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@
<dependency>
<groupId>com.github.junrar</groupId>
<artifactId>junrar</artifactId>
<version>0.7</version>
<version>4.0.0</version>
<exclusions>
<exclusion>
<groupId>org.apache.commons</groupId>
Expand Down
4 changes: 2 additions & 2 deletions dkpro-core-api-resources-asl/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,8 @@
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.hamcrest</groupId>
<artifactId>hamcrest-core</artifactId>
<groupId>org.assertj</groupId>
<artifactId>assertj-core</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ public abstract class ResourceObjectProviderBase<M>
* resolved when {@link #configure()} is called. (optional)
*/
public static final String GROUP_ID = "groupId";
public static final String COMPONENT_GROUP_ID = "componentGroupId";

/**
* The artifact ID of the Maven artifact containing a resource. Variables in the location are
Expand Down Expand Up @@ -212,6 +213,7 @@ public abstract class ResourceObjectProviderBase<M>
protected void init()
{
setDefault(GROUP_ID, "de.tudarmstadt.ukp.dkpro.core");
setDefault(COMPONENT_GROUP_ID, "org.dkpro.core");
setDefault(ARTIFACT_URI,
"mvn:${" + GROUP_ID + "}:${" + ARTIFACT_ID + "}:${" + VERSION + "}");
}
Expand Down Expand Up @@ -374,7 +376,7 @@ public void applyAutoOverrides(Object aObject)
}
}

protected List<URL> getPomUrlsForClass(String aModelGroup, String aModelArtifact,
protected List<URL> getPomUrlsForClass(String aComponentGroupId, String aModelArtifactId,
Class<?> aClass)
throws IOException
{
Expand Down Expand Up @@ -418,7 +420,7 @@ protected List<URL> getPomUrlsForClass(String aModelGroup, String aModelArtifact
Matcher matcher = pattern.matcher(base);
if (matcher.matches()) {
String artifactIdAndVersion = matcher.group("ID");
String pomPattern = base + "META-INF/maven/" + aModelGroup + "/"
String pomPattern = base + "META-INF/maven/" + aComponentGroupId + "/"
+ artifactIdAndVersion + "/pom.xml";
lookupPatterns.add(pomPattern);
ResourcePatternResolver resolver = new PathMatchingResourcePatternResolver();
Expand All @@ -434,9 +436,9 @@ protected List<URL> getPomUrlsForClass(String aModelGroup, String aModelArtifact
// models from the StanfordNLP module).
if (urls.isEmpty()) {
// This is the default strategy supposed to look in the JAR
String moduleArtifactId = aModelArtifact.split("-")[0];
String pomPattern = base + "META-INF/maven/" + aModelGroup + "/" + moduleArtifactId +
"*/pom.xml";
String moduleArtifactId = aModelArtifactId.split("-")[0];
String pomPattern = base + "META-INF/maven/" + aComponentGroupId + "/"
+ moduleArtifactId + "*/pom.xml";
lookupPatterns.add(pomPattern);
ResourcePatternResolver resolver = new PathMatchingResourcePatternResolver();
Resource[] resources = resolver.getResources(pomPattern);
Expand Down Expand Up @@ -468,11 +470,11 @@ protected List<URL> getPomUrlsForClass(String aModelGroup, String aModelArtifact
* the POM, or if no context object was set.
* @return the version of the required model.
*/
protected String getModelVersionFromMavenPom(String aModelGroup, String aModelArtifact,
Class<?> aClass)
protected String getModelVersionFromMavenPom(String aComponentGroupId, String aModelGroupId,
String aModelArtifactId, Class<?> aClass)
throws IOException
{
List<URL> urls = getPomUrlsForClass(aModelGroup, aModelArtifact, contextClass);
List<URL> urls = getPomUrlsForClass(aComponentGroupId, aModelArtifactId, contextClass);

for (URL pomUrl : urls) {
// Parse the POM
Expand All @@ -492,8 +494,8 @@ protected String getModelVersionFromMavenPom(String aModelGroup, String aModelAr
List<Dependency> deps = model.getDependencyManagement().getDependencies();
for (Dependency dep : deps) {
if (
StringUtils.equals(dep.getGroupId(), aModelGroup) &&
StringUtils.equals(dep.getArtifactId(), aModelArtifact)
StringUtils.equals(dep.getGroupId(), aModelGroupId) &&
StringUtils.equals(dep.getArtifactId(), aModelArtifactId)
) {
return dep.getVersion();
}
Expand Down Expand Up @@ -790,12 +792,22 @@ private Properties resolveDependency(Properties aProps)
resolved.getProperty(ARTIFACT_URI, "").contains("${" + VERSION + "}") &&
isNull(resolved.getProperty(VERSION))
) {
String groupId = pph.replacePlaceholders(aProps.getProperty(GROUP_ID), resolved);
String modelGroupId = pph.replacePlaceholders(aProps.getProperty(GROUP_ID), resolved);
String componentGroupId;

if (aProps.getProperty(COMPONENT_GROUP_ID) != null) {
componentGroupId = pph.replacePlaceholders(aProps.getProperty(COMPONENT_GROUP_ID),
resolved);
}
else {
componentGroupId = modelGroupId;
}

String artifactId = pph.replacePlaceholders(aProps.getProperty(ARTIFACT_ID), resolved);
try {
// If the version is to be auto-detected, then we must have a groupId and artifactId
resolved.put(VERSION,
getModelVersionFromMavenPom(groupId, artifactId, contextClass));
resolved.put(VERSION, getModelVersionFromMavenPom(componentGroupId, modelGroupId,
artifactId, contextClass));
}
catch (Throwable e) {
log.error("Unable to obtain version from POM", e);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,8 @@
package org.dkpro.core.api.resources;

import static java.util.Arrays.asList;
import static org.hamcrest.CoreMatchers.is;
import static org.assertj.core.api.Assertions.assertThat;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertThat;
import static org.junit.Assert.assertTrue;

import java.io.File;
Expand Down Expand Up @@ -103,13 +102,13 @@ public void testGetUrlAsExecutable()
URL url = new URL("jar:file:src/test/resources/testfiles.zip!/testfiles/"
+ "FileSetCollectionReaderBase.class");
File file = ResourceUtils.getUrlAsExecutable(url, false);
assertThat(file.getName().endsWith("temp"), is(true));

assertThat(file.getName()).endsWith("temp");

URL url2 = new URL("jar:file:src/test/resources/testfiles.zip!/testfiles/"
+ "ResourceCollectionReaderBase.class");
file = ResourceUtils.getUrlAsExecutable(url2, true);
assertThat(file.getName().endsWith("temp"), is(true));


assertThat(file.getName()).endsWith("temp");
}

}
4 changes: 2 additions & 2 deletions dkpro-core-api-segmentation-asl/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,8 @@
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.hamcrest</groupId>
<artifactId>hamcrest-core</artifactId>
<groupId>org.assertj</groupId>
<artifactId>assertj-core</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,11 @@
*/
package org.dkpro.core.api.segmentation;

import static org.hamcrest.CoreMatchers.is;
import static org.junit.Assert.assertThat;
import static de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Compound.CompoundSplitLevel.ALL;
import static de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Compound.CompoundSplitLevel.HIGHEST;
import static de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Compound.CompoundSplitLevel.LOWEST;
import static de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Compound.CompoundSplitLevel.NONE;
import static org.assertj.core.api.Assertions.assertThat;

import java.util.ArrayList;
import java.util.List;
Expand All @@ -33,13 +36,11 @@
import org.junit.Test;

import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Compound;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Compound.CompoundSplitLevel;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.CompoundPart;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Split;

public class CompoundTest
{

private Compound compound;

@Before
Expand All @@ -66,62 +67,39 @@ public void setUpCompound() throws UIMAException
compound.setSplits(FSCollectionFactory.createFSArray(jcas, splits));
compound.addToIndexes();
jcasBuilder.close();

}

@Test
public void testAll() throws UIMAException
{

final String[] splitsList = new String[] { "getränk", "automat", "auto", "mat" };
assertThat(coveredTextArrayFromAnnotations(
compound.getSplitsWithoutMorpheme(CompoundSplitLevel.ALL)), is(splitsList));

assertThat(compound.getSplitsWithoutMorpheme(ALL))
.extracting(Annotation::getCoveredText)
.containsExactly("getränk", "automat", "auto", "mat");
}

@Test
public void testLowest() throws UIMAException
{

final String[] splitsList = new String[] { "getränk", "auto", "mat" };
assertThat(
coveredTextArrayFromAnnotations(
compound.getSplitsWithoutMorpheme(CompoundSplitLevel.LOWEST)),
is(splitsList));
assertThat(compound.getSplitsWithoutMorpheme(LOWEST))
.extracting(Annotation::getCoveredText)
.containsExactly("getränk", "auto", "mat");

}

@Test
public void testHighest() throws UIMAException
{

final String[] splitsList = new String[] { "getränk", "automat" };
assertThat(
coveredTextArrayFromAnnotations(
compound.getSplitsWithoutMorpheme(CompoundSplitLevel.HIGHEST)),
is(splitsList));
assertThat(compound.getSplitsWithoutMorpheme(HIGHEST))
.extracting(Annotation::getCoveredText)
.containsExactly("getränk", "automat");

}

@Test
public void testNone() throws UIMAException
{

final String[] splitsList = new String[] {};
assertThat(
coveredTextArrayFromAnnotations(
compound.getSplitsWithoutMorpheme(CompoundSplitLevel.NONE)),
is(splitsList));

}

public <T extends Annotation> String[] coveredTextArrayFromAnnotations(final T[] annotations)
{
final List<String> list = new ArrayList<String>();
for (T annotation : annotations) {
list.add(annotation.getCoveredText());
}
return list.toArray(new String[list.size()]);
assertThat(compound.getSplitsWithoutMorpheme(NONE))
.extracting(Annotation::getCoveredText)
.isEmpty();
}

}
1 change: 1 addition & 0 deletions dkpro-core-asl/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -620,6 +620,7 @@
<module>../dkpro-core-ixa-asl</module>
<module>../dkpro-core-jazzy-asl</module>
<module>../dkpro-core-jtok-asl</module>
<module>../dkpro-core-jieba-asl</module>
<module>../dkpro-core-languagetool-asl</module>
<module>../dkpro-core-langdetect-asl</module>
<module>../dkpro-core-ldweb1t-asl</module>
Expand Down
10 changes: 5 additions & 5 deletions dkpro-core-corenlp-gpl/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
<name>DKPro Core GPL - Stanford CoreNLP Suite (v ${corenlp.version}) (GPL)</name>
<url>https://dkpro.github.io/dkpro-core/</url>
<properties>
<corenlp.version>3.9.1</corenlp.version>
<corenlp.version>3.9.2</corenlp.version>
</properties>
<dependencies>
<dependency>
Expand Down Expand Up @@ -178,7 +178,7 @@
<dependency>
<groupId>de.tudarmstadt.ukp.dkpro.core</groupId>
<artifactId>de.tudarmstadt.ukp.dkpro.core.stanfordnlp-model-coref-en-default</artifactId>
<version>20180227.1</version>
<version>20181005.1</version>
</dependency>
<dependency>
<groupId>de.tudarmstadt.ukp.dkpro.core</groupId>
Expand Down Expand Up @@ -358,17 +358,17 @@
<dependency>
<groupId>de.tudarmstadt.ukp.dkpro.core</groupId>
<artifactId>de.tudarmstadt.ukp.dkpro.core.stanfordnlp-model-tagger-en-bidirectional-distsim</artifactId>
<version>20140616.1</version>
<version>20181002.1</version>
</dependency>
<dependency>
<groupId>de.tudarmstadt.ukp.dkpro.core</groupId>
<artifactId>de.tudarmstadt.ukp.dkpro.core.stanfordnlp-model-tagger-en-left3words-distsim</artifactId>
<version>20140616.1</version>
<version>20181002.1</version>
</dependency>
<dependency>
<groupId>de.tudarmstadt.ukp.dkpro.core</groupId>
<artifactId>de.tudarmstadt.ukp.dkpro.core.stanfordnlp-model-tagger-en-caseless-left3words-distsim</artifactId>
<version>20140827.0</version>
<version>20181002.0</version>
</dependency>
<dependency>
<groupId>de.tudarmstadt.ukp.dkpro.core</groupId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -195,17 +195,6 @@ public class CoreNlpNamedEntityRecognizer
// on, off, auto
private boolean useSUTime = false; // = NumberSequenceClassifier.USE_SUTIME_DEFAULT;

// /**
// * Whether to read the default regular expression gazetteer.
// *
// * @see edu.stanford.nlp.pipeline.DefaultPaths#DEFAULT_NER_GAZETTE_MAPPING
// */
// public static final String PARAM_AUGMENT_REGEX_NER = "augmentRegexNER";
// @ConfigurationParameter(name = PARAM_AUGMENT_REGEX_NER, mandatory = true, defaultValue = "false")
// Commented out since the default gazetter is currently only in the original Stanford model
// JARs
private boolean augmentRegexNER = false; // = NERClassifierCombiner.APPLY_GAZETTE_PROPERTY;

private boolean verbose = false;

private ModelProviderBase<NERCombinerAnnotator> annotatorProvider;
Expand Down Expand Up @@ -318,7 +307,7 @@ protected NERCombinerAnnotator produceResource(URL aUrl) throws IOException
}

NERClassifierCombiner combiner = new NERClassifierCombiner(applyNumericClassifiers,
useSUTime, augmentRegexNER, classifier);
useSUTime, classifier);

NERCombinerAnnotator annotator = new NERCombinerAnnotator(combiner, verbose,
numThreads, maxTime, maxSentenceLength, false, false);
Expand Down
12 changes: 6 additions & 6 deletions dkpro-core-corenlp-gpl/src/scripts/build.xml
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,12 @@
- Upstream versions - meta data versions are maintained per model below
-->
<!-- http://nlp.stanford.edu/software/corenlp.shtml -->
<property name="core.arabic.date" value="2018-02-27"/>
<property name="core.chinese.date" value="2018-02-27"/>
<property name="core.english.date" value="2018-02-27"/>
<property name="core.french.date" value="2018-02-27"/>
<property name="core.german.date" value="2018-02-27"/>
<property name="core.spanish.date" value="2018-02-27"/>
<property name="core.arabic.date" value="2018-10-05"/>
<property name="core.chinese.date" value="2018-10-05"/>
<property name="core.english.date" value="2018-10-05"/>
<property name="core.french.date" value="2018-10-05"/>
<property name="core.german.date" value="2018-10-05"/>
<property name="core.spanish.date" value="2018-10-05"/>

<!--
- Output package configuration
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,13 +78,14 @@ public void testTriggerReparse()
JCas jcas = runTest("en", "'Let's go! I want to see the Don', he said.");

String[][] ref = {
{ "'Let's go" },
{ "'Let's" },
{ "'" },
{ "Let's go" },
{ "Let's" },
{ "I" },
{ "the Don'", "he" } };

String[] pennTree = {
"(ROOT (FRAG (NP (NP ('' ') (NNP Let) (POS 's)) (NN go)) (. !)))",
"(ROOT (S (S (NP (POS ')) (NP (NP (NNP Let) (POS 's)) (NN go))) (. !)))",
"(ROOT (S (S (NP (PRP I)) (VP (VBP want) (S (VP (TO to) (VP (VB see) (NP (DT the) "
+ "(NX (NNP Don) (POS ')))))))) (, ,) (NP (PRP he)) (VP (VBD said)) (. .)))"
};
Expand Down
4 changes: 2 additions & 2 deletions dkpro-core-decompounding-asl/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -115,8 +115,8 @@
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.hamcrest</groupId>
<artifactId>hamcrest-core</artifactId>
<groupId>org.assertj</groupId>
<artifactId>assertj-core</artifactId>
<scope>test</scope>
</dependency>
<dependency>
Expand Down
Loading

0 comments on commit e4c4f5c

Please sign in to comment.