diff --git a/dkpro-core-api-datasets-asl/pom.xml b/dkpro-core-api-datasets-asl/pom.xml
index 1127528a55..2fe993e3d5 100644
--- a/dkpro-core-api-datasets-asl/pom.xml
+++ b/dkpro-core-api-datasets-asl/pom.xml
@@ -70,7 +70,7 @@
com.github.junrar
junrar
- 0.7
+ 4.0.0
org.apache.commons
diff --git a/dkpro-core-api-resources-asl/pom.xml b/dkpro-core-api-resources-asl/pom.xml
index 7d6fcce478..42eb7a8d4c 100644
--- a/dkpro-core-api-resources-asl/pom.xml
+++ b/dkpro-core-api-resources-asl/pom.xml
@@ -96,8 +96,8 @@
test
- org.hamcrest
- hamcrest-core
+ org.assertj
+ assertj-core
test
diff --git a/dkpro-core-api-resources-asl/src/main/java/org/dkpro/core/api/resources/ResourceObjectProviderBase.java b/dkpro-core-api-resources-asl/src/main/java/org/dkpro/core/api/resources/ResourceObjectProviderBase.java
index 93818ff731..5747f5b0a4 100644
--- a/dkpro-core-api-resources-asl/src/main/java/org/dkpro/core/api/resources/ResourceObjectProviderBase.java
+++ b/dkpro-core-api-resources-asl/src/main/java/org/dkpro/core/api/resources/ResourceObjectProviderBase.java
@@ -142,6 +142,7 @@ public abstract class ResourceObjectProviderBase
* resolved when {@link #configure()} is called. (optional)
*/
public static final String GROUP_ID = "groupId";
+ public static final String COMPONENT_GROUP_ID = "componentGroupId";
/**
* The artifact ID of the Maven artifact containing a resource. Variables in the location are
@@ -212,6 +213,7 @@ public abstract class ResourceObjectProviderBase
protected void init()
{
setDefault(GROUP_ID, "de.tudarmstadt.ukp.dkpro.core");
+ setDefault(COMPONENT_GROUP_ID, "org.dkpro.core");
setDefault(ARTIFACT_URI,
"mvn:${" + GROUP_ID + "}:${" + ARTIFACT_ID + "}:${" + VERSION + "}");
}
@@ -374,7 +376,7 @@ public void applyAutoOverrides(Object aObject)
}
}
- protected List getPomUrlsForClass(String aModelGroup, String aModelArtifact,
+ protected List getPomUrlsForClass(String aComponentGroupId, String aModelArtifactId,
Class> aClass)
throws IOException
{
@@ -418,7 +420,7 @@ protected List getPomUrlsForClass(String aModelGroup, String aModelArtifact
Matcher matcher = pattern.matcher(base);
if (matcher.matches()) {
String artifactIdAndVersion = matcher.group("ID");
- String pomPattern = base + "META-INF/maven/" + aModelGroup + "/"
+ String pomPattern = base + "META-INF/maven/" + aComponentGroupId + "/"
+ artifactIdAndVersion + "/pom.xml";
lookupPatterns.add(pomPattern);
ResourcePatternResolver resolver = new PathMatchingResourcePatternResolver();
@@ -434,9 +436,9 @@ protected List getPomUrlsForClass(String aModelGroup, String aModelArtifact
// models from the StanfordNLP module).
if (urls.isEmpty()) {
// This is the default strategy supposed to look in the JAR
- String moduleArtifactId = aModelArtifact.split("-")[0];
- String pomPattern = base + "META-INF/maven/" + aModelGroup + "/" + moduleArtifactId +
- "*/pom.xml";
+ String moduleArtifactId = aModelArtifactId.split("-")[0];
+ String pomPattern = base + "META-INF/maven/" + aComponentGroupId + "/"
+ + moduleArtifactId + "*/pom.xml";
lookupPatterns.add(pomPattern);
ResourcePatternResolver resolver = new PathMatchingResourcePatternResolver();
Resource[] resources = resolver.getResources(pomPattern);
@@ -468,11 +470,11 @@ protected List getPomUrlsForClass(String aModelGroup, String aModelArtifact
* the POM, or if no context object was set.
* @return the version of the required model.
*/
- protected String getModelVersionFromMavenPom(String aModelGroup, String aModelArtifact,
- Class> aClass)
+ protected String getModelVersionFromMavenPom(String aComponentGroupId, String aModelGroupId,
+ String aModelArtifactId, Class> aClass)
throws IOException
{
- List urls = getPomUrlsForClass(aModelGroup, aModelArtifact, contextClass);
+ List urls = getPomUrlsForClass(aComponentGroupId, aModelArtifactId, contextClass);
for (URL pomUrl : urls) {
// Parse the POM
@@ -492,8 +494,8 @@ protected String getModelVersionFromMavenPom(String aModelGroup, String aModelAr
List deps = model.getDependencyManagement().getDependencies();
for (Dependency dep : deps) {
if (
- StringUtils.equals(dep.getGroupId(), aModelGroup) &&
- StringUtils.equals(dep.getArtifactId(), aModelArtifact)
+ StringUtils.equals(dep.getGroupId(), aModelGroupId) &&
+ StringUtils.equals(dep.getArtifactId(), aModelArtifactId)
) {
return dep.getVersion();
}
@@ -790,12 +792,22 @@ private Properties resolveDependency(Properties aProps)
resolved.getProperty(ARTIFACT_URI, "").contains("${" + VERSION + "}") &&
isNull(resolved.getProperty(VERSION))
) {
- String groupId = pph.replacePlaceholders(aProps.getProperty(GROUP_ID), resolved);
+ String modelGroupId = pph.replacePlaceholders(aProps.getProperty(GROUP_ID), resolved);
+ String componentGroupId;
+
+ if (aProps.getProperty(COMPONENT_GROUP_ID) != null) {
+ componentGroupId = pph.replacePlaceholders(aProps.getProperty(COMPONENT_GROUP_ID),
+ resolved);
+ }
+ else {
+ componentGroupId = modelGroupId;
+ }
+
String artifactId = pph.replacePlaceholders(aProps.getProperty(ARTIFACT_ID), resolved);
try {
// If the version is to be auto-detected, then we must have a groupId and artifactId
- resolved.put(VERSION,
- getModelVersionFromMavenPom(groupId, artifactId, contextClass));
+ resolved.put(VERSION, getModelVersionFromMavenPom(componentGroupId, modelGroupId,
+ artifactId, contextClass));
}
catch (Throwable e) {
log.error("Unable to obtain version from POM", e);
diff --git a/dkpro-core-api-resources-asl/src/test/java/org/dkpro/core/api/resources/ResourceUtilsTest.java b/dkpro-core-api-resources-asl/src/test/java/org/dkpro/core/api/resources/ResourceUtilsTest.java
index 60efa7aab9..2e9b756bbb 100644
--- a/dkpro-core-api-resources-asl/src/test/java/org/dkpro/core/api/resources/ResourceUtilsTest.java
+++ b/dkpro-core-api-resources-asl/src/test/java/org/dkpro/core/api/resources/ResourceUtilsTest.java
@@ -18,9 +18,8 @@
package org.dkpro.core.api.resources;
import static java.util.Arrays.asList;
-import static org.hamcrest.CoreMatchers.is;
+import static org.assertj.core.api.Assertions.assertThat;
import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertThat;
import static org.junit.Assert.assertTrue;
import java.io.File;
@@ -103,13 +102,13 @@ public void testGetUrlAsExecutable()
URL url = new URL("jar:file:src/test/resources/testfiles.zip!/testfiles/"
+ "FileSetCollectionReaderBase.class");
File file = ResourceUtils.getUrlAsExecutable(url, false);
- assertThat(file.getName().endsWith("temp"), is(true));
+
+ assertThat(file.getName()).endsWith("temp");
URL url2 = new URL("jar:file:src/test/resources/testfiles.zip!/testfiles/"
+ "ResourceCollectionReaderBase.class");
file = ResourceUtils.getUrlAsExecutable(url2, true);
- assertThat(file.getName().endsWith("temp"), is(true));
-
+
+ assertThat(file.getName()).endsWith("temp");
}
-
}
diff --git a/dkpro-core-api-segmentation-asl/pom.xml b/dkpro-core-api-segmentation-asl/pom.xml
index e988a588ad..887c2ec3ff 100644
--- a/dkpro-core-api-segmentation-asl/pom.xml
+++ b/dkpro-core-api-segmentation-asl/pom.xml
@@ -56,8 +56,8 @@
test
- org.hamcrest
- hamcrest-core
+ org.assertj
+ assertj-core
test
diff --git a/dkpro-core-api-segmentation-asl/src/test/java/org/dkpro/core/api/segmentation/CompoundTest.java b/dkpro-core-api-segmentation-asl/src/test/java/org/dkpro/core/api/segmentation/CompoundTest.java
index 799d3c1984..64704d52ce 100644
--- a/dkpro-core-api-segmentation-asl/src/test/java/org/dkpro/core/api/segmentation/CompoundTest.java
+++ b/dkpro-core-api-segmentation-asl/src/test/java/org/dkpro/core/api/segmentation/CompoundTest.java
@@ -17,8 +17,11 @@
*/
package org.dkpro.core.api.segmentation;
-import static org.hamcrest.CoreMatchers.is;
-import static org.junit.Assert.assertThat;
+import static de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Compound.CompoundSplitLevel.ALL;
+import static de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Compound.CompoundSplitLevel.HIGHEST;
+import static de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Compound.CompoundSplitLevel.LOWEST;
+import static de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Compound.CompoundSplitLevel.NONE;
+import static org.assertj.core.api.Assertions.assertThat;
import java.util.ArrayList;
import java.util.List;
@@ -33,13 +36,11 @@
import org.junit.Test;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Compound;
-import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Compound.CompoundSplitLevel;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.CompoundPart;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Split;
public class CompoundTest
{
-
private Compound compound;
@Before
@@ -66,62 +67,39 @@ public void setUpCompound() throws UIMAException
compound.setSplits(FSCollectionFactory.createFSArray(jcas, splits));
compound.addToIndexes();
jcasBuilder.close();
-
}
@Test
public void testAll() throws UIMAException
{
-
- final String[] splitsList = new String[] { "getränk", "automat", "auto", "mat" };
- assertThat(coveredTextArrayFromAnnotations(
- compound.getSplitsWithoutMorpheme(CompoundSplitLevel.ALL)), is(splitsList));
-
+ assertThat(compound.getSplitsWithoutMorpheme(ALL))
+ .extracting(Annotation::getCoveredText)
+ .containsExactly("getränk", "automat", "auto", "mat");
}
@Test
public void testLowest() throws UIMAException
{
-
- final String[] splitsList = new String[] { "getränk", "auto", "mat" };
- assertThat(
- coveredTextArrayFromAnnotations(
- compound.getSplitsWithoutMorpheme(CompoundSplitLevel.LOWEST)),
- is(splitsList));
+ assertThat(compound.getSplitsWithoutMorpheme(LOWEST))
+ .extracting(Annotation::getCoveredText)
+ .containsExactly("getränk", "auto", "mat");
}
@Test
public void testHighest() throws UIMAException
{
-
- final String[] splitsList = new String[] { "getränk", "automat" };
- assertThat(
- coveredTextArrayFromAnnotations(
- compound.getSplitsWithoutMorpheme(CompoundSplitLevel.HIGHEST)),
- is(splitsList));
+ assertThat(compound.getSplitsWithoutMorpheme(HIGHEST))
+ .extracting(Annotation::getCoveredText)
+ .containsExactly("getränk", "automat");
}
@Test
public void testNone() throws UIMAException
{
-
- final String[] splitsList = new String[] {};
- assertThat(
- coveredTextArrayFromAnnotations(
- compound.getSplitsWithoutMorpheme(CompoundSplitLevel.NONE)),
- is(splitsList));
-
- }
-
- public String[] coveredTextArrayFromAnnotations(final T[] annotations)
- {
- final List list = new ArrayList();
- for (T annotation : annotations) {
- list.add(annotation.getCoveredText());
- }
- return list.toArray(new String[list.size()]);
+ assertThat(compound.getSplitsWithoutMorpheme(NONE))
+ .extracting(Annotation::getCoveredText)
+ .isEmpty();
}
-
}
diff --git a/dkpro-core-asl/pom.xml b/dkpro-core-asl/pom.xml
index 1c255a231a..a03fe98eba 100644
--- a/dkpro-core-asl/pom.xml
+++ b/dkpro-core-asl/pom.xml
@@ -620,6 +620,7 @@
../dkpro-core-ixa-asl
../dkpro-core-jazzy-asl
../dkpro-core-jtok-asl
+ ../dkpro-core-jieba-asl
../dkpro-core-languagetool-asl
../dkpro-core-langdetect-asl
../dkpro-core-ldweb1t-asl
diff --git a/dkpro-core-corenlp-gpl/pom.xml b/dkpro-core-corenlp-gpl/pom.xml
index df1763863b..ddfde4ce71 100644
--- a/dkpro-core-corenlp-gpl/pom.xml
+++ b/dkpro-core-corenlp-gpl/pom.xml
@@ -33,7 +33,7 @@
DKPro Core GPL - Stanford CoreNLP Suite (v ${corenlp.version}) (GPL)
https://dkpro.github.io/dkpro-core/
- 3.9.1
+ 3.9.2
@@ -178,7 +178,7 @@
de.tudarmstadt.ukp.dkpro.core
de.tudarmstadt.ukp.dkpro.core.stanfordnlp-model-coref-en-default
- 20180227.1
+ 20181005.1
de.tudarmstadt.ukp.dkpro.core
@@ -358,17 +358,17 @@
de.tudarmstadt.ukp.dkpro.core
de.tudarmstadt.ukp.dkpro.core.stanfordnlp-model-tagger-en-bidirectional-distsim
- 20140616.1
+ 20181002.1
de.tudarmstadt.ukp.dkpro.core
de.tudarmstadt.ukp.dkpro.core.stanfordnlp-model-tagger-en-left3words-distsim
- 20140616.1
+ 20181002.1
de.tudarmstadt.ukp.dkpro.core
de.tudarmstadt.ukp.dkpro.core.stanfordnlp-model-tagger-en-caseless-left3words-distsim
- 20140827.0
+ 20181002.0
de.tudarmstadt.ukp.dkpro.core
diff --git a/dkpro-core-corenlp-gpl/src/main/java/org/dkpro/core/corenlp/CoreNlpNamedEntityRecognizer.java b/dkpro-core-corenlp-gpl/src/main/java/org/dkpro/core/corenlp/CoreNlpNamedEntityRecognizer.java
index 1c40b57f51..0fa5e11cd1 100644
--- a/dkpro-core-corenlp-gpl/src/main/java/org/dkpro/core/corenlp/CoreNlpNamedEntityRecognizer.java
+++ b/dkpro-core-corenlp-gpl/src/main/java/org/dkpro/core/corenlp/CoreNlpNamedEntityRecognizer.java
@@ -195,17 +195,6 @@ public class CoreNlpNamedEntityRecognizer
// on, off, auto
private boolean useSUTime = false; // = NumberSequenceClassifier.USE_SUTIME_DEFAULT;
-// /**
-// * Whether to read the default regular expression gazetteer.
-// *
-// * @see edu.stanford.nlp.pipeline.DefaultPaths#DEFAULT_NER_GAZETTE_MAPPING
-// */
-// public static final String PARAM_AUGMENT_REGEX_NER = "augmentRegexNER";
-// @ConfigurationParameter(name = PARAM_AUGMENT_REGEX_NER, mandatory = true, defaultValue = "false")
- // Commented out since the default gazetter is currently only in the original Stanford model
- // JARs
- private boolean augmentRegexNER = false; // = NERClassifierCombiner.APPLY_GAZETTE_PROPERTY;
-
private boolean verbose = false;
private ModelProviderBase annotatorProvider;
@@ -318,7 +307,7 @@ protected NERCombinerAnnotator produceResource(URL aUrl) throws IOException
}
NERClassifierCombiner combiner = new NERClassifierCombiner(applyNumericClassifiers,
- useSUTime, augmentRegexNER, classifier);
+ useSUTime, classifier);
NERCombinerAnnotator annotator = new NERCombinerAnnotator(combiner, verbose,
numThreads, maxTime, maxSentenceLength, false, false);
diff --git a/dkpro-core-corenlp-gpl/src/scripts/build.xml b/dkpro-core-corenlp-gpl/src/scripts/build.xml
index ef13e666f8..fa239479a0 100644
--- a/dkpro-core-corenlp-gpl/src/scripts/build.xml
+++ b/dkpro-core-corenlp-gpl/src/scripts/build.xml
@@ -27,12 +27,12 @@
- Upstream versions - meta data versions are maintained per model below
-->
-
-
-
-
-
-
+
+
+
+
+
+
+
+ 4.0.0
+
+ org.dkpro.core
+ dkpro-core-asl
+ 1.11.0-SNAPSHOT
+ ../dkpro-core-asl
+
+ dkpro-core-jieba-asl
+ DKPro Core ASL - Jieba (v ${jieba.version}) (ASL)
+ https://dkpro.github.io/dkpro-core/
+
+ 1.0.2
+
+
+
+ com.huaban
+ jieba-analysis
+ ${jieba.version}
+
+
+ org.apache.uima
+ uimaj-core
+
+
+ org.apache.uima
+ uimafit-core
+
+
+ org.dkpro.core
+ dkpro-core-api-segmentation-asl
+
+
+ eu.openminted.share.annotations
+ omtd-share-annotations-api
+
+
+ junit
+ junit
+ test
+
+
+ org.dkpro.core
+ dkpro-core-testing-asl
+ test
+
+
+
\ No newline at end of file
diff --git a/dkpro-core-jieba-asl/src/main/java/org/dkpro/core/jieba/JiebaSegmenter.java b/dkpro-core-jieba-asl/src/main/java/org/dkpro/core/jieba/JiebaSegmenter.java
new file mode 100644
index 0000000000..6d7fce7d15
--- /dev/null
+++ b/dkpro-core-jieba-asl/src/main/java/org/dkpro/core/jieba/JiebaSegmenter.java
@@ -0,0 +1,119 @@
+/*
+ * Licensed to the Technische Universität Darmstadt under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The Technische Universität Darmstadt
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.dkpro.core.jieba;
+
+import java.util.List;
+
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.fit.descriptor.LanguageCapability;
+import org.apache.uima.fit.descriptor.ResourceMetaData;
+import org.apache.uima.fit.descriptor.TypeCapability;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.dkpro.core.api.segmentation.SegmenterBase;
+
+import com.huaban.analysis.jieba.JiebaSegmenter.SegMode;
+import com.huaban.analysis.jieba.SegToken;
+
+import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence;
+import eu.openminted.share.annotations.api.DocumentationResource;
+
+/**
+ * Segmenter for Japanese using Jieba.
+ */
+@ResourceMetaData(name = "Jieba Segmenter")
+@DocumentationResource("${docbase}/component-reference.html#engine-${shortClassName}")
+@LanguageCapability("zh")
+@TypeCapability(outputs = { "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence",
+ "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token" })
+public class JiebaSegmenter
+ extends SegmenterBase
+{
+ private com.huaban.analysis.jieba.JiebaSegmenter jieba;
+
+ @Override
+ public void initialize(UimaContext aContext) throws ResourceInitializationException
+ {
+ super.initialize(aContext);
+ jieba = new com.huaban.analysis.jieba.JiebaSegmenter();
+ }
+
+ @Override
+ protected void process(JCas aJCas, String text, int zoneBegin)
+ throws AnalysisEngineProcessException
+ {
+ int sentenceBegin = 0;
+ int sentenceEnd = text.indexOf("。");
+ while (sentenceEnd > sentenceBegin) {
+ String stext = text.substring(sentenceBegin, sentenceEnd + 1);
+
+ processSentence(aJCas, stext, zoneBegin + sentenceBegin);
+
+ sentenceBegin = sentenceEnd + 1;
+ sentenceEnd = text.indexOf("。", sentenceBegin);
+ }
+
+ if (sentenceBegin < text.length()) {
+ String stext = text.substring(sentenceBegin, text.length());
+ processSentence(aJCas, stext, zoneBegin + sentenceBegin);
+ }
+ }
+
+ private Sentence processSentence(JCas aJCas, String text, int zoneBegin)
+ {
+ String innerText = text;
+ boolean addFinalToken = false;
+ if (innerText.endsWith("。")) {
+ innerText = text.substring(0, text.length() - 1);
+ addFinalToken = true;
+ }
+
+ Annotation firstToken = null;
+ Annotation lastToken = null;
+
+ List tokens = jieba.process(innerText, SegMode.SEARCH);
+ for (SegToken t : tokens) {
+ Annotation ut = createToken(aJCas, t.startOffset + zoneBegin, t.endOffset + zoneBegin);
+
+ // Tokenizer reports whitespace as tokens - we don't add whitespace-only tokens.
+ if (ut == null) {
+ continue;
+ }
+
+ if (firstToken == null) {
+ firstToken = ut;
+ }
+
+ lastToken = ut;
+ }
+
+ if (addFinalToken) {
+ lastToken = createToken(aJCas, zoneBegin + text.length() - 1,
+ zoneBegin + text.length());
+ }
+
+ if (firstToken != null && lastToken != null) {
+ return createSentence(aJCas, firstToken.getBegin(), lastToken.getEnd());
+ }
+ else {
+ return null;
+ }
+ }
+}
diff --git a/dkpro-core-jieba-asl/src/test/java/org/dkpro/core/jieba/JiebaSegmenterTest.java b/dkpro-core-jieba-asl/src/test/java/org/dkpro/core/jieba/JiebaSegmenterTest.java
new file mode 100644
index 0000000000..a6fb386eb2
--- /dev/null
+++ b/dkpro-core-jieba-asl/src/test/java/org/dkpro/core/jieba/JiebaSegmenterTest.java
@@ -0,0 +1,60 @@
+package org.dkpro.core.jieba;
+/*
+ * Licensed to the Technische Universität Darmstadt under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The Technische Universität Darmstadt
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngine;
+import static org.apache.uima.fit.util.JCasUtil.select;
+import static org.dkpro.core.testing.AssertAnnotations.assertSentence;
+import static org.dkpro.core.testing.AssertAnnotations.assertToken;
+
+import org.apache.uima.analysis_engine.AnalysisEngine;
+import org.apache.uima.fit.factory.JCasFactory;
+import org.apache.uima.jcas.JCas;
+import org.dkpro.core.testing.DkproTestContext;
+import org.junit.Rule;
+import org.junit.Test;
+
+import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence;
+import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token;
+
+public class JiebaSegmenterTest
+{
+ @Test
+ public void testChinese() throws Exception
+ {
+ JCas jcas = JCasFactory.createText("这是一个伸手不见五指的黑夜。我叫孙悟空,我爱北京,我爱Python"
+ + "和C++。我不喜欢日本和服。", "zh");
+
+ AnalysisEngine aed = createEngine(JiebaSegmenter.class);
+ aed.process(jcas);
+
+ String[] tokens = { "这是", "一个", "伸手不见五指", "的", "黑夜", "。", "我", "叫", "孙悟空",
+ ",", "我", "爱", "北京", ",", "我", "爱", "Python", "和", "C++", "。", "我", "不",
+ "喜欢", "日本", "和服", "。" };
+
+ assertToken(tokens, select(jcas, Token.class));
+
+ String[] sentences = { "这是一个伸手不见五指的黑夜。", "我叫孙悟空,我爱北京,我爱Python和C++。",
+ "我不喜欢日本和服。" };
+
+ assertSentence(sentences, select(jcas, Sentence.class));
+ }
+
+ @Rule
+ public DkproTestContext testContext = new DkproTestContext();
+}
diff --git a/dkpro-core-lancaster-asl/pom.xml b/dkpro-core-lancaster-asl/pom.xml
index f7380d6ccd..250bce336f 100644
--- a/dkpro-core-lancaster-asl/pom.xml
+++ b/dkpro-core-lancaster-asl/pom.xml
@@ -27,9 +27,12 @@
dkpro-core-lancaster-asl
jar
- DKPro Core ASL - Lancaster
+ DKPro Core ASL - Lancaster (v ${smile.version}) (ASL
https://dkpro.github.io/dkpro-core/
http://haifengl.github.io/smile
+
+ 1.3.1
+
org.apache.uima
@@ -46,6 +49,7 @@
com.github.haifengl
smile-nlp
+ ${smile.version}
org.dkpro.core
diff --git a/dkpro-core-stanfordnlp-gpl/pom.xml b/dkpro-core-stanfordnlp-gpl/pom.xml
index 697c6ec649..367a657e98 100644
--- a/dkpro-core-stanfordnlp-gpl/pom.xml
+++ b/dkpro-core-stanfordnlp-gpl/pom.xml
@@ -33,7 +33,7 @@
DKPro Core GPL - Stanford CoreNLP Suite (v ${corenlp.version}) - Classic API (GPL)
https://dkpro.github.io/dkpro-core/
- 3.9.1
+ 3.9.2
@@ -189,7 +189,7 @@
de.tudarmstadt.ukp.dkpro.core
de.tudarmstadt.ukp.dkpro.core.stanfordnlp-model-coref-en-default
- 20180227.1
+ 20181005.1
de.tudarmstadt.ukp.dkpro.core
@@ -369,17 +369,17 @@
de.tudarmstadt.ukp.dkpro.core
de.tudarmstadt.ukp.dkpro.core.stanfordnlp-model-tagger-en-bidirectional-distsim
- 20140616.1
+ 20181002.1
de.tudarmstadt.ukp.dkpro.core
de.tudarmstadt.ukp.dkpro.core.stanfordnlp-model-tagger-en-left3words-distsim
- 20140616.1
+ 20181002.1
de.tudarmstadt.ukp.dkpro.core
de.tudarmstadt.ukp.dkpro.core.stanfordnlp-model-tagger-en-caseless-left3words-distsim
- 20140827.0
+ 20181002.0
de.tudarmstadt.ukp.dkpro.core
diff --git a/dkpro-core-stanfordnlp-gpl/src/scripts/build.xml b/dkpro-core-stanfordnlp-gpl/src/scripts/build.xml
index a3fbe1c123..631378d6e6 100644
--- a/dkpro-core-stanfordnlp-gpl/src/scripts/build.xml
+++ b/dkpro-core-stanfordnlp-gpl/src/scripts/build.xml
@@ -27,20 +27,20 @@
- Upstream versions - meta data versions are maintained per model below
-->
-
-
+
+
-
-
-
+
+
+
-
-
-
-
-
-
+
+
+
+
+
+
+ ${project.compileSourceRoots}
+ ${project.testCompileSourceRoots}
dkpro-core/checkstyle.xml
basedir=${project.basedir}
true