From 04f326428c26f8933540973d2f91517c81bae0d3 Mon Sep 17 00:00:00 2001 From: Richard Eckart de Castilho Date: Tue, 19 Mar 2019 21:27:53 +0100 Subject: [PATCH 01/15] #1340 - Upgrade dependencies - maven-checkstyle-plugin 2.17 -> 3.0.0 - checkstyle 8.8 -> 8.18 - junrar 0.7 -> 4.0.0 --- dkpro-core-api-datasets-asl/pom.xml | 2 +- pom.xml | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/dkpro-core-api-datasets-asl/pom.xml b/dkpro-core-api-datasets-asl/pom.xml index e3143a1eb6..d41bbfd660 100644 --- a/dkpro-core-api-datasets-asl/pom.xml +++ b/dkpro-core-api-datasets-asl/pom.xml @@ -70,7 +70,7 @@ com.github.junrar junrar - 0.7 + 4.0.0 org.apache.commons diff --git a/pom.xml b/pom.xml index 4197f5c012..666bf45d3b 100644 --- a/pom.xml +++ b/pom.xml @@ -739,7 +739,7 @@ org.apache.maven.plugins maven-checkstyle-plugin - 2.17 + 3.0.0 true @@ -750,10 +750,13 @@ com.puppycrawl.tools checkstyle - 8.8 + 8.18 + + ${project.compileSourceRoots} + ${project.testCompileSourceRoots} dkpro-core/checkstyle.xml basedir=${project.basedir} true From 90bd55e44feb106638a220e8ef7e0dfe06b672cc Mon Sep 17 00:00:00 2001 From: Tobias Horsmann Date: Mon, 25 Mar 2019 14:17:30 +0100 Subject: [PATCH 02/15] #1343 - Segmenter for Chinese New module Jieba added --- dkpro-core-asl/pom.xml | 1 + dkpro-core-jieba-asl/pom.xml | 60 +++++++++ .../org/dkpro/core/jieba/JiebaSegmenter.java | 119 ++++++++++++++++++ .../dkpro/core/jieba/JiebaSegmenterTest.java | 58 +++++++++ 4 files changed, 238 insertions(+) create mode 100644 dkpro-core-jieba-asl/pom.xml create mode 100644 dkpro-core-jieba-asl/src/main/java/org/dkpro/core/jieba/JiebaSegmenter.java create mode 100644 dkpro-core-jieba-asl/src/test/java/org/dkpro/core/jieba/JiebaSegmenterTest.java diff --git a/dkpro-core-asl/pom.xml b/dkpro-core-asl/pom.xml index 81d222fe3d..b547bb8b81 100644 --- a/dkpro-core-asl/pom.xml +++ b/dkpro-core-asl/pom.xml @@ -613,6 +613,7 @@ ../dkpro-core-ixa-asl ../dkpro-core-jazzy-asl ../dkpro-core-jtok-asl + ../dkpro-core-jieba-asl ../dkpro-core-languagetool-asl ../dkpro-core-langdetect-asl ../dkpro-core-ldweb1t-asl diff --git a/dkpro-core-jieba-asl/pom.xml b/dkpro-core-jieba-asl/pom.xml new file mode 100644 index 0000000000..9c8fe89887 --- /dev/null +++ b/dkpro-core-jieba-asl/pom.xml @@ -0,0 +1,60 @@ + + + 4.0.0 + org.dkpro.core + dkpro-core-jieba-asl + 0.0.1-SNAPSHOT + + de.tudarmstadt.ukp.dkpro.core + de.tudarmstadt.ukp.dkpro.core-asl + 1.11.0-SNAPSHOT + ../dkpro-core-asl + + + + com.huaban + jieba-analysis + 1.0.2 + + + org.apache.uima + uimaj-core + + + org.apache.uima + uimafit-core + + + de.tudarmstadt.ukp.dkpro.core + de.tudarmstadt.ukp.dkpro.core.api.segmentation-asl + + + junit + junit + test + + + de.tudarmstadt.ukp.dkpro.core + de.tudarmstadt.ukp.dkpro.core.testing-asl + test + + + \ No newline at end of file diff --git a/dkpro-core-jieba-asl/src/main/java/org/dkpro/core/jieba/JiebaSegmenter.java b/dkpro-core-jieba-asl/src/main/java/org/dkpro/core/jieba/JiebaSegmenter.java new file mode 100644 index 0000000000..536b2d558b --- /dev/null +++ b/dkpro-core-jieba-asl/src/main/java/org/dkpro/core/jieba/JiebaSegmenter.java @@ -0,0 +1,119 @@ +/* + * Licensed to the Technische Universität Darmstadt under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The Technische Universität Darmstadt + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.dkpro.core.jieba; + +import java.util.List; + +import org.apache.uima.UimaContext; +import org.apache.uima.analysis_engine.AnalysisEngineProcessException; +import org.apache.uima.fit.descriptor.LanguageCapability; +import org.apache.uima.fit.descriptor.ResourceMetaData; +import org.apache.uima.fit.descriptor.TypeCapability; +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.tcas.Annotation; +import org.apache.uima.resource.ResourceInitializationException; + +import com.huaban.analysis.jieba.JiebaSegmenter.SegMode; +import com.huaban.analysis.jieba.SegToken; + +import de.tudarmstadt.ukp.dkpro.core.api.segmentation.SegmenterBase; +import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence; +import eu.openminted.share.annotations.api.DocumentationResource; + +/** + * Segmenter for Japanese using Jieba. + */ +@ResourceMetaData(name = "Jieba Segmenter") +@DocumentationResource("${docbase}/component-reference.html#engine-${shortClassName}") +@LanguageCapability("zh") +@TypeCapability(outputs = { "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence", + "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token" }) +public class JiebaSegmenter + extends SegmenterBase +{ + private com.huaban.analysis.jieba.JiebaSegmenter jieba; + @Override + public void initialize(UimaContext aContext) + throws ResourceInitializationException + { + super.initialize(aContext); + jieba = new com.huaban.analysis.jieba.JiebaSegmenter(); + } + + @Override + protected void process(JCas aJCas, String text, int zoneBegin) + throws AnalysisEngineProcessException + { + int sentenceBegin = 0; + int sentenceEnd = text.indexOf("。"); + while (sentenceEnd > sentenceBegin) { + String stext = text.substring(sentenceBegin, sentenceEnd + 1); + + processSentence(aJCas, stext, zoneBegin + sentenceBegin); + + sentenceBegin = sentenceEnd + 1; + sentenceEnd = text.indexOf("。", sentenceBegin); + } + + if (sentenceBegin < text.length()) { + String stext = text.substring(sentenceBegin, text.length()); + processSentence(aJCas, stext, zoneBegin + sentenceBegin); + } + } + + private Sentence processSentence(JCas aJCas, String text, int zoneBegin) + { + String innerText = text; + boolean addFinalToken = false; + if (innerText.endsWith("。")) { + innerText = text.substring(0, text.length() - 1); + addFinalToken = true; + } + + Annotation firstToken = null; + Annotation lastToken = null; + + List tokens = jieba.process(innerText, SegMode.SEARCH); + for (SegToken t : tokens) { + Annotation ut = createToken(aJCas, t.startOffset + zoneBegin, t.endOffset + zoneBegin); + + // Tokenizer reports whitespace as tokens - we don't add whitespace-only tokens. + if (ut == null) { + continue; + } + + if (firstToken == null) { + firstToken = ut; + } + + lastToken = ut; + } + + if (addFinalToken) { + lastToken = createToken(aJCas, zoneBegin + text.length() - 1, + zoneBegin + text.length()); + } + + if (firstToken != null && lastToken != null) { + return createSentence(aJCas, firstToken.getBegin(), lastToken.getEnd()); + } + else { + return null; + } + } +} diff --git a/dkpro-core-jieba-asl/src/test/java/org/dkpro/core/jieba/JiebaSegmenterTest.java b/dkpro-core-jieba-asl/src/test/java/org/dkpro/core/jieba/JiebaSegmenterTest.java new file mode 100644 index 0000000000..d750b849ab --- /dev/null +++ b/dkpro-core-jieba-asl/src/test/java/org/dkpro/core/jieba/JiebaSegmenterTest.java @@ -0,0 +1,58 @@ +package org.dkpro.core.jieba; +/* + * Licensed to the Technische Universität Darmstadt under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The Technische Universität Darmstadt + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngine; +import static org.apache.uima.fit.util.JCasUtil.select; + +import org.apache.uima.analysis_engine.AnalysisEngine; +import org.apache.uima.fit.factory.JCasFactory; +import org.apache.uima.jcas.JCas; +import org.junit.Rule; +import org.junit.Test; + +import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence; +import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; +import de.tudarmstadt.ukp.dkpro.core.testing.AssertAnnotations; +import de.tudarmstadt.ukp.dkpro.core.testing.DkproTestContext; + +public class JiebaSegmenterTest +{ + + @Test + public void testChinese() throws Exception + { + JCas jcas = JCasFactory.createText("这是一个伸手不见五指的黑夜。我叫孙悟空,我爱北京,我爱Python和C++。我不喜欢日本和服。", "zh"); + + AnalysisEngine aed = createEngine(JiebaSegmenter.class); + aed.process(jcas); + + String[] tokens = { "这是", "一个", "伸手不见五指", "的", "黑夜", "。", "我", "叫", "孙悟空", ",", "我", "爱", "北京", ",", + "我", "爱", "Python", "和", "C++", "。", "我", "不", "喜欢", "日本", "和服", "。" }; + + AssertAnnotations.assertToken(tokens, select(jcas, Token.class)); + + String [] sentences= {"这是一个伸手不见五指的黑夜。", "我叫孙悟空,我爱北京,我爱Python和C++。", "我不喜欢日本和服。"}; + AssertAnnotations.assertSentence(sentences, select(jcas, Sentence.class)); + } + + @Rule + public DkproTestContext testContext = new DkproTestContext(); + +} From 1ecac948310e996bdabe5d455c68c0e2103043fe Mon Sep 17 00:00:00 2001 From: Tobias Horsmann Date: Mon, 25 Mar 2019 19:03:25 +0100 Subject: [PATCH 03/15] #1343 - Segmenter for Chinese added LICENSE.txt --- dkpro-core-jieba-asl/LICENSE.txt | 202 +++++++++++++++++++++++++++++++ 1 file changed, 202 insertions(+) create mode 100644 dkpro-core-jieba-asl/LICENSE.txt diff --git a/dkpro-core-jieba-asl/LICENSE.txt b/dkpro-core-jieba-asl/LICENSE.txt new file mode 100644 index 0000000000..d645695673 --- /dev/null +++ b/dkpro-core-jieba-asl/LICENSE.txt @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. From 6cfe1dfaf48c353e4cee290a4fc8a9d180ad3e2a Mon Sep 17 00:00:00 2001 From: Tobias Horsmann Date: Mon, 25 Mar 2019 20:54:50 +0100 Subject: [PATCH 04/15] #1343 - Segmenter for Chinese added missing dependency --- dkpro-core-jieba-asl/pom.xml | 87 +++++++++++++++++++----------------- 1 file changed, 46 insertions(+), 41 deletions(-) diff --git a/dkpro-core-jieba-asl/pom.xml b/dkpro-core-jieba-asl/pom.xml index 9c8fe89887..6ad6a2e4e7 100644 --- a/dkpro-core-jieba-asl/pom.xml +++ b/dkpro-core-jieba-asl/pom.xml @@ -16,45 +16,50 @@ limitations under the License. --> - 4.0.0 - org.dkpro.core - dkpro-core-jieba-asl - 0.0.1-SNAPSHOT - - de.tudarmstadt.ukp.dkpro.core - de.tudarmstadt.ukp.dkpro.core-asl - 1.11.0-SNAPSHOT - ../dkpro-core-asl - - - - com.huaban - jieba-analysis - 1.0.2 - - - org.apache.uima - uimaj-core - - - org.apache.uima - uimafit-core - - - de.tudarmstadt.ukp.dkpro.core - de.tudarmstadt.ukp.dkpro.core.api.segmentation-asl - - - junit - junit - test - - - de.tudarmstadt.ukp.dkpro.core - de.tudarmstadt.ukp.dkpro.core.testing-asl - test - - + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + 4.0.0 + org.dkpro.core + dkpro-core-jieba-asl + 0.0.1-SNAPSHOT + + de.tudarmstadt.ukp.dkpro.core + de.tudarmstadt.ukp.dkpro.core-asl + 1.11.0-SNAPSHOT + ../dkpro-core-asl + + + + com.huaban + jieba-analysis + 1.0.2 + + + org.apache.uima + uimaj-core + + + org.apache.uima + uimafit-core + + + de.tudarmstadt.ukp.dkpro.core + de.tudarmstadt.ukp.dkpro.core.api.segmentation-asl + + + eu.openminted.share.annotations + omtd-share-annotations-api + 3.0.2.7 + + + junit + junit + test + + + de.tudarmstadt.ukp.dkpro.core + de.tudarmstadt.ukp.dkpro.core.testing-asl + test + + \ No newline at end of file From cd91caa309a8230602118bacbf704daca4d8b747 Mon Sep 17 00:00:00 2001 From: Tobias Horsmann Date: Mon, 25 Mar 2019 22:05:23 +0100 Subject: [PATCH 05/15] #1343 - Segmenter for Chinese code style --- dkpro-core-jieba-asl/pom.xml | 2 ++ .../org/dkpro/core/jieba/JiebaSegmenter.java | 6 +++--- .../dkpro/core/jieba/JiebaSegmenterTest.java | 19 +++++++++---------- 3 files changed, 14 insertions(+), 13 deletions(-) diff --git a/dkpro-core-jieba-asl/pom.xml b/dkpro-core-jieba-asl/pom.xml index 6ad6a2e4e7..ceafbcbb87 100644 --- a/dkpro-core-jieba-asl/pom.xml +++ b/dkpro-core-jieba-asl/pom.xml @@ -62,4 +62,6 @@ test + DKPro Core ASL - Jieba + https://dkpro.github.io/dkpro-core/ \ No newline at end of file diff --git a/dkpro-core-jieba-asl/src/main/java/org/dkpro/core/jieba/JiebaSegmenter.java b/dkpro-core-jieba-asl/src/main/java/org/dkpro/core/jieba/JiebaSegmenter.java index 536b2d558b..d286d842d7 100644 --- a/dkpro-core-jieba-asl/src/main/java/org/dkpro/core/jieba/JiebaSegmenter.java +++ b/dkpro-core-jieba-asl/src/main/java/org/dkpro/core/jieba/JiebaSegmenter.java @@ -36,7 +36,7 @@ import eu.openminted.share.annotations.api.DocumentationResource; /** - * Segmenter for Japanese using Jieba. + * Segmenter for Japanese using Jieba. */ @ResourceMetaData(name = "Jieba Segmenter") @DocumentationResource("${docbase}/component-reference.html#engine-${shortClassName}") @@ -47,9 +47,9 @@ public class JiebaSegmenter extends SegmenterBase { private com.huaban.analysis.jieba.JiebaSegmenter jieba; + @Override - public void initialize(UimaContext aContext) - throws ResourceInitializationException + public void initialize(UimaContext aContext) throws ResourceInitializationException { super.initialize(aContext); jieba = new com.huaban.analysis.jieba.JiebaSegmenter(); diff --git a/dkpro-core-jieba-asl/src/test/java/org/dkpro/core/jieba/JiebaSegmenterTest.java b/dkpro-core-jieba-asl/src/test/java/org/dkpro/core/jieba/JiebaSegmenterTest.java index d750b849ab..763d45b28c 100644 --- a/dkpro-core-jieba-asl/src/test/java/org/dkpro/core/jieba/JiebaSegmenterTest.java +++ b/dkpro-core-jieba-asl/src/test/java/org/dkpro/core/jieba/JiebaSegmenterTest.java @@ -17,7 +17,6 @@ * limitations under the License. */ - import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngine; import static org.apache.uima.fit.util.JCasUtil.select; @@ -34,24 +33,24 @@ public class JiebaSegmenterTest { - + @Test public void testChinese() throws Exception { JCas jcas = JCasFactory.createText("这是一个伸手不见五指的黑夜。我叫孙悟空,我爱北京,我爱Python和C++。我不喜欢日本和服。", "zh"); - + AnalysisEngine aed = createEngine(JiebaSegmenter.class); aed.process(jcas); - - String[] tokens = { "这是", "一个", "伸手不见五指", "的", "黑夜", "。", "我", "叫", "孙悟空", ",", "我", "爱", "北京", ",", - "我", "爱", "Python", "和", "C++", "。", "我", "不", "喜欢", "日本", "和服", "。" }; - + + String[] tokens = { "这是", "一个", "伸手不见五指", "的", "黑夜", "。", "我", "叫", "孙悟空", ",", "我", "爱", + "北京", ",", "我", "爱", "Python", "和", "C++", "。", "我", "不", "喜欢", "日本", "和服", "。" }; + AssertAnnotations.assertToken(tokens, select(jcas, Token.class)); - - String [] sentences= {"这是一个伸手不见五指的黑夜。", "我叫孙悟空,我爱北京,我爱Python和C++。", "我不喜欢日本和服。"}; + + String[] sentences = { "这是一个伸手不见五指的黑夜。", "我叫孙悟空,我爱北京,我爱Python和C++。", "我不喜欢日本和服。" }; AssertAnnotations.assertSentence(sentences, select(jcas, Sentence.class)); } - + @Rule public DkproTestContext testContext = new DkproTestContext(); From 304e790915633c3df7776d4307102c88140a391e Mon Sep 17 00:00:00 2001 From: Richard Eckart de Castilho Date: Tue, 30 Apr 2019 09:17:04 +0200 Subject: [PATCH 06/15] #1343 - Segmenter for Chinese - Fixed groupIds and artifactIds in pom.xml - Fixed imports - Formatting --- dkpro-core-jieba-asl/pom.xml | 94 +++++++++---------- .../org/dkpro/core/jieba/JiebaSegmenter.java | 2 +- .../dkpro/core/jieba/JiebaSegmenterTest.java | 23 +++-- 3 files changed, 61 insertions(+), 58 deletions(-) diff --git a/dkpro-core-jieba-asl/pom.xml b/dkpro-core-jieba-asl/pom.xml index ceafbcbb87..85543f3cc7 100644 --- a/dkpro-core-jieba-asl/pom.xml +++ b/dkpro-core-jieba-asl/pom.xml @@ -16,52 +16,52 @@ limitations under the License. --> - 4.0.0 + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + 4.0.0 + org.dkpro.core - dkpro-core-jieba-asl - 0.0.1-SNAPSHOT - - de.tudarmstadt.ukp.dkpro.core - de.tudarmstadt.ukp.dkpro.core-asl - 1.11.0-SNAPSHOT - ../dkpro-core-asl - - - - com.huaban - jieba-analysis - 1.0.2 - - - org.apache.uima - uimaj-core - - - org.apache.uima - uimafit-core - - - de.tudarmstadt.ukp.dkpro.core - de.tudarmstadt.ukp.dkpro.core.api.segmentation-asl - - - eu.openminted.share.annotations - omtd-share-annotations-api - 3.0.2.7 - - - junit - junit - test - - - de.tudarmstadt.ukp.dkpro.core - de.tudarmstadt.ukp.dkpro.core.testing-asl - test - - - DKPro Core ASL - Jieba - https://dkpro.github.io/dkpro-core/ + dkpro-core-asl + 1.11.0-SNAPSHOT + ../dkpro-core-asl + + dkpro-core-jieba-asl + DKPro Core ASL - Jieba (v ${jieba.version}) (ASL) + https://dkpro.github.io/dkpro-core/ + + 1.0.2 + + + + com.huaban + jieba-analysis + ${jieba.version} + + + org.apache.uima + uimaj-core + + + org.apache.uima + uimafit-core + + + org.dkpro.core + dkpro-core-api-segmentation-asl + + + eu.openminted.share.annotations + omtd-share-annotations-api + + + junit + junit + test + + + org.dkpro.core + dkpro-core-testing-asl + test + + \ No newline at end of file diff --git a/dkpro-core-jieba-asl/src/main/java/org/dkpro/core/jieba/JiebaSegmenter.java b/dkpro-core-jieba-asl/src/main/java/org/dkpro/core/jieba/JiebaSegmenter.java index d286d842d7..6d7fce7d15 100644 --- a/dkpro-core-jieba-asl/src/main/java/org/dkpro/core/jieba/JiebaSegmenter.java +++ b/dkpro-core-jieba-asl/src/main/java/org/dkpro/core/jieba/JiebaSegmenter.java @@ -27,11 +27,11 @@ import org.apache.uima.jcas.JCas; import org.apache.uima.jcas.tcas.Annotation; import org.apache.uima.resource.ResourceInitializationException; +import org.dkpro.core.api.segmentation.SegmenterBase; import com.huaban.analysis.jieba.JiebaSegmenter.SegMode; import com.huaban.analysis.jieba.SegToken; -import de.tudarmstadt.ukp.dkpro.core.api.segmentation.SegmenterBase; import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence; import eu.openminted.share.annotations.api.DocumentationResource; diff --git a/dkpro-core-jieba-asl/src/test/java/org/dkpro/core/jieba/JiebaSegmenterTest.java b/dkpro-core-jieba-asl/src/test/java/org/dkpro/core/jieba/JiebaSegmenterTest.java index 763d45b28c..a6fb386eb2 100644 --- a/dkpro-core-jieba-asl/src/test/java/org/dkpro/core/jieba/JiebaSegmenterTest.java +++ b/dkpro-core-jieba-asl/src/test/java/org/dkpro/core/jieba/JiebaSegmenterTest.java @@ -19,39 +19,42 @@ import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngine; import static org.apache.uima.fit.util.JCasUtil.select; +import static org.dkpro.core.testing.AssertAnnotations.assertSentence; +import static org.dkpro.core.testing.AssertAnnotations.assertToken; import org.apache.uima.analysis_engine.AnalysisEngine; import org.apache.uima.fit.factory.JCasFactory; import org.apache.uima.jcas.JCas; +import org.dkpro.core.testing.DkproTestContext; import org.junit.Rule; import org.junit.Test; import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence; import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; -import de.tudarmstadt.ukp.dkpro.core.testing.AssertAnnotations; -import de.tudarmstadt.ukp.dkpro.core.testing.DkproTestContext; public class JiebaSegmenterTest { - @Test public void testChinese() throws Exception { - JCas jcas = JCasFactory.createText("这是一个伸手不见五指的黑夜。我叫孙悟空,我爱北京,我爱Python和C++。我不喜欢日本和服。", "zh"); + JCas jcas = JCasFactory.createText("这是一个伸手不见五指的黑夜。我叫孙悟空,我爱北京,我爱Python" + + "和C++。我不喜欢日本和服。", "zh"); AnalysisEngine aed = createEngine(JiebaSegmenter.class); aed.process(jcas); - String[] tokens = { "这是", "一个", "伸手不见五指", "的", "黑夜", "。", "我", "叫", "孙悟空", ",", "我", "爱", - "北京", ",", "我", "爱", "Python", "和", "C++", "。", "我", "不", "喜欢", "日本", "和服", "。" }; + String[] tokens = { "这是", "一个", "伸手不见五指", "的", "黑夜", "。", "我", "叫", "孙悟空", + ",", "我", "爱", "北京", ",", "我", "爱", "Python", "和", "C++", "。", "我", "不", + "喜欢", "日本", "和服", "。" }; - AssertAnnotations.assertToken(tokens, select(jcas, Token.class)); + assertToken(tokens, select(jcas, Token.class)); - String[] sentences = { "这是一个伸手不见五指的黑夜。", "我叫孙悟空,我爱北京,我爱Python和C++。", "我不喜欢日本和服。" }; - AssertAnnotations.assertSentence(sentences, select(jcas, Sentence.class)); + String[] sentences = { "这是一个伸手不见五指的黑夜。", "我叫孙悟空,我爱北京,我爱Python和C++。", + "我不喜欢日本和服。" }; + + assertSentence(sentences, select(jcas, Sentence.class)); } @Rule public DkproTestContext testContext = new DkproTestContext(); - } From 1ddd7dba957e24d359a9d1a6637aae5882699260 Mon Sep 17 00:00:00 2001 From: Richard Eckart de Castilho Date: Fri, 3 May 2019 10:44:41 +0200 Subject: [PATCH 07/15] #1340 - Upgrade dependencies (1.11.0) - snakeyaml 1.19 -> 1.24 - commons-collections4 4.1 -> 4.3 - commons-coded 1.11 -> 1.12 - commons-lang 3.8 -> 3.9 - ant 1.10.2 -> 1.10.5 - hamcrest-core 1.3 -> 2.1 - fastutil 7.0.13 -> 8.2.2 --- dkpro-core-lancaster-asl/pom.xml | 6 +++++- pom.xml | 20 ++++++++------------ 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/dkpro-core-lancaster-asl/pom.xml b/dkpro-core-lancaster-asl/pom.xml index 25efb51ab6..c3785a396b 100644 --- a/dkpro-core-lancaster-asl/pom.xml +++ b/dkpro-core-lancaster-asl/pom.xml @@ -27,9 +27,12 @@ dkpro-core-lancaster-asl jar - DKPro Core ASL - Lancaster + DKPro Core ASL - Lancaster (v ${smile.version}) (ASL https://dkpro.github.io/dkpro-core/ http://haifengl.github.io/smile + + 1.3.1 + org.apache.uima @@ -46,6 +49,7 @@ com.github.haifengl smile-nlp + ${smile.version} org.dkpro.core diff --git a/pom.xml b/pom.xml index 1fa06fa2e3..4016334bd9 100644 --- a/pom.xml +++ b/pom.xml @@ -49,6 +49,7 @@ 1.7.25 6g 1.2.0 + 64.2 @@ -246,7 +247,7 @@ org.yaml snakeyaml - 1.19 + 1.24 xerces @@ -316,12 +317,12 @@ org.apache.commons commons-collections4 - 4.1 + 4.3 commons-codec commons-codec - 1.11 + 1.12 commons-io @@ -336,7 +337,7 @@ org.apache.commons commons-lang3 - 3.8 + 3.9 org.apache.commons @@ -348,15 +349,10 @@ xz 1.8 - - com.github.haifengl - smile-nlp - 1.3.1 - org.apache.ant ant - 1.10.2 + 1.10.5 jaxen @@ -440,12 +436,12 @@ org.hamcrest hamcrest-core - 1.3 + 2.1 it.unimi.dsi fastutil - 7.0.13 + 8.2.2 org.apache.ivy From e188d9997967d93a9d1e46293da75b837dc1046a Mon Sep 17 00:00:00 2001 From: Richard Eckart de Castilho Date: Fri, 3 May 2019 10:48:12 +0200 Subject: [PATCH 08/15] #1357 - Upgrade to ICU4J 64.2 - icu4j 61.1 -> 64.2 --- dkpro-core-icu-asl/pom.xml | 2 +- pom.xml | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/dkpro-core-icu-asl/pom.xml b/dkpro-core-icu-asl/pom.xml index 3c24c28b7c..b2c13e9a60 100644 --- a/dkpro-core-icu-asl/pom.xml +++ b/dkpro-core-icu-asl/pom.xml @@ -27,7 +27,7 @@ dkpro-core-icu-asl jar - DKPro Core ASL - ICU + DKPro Core ASL - ICU (v ${icu4j.version}) (ASL) https://dkpro.github.io/dkpro-core/ diff --git a/pom.xml b/pom.xml index 1fa06fa2e3..8313d352b6 100644 --- a/pom.xml +++ b/pom.xml @@ -49,6 +49,7 @@ 1.7.25 6g 1.2.0 + 64.2 @@ -286,7 +287,7 @@ com.ibm.icu icu4j - 61.1 + ${icu4j.version} commons-logging From 56437e9f55f57087bc17a2ae160d3f39050acbab Mon Sep 17 00:00:00 2001 From: Richard Eckart de Castilho Date: Fri, 3 May 2019 10:50:26 +0200 Subject: [PATCH 09/15] #1358 - Improve error messages in TSV3 - Improve error messages in TSV3 when an annotation cannot be persisted because it starts before the first token or ends beyond the last token. --- .../tsv3x/Tsv3XCasDocumentBuilder.java | 21 +++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/dkpro-core-io-webanno-asl/src/main/java/org/dkpro/core/io/webanno/tsv/internal/tsv3x/Tsv3XCasDocumentBuilder.java b/dkpro-core-io-webanno-asl/src/main/java/org/dkpro/core/io/webanno/tsv/internal/tsv3x/Tsv3XCasDocumentBuilder.java index ccb8295974..afb53bf999 100644 --- a/dkpro-core-io-webanno-asl/src/main/java/org/dkpro/core/io/webanno/tsv/internal/tsv3x/Tsv3XCasDocumentBuilder.java +++ b/dkpro-core-io-webanno-asl/src/main/java/org/dkpro/core/io/webanno/tsv/internal/tsv3x/Tsv3XCasDocumentBuilder.java @@ -37,6 +37,7 @@ import java.util.HashSet; import java.util.List; import java.util.ListIterator; +import java.util.Map.Entry; import java.util.NavigableMap; import java.util.Set; import java.util.TreeMap; @@ -133,8 +134,24 @@ public static TsvDocument of(TsvSchema aSchema, JCas aJCas) end = targetFS.getEnd(); } - TsvToken beginToken = tokenBeginIndex.floorEntry(begin).getValue(); - TsvToken endToken = tokenEndIndex.ceilingEntry(end).getValue(); + Entry beginTokenEntry = tokenBeginIndex.floorEntry(begin); + if (beginTokenEntry == null) { + throw new IllegalStateException( + "Unable to find begin token starting at or before " + begin + + " (first token starts at " + + tokenBeginIndex.pollFirstEntry().getKey() + + ") for annotation: " + annotation); + } + + Entry endTokenEntry = tokenEndIndex.ceilingEntry(end); + if (endTokenEntry == null) { + throw new IllegalStateException("Unable to find end token ending at or after " + + end + " (last token ends at " + tokenEndIndex.pollLastEntry().getKey() + + ") for annotation: " + annotation); + } + + TsvToken beginToken = beginTokenEntry.getValue(); + TsvToken endToken = endTokenEntry.getValue(); // For zero-width annotations, the begin token must match the end token. // Zero-width annotations between two directly adjacent tokens are always From 14dd51811a9caab3d75f4aedc37f4b5a218710a0 Mon Sep 17 00:00:00 2001 From: Richard Eckart de Castilho Date: Fri, 3 May 2019 13:27:40 +0200 Subject: [PATCH 10/15] #1340 - Upgrade dependencies (1.11.0) - Drop hamcrest and use assertj instead --- dkpro-core-api-resources-asl/pom.xml | 4 +- .../core/api/resources/ResourceUtilsTest.java | 11 ++-- dkpro-core-api-segmentation-asl/pom.xml | 4 +- .../core/api/segmentation/CompoundTest.java | 56 ++++++------------- dkpro-core-decompounding-asl/pom.xml | 4 +- .../dictionary/SimpleDictionaryTest.java | 20 +++---- .../splitter/BananaSplitterTest.java | 8 ++- .../splitter/DataDrivenAlgorithmTest.java | 8 +-- .../splitter/DecompoundedWordTest.java | 13 +++-- .../decompounding/splitter/FragmentTest.java | 6 +- .../splitter/JWordSplitterTest.java | 12 ++-- .../LeftToRightSplitAlgorithmTest.java | 6 +- .../uima/annotator/CompoundAnnotatorTest.java | 19 +++---- pom.xml | 5 -- 14 files changed, 74 insertions(+), 102 deletions(-) diff --git a/dkpro-core-api-resources-asl/pom.xml b/dkpro-core-api-resources-asl/pom.xml index cfb434ab2b..97fb4a913e 100644 --- a/dkpro-core-api-resources-asl/pom.xml +++ b/dkpro-core-api-resources-asl/pom.xml @@ -96,8 +96,8 @@ test - org.hamcrest - hamcrest-core + org.assertj + assertj-core test diff --git a/dkpro-core-api-resources-asl/src/test/java/org/dkpro/core/api/resources/ResourceUtilsTest.java b/dkpro-core-api-resources-asl/src/test/java/org/dkpro/core/api/resources/ResourceUtilsTest.java index 60efa7aab9..2e9b756bbb 100644 --- a/dkpro-core-api-resources-asl/src/test/java/org/dkpro/core/api/resources/ResourceUtilsTest.java +++ b/dkpro-core-api-resources-asl/src/test/java/org/dkpro/core/api/resources/ResourceUtilsTest.java @@ -18,9 +18,8 @@ package org.dkpro.core.api.resources; import static java.util.Arrays.asList; -import static org.hamcrest.CoreMatchers.is; +import static org.assertj.core.api.Assertions.assertThat; import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertThat; import static org.junit.Assert.assertTrue; import java.io.File; @@ -103,13 +102,13 @@ public void testGetUrlAsExecutable() URL url = new URL("jar:file:src/test/resources/testfiles.zip!/testfiles/" + "FileSetCollectionReaderBase.class"); File file = ResourceUtils.getUrlAsExecutable(url, false); - assertThat(file.getName().endsWith("temp"), is(true)); + + assertThat(file.getName()).endsWith("temp"); URL url2 = new URL("jar:file:src/test/resources/testfiles.zip!/testfiles/" + "ResourceCollectionReaderBase.class"); file = ResourceUtils.getUrlAsExecutable(url2, true); - assertThat(file.getName().endsWith("temp"), is(true)); - + + assertThat(file.getName()).endsWith("temp"); } - } diff --git a/dkpro-core-api-segmentation-asl/pom.xml b/dkpro-core-api-segmentation-asl/pom.xml index a83e8569c9..a64a65654e 100644 --- a/dkpro-core-api-segmentation-asl/pom.xml +++ b/dkpro-core-api-segmentation-asl/pom.xml @@ -56,8 +56,8 @@ test - org.hamcrest - hamcrest-core + org.assertj + assertj-core test diff --git a/dkpro-core-api-segmentation-asl/src/test/java/org/dkpro/core/api/segmentation/CompoundTest.java b/dkpro-core-api-segmentation-asl/src/test/java/org/dkpro/core/api/segmentation/CompoundTest.java index 799d3c1984..64704d52ce 100644 --- a/dkpro-core-api-segmentation-asl/src/test/java/org/dkpro/core/api/segmentation/CompoundTest.java +++ b/dkpro-core-api-segmentation-asl/src/test/java/org/dkpro/core/api/segmentation/CompoundTest.java @@ -17,8 +17,11 @@ */ package org.dkpro.core.api.segmentation; -import static org.hamcrest.CoreMatchers.is; -import static org.junit.Assert.assertThat; +import static de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Compound.CompoundSplitLevel.ALL; +import static de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Compound.CompoundSplitLevel.HIGHEST; +import static de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Compound.CompoundSplitLevel.LOWEST; +import static de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Compound.CompoundSplitLevel.NONE; +import static org.assertj.core.api.Assertions.assertThat; import java.util.ArrayList; import java.util.List; @@ -33,13 +36,11 @@ import org.junit.Test; import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Compound; -import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Compound.CompoundSplitLevel; import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.CompoundPart; import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Split; public class CompoundTest { - private Compound compound; @Before @@ -66,62 +67,39 @@ public void setUpCompound() throws UIMAException compound.setSplits(FSCollectionFactory.createFSArray(jcas, splits)); compound.addToIndexes(); jcasBuilder.close(); - } @Test public void testAll() throws UIMAException { - - final String[] splitsList = new String[] { "getränk", "automat", "auto", "mat" }; - assertThat(coveredTextArrayFromAnnotations( - compound.getSplitsWithoutMorpheme(CompoundSplitLevel.ALL)), is(splitsList)); - + assertThat(compound.getSplitsWithoutMorpheme(ALL)) + .extracting(Annotation::getCoveredText) + .containsExactly("getränk", "automat", "auto", "mat"); } @Test public void testLowest() throws UIMAException { - - final String[] splitsList = new String[] { "getränk", "auto", "mat" }; - assertThat( - coveredTextArrayFromAnnotations( - compound.getSplitsWithoutMorpheme(CompoundSplitLevel.LOWEST)), - is(splitsList)); + assertThat(compound.getSplitsWithoutMorpheme(LOWEST)) + .extracting(Annotation::getCoveredText) + .containsExactly("getränk", "auto", "mat"); } @Test public void testHighest() throws UIMAException { - - final String[] splitsList = new String[] { "getränk", "automat" }; - assertThat( - coveredTextArrayFromAnnotations( - compound.getSplitsWithoutMorpheme(CompoundSplitLevel.HIGHEST)), - is(splitsList)); + assertThat(compound.getSplitsWithoutMorpheme(HIGHEST)) + .extracting(Annotation::getCoveredText) + .containsExactly("getränk", "automat"); } @Test public void testNone() throws UIMAException { - - final String[] splitsList = new String[] {}; - assertThat( - coveredTextArrayFromAnnotations( - compound.getSplitsWithoutMorpheme(CompoundSplitLevel.NONE)), - is(splitsList)); - - } - - public String[] coveredTextArrayFromAnnotations(final T[] annotations) - { - final List list = new ArrayList(); - for (T annotation : annotations) { - list.add(annotation.getCoveredText()); - } - return list.toArray(new String[list.size()]); + assertThat(compound.getSplitsWithoutMorpheme(NONE)) + .extracting(Annotation::getCoveredText) + .isEmpty(); } - } diff --git a/dkpro-core-decompounding-asl/pom.xml b/dkpro-core-decompounding-asl/pom.xml index 421d186a50..da7f9827d1 100644 --- a/dkpro-core-decompounding-asl/pom.xml +++ b/dkpro-core-decompounding-asl/pom.xml @@ -115,8 +115,8 @@ test - org.hamcrest - hamcrest-core + org.assertj + assertj-core test diff --git a/dkpro-core-decompounding-asl/src/test/java/org/dkpro/core/decompounding/dictionary/SimpleDictionaryTest.java b/dkpro-core-decompounding-asl/src/test/java/org/dkpro/core/decompounding/dictionary/SimpleDictionaryTest.java index 50f7e196e5..21faa318e6 100644 --- a/dkpro-core-decompounding-asl/src/test/java/org/dkpro/core/decompounding/dictionary/SimpleDictionaryTest.java +++ b/dkpro-core-decompounding-asl/src/test/java/org/dkpro/core/decompounding/dictionary/SimpleDictionaryTest.java @@ -18,16 +18,15 @@ package org.dkpro.core.decompounding.dictionary; -import static org.hamcrest.CoreMatchers.is; -import static org.hamcrest.CoreMatchers.not; -import static org.junit.Assert.assertThat; +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; import java.io.File; import java.io.IOException; import org.dkpro.core.api.resources.ResourceUtils; import org.dkpro.core.decompounding.dictionary.SimpleDictionary; -import org.junit.Assert; import org.junit.Before; import org.junit.Test; @@ -47,18 +46,17 @@ public void setUp() throws IOException @Test public void testContains() { - Assert.assertEquals(72508, dict.getAll().size()); + assertEquals(72508, dict.getAll().size()); - Assert.assertTrue(dict.contains("worauf")); - Assert.assertTrue(dict.contains("woraufhin")); - Assert.assertTrue(dict.contains("woraus")); + assertTrue(dict.contains("worauf")); + assertTrue(dict.contains("woraufhin")); + assertTrue(dict.contains("woraus")); } @Test public void testDictionary() { - assertThat(dict.getAll().size(), not(0)); - assertThat(dict.contains("zu"), is(true)); + assertThat(dict.getAll()).isNotEmpty(); + assertThat(dict.contains("zu")).isTrue(); } - } diff --git a/dkpro-core-decompounding-asl/src/test/java/org/dkpro/core/decompounding/splitter/BananaSplitterTest.java b/dkpro-core-decompounding-asl/src/test/java/org/dkpro/core/decompounding/splitter/BananaSplitterTest.java index 9f7b245bf1..3429590190 100644 --- a/dkpro-core-decompounding-asl/src/test/java/org/dkpro/core/decompounding/splitter/BananaSplitterTest.java +++ b/dkpro-core-decompounding-asl/src/test/java/org/dkpro/core/decompounding/splitter/BananaSplitterTest.java @@ -17,9 +17,8 @@ **/ package org.dkpro.core.decompounding.splitter; -import static org.hamcrest.CoreMatchers.is; +import static org.assertj.core.api.Assertions.assertThat; import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertThat; import java.io.File; import java.io.IOException; @@ -41,6 +40,7 @@ public void testSplitter() throws IOException splitter.setDictionary(new SimpleDictionary("Garage", "einfahrt")); List result = splitter.split("Garageneinfahrt").getAllSplits(); + assertEquals(2, result.size()); assertEquals("Garageneinfahrt", result.get(0).toString()); assertEquals("garage(n)+einfahrt", result.get(1).toString()); @@ -55,7 +55,9 @@ public void testSplitter2() throws IOException Dictionary dict = new SimpleDictionary(dictFile, "UTF-8"); BananaSplitterAlgorithm splitter = new BananaSplitterAlgorithm(); splitter.setDictionary(dict); + List result = splitter.split("geräteelektronik").getAllSplits(); - assertThat(result.size(), is(1)); + + assertThat(result).hasSize(1); } } diff --git a/dkpro-core-decompounding-asl/src/test/java/org/dkpro/core/decompounding/splitter/DataDrivenAlgorithmTest.java b/dkpro-core-decompounding-asl/src/test/java/org/dkpro/core/decompounding/splitter/DataDrivenAlgorithmTest.java index 3bcc5dabfc..fb64f155f9 100644 --- a/dkpro-core-decompounding-asl/src/test/java/org/dkpro/core/decompounding/splitter/DataDrivenAlgorithmTest.java +++ b/dkpro-core-decompounding-asl/src/test/java/org/dkpro/core/decompounding/splitter/DataDrivenAlgorithmTest.java @@ -18,9 +18,8 @@ package org.dkpro.core.decompounding.splitter; -import static org.hamcrest.CoreMatchers.is; +import static org.assertj.core.api.Assertions.assertThat; import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertThat; import java.io.File; import java.io.IOException; @@ -29,8 +28,6 @@ import org.dkpro.core.api.resources.ResourceUtils; import org.dkpro.core.decompounding.dictionary.LinkingMorphemes; import org.dkpro.core.decompounding.dictionary.SimpleDictionary; -import org.dkpro.core.decompounding.splitter.DataDrivenSplitterAlgorithm; -import org.dkpro.core.decompounding.splitter.DecompoundedWord; import org.junit.Test; public class DataDrivenAlgorithmTest @@ -64,6 +61,7 @@ public void testSplit2() throws IOException LinkingMorphemes morphemes = new LinkingMorphemes(morphemesFile); DataDrivenSplitterAlgorithm splitter = new DataDrivenSplitterAlgorithm(dict, morphemes); List result = splitter.split("geräteelektronik").getAllSplits(); - assertThat(result.size(), is(1)); + + assertThat(result).hasSize(1); } } diff --git a/dkpro-core-decompounding-asl/src/test/java/org/dkpro/core/decompounding/splitter/DecompoundedWordTest.java b/dkpro-core-decompounding-asl/src/test/java/org/dkpro/core/decompounding/splitter/DecompoundedWordTest.java index be6ab8e886..2775c27aea 100644 --- a/dkpro-core-decompounding-asl/src/test/java/org/dkpro/core/decompounding/splitter/DecompoundedWordTest.java +++ b/dkpro-core-decompounding-asl/src/test/java/org/dkpro/core/decompounding/splitter/DecompoundedWordTest.java @@ -18,10 +18,9 @@ package org.dkpro.core.decompounding.splitter; -import static org.hamcrest.CoreMatchers.is; +import static org.assertj.core.api.Assertions.assertThat; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertThat; import static org.junit.Assert.assertTrue; import java.util.ArrayList; @@ -145,8 +144,9 @@ public void testIsCompound() { DecompoundedWord s1 = DecompoundedWord.createFromString("Aktion(s)+plan"); DecompoundedWord s2 = DecompoundedWord.createFromString("Aktionsplan"); - assertThat(s1.isCompound(), is(true)); - assertThat(s2.isCompound(), is(false)); + + assertThat(s1.isCompound()).isTrue(); + assertThat(s2.isCompound()).isFalse(); } @Test @@ -154,7 +154,8 @@ public void testHasLastFragmentMorpheme() { DecompoundedWord s1 = DecompoundedWord.createFromString("Aktion(s)+plan"); DecompoundedWord s2 = DecompoundedWord.createFromString("unter+flur+konvektor(en)"); - assertThat(s1.hasLastFragmentMorpheme(), is(false)); - assertThat(s2.hasLastFragmentMorpheme(), is(true)); + + assertThat(s1.hasLastFragmentMorpheme()).isFalse(); + assertThat(s2.hasLastFragmentMorpheme()).isTrue(); } } diff --git a/dkpro-core-decompounding-asl/src/test/java/org/dkpro/core/decompounding/splitter/FragmentTest.java b/dkpro-core-decompounding-asl/src/test/java/org/dkpro/core/decompounding/splitter/FragmentTest.java index 855f309916..a08e27e65a 100644 --- a/dkpro-core-decompounding-asl/src/test/java/org/dkpro/core/decompounding/splitter/FragmentTest.java +++ b/dkpro-core-decompounding-asl/src/test/java/org/dkpro/core/decompounding/splitter/FragmentTest.java @@ -18,8 +18,9 @@ package org.dkpro.core.decompounding.splitter; +import static org.assertj.core.api.Assertions.assertThat; + import org.dkpro.core.decompounding.splitter.Fragment; -import org.hamcrest.CoreMatchers; import org.junit.Assert; import org.junit.Test; @@ -72,6 +73,7 @@ public void testEquals() public void testCreateFromString() { Fragment fragm = Fragment.createFromString("("); - Assert.assertThat(fragm.getWord(), CoreMatchers.is("(")); + + assertThat(fragm.getWord()).isEqualTo("("); } } diff --git a/dkpro-core-decompounding-asl/src/test/java/org/dkpro/core/decompounding/splitter/JWordSplitterTest.java b/dkpro-core-decompounding-asl/src/test/java/org/dkpro/core/decompounding/splitter/JWordSplitterTest.java index 8f0885412a..9c79fd8cf5 100644 --- a/dkpro-core-decompounding-asl/src/test/java/org/dkpro/core/decompounding/splitter/JWordSplitterTest.java +++ b/dkpro-core-decompounding-asl/src/test/java/org/dkpro/core/decompounding/splitter/JWordSplitterTest.java @@ -17,9 +17,8 @@ **/ package org.dkpro.core.decompounding.splitter; -import static org.hamcrest.CoreMatchers.is; +import static org.assertj.core.api.Assertions.assertThat; import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertThat; import java.io.File; import java.io.IOException; @@ -40,6 +39,7 @@ public void testSplitter() { JWordSplitterAlgorithm splitter = new JWordSplitterAlgorithm(); List result = splitter.split("Aktionsplan").getAllSplits(); + assertEquals(2, result.size()); assertEquals("Aktionsplan", result.get(0).toString()); assertEquals("Aktion(s)+plan", result.get(1).toString()); @@ -51,12 +51,12 @@ public void testSplliter2() throws IOException { final File dictFile = ResourceUtils.getUrlAsFile(getClass().getResource( "/de/tudarmstadt/ukp/dkpro/core/decompounding/lib/spelling-de-igerman98.dic"), false); - ; Dictionary dict = new SimpleDictionary(dictFile, "UTF-8"); splitter.setDictionary(dict); List result = splitter.split("geräteelektronik").getAllSplits(); - assertThat(result.size(),is(1)); + + assertThat(result).hasSize(1); } @Test @@ -65,11 +65,11 @@ public void testSplliter3() throws IOException { final File dictFile = ResourceUtils.getUrlAsFile(getClass().getResource( "/de/tudarmstadt/ukp/dkpro/core/decompounding/lib/spelling-de-igerman98.dic"), false); - ; Dictionary dict = new SimpleDictionary(dictFile, "UTF-8"); splitter.setDictionary(dict); List result = splitter.split("Schwerwiegend").getAllSplits(); - assertThat(result.size(),is(1)); + + assertThat(result).hasSize(1); } } diff --git a/dkpro-core-decompounding-asl/src/test/java/org/dkpro/core/decompounding/splitter/LeftToRightSplitAlgorithmTest.java b/dkpro-core-decompounding-asl/src/test/java/org/dkpro/core/decompounding/splitter/LeftToRightSplitAlgorithmTest.java index 11dc8012e7..aeee010c7d 100644 --- a/dkpro-core-decompounding-asl/src/test/java/org/dkpro/core/decompounding/splitter/LeftToRightSplitAlgorithmTest.java +++ b/dkpro-core-decompounding-asl/src/test/java/org/dkpro/core/decompounding/splitter/LeftToRightSplitAlgorithmTest.java @@ -18,9 +18,8 @@ package org.dkpro.core.decompounding.splitter; -import static org.hamcrest.CoreMatchers.is; +import static org.assertj.core.api.Assertions.assertThat; import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertThat; import java.io.File; import java.io.IOException; @@ -91,6 +90,7 @@ public void testMorphemes1() morphemes); List result = algo.split("alarmreaktionen").getAllSplits(); + // Super+mann+anzug, Supermann+anzug assertEquals(3, result.size()); assertEquals("alarmreaktionen", result.get(0).toString()); @@ -115,6 +115,6 @@ public void testSplit4() throws IOException List result = splitter.split("geräteelektronik").getAllSplits(); - assertThat(result.size(),is(1)); + assertThat(result).hasSize(1); } } diff --git a/dkpro-core-decompounding-asl/src/test/java/org/dkpro/core/decompounding/uima/annotator/CompoundAnnotatorTest.java b/dkpro-core-decompounding-asl/src/test/java/org/dkpro/core/decompounding/uima/annotator/CompoundAnnotatorTest.java index 8de8e36133..beb0a022e2 100644 --- a/dkpro-core-decompounding-asl/src/test/java/org/dkpro/core/decompounding/uima/annotator/CompoundAnnotatorTest.java +++ b/dkpro-core-decompounding-asl/src/test/java/org/dkpro/core/decompounding/uima/annotator/CompoundAnnotatorTest.java @@ -19,8 +19,7 @@ import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngineDescription; import static org.apache.uima.fit.factory.ExternalResourceFactory.createExternalResourceDescription; -import static org.hamcrest.CoreMatchers.is; -import static org.junit.Assert.assertThat; +import static org.assertj.core.api.Assertions.assertThat; import java.io.File; import java.util.ArrayList; @@ -160,14 +159,15 @@ private void runAnnotator(AnalysisEngineDescription aed, String[] splits, ae.typeSystemInit(cas.getTypeSystem()); ae.process(cas); - String[] compounds = new String[] {"Aktionsplan", "Doppelprozessormaschine"}; - String[] linkingMorphemes = new String[] {"s"}; - // Check if splits and morphemes are equal - assertThat(getAnnotation(cas.getJCas(), Compound.class), is(compounds)); - assertThat(getAnnotation(cas.getJCas(), Split.class), is(splits)); - assertThat(getAnnotation(cas.getJCas(), CompoundPart.class), is(compoundsParts)); - assertThat(getAnnotation(cas.getJCas(), LinkingMorpheme.class), is(linkingMorphemes)); + assertThat(getAnnotation(cas.getJCas(), Compound.class)) + .containsExactly("Aktionsplan", "Doppelprozessormaschine"); + assertThat(getAnnotation(cas.getJCas(), Split.class)) + .containsExactly(splits); + assertThat(getAnnotation(cas.getJCas(), CompoundPart.class)) + .containsExactly(compoundsParts); + assertThat(getAnnotation(cas.getJCas(), LinkingMorpheme.class)) + .containsExactly("s"); } protected String[] getAnnotation(JCas aCas, Class aClass) @@ -194,5 +194,4 @@ public static void tearDown() index.delete(); } - } diff --git a/pom.xml b/pom.xml index 4016334bd9..459d2a2e15 100644 --- a/pom.xml +++ b/pom.xml @@ -433,11 +433,6 @@ xmlunit 1.6 - - org.hamcrest - hamcrest-core - 2.1 - it.unimi.dsi fastutil From 9911dbaf22b88da92ef1456a3244df02bdd876e3 Mon Sep 17 00:00:00 2001 From: Richard Eckart de Castilho Date: Fri, 3 May 2019 15:23:47 +0200 Subject: [PATCH 11/15] No issue. Fixed JavaDoc error. --- .../dkpro/core/io/lxf/internal/DKPro2Lxf.java | 50 ++++++++++--------- 1 file changed, 27 insertions(+), 23 deletions(-) diff --git a/dkpro-core-io-lxf-asl/src/main/java/org/dkpro/core/io/lxf/internal/DKPro2Lxf.java b/dkpro-core-io-lxf-asl/src/main/java/org/dkpro/core/io/lxf/internal/DKPro2Lxf.java index 029cfe93aa..3cfef43f9d 100644 --- a/dkpro-core-io-lxf-asl/src/main/java/org/dkpro/core/io/lxf/internal/DKPro2Lxf.java +++ b/dkpro-core-io-lxf-asl/src/main/java/org/dkpro/core/io/lxf/internal/DKPro2Lxf.java @@ -63,13 +63,12 @@ public static void convert(JCas aJCas, LxfGraph aSource, LxfGraph aTarget) * the layer was present in the source than the tool from the source will be used for the layer. * Otherwise the toolName will be used. * - * @param toolName + * @param aToolName * - Tool name for new layers * @param aSource * - original lxf for DKPro - * @return */ - public static Map createIdMap(String toolName, LxfGraph aSource) + public static Map createIdMap(String aToolName, LxfGraph aSource) { Map ids = new HashMap<>(); if (aSource != null) { @@ -77,14 +76,18 @@ public static Map createIdMap(String toolName, LxfGraph aSource) ids.put(n.getType(), n.getOrigin()); } } - if (!ids.containsKey(LAYER_DEPENDENCY)) - ids.put(LAYER_DEPENDENCY, toolName); - if (!ids.containsKey(LAYER_MORPHOLOGY)) - ids.put(LAYER_MORPHOLOGY, toolName); - if (!ids.containsKey(LAYER_SENTENCE)) - ids.put(LAYER_SENTENCE, toolName); - if (!ids.containsKey(LAYER_TOKEN)) - ids.put(LAYER_TOKEN, toolName); + if (!ids.containsKey(LAYER_DEPENDENCY)) { + ids.put(LAYER_DEPENDENCY, aToolName); + } + if (!ids.containsKey(LAYER_MORPHOLOGY)) { + ids.put(LAYER_MORPHOLOGY, aToolName); + } + if (!ids.containsKey(LAYER_SENTENCE)) { + ids.put(LAYER_SENTENCE, aToolName); + } + if (!ids.containsKey(LAYER_TOKEN)) { + ids.put(LAYER_TOKEN, aToolName); + } return ids; } @@ -97,22 +100,22 @@ public static Map createIdMap(String toolName, LxfGraph aSource) * the original LXF. If this is non-null, then delta-mode is enabled. * @param aTarget * the target LXF. - * @param tooName + * @param aToolName * the name of the tool generating the new annotation - * @param ids + * @param aIds * The ids of the tool responsible for generation of the annotation Layer. The key is * the annotation layer. The value is the tool that generates the annotation. */ public static void convert(JCas aJCas, LxfGraph aSource, LxfGraph aTarget, - Map ids, String toolName) + Map aIds, String aToolName) { if (aSource == null) { aTarget.setMedia(new LxfText(aJCas.getDocumentText())); } - ToolGeneratorIndex toolEdgeIndex = new ToolGeneratorIndex(ids.values()); - ToolGeneratorIndex toolNodeIndex = new ToolGeneratorIndex(ids.values()); - ToolGeneratorIndex toolRegionIndex = new ToolGeneratorIndex(ids.values()); + ToolGeneratorIndex toolEdgeIndex = new ToolGeneratorIndex(aIds.values()); + ToolGeneratorIndex toolNodeIndex = new ToolGeneratorIndex(aIds.values()); + ToolGeneratorIndex toolRegionIndex = new ToolGeneratorIndex(aIds.values()); NodeIterator iter = new NodeIterator(aSource); Map> idxSentTok = indexCovered(aJCas, Sentence.class, @@ -124,7 +127,7 @@ public static void convert(JCas aJCas, LxfGraph aSource, LxfGraph aTarget, for (Sentence sentence : select(aJCas, Sentence.class)) { LxfNode sentenceNode; - String toolid = ids.get(LAYER_SENTENCE); + String toolid = aIds.get(LAYER_SENTENCE); if (aSource == null || needsExport(aJCas, sentence)) { // Sentence region @@ -148,7 +151,7 @@ public static void convert(JCas aJCas, LxfGraph aSource, LxfGraph aTarget, for (Token token : tokens) { // Convert or obtain token node LxfNode tokenNode; - toolid = ids.get(LAYER_TOKEN); + toolid = aIds.get(LAYER_TOKEN); if (aSource == null || needsExport(aJCas, token)) { LxfRegion tokenRegion = new LxfRegion(toolid, toolRegionIndex.nextIndex(toolid), @@ -168,7 +171,7 @@ public static void convert(JCas aJCas, LxfGraph aSource, LxfGraph aTarget, tokenNode = iter.next(toolid, LAYER_TOKEN); } - toolid = ids.get(LAYER_MORPHOLOGY); + toolid = aIds.get(LAYER_MORPHOLOGY); // Convert POS if exists - if we create a node, pass it on to the lemma conversion // as well @@ -200,7 +203,7 @@ public static void convert(JCas aJCas, LxfGraph aSource, LxfGraph aTarget, if (lemma != null && (aSource == null || needsExport(aJCas, lemma))) { LxfNode lemmaNode = newMorphNode ? morphNode : null; if (lemmaNode == null) { - lemmaNode = new LxfNode(LAYER_MORPHOLOGY, toolName, + lemmaNode = new LxfNode(LAYER_MORPHOLOGY, aToolName, toolNodeIndex.nextIndex(toolid), 0); aTarget.addNode(lemmaNode); aTarget.addEdge(new LxfEdge(lemmaNode.getOrigin(), @@ -212,15 +215,16 @@ public static void convert(JCas aJCas, LxfGraph aSource, LxfGraph aTarget, } - toolid = ids.get(LAYER_DEPENDENCY); + toolid = aIds.get(LAYER_DEPENDENCY); // Dependencies Collection deps = idxSentDep.get(sentence); for (Dependency dep : deps) { - if (aSource != null && !needsExport(aJCas, dep)) + if (aSource != null && !needsExport(aJCas, dep)) { continue; + } LxfNode depNode = new LxfNode(LAYER_DEPENDENCY, toolid, toolNodeIndex.nextIndex(toolid), 0); From 71921b0689b14aa6239a6a5d7c16d2a4b30f6ea2 Mon Sep 17 00:00:00 2001 From: Richard Eckart de Castilho Date: Mon, 6 May 2019 16:45:24 +0200 Subject: [PATCH 12/15] #1299 - Update to CoreNLP 3.9.2 - Updated CoreNLP dependency - Updated some English models --- dkpro-core-corenlp-gpl/pom.xml | 10 ++--- .../corenlp/CoreNlpNamedEntityRecognizer.java | 13 +------ dkpro-core-corenlp-gpl/src/scripts/build.xml | 12 +++--- dkpro-core-stanfordnlp-gpl/pom.xml | 10 ++--- .../src/scripts/build.xml | 37 ++++++++++--------- 5 files changed, 37 insertions(+), 45 deletions(-) diff --git a/dkpro-core-corenlp-gpl/pom.xml b/dkpro-core-corenlp-gpl/pom.xml index 9c7f647aa9..a0a6be78b0 100644 --- a/dkpro-core-corenlp-gpl/pom.xml +++ b/dkpro-core-corenlp-gpl/pom.xml @@ -33,7 +33,7 @@ DKPro Core GPL - Stanford CoreNLP Suite (v ${corenlp.version}) (GPL) https://dkpro.github.io/dkpro-core/ - 3.9.1 + 3.9.2 @@ -178,7 +178,7 @@ de.tudarmstadt.ukp.dkpro.core de.tudarmstadt.ukp.dkpro.core.stanfordnlp-model-coref-en-default - 20180227.1 + 20181005.1 de.tudarmstadt.ukp.dkpro.core @@ -358,17 +358,17 @@ de.tudarmstadt.ukp.dkpro.core de.tudarmstadt.ukp.dkpro.core.stanfordnlp-model-tagger-en-bidirectional-distsim - 20140616.1 + 20181002.1 de.tudarmstadt.ukp.dkpro.core de.tudarmstadt.ukp.dkpro.core.stanfordnlp-model-tagger-en-left3words-distsim - 20140616.1 + 20181002.1 de.tudarmstadt.ukp.dkpro.core de.tudarmstadt.ukp.dkpro.core.stanfordnlp-model-tagger-en-caseless-left3words-distsim - 20140827.0 + 20181002.0 de.tudarmstadt.ukp.dkpro.core diff --git a/dkpro-core-corenlp-gpl/src/main/java/org/dkpro/core/corenlp/CoreNlpNamedEntityRecognizer.java b/dkpro-core-corenlp-gpl/src/main/java/org/dkpro/core/corenlp/CoreNlpNamedEntityRecognizer.java index bcbebc2616..65ff620090 100644 --- a/dkpro-core-corenlp-gpl/src/main/java/org/dkpro/core/corenlp/CoreNlpNamedEntityRecognizer.java +++ b/dkpro-core-corenlp-gpl/src/main/java/org/dkpro/core/corenlp/CoreNlpNamedEntityRecognizer.java @@ -195,17 +195,6 @@ public class CoreNlpNamedEntityRecognizer // on, off, auto private boolean useSUTime = false; // = NumberSequenceClassifier.USE_SUTIME_DEFAULT; -// /** -// * Whether to read the default regular expression gazetteer. -// * -// * @see edu.stanford.nlp.pipeline.DefaultPaths#DEFAULT_NER_GAZETTE_MAPPING -// */ -// public static final String PARAM_AUGMENT_REGEX_NER = "augmentRegexNER"; -// @ConfigurationParameter(name = PARAM_AUGMENT_REGEX_NER, mandatory = true, defaultValue = "false") - // Commented out since the default gazetter is currently only in the original Stanford model - // JARs - private boolean augmentRegexNER = false; // = NERClassifierCombiner.APPLY_GAZETTE_PROPERTY; - private boolean verbose = false; private ModelProviderBase annotatorProvider; @@ -318,7 +307,7 @@ protected NERCombinerAnnotator produceResource(URL aUrl) throws IOException } NERClassifierCombiner combiner = new NERClassifierCombiner(applyNumericClassifiers, - useSUTime, augmentRegexNER, classifier); + useSUTime, classifier); NERCombinerAnnotator annotator = new NERCombinerAnnotator(combiner, verbose, numThreads, maxTime, maxSentenceLength, false, false); diff --git a/dkpro-core-corenlp-gpl/src/scripts/build.xml b/dkpro-core-corenlp-gpl/src/scripts/build.xml index 95b5e7b3e4..ada433a232 100644 --- a/dkpro-core-corenlp-gpl/src/scripts/build.xml +++ b/dkpro-core-corenlp-gpl/src/scripts/build.xml @@ -27,12 +27,12 @@ - Upstream versions - meta data versions are maintained per model below --> - - - - - - + + + + + + - - + + - - - + + + - - - - - - + + + + + +