diff --git a/src/main/java/com/worksap/nlp/lucene/sudachi/ja/PartOfSpeechTrie.java b/src/main/java/com/worksap/nlp/lucene/sudachi/ja/PartOfSpeechTrie.java index a8e8fb21..e50d9e7d 100644 --- a/src/main/java/com/worksap/nlp/lucene/sudachi/ja/PartOfSpeechTrie.java +++ b/src/main/java/com/worksap/nlp/lucene/sudachi/ja/PartOfSpeechTrie.java @@ -23,6 +23,7 @@ public class PartOfSpeechTrie { static final String EMPTY_SYMBOL = "*"; + static final String LEAF = ""; Map root = new HashMap<>(); @@ -37,6 +38,7 @@ public void add(String... items) { (Map)node.computeIfAbsent(item, k -> new HashMap<>()); node = newNode; } + node.put(LEAF, LEAF); } public boolean isPrefixOf(List items, int begin, int end) { @@ -47,17 +49,17 @@ public boolean isPrefixOf(List items, int begin, int end) { for (int i = begin; i < end; i++) { String item = items.get(i); if (EMPTY_SYMBOL.equals(item)) { - return node.isEmpty(); + return node.containsKey(LEAF); } @SuppressWarnings("unchecked") Map newNode = (Map)node.get(item); node = newNode; if (node == null) { return false; - } else if (node.isEmpty()) { + } else if (node.containsKey(LEAF)) { return true; } } - return true; + return node.containsKey(LEAF); } } diff --git a/src/test/java/com/worksap/nlp/lucene/sudachi/ja/TestSudachiPartOfSpeechStopFilter.java b/src/test/java/com/worksap/nlp/lucene/sudachi/ja/TestSudachiPartOfSpeechStopFilter.java index d816517b..0728b136 100644 --- a/src/test/java/com/worksap/nlp/lucene/sudachi/ja/TestSudachiPartOfSpeechStopFilter.java +++ b/src/test/java/com/worksap/nlp/lucene/sudachi/ja/TestSudachiPartOfSpeechStopFilter.java @@ -91,4 +91,21 @@ public void testConjugationForm() throws IOException { assertTokenStreamContents(tokenStream, new String[] {"東京都", "東京", "都", "に", "行っ"}); } + + public void testPrefixWithUnmatchedSubcategory() throws IOException { + String tags = "助詞,格助詞\n助詞,格助詞,引用\n"; + factory.inform(new StringResourceLoader(tags)); + tokenStream = factory.create(tokenStream); + assertTokenStreamContents(tokenStream, + new String[] {"東京都", "東京", "都", "行っ", "た"}); + } + + public void testTooLongCategory() throws IOException { + String tags = "名詞,固有名詞,地名,一般,一般\n"; + factory.inform(new StringResourceLoader(tags)); + tokenStream = factory.create(tokenStream); + assertTokenStreamContents(tokenStream, + new String[] {"東京都", "東京", "都", "に", "行っ", "た"}); + } + }