Skip to content

Commit

Permalink
增加单元测试,发布新版本
Browse files Browse the repository at this point in the history
  • Loading branch information
hankcs committed Jul 29, 2016
1 parent 6277218 commit c6be0de
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 2 deletions.
4 changes: 2 additions & 2 deletions pom.xml
Expand Up @@ -6,15 +6,15 @@

<groupId>com.hankcs.nlp</groupId>
<artifactId>hanlp-solr-plugin</artifactId>
<version>1.1.1</version>
<version>1.1.2</version>

<name>hanlp-solr-plugin</name>
<url>https://github.com/hankcs/HanLP</url>

<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>

<hanlp.version>portable-1.2.9</hanlp.version>
<hanlp.version>portable-1.2.10</hanlp.version>
<lucene.version>5.1.0</lucene.version>

<javac.src.version>1.7</javac.src.version>
Expand Down
29 changes: 29 additions & 0 deletions src/test/java/com/hankcs/lucene/HanLPAnalyzerTest.java
Expand Up @@ -3,12 +3,14 @@
import junit.framework.TestCase;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
Expand All @@ -23,8 +25,11 @@
import org.apache.lucene.store.RAMDirectory;

import java.io.File;
import java.io.StringReader;
import java.nio.file.Paths;
import java.util.Arrays;
import java.util.Map;
import java.util.TreeMap;

public class HanLPAnalyzerTest extends TestCase
{
Expand Down Expand Up @@ -88,4 +93,28 @@ public void testIndexAndSearch() throws Exception
System.out.println(targetDoc.getField("content").stringValue());
}
}

public void testIssue() throws Exception
{
Map<String, String> args = new TreeMap<>();
args.put("enableTraditionalChineseMode", "true");
args.put("enableNormalization", "true");
HanLPTokenizerFactory factory = new HanLPTokenizerFactory(args);
Tokenizer tokenizer = factory.create();
String text = "會辦台星保證最低價的原因?";

tokenizer.setReader(new StringReader(text));
tokenizer.reset();
while (tokenizer.incrementToken())
{
CharTermAttribute attribute = tokenizer.getAttribute(CharTermAttribute.class);
// 偏移量
OffsetAttribute offsetAtt = tokenizer.getAttribute(OffsetAttribute.class);
// 距离
PositionIncrementAttribute positionAttr = tokenizer.getAttribute(PositionIncrementAttribute.class);
// 词性
TypeAttribute typeAttr = tokenizer.getAttribute(TypeAttribute.class);
System.out.printf("[%d:%d %d] %s/%s\n", offsetAtt.startOffset(), offsetAtt.endOffset(), positionAttr.getPositionIncrement(), attribute, typeAttr.type());
}
}
}

0 comments on commit c6be0de

Please sign in to comment.