Skip to content

Commit

Permalink
更新CRF词法分析模型
Browse files Browse the repository at this point in the history
  • Loading branch information
hankcs committed Jul 2, 2018
1 parent eae3b71 commit 049f849
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 5 deletions.
6 changes: 3 additions & 3 deletions src/main/java/com/hankcs/hanlp/HanLP.java
Original file line number Diff line number Diff line change
Expand Up @@ -172,15 +172,15 @@ public static final class Config
/**
* CRF分词模型
*/
public static String CRFCWSModelPath = "data/model/crf/pku199801/cws.bin";
public static String CRFCWSModelPath = "data/model/crf/pku199801/cws.txt";
/**
* CRF词性标注模型
*/
public static String CRFPOSModelPath = "data/model/crf/pku199801/pos.bin";
public static String CRFPOSModelPath = "data/model/crf/pku199801/pos.txt";
/**
* CRF命名实体识别模型
*/
public static String CRFNERModelPath = "data/model/crf/pku199801/ner.bin";
public static String CRFNERModelPath = "data/model/crf/pku199801/ner.txt";
/**
* 感知机分词模型
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ public CRFNERecognizer(String modelPath) throws IOException
else
{
perceptronNERecognizer = new PerceptronNERecognizer(this.model);
tagSet = perceptronNERecognizer.getNERTagSet();
}
}

Expand Down
7 changes: 5 additions & 2 deletions src/test/java/com/hankcs/demo/DemoMultithreadingSegment.java
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,12 @@
package com.hankcs.demo;

import com.hankcs.hanlp.HanLP;
import com.hankcs.hanlp.model.crf.CRFLexicalAnalyzer;
import com.hankcs.hanlp.seg.CRF.CRFSegment;
import com.hankcs.hanlp.seg.Segment;

import java.io.IOException;

/**
* 演示多线程并行分词
* 由于HanLP的任何分词器都是线程安全的,所以用户只需调用一个配置接口就可以启用任何分词器的并行化
Expand All @@ -22,9 +25,9 @@
*/
public class DemoMultithreadingSegment
{
public static void main(String[] args)
public static void main(String[] args) throws IOException
{
Segment segment = new CRFSegment(); // CRF分词器效果好,速度慢,并行化之后可以提高一些速度
Segment segment = new CRFLexicalAnalyzer(HanLP.Config.CRFCWSModelPath).enableCustomDictionary(false); // CRF分词器效果好,速度慢,并行化之后可以提高一些速度

String text = "举办纪念活动铭记二战历史,不忘战争带给人类的深重灾难,是为了防止悲剧重演,确保和平永驻;" +
"铭记二战历史,更是为了提醒国际社会,需要共同捍卫二战胜利成果和国际公平正义," +
Expand Down

0 comments on commit 049f849

Please sign in to comment.