In [1]:
import pycrfsuite
from config import model_name
from utils import loadData, dataset2feature, dataset2label, sent2feature

#### 加载测试数据集

In [2]:
test_data = loadData("data/example.test")
X_test = dataset2feature(test_data)
y_test = dataset2label(test_data)

loading data...
completed!
getting features...
completed!


#### 加载训练好的模型

In [3]:
tagger = pycrfsuite.Tagger()
tagger.open(model_name)

<contextlib.closing at 0x112936518>

#### 预测

In [4]:
y_pred = tagger.tag(X_test)

#### 效果

In [5]:
from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
y_true_combined = le.fit_transform(y_test)
y_pred_combined = le.transform(y_pred)

In [6]:
report  = classification_report(y_true_combined, y_pred_combined, labels=range(len(le.classes_)), target_names=list(le.classes_))
print(report)

             precision    recall  f1-score   support

      B-LOC       0.92      0.87      0.90      3658
      B-ORG       0.86      0.78      0.82      2185
      B-PER       0.96      0.86      0.90      1864
      I-LOC       0.89      0.87      0.88      4948
      I-ORG       0.89      0.84      0.86      8756
      I-PER       0.94      0.89      0.92      3601
          O       0.99      0.99      0.99    194185

avg / total       0.98      0.98      0.98    219197



#### 预测

In [7]:
sentence = "英国首相鲍里斯·约翰逊表示，他在昨天晚上与美国白宫通电话，双方交流了抗击疫情的经验"
sent = list(map(lambda w: (w, "x"), sentence))
features = sent2feature(sent)
result = tagger.tag(features)
list(zip(sentence, result))

[('英', 'B-LOC'),
 ('国', 'I-LOC'),
 ('首', 'O'),
 ('相', 'O'),
 ('鲍', 'B-PER'),
 ('里', 'I-PER'),
 ('斯', 'I-PER'),
 ('·', 'I-PER'),
 ('约', 'I-PER'),
 ('翰', 'I-PER'),
 ('逊', 'I-PER'),
 ('表', 'O'),
 ('示', 'O'),
 ('，', 'O'),
 ('他', 'O'),
 ('在', 'O'),
 ('昨', 'O'),
 ('天', 'O'),
 ('晚', 'O'),
 ('上', 'O'),
 ('与', 'O'),
 ('美', 'B-ORG'),
 ('国', 'I-ORG'),
 ('白', 'I-ORG'),
 ('宫', 'I-ORG'),
 ('通', 'O'),
 ('电', 'O'),
 ('话', 'O'),
 ('，', 'O'),
 ('双', 'O'),
 ('方', 'O'),
 ('交', 'O'),
 ('流', 'O'),
 ('了', 'O'),
 ('抗', 'O'),
 ('击', 'O'),
 ('疫', 'O'),
 ('情', 'O'),
 ('的', 'O'),
 ('经', 'O'),
 ('验', 'O')]

In [8]:
sentence = "李克强来到位于江西省赣州市于都县的梓山镇潭头村看望慰问群众"
sent = list(map(lambda w: (w, "x"), sentence))
features = sent2feature(sent)
result = tagger.tag(features)
list(zip(sentence, result))

[('李', 'B-PER'),
 ('克', 'I-PER'),
 ('强', 'I-PER'),
 ('来', 'O'),
 ('到', 'O'),
 ('位', 'O'),
 ('于', 'O'),
 ('江', 'B-LOC'),
 ('西', 'I-LOC'),
 ('省', 'I-LOC'),
 ('赣', 'B-LOC'),
 ('州', 'I-LOC'),
 ('市', 'I-LOC'),
 ('于', 'B-LOC'),
 ('都', 'I-LOC'),
 ('县', 'I-LOC'),
 ('的', 'O'),
 ('梓', 'B-LOC'),
 ('山', 'I-LOC'),
 ('镇', 'I-LOC'),
 ('潭', 'B-LOC'),
 ('头', 'I-LOC'),
 ('村', 'I-LOC'),
 ('看', 'O'),
 ('望', 'O'),
 ('慰', 'O'),
 ('问', 'O'),
 ('群', 'O'),
 ('众', 'O')]