/
train_ctb5_dep.py
27 lines (26 loc) · 1.21 KB
/
train_ctb5_dep.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
# -*- coding:utf-8 -*-
# Author: hankcs
# Date: 2019-12-28 18:33
from hanlp.components.parsers.biaffine_parser_tf import BiaffineDependencyParserTF
from hanlp.datasets.parsing.ctb5 import CTB5_DEP_TRAIN, CTB5_DEP_DEV, CTB5_DEP_TEST
from hanlp.pretrained.word2vec import CTB5_FASTTEXT_300_CN
from tests import cdroot
cdroot()
save_dir = 'data/model/dep/biaffine_ctb'
parser = BiaffineDependencyParserTF()
parser.fit(CTB5_DEP_TRAIN, CTB5_DEP_DEV, save_dir,
pretrained_embed={'class_name': 'HanLP>Word2VecEmbedding',
'config': {
'trainable': False,
'embeddings_initializer': 'zero',
'filepath': CTB5_FASTTEXT_300_CN,
'expand_vocab': True,
'lowercase': True,
'normalize': True,
}},
)
parser.load(save_dir)
sentence = [('中国', 'NR'), ('批准', 'VV'), ('设立', 'VV'), ('外商', 'NN'), ('投资', 'NN'), ('企业', 'NN'), ('逾', 'VV'),
('三十万', 'CD'), ('家', 'M')]
print(parser.predict(sentence))
parser.evaluate(CTB5_DEP_TEST, save_dir)