/
test_zzz_ner_v1.py
67 lines (48 loc) · 1.85 KB
/
test_zzz_ner_v1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
#!/usr/bin/env python3
"""
Tests of ktrain text classification flows
"""
import testenv
import IPython
from unittest import TestCase, main, skip
import numpy as np
import os
os.environ['DISABLE_V2_BEHAVIOR'] = '1'
import ktrain
from ktrain import text as txt
class TestNERClassification(TestCase):
def setUp(self):
TDATA = 'conll2003/train.txt'
(trn, val, preproc) = txt.entities_from_txt(TDATA, use_char=True)
self.trn = trn
self.val = val
self.preproc = preproc
def test_ner(self):
wv_url = 'https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.en.300.vec.gz'
model = txt.sequence_tagger('bilstm-crf', self.preproc, wv_path_or_url=wv_url)
learner = ktrain.get_learner(model, train_data=self.trn, val_data=self.val, batch_size=128)
lr = 0.01
hist = learner.fit(lr, 1)
# test training results
#self.assertAlmostEqual(max(hist.history['lr']), lr)
self.assertGreater(learner.validate(), 0.65)
# test top losses
obs = learner.top_losses(n=1)
self.assertIn(obs[0][0], list(range(len(self.val.x))))
learner.view_top_losses(n=1)
# test weight decay
self.assertEqual(learner.get_weight_decay(), None)
learner.set_weight_decay(1e-2)
self.assertAlmostEqual(learner.get_weight_decay(), 1e-2)
# test load and save model
learner.save_model('/tmp/test_model')
learner.load_model('/tmp/test_model')
# test predictor
SENT = 'There is a man named John Smith.'
p = ktrain.get_predictor(learner.model,self.preproc)
self.assertEqual(p.predict(SENT)[-2][1], 'I-PER' )
p.save('/tmp/test_predictor')
p = ktrain.load_predictor('/tmp/test_predictor')
self.assertEqual(p.predict(SENT)[-2][1], 'I-PER' )
if __name__ == "__main__":
main()