/
models.py
156 lines (132 loc) · 6.19 KB
/
models.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
"""
Model definition.
"""
from ....imports import *
from .... import utils as U
#if U.is_tf_keras():
#from .layers import CRF
#else:
#from .layers_standalone import CRF
def save_model(model, weights_file, params_file):
with open(params_file, 'w') as f:
params = model.to_json()
json.dump(json.loads(params), f, sort_keys=True, indent=4)
model.save_weights(weights_file)
def load_model(weights_file, params_file):
with open(params_file) as f:
model = model_from_json(f.read(), custom_objects={'CRF': CRF})
model.load_weights(weights_file)
return model
class BiLSTMCRF(object):
"""A Keras implementation of BiLSTM-CRF for sequence labeling.
References
--
Guillaume Lample, Miguel Ballesteros, Sandeep Subramanian, Kazuya Kawakami, Chris Dyer.
"Neural Architectures for Named Entity Recognition". Proceedings of NAACL 2016.
https://arxiv.org/abs/1603.01360
"""
def __init__(self,
num_labels,
word_vocab_size,
char_vocab_size=None,
word_embedding_dim=100,
char_embedding_dim=25,
word_lstm_size=100,
char_lstm_size=25,
fc_dim=100,
dropout=0.5,
embeddings=None,
use_char=True,
use_crf=True,
char_mask_zero=True,
use_elmo=False,
use_transformer_with_dim=None):
"""Build a Bi-LSTM CRF model.
Args:
word_vocab_size (int): word vocabulary size.
char_vocab_size (int): character vocabulary size.
num_labels (int): number of entity labels.
word_embedding_dim (int): word embedding dimensions.
char_embedding_dim (int): character embedding dimensions.
word_lstm_size (int): character LSTM feature extractor output dimensions.
char_lstm_size (int): word tagger LSTM output dimensions.
fc_dim (int): output fully-connected layer size.
dropout (float): dropout rate.
embeddings (numpy array): word embedding matrix.
use_char (boolean): add char feature.
use_crf (boolean): use crf as last layer.
char_mask_zero(boolean): mask zero for character embedding (see TF2 isse #33148 and #33069)
use_elmo(boolean): If True, model will be configured to accept Elmo embeddings
as an additional input to word and character embeddings
use_transformer_with_dim(int): If not None, model will be configured to accept
transformer embeddings of given dimension
"""
super(BiLSTMCRF).__init__()
self._char_embedding_dim = char_embedding_dim
self._word_embedding_dim = word_embedding_dim
self._char_lstm_size = char_lstm_size
self._word_lstm_size = word_lstm_size
self._char_vocab_size = char_vocab_size
self._word_vocab_size = word_vocab_size
self._fc_dim = fc_dim
self._dropout = dropout
self._use_char = use_char
self._use_crf = use_crf
self._embeddings = embeddings
self._num_labels = num_labels
self._char_mask_zero = char_mask_zero
self._use_elmo = use_elmo
self._use_transformer_with_dim = use_transformer_with_dim
def build(self):
# build word embedding
word_ids = Input(batch_shape=(None, None), dtype='int32', name='word_input')
inputs = [word_ids]
embedding_list = []
if self._embeddings is None:
word_embeddings = Embedding(input_dim=self._word_vocab_size,
output_dim=self._word_embedding_dim,
mask_zero=True,
name='word_embedding')(word_ids)
else:
word_embeddings = Embedding(input_dim=self._embeddings.shape[0],
output_dim=self._embeddings.shape[1],
mask_zero=True,
weights=[self._embeddings],
name='word_embedding')(word_ids)
embedding_list.append(word_embeddings)
# build character based word embedding
if self._use_char:
char_ids = Input(batch_shape=(None, None, None), dtype='int32', name='char_input')
inputs.append(char_ids)
char_embeddings = Embedding(input_dim=self._char_vocab_size,
output_dim=self._char_embedding_dim,
mask_zero=self._char_mask_zero,
name='char_embedding')(char_ids)
char_embeddings = TimeDistributed(Bidirectional(LSTM(self._char_lstm_size)))(char_embeddings)
embedding_list.append(char_embeddings)
# add elmo embedding
if self._use_elmo:
elmo_embeddings = Input(shape=(None, 1024), dtype='float32')
inputs.append(elmo_embeddings)
embedding_list.append(elmo_embeddings)
# add transformer embedding
if self._use_transformer_with_dim is not None:
transformer_embeddings = Input(shape=(None, self._use_transformer_with_dim), dtype='float32')
inputs.append(transformer_embeddings)
embedding_list.append(transformer_embeddings)
# concatenate embeddings
word_embeddings = Concatenate()(embedding_list) if len(embedding_list) > 1 else embedding_list[0]
# build model
word_embeddings = Dropout(self._dropout)(word_embeddings)
z = Bidirectional(LSTM(units=self._word_lstm_size, return_sequences=True))(word_embeddings)
z = Dense(self._fc_dim, activation='tanh')(z)
if self._use_crf:
from .layers import CRF
crf = CRF(self._num_labels, sparse_target=False)
loss = crf.loss_function
pred = crf(z)
else:
loss = 'categorical_crossentropy'
pred = Dense(self._num_labels, activation='softmax')(z)
model = Model(inputs=inputs, outputs=pred)
return model, loss