Skip to content

Commit

Permalink
fixed embedder with soft inputs
Browse files Browse the repository at this point in the history
Former-commit-id: 128c56f
  • Loading branch information
ZhitingHu committed Jul 18, 2018
1 parent 640bf24 commit 672cd73
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 9 deletions.
5 changes: 2 additions & 3 deletions texar/modules/embedders/embedder_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def _init_parameterized_embedding(self, init_value, num_embeds, hparams):
if self._dim_rank == 1:
self._dim = self._dim[0]

def _get_dropout_layer(self, hparams, inputs, dropout_strategy=None):
def _get_dropout_layer(self, hparams, ids_rank, dropout_strategy=None):
"""Creates dropout layer according to dropout strategy.
Called in :meth:`_build()`.
Expand All @@ -55,8 +55,7 @@ def _get_dropout_layer(self, hparams, inputs, dropout_strategy=None):
if st == 'element':
noise_shape = None
elif st == 'item':
index_rank = len(inputs.shape.dims)
noise_shape = [None] * index_rank + [1] * self._dim_rank
noise_shape = [None] * ids_rank + [1] * self._dim_rank
elif st == 'item_type':
noise_shape = [None] + [1] * self._dim_rank
else:
Expand Down
18 changes: 16 additions & 2 deletions texar/modules/embedders/embedders.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,15 +134,29 @@ def _build(self, ids=None, soft_ids=None, mode=None, **kwargs):
and `shape(embedding) = [vocab_size, emb_dim]`, then the return
tensor has shape `[batch_size, max_time, emb_dim]`.
"""
if ids is not None:
if soft_ids is not None:
raise ValueError(
'Must not specify `ids` and `soft_ids` at the same time.')
ids_rank = len(ids.shape.dims)
elif soft_ids is not None:
ids_rank = len(soft_ids.shape.dims) - 1
else:
raise ValueError('Either `ids` or `soft_ids` must be given.')

embedding = self._embedding
dropout_layer = self._get_dropout_layer(self._hparams, ids)

dropout_layer = self._get_dropout_layer(self._hparams, ids_rank)
if dropout_layer:
is_training = utils.is_train_mode(mode)
if self._hparams.dropout_strategy == 'item_type':
embedding = dropout_layer.apply(
inputs=embedding, training=is_training)

outputs = tf.nn.embedding_lookup(embedding, ids, **kwargs)
if ids is not None:
outputs = tf.nn.embedding_lookup(embedding, ids, **kwargs)
else:
outputs = embedder_utils.soft_embedding_lookup(embedding, soft_ids)

if dropout_layer and self._hparams.dropout_strategy != 'item_type':
outputs = dropout_layer.apply(
Expand Down
12 changes: 9 additions & 3 deletions texar/modules/embedders/embedders_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,13 @@ def _test_word_embedder(self, hparams):
"""
embedder = WordEmbedder(
vocab_size=100, hparams=hparams)

inputs = tf.ones([64, 16], dtype=tf.int32)
outputs = embedder(inputs)

inputs_soft = tf.ones([64, 16, embedder.vocab_size], dtype=tf.float32)
outputs_soft = embedder(soft_ids=inputs_soft)

emb_dim = embedder.dim
if not isinstance(emb_dim, (list, tuple)):
emb_dim = [emb_dim]
Expand All @@ -37,16 +41,18 @@ def _test_word_embedder(self, hparams):
hparams_dim = [hparams["dim"]]

self.assertEqual(outputs.shape, [64, 16] + emb_dim)
self.assertEqual(outputs_soft.shape, [64, 16] + emb_dim)
self.assertEqual(emb_dim, hparams_dim)
self.assertEqual(embedder.vocab_size, 100)
self.assertEqual(len(embedder.trainable_variables), 1)

with self.test_session() as sess:
sess.run(tf.global_variables_initializer())
outputs_ = sess.run(
outputs,
outputs_, outputs_soft_ = sess.run(
[outputs, outputs_soft],
feed_dict={global_mode(): tf.estimator.ModeKeys.TRAIN})
self.assertEqual(outputs_.shape, (64, 16) + tuple(emb_dim))
self.assertEqual(outputs_soft_.shape, (64, 16) + tuple(emb_dim))

def _test_position_embedder(self, hparams):
"""Tests :class:`texar.modules.PositionEmbedder`.
Expand Down Expand Up @@ -80,7 +86,6 @@ def _test_position_embedder(self, hparams):
self.assertEqual(outputs_.shape,
(64, max_seq_length) + tuple(emb_dim))


def test_embedder(self):
"""Tests various embedders.
"""
Expand Down Expand Up @@ -165,5 +170,6 @@ def test_embedder_multi_calls(self):
emb_dim = [emb_dim]
self.assertEqual(outputs.shape, [64, 10, 20] + emb_dim)


if __name__ == "__main__":
tf.test.main()
3 changes: 2 additions & 1 deletion texar/modules/embedders/position_embedders.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,8 @@ def _build(self, positions=None, sequence_length=None, mode=None, **kwargs):
# use 'item_type' to avoid unknown noise_shape in the 'item'
# strategy
st = 'item_type'
dropout_layer = self._get_dropout_layer(self._hparams, inputs, st)
ids_rank = len(inputs.shape.dims)
dropout_layer = self._get_dropout_layer(self._hparams, ids_rank, st)

embedding = self._embedding
if dropout_layer:
Expand Down

0 comments on commit 672cd73

Please sign in to comment.