Skip to content

Commit

Permalink
Merge pull request #231 from gpengzhi/embedder
Browse files Browse the repository at this point in the history
Bugfix and Polish Embedder modules
  • Loading branch information
gpengzhi committed Oct 21, 2019
2 parents c58d423 + 2f04a58 commit caf0489
Show file tree
Hide file tree
Showing 8 changed files with 150 additions and 183 deletions.
6 changes: 1 addition & 5 deletions texar/tf/modules/embedders/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Modules of texar library embedders.
Modules of Texar library embedders.
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

# pylint: disable=wildcard-import

from texar.tf.modules.embedders.embedder_base import *
Expand Down
35 changes: 15 additions & 20 deletions texar/tf/modules/embedders/embedder_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,30 +15,28 @@
The base embedder class.
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf

from texar.tf.module_base import ModuleBase
from texar.tf.modules.embedders import embedder_utils
from texar.tf.utils.shapes import shape_list

# pylint: disable=invalid-name

__all__ = [
"EmbedderBase"
]


class EmbedderBase(ModuleBase):
"""The base embedder class that all embedder classes inherit.
r"""The base embedder class that all embedder classes inherit.
Args:
num_embeds (int, optional): The number of embedding elements, e.g.,
the vocabulary size of a word embedder.
hparams (dict or HParams, optional): Embedder hyperparameters. Missing
hyperparamerter will be set to default values. See
:meth:`default_hparams` for the hyperparameter sturcture and
:meth:`default_hparams` for the hyperparameter structure and
default values.
"""

Expand All @@ -54,18 +52,17 @@ def _init_parameterized_embedding(self, init_value, num_embeds, hparams):
if hparams.trainable:
self._add_trainable_variable(self._embedding)

self._num_embeds = self._embedding.get_shape().as_list()[0]
self._num_embeds = shape_list(self._embedding)[0]

self._dim = self._embedding.get_shape().as_list()[1:]
self._dim = shape_list(self._embedding)[1:]
self._dim_rank = len(self._dim)
if self._dim_rank == 1:
self._dim = self._dim[0]

def _get_dropout_layer(self, hparams, ids_rank=None, dropout_input=None,
dropout_strategy=None):
"""Creates dropout layer according to dropout strategy.
Called in :meth:`_build()`.
r"""Creates dropout layer according to dropout strategy.
Called in :meth:`_build`.
"""
dropout_layer = None

Expand All @@ -76,11 +73,12 @@ def _get_dropout_layer(self, hparams, ids_rank=None, dropout_input=None,
if st == 'element':
noise_shape = None
elif st == 'item':
noise_shape = tf.concat([tf.shape(dropout_input)[:ids_rank],
tf.ones([self._dim_rank], tf.int32)],
axis=0)
assert dropout_input is not None
assert ids_rank is not None
noise_shape = (shape_list(dropout_input)[:ids_rank]
+ [1] * self._dim_rank)
elif st == 'item_type':
noise_shape = [None] + [1] * self._dim_rank
noise_shape = [None] + [1] * self._dim_rank # type: ignore
else:
raise ValueError('Unknown dropout strategy: {}'.format(st))

Expand All @@ -91,7 +89,7 @@ def _get_dropout_layer(self, hparams, ids_rank=None, dropout_input=None,

@staticmethod
def default_hparams():
"""Returns a dictionary of hyperparameters with default values.
r"""Returns a dictionary of hyperparameters with default values.
.. code-block:: python
Expand All @@ -103,11 +101,8 @@ def default_hparams():
"name": "embedder"
}

def _build(self, *args, **kwargs):
raise NotImplementedError

@property
def num_embeds(self):
"""The number of embedding elements.
r"""The number of embedding elements.
"""
return self._num_embeds
92 changes: 45 additions & 47 deletions texar/tf/modules/embedders/embedder_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,6 @@
"""Utils of embedder.
"""

from __future__ import absolute_import
from __future__ import print_function
from __future__ import division

import tensorflow as tf

from texar.tf.hyperparams import HParams
Expand All @@ -29,8 +25,9 @@
"soft_embedding_lookup"
]


def default_embedding_hparams():
"""Returns a `dict` of hyperparameters and default values of a embedder.
r"""Returns a ``dict`` of hyperparameters and default values of a embedder.
See :meth:`~texar.tf.modules.WordEmbedder.default_hparams` for details.
Expand All @@ -54,14 +51,14 @@ def default_embedding_hparams():
Here:
"name": str
`"name"`: str
Name of the embedding variable.
"dim": int or list
`"dim"`: int or list
Embedding dimension. Can be a list of integers to yield embeddings
with dimensionality > 1.
"initializer": dict or None
`"initializer"`: dict or None
Hyperparameters of the initializer for the embedding values. An
example is as
Expand All @@ -79,37 +76,37 @@ def default_embedding_hparams():
which corresponds to :tf_main:`tf.random_uniform_initializer
<random_uniform_initializer>`, and includes:
"type": str or initializer instance
`"type"`: str or initializer instance
Name, full path, or instance of the initializer class; Or name
or full path to a function that returns the initializer class.
The class or function can be
- Built-in initializer defined in \
:tf_main:`tf.initializers <initializers>`, e.g., \
:tf_main:`random_uniform <random_uniform_initializer>` \
(a.k.a :class:`tf.random_uniform_initializer`), or \
in :mod:`tf`, e.g., :tf_main:`glorot_uniform_initializer \
<glorot_uniform_initializer>`, or in \
- Built-in initializer defined in
:tf_main:`tf.initializers <initializers>`, e.g.,
:tf_main:`random_uniform <random_uniform_initializer>`
(a.k.a :class:`tf.random_uniform_initializer`), or
in :mod:`tf`, e.g., :tf_main:`glorot_uniform_initializer
<glorot_uniform_initializer>`, or in
:tf_main:`tf.keras.initializers <keras/initializers>`.
- User-defined initializer in :mod:`texar.tf.custom`.
- External initializer. Must provide the full path, \
- External initializer. Must provide the full path,
e.g., :attr:`"my_module.MyInitializer"`, or the instance.
"kwargs": dict
`"kwargs"`: dict
A dictionary of arguments for constructor of the
initializer class or for the function. An initializer is
created by `initialzier = initializer_class_or_fn(**kwargs)`
created by ``initialzier = initializer_class_or_fn(**kwargs)``
where :attr:`initializer_class_or_fn` is specified in
:attr:`"type"`.
Ignored if :attr:`"type"` is an initializer instance.
"regularizer": dict
`"regularizer"`: dict
Hyperparameters of the regularizer for the embedding values. The
regularizer must be an instance of
the base :tf_main:`Regularizer <keras/regularizers/Regularizer>`
class. The hyperparameters include:
"type": str or Regularizer instance
`"type"`: str or regularizer instance
Name, full path, or instance of the regularizer class. The
class can be
Expand All @@ -119,38 +116,39 @@ class can be
- User-defined regularizer in :mod:`texar.tf.custom`. The
regularizer class should inherit the base class
:tf_main:`Regularizer <keras/regularizers/Regularizer>`.
- External regularizer. Must provide the full path, \
- External regularizer. Must provide the full path,
e.g., :attr:`"my_module.MyRegularizer"`, or the instance.
"kwargs": dict
`"kwargs"`: dict
A dictionary of arguments for constructor of the
regularizer class. A regularizer is created by
calling `regularizer_class(**kwargs)` where
:attr:`regularizer_class` is specified in :attr:`"type"`.
Ignored if :attr:`"type"` is a Regularizer instance.
Ignored if :attr:`"type"` is a regularizer instance.
The default value corresponds to
:tf_main:`L1L2 <keras/regularizers/L1L2>` with `(l1=0, l2=0)`,
which disables regularization.
"dropout_rate": float
`"dropout_rate"`: float
The dropout rate between 0 and 1. E.g., `dropout_rate=0.1` would
drop out 10% of the embedding.
"dropout_strategy": str
`"dropout_strategy"`: str
The dropout strategy. Can be one of the following
- 'element': The regular strategy that drops individual elements \
- ``"element"``: The regular strategy that drops individual elements
in the embedding vectors.
- 'item': Drops individual items (e.g., words) entirely. E.g., for \
the word sequence 'the simpler the better', the strategy can \
yield '_ simpler the better', where the first `the` is dropped.
- 'item_type': Drops item types (e.g., word types). E.g., for the \
above sequence, the strategy can yield '_ simpler _ better', \
where the word type 'the' is dropped. The dropout will never \
yield '_ simpler the better' as in the 'item' strategy.
"trainable": bool
- ``"item"``: Drops individual items (e.g., words) entirely. For
example, for the word sequence "the simpler the better", the
strategy can yield "_ simpler the better", where the first "the"
is dropped.
- ``"item_type"``: Drops item types (e.g., word types). For example,
for the above sequence, the strategy can yield "_ simpler _
better", where the word type "the" is dropped. The dropout will
never yield "_ simpler the better" as in the ``"item"`` strategy.
`"trainable"`: bool
Whether the embedding is trainable.
"""
return {
Expand All @@ -169,7 +167,7 @@ def get_embedding(hparams=None,
init_value=None,
num_embeds=None,
variable_scope='Embedding'):
"""Creates embedding variable if not exists.
r"""Creates embedding variable if not exists.
Args:
hparams (dict or HParams, optional): Embedding hyperparameters. Missing
Expand All @@ -190,8 +188,7 @@ def get_embedding(hparams=None,
Returns:
Variable or Tensor: A 2D `Variable` or `Tensor` of the same shape with
:attr:`init_value` or of the shape
:attr:`[num_embeds, hparams["dim"]]`.
:attr:`init_value` or of the shape ``[num_embeds, hparams["dim"]]``.
"""
with tf.variable_scope(variable_scope):
if hparams is None or isinstance(hparams, dict):
Expand All @@ -216,23 +213,24 @@ def get_embedding(hparams=None,

return embedding


def soft_embedding_lookup(embedding, soft_ids):
"""Transforms soft ids (e.g., probability distribution over ids) into
r"""Transforms soft ids (e.g., probability distribution over ids) into
embeddings, by mixing the embedding vectors with the soft weights.
Args:
embedding: A Tensor of shape `[num_classes] + embedding-dim` containing
the embedding vectors. Embedding can have dimensionality > 1, i.e.,
:attr:`embedding` can be of shape
`[num_classes, emb_dim_1, emb_dim_2, ...]`
embedding: A Tensor of shape ``[num_classes] + embedding-dim``
containing the embedding vectors. Embedding can have
dimensionality > 1, i.e., :attr:`embedding` can be of shape
``[num_classes, emb_dim_1, emb_dim_2, ...]``
soft_ids: A Tensor of weights (probabilities) used to mix the
embedding vectors.
Returns:
A Tensor of shape `shape(soft_ids)[:-1] + shape(embedding)[1:]`. For
example, if `shape(soft_ids) = [batch_size, max_time, vocab_size]`
and `shape(embedding) = [vocab_size, emb_dim]`, then the return tensor
has shape `[batch_size, max_time, emb_dim]`.
A Tensor of shape ``shape(soft_ids)[:-1] + shape(embedding)[1:]``. For
example, if ``shape(soft_ids) = [batch_size, max_time, vocab_size]``
and ``shape(embedding) = [vocab_size, emb_dim]``, then the return tensor
has shape ``[batch_size, max_time, emb_dim]``.
Example::
Expand Down
9 changes: 1 addition & 8 deletions texar/tf/modules/embedders/embedder_utils_test.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,12 @@
#
"""
Unit tests for embedder utils.
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

# pylint: disable=no-member

import tensorflow as tf

from texar.tf.modules.embedders import embedder_utils


class GetEmbeddingTest(tf.test.TestCase):
"""Tests embedding creator.
"""
Expand Down

0 comments on commit caf0489

Please sign in to comment.