Skip to content

Commit

Permalink
finished WordEmbedder; updated encoders/decoders
Browse files Browse the repository at this point in the history
Former-commit-id: 3b76426
  • Loading branch information
ZhitingHu committed Mar 12, 2018
1 parent 1750bfa commit 52b04cc
Show file tree
Hide file tree
Showing 16 changed files with 422 additions and 191 deletions.
163 changes: 0 additions & 163 deletions texar/core/layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,6 @@
"get_initializer",
"get_activation_fn",
"get_constraint_fn",
"default_embedding_hparams",
"get_embedding",
"get_layer",
"MergeLayer",
"SequentialLayer",
Expand Down Expand Up @@ -386,167 +384,6 @@ def get_constraint_fn(fn_name="NonNeg"):
constraint_fn = utils.get_function(fn_name, fn_modules)
return constraint_fn

def default_embedding_hparams():
"""Returns default hyperparameters of token embedding used in encoders,
decoders, and other modules.
Returns:
A dictionary with the following structure and values.
.. code-block:: python
{
"name": "embedding",
"dim": 100,
"initializer": {
"type": "random_uniform_initializer",
"kwargs": {
"minval": -0.1,
"maxval": 0.1,
"seed": None
}
},
"regularizer": {
"type": "L1L2",
"kwargs": {
"l1": 0.,
"l2": 0.
}
}
"trainable": True,
}
Here:
"name" : str
Name of the embedding variable.
"dim" : int
Embedding dimension.
"initializer" : dict
Hyperparameters of the initializer for the embedding values,
including:
"type" : str or initializer instance
Name, full path, or instance of the initializer class; Or name
or full path to a function that returns the initializer class.
The class or function can be
- Built-in initializer defined in \
:tf_main:`tf.initializers <initializers>`, e.g., \
:tf_main:`random_uniform <random_uniform_initializer>` \
(a.k.a :class:`tf.random_uniform_initializer`), or \
in :mod:`tf`, e.g., :tf_main:`glorot_uniform_initializer \
<glorot_uniform_initializer>`, or in \
:tf_main:`tf.keras.initializers <keras/initializers>`.
- User-defined initializer in :mod:`texar.custom`.
- External initializer. Must provide the full path, \
e.g., :attr:`"my_module.MyInitializer"`, or the instance.
"kwargs" : dict
A dictionary of arguments for constructor of the
initializer class or for the function. An initializer is
created by `initialzier = initializer_class_or_fn(**kwargs)`
where :attr:`initializer_class_or_fn` is specified in
:attr:`"type"`.
Ignored if :attr:`"type"` is an initializer instance.
The default value corresponds to the initializer
:tf_main:`tf.random_uniform_initializer
<random_uniform_initializer>`.
"regularizer" : dict
Hyperparameters of the regularizer for the embedding values. The
regularizer must be an instance of
the base :tf_main:`Regularizer <keras/regularizers/Regularizer>`
class. The hyperparameters include:
"type" : str or Regularizer instance
Name, full path, or instance of the regularizer class. The
class can be
- Built-in regularizer defined in
:tf_main:`tf.keras.regularizers <keras/regularizers>`, e.g.,
:tf_main:`L1L2 <keras/regularizers/L1L2>`.
- User-defined regularizer in :mod:`texar.custom`. The
regularizer class should inherit the base class
:tf_main:`Regularizer <keras/regularizers/Regularizer>`.
- External regularizer. Must provide the full path, \
e.g., :attr:`"my_module.MyRegularizer"`, or the instance.
"kwargs" : dict
A dictionary of arguments for constructor of the
regularizer class. A regularizer is created by
calling `regularizer_class(**kwargs)` where
:attr:`regularizer_class` is specified in :attr:`"type"`.
Ignored if :attr:`"type"` is a Regularizer instance.
The default value corresponds to
:tf_main:`L1L2 <keras/regularizers/L1L2>` with `(l1=0, l2=0)`,
which disables regularization.
"trainable" : bool
Whether the embedding is trainable.
"""
return {
"name": "embedding",
"dim": 50,
"initializer": {
"type": "random_uniform_initializer",
"kwargs": {
"minval": -0.1,
"maxval": 0.1,
"seed": None
}
},
"regularizer": default_regularizer_hparams(),
"trainable": True
}


def get_embedding(hparams=None,
init_values=None,
vocab_size=None,
variable_scope='Embedding'):
"""Creates embedding variable if not exists.
Args:
hparams (dict or HParams, optional): Embedding hyperparameters. Missing
hyperparameters are set to default values. See
:func:`~texar.core.layers.default_embedding_hparams` for all
hyperparameters and default values.
If :attr:`init_values` is given, :attr:`hparams["initializer"]`,
and :attr:`hparams["dim"]` are ignored.
init_values (Tensor or numpy array, optional): Initial values of the
embedding variable. If not given, embedding is initialized as
specified in :attr:`hparams["initializer"]`.
vocab_size (int, optional): The vocabulary size. Required if
:attr:`init_values` is not provided.
variable_scope (string or VariableScope, optional): Variable scope of
the embedding variable.
Returns:
Variable: A 2D `Variable` of the same shape with :attr:`init_values`
or of the shape :attr:`[vocab_size, hparams["dim"]]`.
"""
with tf.variable_scope(variable_scope):
if hparams is None or isinstance(hparams, dict):
hparams = HParams(hparams, default_embedding_hparams())
regularizer = get_regularizer(hparams["regularizer"])
if init_values is None:
initializer = get_initializer(hparams["initializer"])
return tf.get_variable(name=hparams["name"],
shape=[vocab_size, hparams["dim"]],
initializer=initializer,
regularizer=regularizer,
trainable=hparams["trainable"])
else:
return tf.get_variable(name=hparams["name"],
initializer=tf.to_float(init_values),
regularizer=regularizer,
trainable=hparams["trainable"])

#TODO: allow flat `type` and `kwargs` arguments.
def get_layer(hparams):
Expand Down
13 changes: 13 additions & 0 deletions texar/core/layers_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import tensorflow as tf
import tensorflow.contrib.rnn as rnn

import texar as tx
from texar import context
from texar.hyperparams import HParams
from texar.core import layers
Expand Down Expand Up @@ -113,6 +114,18 @@ def test_get_layer(self):
layer = layers.get_layer(hparams)
self.assertTrue(isinstance(layer, tf.layers.Conv1D))

hparams = {
"type": "MergeLayer",
"kwargs": {
"layers": [
{"type": "Conv1D"},
{"type": "Conv1D"}
]
}
}
layer = layers.get_layer(hparams)
self.assertTrue(isinstance(layer, tx.core.MergeLayer))


class MergeLayerTest(tf.test.TestCase):
"""Tests MergeLayer.
Expand Down
1 change: 1 addition & 0 deletions texar/modules/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
# pylint: disable=wildcard-import

from texar.modules.networks import *
from texar.modules.embedders import *
from texar.modules.encoders import *
from texar.modules.decoders import *
from texar.modules.connectors import *
Expand Down
14 changes: 8 additions & 6 deletions texar/modules/decoders/rnn_decoder_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,20 @@
from __future__ import division
from __future__ import print_function

# pylint: disable=not-context-manager, too-many-arguments, no-name-in-module
# pylint: disable=too-many-branches, protected-access

import tensorflow as tf
from tensorflow.contrib.seq2seq import Decoder as TFDecoder
from tensorflow.contrib.seq2seq import dynamic_decode
from tensorflow.python.framework import tensor_shape
from tensorflow.python.util import nest

from texar.modules.module_base import ModuleBase
from texar.modules.decoders import rnn_decoder_helpers
from texar.core import layers, utils
from texar import context

# pylint: disable=not-context-manager, too-many-arguments
from texar.modules.module_base import ModuleBase
from texar.modules.decoders import rnn_decoder_helpers
from texar.modules.embedders import embedder_utils

__all__ = [
"RNNDecoderBase"
Expand Down Expand Up @@ -61,7 +63,7 @@ def __init__(self,
if isinstance(embedding, tf.Variable):
self._embedding = embedding
else:
self._embedding = layers.get_embedding(
self._embedding = embedder_utils.get_embedding(
self._hparams.embedding, embedding, self._vocab_size,
self.variable_scope)
if self._hparams.embedding.trainable:
Expand Down Expand Up @@ -98,7 +100,7 @@ def default_hparams():
return {
"rnn_cell": layers.default_rnn_cell_hparams(),
"use_embedding": True,
"embedding": layers.default_embedding_hparams(),
"embedding": embedder_utils.default_embedding_hparams(),
"helper_train": rnn_decoder_helpers.default_helper_train_hparams(),
"helper_infer": rnn_decoder_helpers.default_helper_infer_hparams(),
"max_decoding_length_train": None,
Expand Down
16 changes: 10 additions & 6 deletions texar/modules/decoders/transformer_decoders.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,19 @@
from __future__ import division
from __future__ import print_function

import collections
# pylint: disable=no-name-in-module, too-many-arguments, too-many-locals
# pylint: disable=not-context-manager

import collections

import tensorflow as tf
from tensorflow.python.framework import tensor_shape, dtypes
from texar.modules.networks import FeedForwardNetwork
from texar.modules.module_base import ModuleBase

from texar.core import layers
from texar import context
from texar.modules.networks import FeedForwardNetwork
from texar.modules.module_base import ModuleBase
from texar.modules.embedders import embedder_utils

class TransformerDecoderOutput(
collections.namedtuple("TransformerDecoderOutput",\
Expand All @@ -40,7 +44,7 @@ def __init__(self, embedding=None, vocab_size=None, hparams=None):
self._embedding = embedding
print('embedding shared between encoder and decoder')
else:
self._embedding = layers.get_embedding(
self._embedding = embedder_utils.get_embedding(
self._hparams.embedding, embedding, vocab_size,
variable_scope=self.variable_scope)
self._embed_dim = self._embedding.get_shape().as_list()[-1]
Expand All @@ -59,7 +63,7 @@ def default_hparams():
'multiply_embedding_mode': 'sqrt_depth',
'share_embed_and_transform': True,
"use_embedding": True,
"embedding": layers.default_embedding_hparams(),
"embedding": embedder_utils.default_embedding_hparams(),
"name":"decoder",
"num_heads":8,
"num_blocks":6,
Expand All @@ -85,7 +89,7 @@ def _build(self, inputs, encoder_output, src_length, tgt_length):
self.dec,
variable_scope='dec_pe')
else:
self.position_dec_embedding = layers.get_embedding(
self.position_dec_embedding = embedder_utils.get_embedding(
hparams=self._hparams.embedding,
vocab_size=self._hparams.max_seq_length,
variable_scope='dec_pe')
Expand Down
12 changes: 12 additions & 0 deletions texar/modules/embedders/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#
"""
Modules of texar library embedders.
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

# pylint: disable=wildcard-import

from texar.modules.embedders import *

0 comments on commit 52b04cc

Please sign in to comment.