Skip to content

Commit

Permalink
fixed transformer decoder docs
Browse files Browse the repository at this point in the history
  • Loading branch information
ZhitingHu committed Feb 21, 2019
1 parent a2e28b2 commit 615411b
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 13 deletions.
16 changes: 11 additions & 5 deletions texar/core/layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -448,7 +448,8 @@ def get_activation_fn(fn_name="identity", kwargs=None):
if fn_name is None:
return None

fn_modules = ['tensorflow', 'tensorflow.nn', 'texar.custom', 'texar.core.layers']
fn_modules = ['tensorflow', 'tensorflow.nn', 'texar.custom',
'texar.core.layers']
activation_fn_ = utils.get_function(fn_name, fn_modules)
activation_fn = activation_fn_

Expand Down Expand Up @@ -1175,14 +1176,16 @@ def default_average_pooling3d_kwargs():
def layer_normalize(inputs,
scope=None,
**kwargs):
'''Applies layer normalization. averaging over the last dimension
"""Applies layer normalization. Normalizes over the last dimension.
Args:
inputs: A tensor with 2 or more dimensions, where the first
dimension has `batch_size`.
scope: Optional scope for `variable_scope`.
dimension must be `batch_size`.
scope (optional): variable scope.
Returns:
A tensor with the same shape and data dtype as `inputs`.
'''
"""
return tf.contrib.layers.layer_norm(
inputs=inputs, begin_norm_axis=-1, begin_params_axis=-1, scope=scope,
**kwargs
Expand All @@ -1191,10 +1194,13 @@ def layer_normalize(inputs,

def gelu(input_tensor):
"""Gaussian Error Linear Unit.
This is a smoother version of the RELU.
Original paper: https://arxiv.org/abs/1606.08415
Args:
input_tensor: float Tensor to perform activation.
Returns:
`input_tensor` with the GELU activation applied.
"""
Expand Down
15 changes: 7 additions & 8 deletions texar/modules/decoders/transformer_decoders.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,18 +62,17 @@ class TransformerDecoderOutput(


class TransformerDecoder(ModuleBase, TFDecoder):
"""Transformer decoder that applies multi-head attention for
"""Transformer decoder that applies multi-head self-attention for
sequence decoding.
Stacked :class:`~texar.modules.encoders.MultiheadAttentionEncoder` for
encoder-decoder attention and self attention,
:class:`~texar.modules.FeedForwardNetwork` and residual connections.
Use the passed `embedding` variable as the parameters of the
transform layer from output to logits.
It is a stack of :class:`~texar.modules.encoders.MultiheadAttentionEncoder`,
:class:`~texar.modules.FeedForwardNetwork`, and residual connections.
Args:
embedding: A Tensor of shape `[vocab_size, dim]` containing the
word embedding. The Tensor is used as the decoder output layer.
word embedding matrix. The Tensor is used as the decoder output
layer that computes logits over vocabulary. Ignored if
`hparams['embedding_tie']` is False.
hparams (dict or HParams, optional): Hyperparameters. Missing
hyperparameter will be set to default values. See
:meth:`default_hparams` for the hyperparameter sturcture and
Expand Down Expand Up @@ -208,7 +207,7 @@ def default_hparams():
"embedding_tie" : bool
Whether to use the word embedding matrix as the output layer
that computes logits. If `False`, an additional dense layer
that computes logits. If `False`, a new dense layer
is created.
"output_layer_bias" : bool
Expand Down

0 comments on commit 615411b

Please sign in to comment.