finished cross_entropy los

asyml · Aug 21, 2017 · ff81731 · ff81731
1 parent e372227
commit ff81731
Show file tree

Hide file tree

Showing 7 changed files with 206 additions and 33 deletions.
diff --git a/txtgen/core/layers.py b/txtgen/core/layers.py
@@ -87,13 +87,13 @@ def default_rnn_cell_hparams():
 
 
 def get_rnn_cell(cell_hparams):
-    """Creates an RNN cell
+    """Creates an RNN cell.
 
     Args:
-      cell_hparams: a dictionary of hyperparameters
+      cell_hparams: a dictionary of hyperparameters.
 
     Returns:
-      An instance of RNN cell
+      An instance of RNN cell.
     """
     cells = []
     for _ in range(cell_hparams["num_layers"]):

diff --git a/txtgen/core/utils.py b/txtgen/core/utils.py
@@ -56,7 +56,7 @@ def get_instance(class_name, args, module_paths=None):
 
 
 def get_function(func_name, module_paths=None):
-    """Returns the function of specified name and module
+    """Returns the function of specified name and module.
 
     Args:
         func_name: Name of the function.
@@ -84,7 +84,7 @@ def get_function(func_name, module_paths=None):
 
 
 def switch_dropout(dropout_keep_prob, is_train=None):
-    """Turn off dropout when not in training mode
+    """Turns off dropout when not in training mode.
 
     Args:
         dropout_keep_prob: Dropout keep probability in training mode
@@ -94,7 +94,7 @@ def switch_dropout(dropout_keep_prob, is_train=None):
 
     Returns:
         A unit Tensor that equals the dropout keep probability in training mode,
-        and 1 in eval mode
+        and 1 in eval mode.
     """
     if is_train is None:
         return 1. - (1. - dropout_keep_prob) * tf.to_int32(context.is_train())
@@ -103,15 +103,15 @@ def switch_dropout(dropout_keep_prob, is_train=None):
 
 
 def transpose_batch_time(inputs):
-    """Transposes inputs between time-major and batch-major
+    """Transposes inputs between time-major and batch-major.
 
     Args:
         inputs: A Tensor of shape `[batch_size, max_time, ...]` (batch-major)
             or `[max_time, batch_size, ...]` (time-major), or a (possibly
             nested) tuple of such elements.
 
     Returns:
-        A Tensor with transposed batch and time dimensions of inputs
+        A Tensor with transposed batch and time dimensions of inputs.
     """
     flat_input = nest.flatten(inputs)
     flat_input = [ops.convert_to_tensor(input_) for input_ in flat_input]

diff --git a/txtgen/losses/__init__.py b/txtgen/losses/__init__.py
diff --git a/txtgen/losses/losses.py b/txtgen/losses/losses.py
@@ -0,0 +1,170 @@
+#
+"""
+Various losses
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tensorflow as tf
+from tensorflow.python.ops import rnn          # pylint: disable=E0611
+
+
+def _mask_sequences(sequence, sequence_length, time_major=False):
+    """Masks out sequence entries that are beyond the respective sequence
+    lengths.
+
+    Args:
+        sequence: A Tensor of sequence values.
+
+            If `time_major=False` (default), this must be a Tensor of shape:
+                `[batch_size, max_time, (...), num_classes]`.
+
+            If `time_major=True`, this must be a Tensor of shape:
+                `[max_time, batch_size, (...), num_classes].`
+        sequence_length: A Tensor of shape `[batch_size]`. Time steps beyond the
+            respective sequence lengths will be made zero.
+        time_major: The shape format of the inputs. If True, `sequence` must
+            have shape `[max_time, batch_size, ...]`. If false (default),
+            `sequence` must have shape `[batch_size, max_time, ...]`.
+
+    Returns:
+        A Tensor of the same shape as `sequence` but with masked-out entries.
+    """
+    if time_major:
+        sequence = rnn._transpose_batch_time(sequence) # pylint: disable=protected-access
+    max_time = tf.to_int32(tf.shape(sequence)[1])
+    mask = tf.sequence_mask(
+        tf.to_int32(sequence_length), max_time, tf.float32)
+    sequence = sequence * mask
+    if time_major:
+        sequence = rnn._transpose_batch_time(sequence) # pylint: disable=protected-access
+    return sequence
+
+
+def sequence_softmax_cross_entropy(labels, # pylint: disable=invalid-name
+                                   logits,
+                                   sequence_length,
+                                   time_major=False,
+                                   name=None):
+    """Computes softmax cross entropy for each time step of sequence
+    predictions.
+
+    Args:
+        labels: Target class distributions.
+
+            If `time_major=False` (default), this must be a Tensor of shape:
+                `[batch_size, max_time, (...), num_classes]`.
+
+            If `time_major=True`, this must be a Tensor of shape:
+                `[max_time, batch_size, (...), num_classes].`
+        logits: Unscaled log probabilities. This must have the same shape as
+            `labels`.
+        sequence_length: A Tensor of shape `[batch_size]`. Time steps beyond the
+            respective sequence lengths will have zero losses.
+        time_major: The shape format of the inputs. If True, `labels` and
+            `logits` must have shape `[max_time, batch_size, ...]`. If false
+            (default), `labels` and `logits` must have shape
+            `[batch_size, max_time, ...]`.
+        name: (optional) A name for the operation.
+
+    Returns:
+        A Tensor containing the loss for each time step of each example. Time
+        steps beyond the respective sequence lengths will have zero losses.
+
+        If `time_major=False` (default), this is of shape:
+        `[batch_size, max_time, (...)]`.
+
+        If `time_major=True`, this is of shape: `[max_time, batch_size, (...)]`.
+    """
+    with tf.name_scope(name, "sequence_softmax_cross_entropy"): # pylint: disable=not-context-manager
+        losses = tf.nn.softmax_cross_entropy_with_logits(
+            labels=labels, logits=logits)
+        return _mask_sequences(losses, sequence_length, time_major)
+
+
+def average_sequence_softmax_cross_entropy(labels, # pylint: disable=invalid-name
+                                           logits,
+                                           sequence_length,
+                                           time_major=False,
+                                           name=None):
+    """Computes a single softmax cross entropy loss that averages over all time
+    steps and all examples in a batch.
+
+    See `sequence_softmax_cross_entropy` for the definition of arguments.
+
+    Returns:
+        A single average loss.
+    """
+    with tf.name_scope(name, "average_sequence_softmax_cross_entropy"): # pylint: disable=not-context-manager
+        losses = tf.nn.softmax_cross_entropy_with_logits(
+            labels=labels, logits=logits)
+        losses = _mask_sequences(losses, sequence_length, time_major)
+        loss = tf.reduce_sum(losses) / tf.reduce_sum(sequence_length)
+        return loss
+
+
+def sequence_sparse_softmax_cross_entropy(labels, # pylint: disable=invalid-name
+                                          logits,
+                                          sequence_length,
+                                          time_major=False,
+                                          name=None):
+    """Computes sparse softmax cross entropy for each time step of sequence
+    predictions.
+
+    Args:
+        labels: Target class indexes. I.e., classes are mutually exclusive (each
+            entry is in exactly one class).
+
+            If `time_major=False` (default), this must be a Tensor of shape:
+                `[batch_size, max_time, (...)]`.
+
+            If `time_major=True`, this must be a Tensor of shape:
+                `[max_time, batch_size, (...)].`
+        logits: Unscaled log probabilities. This must have the shape of
+            `[max_time, batch_size, (...), num_classes]` or
+            `[batch_size, max_time, (...), num_classes]` according to
+            the value of `time_major`.
+        sequence_length: A Tensor of shape `[batch_size]`. Time steps beyond the
+            respective sequence lengths will have zero losses.
+        time_major: The shape format of the inputs. If True, `labels` and
+            `logits` must have shape `[max_time, batch_size, ...]`. If false
+            (default), `labels` and `logits` must have shape
+            `[batch_size, max_time, ...]`.
+        name: (optional) A name for the operation.
+
+    Returns:
+        A Tensor containing the loss for each time step of each example.
+
+        If `time_major=False` (default), this is of shape:
+        `[batch_size, max_time, (...)]`.
+
+        If `time_major=True`, this is of shape: `[max_time, batch_size, (...)]`.
+    """
+    with tf.name_scope(name, "sequence_sparse_softmax_cross_entropy"): # pylint: disable=not-context-manager
+        losses = tf.nn.sparse_softmax_cross_entropy_with_logits(
+            labels=labels, logits=logits)
+        return _mask_sequences(losses, sequence_length, time_major)
+
+
+def average_sequence_sparse_softmax_cross_entropy(labels, # pylint: disable=invalid-name
+                                                  logits,
+                                                  sequence_length,
+                                                  time_major=False,
+                                                  name=None):
+    """Computes a single sparse softmax cross entropy loss that averages over
+    all time steps and all examples in a batch.
+
+    See `sequence_sparse_softmax_cross_entropy` for the definition of arguments.
+
+    Returns:
+        A single average loss.
+    """
+    with tf.name_scope(name, "average_sequence_sparse_softmax_cross_entropy"): # pylint: disable=not-context-manager
+        losses = tf.nn.sparse_softmax_cross_entropy_with_logits(
+            labels=labels, logits=logits)
+        losses = _mask_sequences(losses, sequence_length, time_major)
+        loss = tf.reduce_sum(losses) / tf.reduce_sum(sequence_length)
+        return loss
+
diff --git a/txtgen/modules/decoders/rnn_decoder_base.py b/txtgen/modules/decoders/rnn_decoder_base.py
@@ -27,7 +27,7 @@ def __init__(self, cell=None, hparams=None, name="decoder"):
                 a cell is created as specified by `rnn_cell` in `hparams`.
             hparams: (optional) A dictionary of hyperparameters. If it is not
                 specified, the default hyperparameter setting is used. See
-                `default_hparams` for the sturcture and default values.
+                `default_hparams` for the structure and default values.
             name: Name of the encoder.
         """
         ModuleBase.__init__(name, hparams)
@@ -48,7 +48,10 @@ def _build(self, helper, initial_state):    # pylint: disable=W0221
             initial_state: Initial state of decoding.
 
         Returns:
-            Decoding results.
+            `(outputs, final_state, sequence_lengths)`: `outputs` is an object
+            containing the decoder output on all time steps, `final_state` is
+            the cell state of the final time step, `sequence_lengths` is a
+            Tensor of shape `[batch_size]`.
         """
         self._helper = helper
         self._initial_state = initial_state

diff --git a/txtgen/modules/encoders/encoder_base.py b/txtgen/modules/encoders/encoder_base.py
@@ -8,7 +8,6 @@
 from __future__ import print_function
 
 from txtgen.modules.module_base import ModuleBase
-from txtgen.core.layers import default_rnn_cell_hparams
 
 
 class EncoderBase(ModuleBase):
@@ -31,25 +30,3 @@ def _build(self, inputs, *args, **kwargs):
         """
         raise NotImplementedError
 
-    @staticmethod
-    def default_hparams():
-        """Returns a dictionary of hyperparameters with default values.
-
-        The dictionary has the following structure and default values:
-
-            ```python
-            {
-              # A dictionary of rnn cell hyperparameters. See
-              # `txtgen.core.layers.default_rnn_cell_hparams` for the
-              # structure and default values. It is not used if a cell instance
-              # is already specified.
-
-              "rnn_cell": default_rnn_cell_hparams
-            }
-            ```
-        """
-        return {
-            "rnn_cell": default_rnn_cell_hparams()
-        }
-
-
diff --git a/txtgen/modules/encoders/rnn_encoders.py b/txtgen/modules/encoders/rnn_encoders.py
@@ -11,6 +11,7 @@
 
 from txtgen.modules.encoders.encoder_base import EncoderBase
 from txtgen.core.layers import get_rnn_cell
+from txtgen.core.layers import default_rnn_cell_hparams
 
 
 class ForwardRNNEncoder(EncoderBase):
@@ -57,3 +58,25 @@ def _build(self, inputs, **kwargs):
                 inputs=inputs,
                 **kwargs)
 
+    @staticmethod
+    def default_hparams():
+        """Returns a dictionary of hyperparameters with default values.
+
+        The dictionary has the following structure and default values:
+
+            ```python
+            {
+              # A dictionary of rnn cell hyperparameters. See
+              # `txtgen.core.layers.default_rnn_cell_hparams` for the
+              # structure and default values. It is not used if a cell instance
+              # is already specified.
+
+              "rnn_cell": default_rnn_cell_hparams
+            }
+            ```
+        """
+        return {
+            "rnn_cell": default_rnn_cell_hparams()
+        }
+
+