Skip to content

Commit

Permalink
finished cross_entropy los
Browse files Browse the repository at this point in the history
  • Loading branch information
ZhitingHu committed Aug 21, 2017
1 parent e372227 commit ff81731
Show file tree
Hide file tree
Showing 7 changed files with 206 additions and 33 deletions.
6 changes: 3 additions & 3 deletions txtgen/core/layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,13 +87,13 @@ def default_rnn_cell_hparams():


def get_rnn_cell(cell_hparams):
"""Creates an RNN cell
"""Creates an RNN cell.
Args:
cell_hparams: a dictionary of hyperparameters
cell_hparams: a dictionary of hyperparameters.
Returns:
An instance of RNN cell
An instance of RNN cell.
"""
cells = []
for _ in range(cell_hparams["num_layers"]):
Expand Down
10 changes: 5 additions & 5 deletions txtgen/core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def get_instance(class_name, args, module_paths=None):


def get_function(func_name, module_paths=None):
"""Returns the function of specified name and module
"""Returns the function of specified name and module.
Args:
func_name: Name of the function.
Expand Down Expand Up @@ -84,7 +84,7 @@ def get_function(func_name, module_paths=None):


def switch_dropout(dropout_keep_prob, is_train=None):
"""Turn off dropout when not in training mode
"""Turns off dropout when not in training mode.
Args:
dropout_keep_prob: Dropout keep probability in training mode
Expand All @@ -94,7 +94,7 @@ def switch_dropout(dropout_keep_prob, is_train=None):
Returns:
A unit Tensor that equals the dropout keep probability in training mode,
and 1 in eval mode
and 1 in eval mode.
"""
if is_train is None:
return 1. - (1. - dropout_keep_prob) * tf.to_int32(context.is_train())
Expand All @@ -103,15 +103,15 @@ def switch_dropout(dropout_keep_prob, is_train=None):


def transpose_batch_time(inputs):
"""Transposes inputs between time-major and batch-major
"""Transposes inputs between time-major and batch-major.
Args:
inputs: A Tensor of shape `[batch_size, max_time, ...]` (batch-major)
or `[max_time, batch_size, ...]` (time-major), or a (possibly
nested) tuple of such elements.
Returns:
A Tensor with transposed batch and time dimensions of inputs
A Tensor with transposed batch and time dimensions of inputs.
"""
flat_input = nest.flatten(inputs)
flat_input = [ops.convert_to_tensor(input_) for input_ in flat_input]
Expand Down
Empty file added txtgen/losses/__init__.py
Empty file.
170 changes: 170 additions & 0 deletions txtgen/losses/losses.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
#
"""
Various losses
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf
from tensorflow.python.ops import rnn # pylint: disable=E0611


def _mask_sequences(sequence, sequence_length, time_major=False):
"""Masks out sequence entries that are beyond the respective sequence
lengths.
Args:
sequence: A Tensor of sequence values.
If `time_major=False` (default), this must be a Tensor of shape:
`[batch_size, max_time, (...), num_classes]`.
If `time_major=True`, this must be a Tensor of shape:
`[max_time, batch_size, (...), num_classes].`
sequence_length: A Tensor of shape `[batch_size]`. Time steps beyond the
respective sequence lengths will be made zero.
time_major: The shape format of the inputs. If True, `sequence` must
have shape `[max_time, batch_size, ...]`. If false (default),
`sequence` must have shape `[batch_size, max_time, ...]`.
Returns:
A Tensor of the same shape as `sequence` but with masked-out entries.
"""
if time_major:
sequence = rnn._transpose_batch_time(sequence) # pylint: disable=protected-access
max_time = tf.to_int32(tf.shape(sequence)[1])
mask = tf.sequence_mask(
tf.to_int32(sequence_length), max_time, tf.float32)
sequence = sequence * mask
if time_major:
sequence = rnn._transpose_batch_time(sequence) # pylint: disable=protected-access
return sequence


def sequence_softmax_cross_entropy(labels, # pylint: disable=invalid-name
logits,
sequence_length,
time_major=False,
name=None):
"""Computes softmax cross entropy for each time step of sequence
predictions.
Args:
labels: Target class distributions.
If `time_major=False` (default), this must be a Tensor of shape:
`[batch_size, max_time, (...), num_classes]`.
If `time_major=True`, this must be a Tensor of shape:
`[max_time, batch_size, (...), num_classes].`
logits: Unscaled log probabilities. This must have the same shape as
`labels`.
sequence_length: A Tensor of shape `[batch_size]`. Time steps beyond the
respective sequence lengths will have zero losses.
time_major: The shape format of the inputs. If True, `labels` and
`logits` must have shape `[max_time, batch_size, ...]`. If false
(default), `labels` and `logits` must have shape
`[batch_size, max_time, ...]`.
name: (optional) A name for the operation.
Returns:
A Tensor containing the loss for each time step of each example. Time
steps beyond the respective sequence lengths will have zero losses.
If `time_major=False` (default), this is of shape:
`[batch_size, max_time, (...)]`.
If `time_major=True`, this is of shape: `[max_time, batch_size, (...)]`.
"""
with tf.name_scope(name, "sequence_softmax_cross_entropy"): # pylint: disable=not-context-manager
losses = tf.nn.softmax_cross_entropy_with_logits(
labels=labels, logits=logits)
return _mask_sequences(losses, sequence_length, time_major)


def average_sequence_softmax_cross_entropy(labels, # pylint: disable=invalid-name
logits,
sequence_length,
time_major=False,
name=None):
"""Computes a single softmax cross entropy loss that averages over all time
steps and all examples in a batch.
See `sequence_softmax_cross_entropy` for the definition of arguments.
Returns:
A single average loss.
"""
with tf.name_scope(name, "average_sequence_softmax_cross_entropy"): # pylint: disable=not-context-manager
losses = tf.nn.softmax_cross_entropy_with_logits(
labels=labels, logits=logits)
losses = _mask_sequences(losses, sequence_length, time_major)
loss = tf.reduce_sum(losses) / tf.reduce_sum(sequence_length)
return loss


def sequence_sparse_softmax_cross_entropy(labels, # pylint: disable=invalid-name
logits,
sequence_length,
time_major=False,
name=None):
"""Computes sparse softmax cross entropy for each time step of sequence
predictions.
Args:
labels: Target class indexes. I.e., classes are mutually exclusive (each
entry is in exactly one class).
If `time_major=False` (default), this must be a Tensor of shape:
`[batch_size, max_time, (...)]`.
If `time_major=True`, this must be a Tensor of shape:
`[max_time, batch_size, (...)].`
logits: Unscaled log probabilities. This must have the shape of
`[max_time, batch_size, (...), num_classes]` or
`[batch_size, max_time, (...), num_classes]` according to
the value of `time_major`.
sequence_length: A Tensor of shape `[batch_size]`. Time steps beyond the
respective sequence lengths will have zero losses.
time_major: The shape format of the inputs. If True, `labels` and
`logits` must have shape `[max_time, batch_size, ...]`. If false
(default), `labels` and `logits` must have shape
`[batch_size, max_time, ...]`.
name: (optional) A name for the operation.
Returns:
A Tensor containing the loss for each time step of each example.
If `time_major=False` (default), this is of shape:
`[batch_size, max_time, (...)]`.
If `time_major=True`, this is of shape: `[max_time, batch_size, (...)]`.
"""
with tf.name_scope(name, "sequence_sparse_softmax_cross_entropy"): # pylint: disable=not-context-manager
losses = tf.nn.sparse_softmax_cross_entropy_with_logits(
labels=labels, logits=logits)
return _mask_sequences(losses, sequence_length, time_major)


def average_sequence_sparse_softmax_cross_entropy(labels, # pylint: disable=invalid-name
logits,
sequence_length,
time_major=False,
name=None):
"""Computes a single sparse softmax cross entropy loss that averages over
all time steps and all examples in a batch.
See `sequence_sparse_softmax_cross_entropy` for the definition of arguments.
Returns:
A single average loss.
"""
with tf.name_scope(name, "average_sequence_sparse_softmax_cross_entropy"): # pylint: disable=not-context-manager
losses = tf.nn.sparse_softmax_cross_entropy_with_logits(
labels=labels, logits=logits)
losses = _mask_sequences(losses, sequence_length, time_major)
loss = tf.reduce_sum(losses) / tf.reduce_sum(sequence_length)
return loss

7 changes: 5 additions & 2 deletions txtgen/modules/decoders/rnn_decoder_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def __init__(self, cell=None, hparams=None, name="decoder"):
a cell is created as specified by `rnn_cell` in `hparams`.
hparams: (optional) A dictionary of hyperparameters. If it is not
specified, the default hyperparameter setting is used. See
`default_hparams` for the sturcture and default values.
`default_hparams` for the structure and default values.
name: Name of the encoder.
"""
ModuleBase.__init__(name, hparams)
Expand All @@ -48,7 +48,10 @@ def _build(self, helper, initial_state): # pylint: disable=W0221
initial_state: Initial state of decoding.
Returns:
Decoding results.
`(outputs, final_state, sequence_lengths)`: `outputs` is an object
containing the decoder output on all time steps, `final_state` is
the cell state of the final time step, `sequence_lengths` is a
Tensor of shape `[batch_size]`.
"""
self._helper = helper
self._initial_state = initial_state
Expand Down
23 changes: 0 additions & 23 deletions txtgen/modules/encoders/encoder_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from __future__ import print_function

from txtgen.modules.module_base import ModuleBase
from txtgen.core.layers import default_rnn_cell_hparams


class EncoderBase(ModuleBase):
Expand All @@ -31,25 +30,3 @@ def _build(self, inputs, *args, **kwargs):
"""
raise NotImplementedError

@staticmethod
def default_hparams():
"""Returns a dictionary of hyperparameters with default values.
The dictionary has the following structure and default values:
```python
{
# A dictionary of rnn cell hyperparameters. See
# `txtgen.core.layers.default_rnn_cell_hparams` for the
# structure and default values. It is not used if a cell instance
# is already specified.
"rnn_cell": default_rnn_cell_hparams
}
```
"""
return {
"rnn_cell": default_rnn_cell_hparams()
}


23 changes: 23 additions & 0 deletions txtgen/modules/encoders/rnn_encoders.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

from txtgen.modules.encoders.encoder_base import EncoderBase
from txtgen.core.layers import get_rnn_cell
from txtgen.core.layers import default_rnn_cell_hparams


class ForwardRNNEncoder(EncoderBase):
Expand Down Expand Up @@ -57,3 +58,25 @@ def _build(self, inputs, **kwargs):
inputs=inputs,
**kwargs)

@staticmethod
def default_hparams():
"""Returns a dictionary of hyperparameters with default values.
The dictionary has the following structure and default values:
```python
{
# A dictionary of rnn cell hyperparameters. See
# `txtgen.core.layers.default_rnn_cell_hparams` for the
# structure and default values. It is not used if a cell instance
# is already specified.
"rnn_cell": default_rnn_cell_hparams
}
```
"""
return {
"rnn_cell": default_rnn_cell_hparams()
}


0 comments on commit ff81731

Please sign in to comment.