Skip to content
This repository has been archived by the owner on Dec 29, 2022. It is now read-only.

Commit

Permalink
Enforce styling with YAPF
Browse files Browse the repository at this point in the history
  • Loading branch information
dennybritz committed Dec 11, 2016
1 parent 33b31a1 commit 15b14a8
Show file tree
Hide file tree
Showing 30 changed files with 850 additions and 564 deletions.
4 changes: 4 additions & 0 deletions .style.yapf
@@ -0,0 +1,4 @@
[style]
based_on_style = google
indent_width = 2
column_limit = 80
6 changes: 3 additions & 3 deletions pylintrc
Expand Up @@ -193,7 +193,7 @@ max-nested-blocks=5
[FORMAT]

# Maximum number of characters on a single line.
max-line-length=100
max-line-length=80

# Regexp for a line that is allowed to be longer than the limit.
ignore-long-lines=^\s*(# )?<?https?://\S+>?$
Expand All @@ -216,7 +216,7 @@ max-module-lines=1000
indent-string=' '

# Number of spaces of indent required inside a hanging or continued line.
indent-after-paren=2
indent-after-paren=4

# Expected format of line ending, e.g. empty (any line ending), LF or CRLF.
expected-line-ending-format=
Expand All @@ -238,7 +238,7 @@ notes=FIXME,XXX,TODO
[SIMILARITIES]

# Minimum lines number of a similarity.
min-similarity-lines=4
min-similarity-lines=10

# Ignore comments when computing similarities.
ignore-comments=yes
Expand Down
35 changes: 22 additions & 13 deletions seq2seq/decoders/attention.py
Expand Up @@ -22,16 +22,19 @@ def _build(self, state, inputs):
"""Computes attention scores and outputs.
Args:
state: The state based on which to calculate attention scores. In seq2seq this is typically
the current state of the decoder. A tensor of shape `[B, ...]`
state: The state based on which to calculate attention scores.
In seq2seq this is typically the current state of the decoder.
A tensor of shape `[B, ...]`
inputs: The elements to compute attention *over*. In seq2seq this is
typically the sequence of encoder outputs. A tensor of shape `[B, T, input_dim]`
typically the sequence of encoder outputs.
A tensor of shape `[B, T, input_dim]`
Returns:
A tuple `(scores, context)`.
`scores` is vector of length `T` where each element is the normalized "score" of
the corresponding `inputs` element.
`context` is the final attention layer output corresponding to the weighted inputs.
`scores` is vector of length `T` where each element is the
normalized "score" of the corresponding `inputs` element.
`context` is the final attention layer output corresponding to
the weighted inputs.
A tensor fo shape `[B, input_dim]`.
"""
batch_size, inputs_timesteps, _ = tf.unpack(tf.shape(inputs))
Expand All @@ -40,25 +43,31 @@ def _build(self, state, inputs):
# Fully connected layers to transform both inputs and state
# into a tensor with `num_units` units
inputs_att = tf.contrib.layers.fully_connected(
inputs=inputs, num_outputs=self.num_units, activation_fn=None, scope="inputs_att")
inputs=inputs,
num_outputs=self.num_units,
activation_fn=None,
scope="inputs_att")
state_att = tf.contrib.layers.fully_connected(
inputs=state, num_outputs=self.num_units, activation_fn=None, scope="state_att")
inputs=state,
num_outputs=self.num_units,
activation_fn=None,
scope="state_att")

# Take the dot product of state for each time step in inputs
# Result: A tensor of shape [B, T]
inputs_att_flat = tf.reshape(inputs_att, [-1, self.num_units])
state_att_flat = tf.reshape(
tf.tile(state_att, [1, inputs_timesteps]),
[inputs_timesteps * batch_size, self.num_units])
tf.tile(state_att, [1, inputs_timesteps]),
[inputs_timesteps * batch_size, self.num_units])
scores = tf.batch_matmul(
tf.expand_dims(inputs_att_flat, 1),
tf.expand_dims(state_att_flat, 2))
tf.expand_dims(inputs_att_flat, 1), tf.expand_dims(state_att_flat, 2))
scores = tf.reshape(scores, [batch_size, inputs_timesteps], name="scores")

# Normalize the scores
scores_normalized = tf.nn.softmax(scores, name="scores_normalized")

# Calculate the weighted average of the attention inputs according to the scores
# Calculate the weighted average of the attention inputs
# according to the scores
context = tf.expand_dims(scores_normalized, 2) * inputs
context = tf.reduce_sum(context, 1, name="context")
context.set_shape([None, inputs_dim])
Expand Down
65 changes: 43 additions & 22 deletions seq2seq/decoders/attention_decoder.py
Expand Up @@ -6,28 +6,41 @@
import tensorflow as tf
from seq2seq.decoders import DecoderBase, DecoderOutput, DecoderStepOutput


class AttentionDecoderOutput(
namedtuple("DecoderOutput", ["logits", "predictions", "attention_scores"])):
"""Augmented decoder output that also includes the attention scores.
"""
pass


class AttentionDecoder(DecoderBase):
"""An RNN Decoder that uses attention over an input sequence.
Args:
cell: An instance of ` tf.nn.rnn_cell.RNNCell`
vocab_size: Output vocabulary size, i.e. number of units in the softmax layer
attention_inputs: The sequence to take attentio over. A tensor of shaoe `[B, T, ...]`.
attention_fn: The attention function to use. This function map from `(state, inputs)` to
`(attention_scores, attention_context)`.
vocab_size: Output vocabulary size, i.e. number of units
in the softmax layer
attention_inputs: The sequence to take attentio over.
A tensor of shaoe `[B, T, ...]`.
attention_fn: The attention function to use. This function map from
`(state, inputs)` to `(attention_scores, attention_context)`.
For an example, see `seq2seq.decoder.attention.AttentionLayer`.
max_decode_length: Maximum length for decoding steps for each example of shape `[B]`.
prediction_fn: Optional. A function that generates a predictions of shape `[B]` from a logits
of shape `[B, vocab_size]`. By default, this is argmax.
max_decode_length: Maximum length for decoding steps
for each example of shape `[B]`.
prediction_fn: Optional. A function that generates a predictions
of shape `[B]` from a logits of shape `[B, vocab_size]`.
By default, this is argmax.
"""
def __init__(self, cell, vocab_size, attention_inputs, attention_fn, max_decode_length,
prediction_fn=None, name="attention_decoder"):

def __init__(self,
cell,
vocab_size,
attention_inputs,
attention_fn,
max_decode_length,
prediction_fn=None,
name="attention_decoder"):
super(AttentionDecoder, self).__init__(cell, max_decode_length, name)
self.vocab_size = vocab_size
self.prediction_fn = prediction_fn
Expand All @@ -40,20 +53,24 @@ def __init__(self, cell, vocab_size, attention_inputs, attention_fn, max_decode_

@staticmethod
def _pack_outputs(outputs_ta, final_loop_state):
logits, predictions = DecoderBase._pack_outputs(outputs_ta, final_loop_state)
logits, predictions = DecoderBase._pack_outputs(outputs_ta,
final_loop_state)
attention_scores = tf.transpose(final_loop_state.pack(), [1, 0, 2])
return AttentionDecoderOutput(logits, predictions, attention_scores)

def _step(self, time_, cell_output, cell_state, loop_state, next_input_fn):
initial_call = (cell_output is None)

if initial_call:
cell_output = tf.zeros([tf.shape(self.attention_inputs)[0], self.cell.output_size])
cell_output = tf.zeros(
[tf.shape(self.attention_inputs)[0], self.cell.output_size])
# Initialize the TensorArray that will hold the attention scores
next_loop_state = tf.TensorArray(dtype=tf.float32, size=1, dynamic_size=True)
next_loop_state = tf.TensorArray(
dtype=tf.float32, size=1, dynamic_size=True)

# Compute attention
att_scores, attention_context = self.attention_fn(cell_output, self.attention_inputs)
att_scores, attention_context = self.attention_fn(cell_output,
self.attention_inputs)

# In the first step the attention vector is set to all zeros
if initial_call:
Expand All @@ -64,22 +81,26 @@ def _step(self, time_, cell_output, cell_state, loop_state, next_input_fn):
# Softmax computation
softmax_input = tf.concat(1, [cell_output, attention_context])
logits = tf.contrib.layers.fully_connected(
inputs=softmax_input, num_outputs=self.vocab_size, activation_fn=None, scope="logits")
inputs=softmax_input,
num_outputs=self.vocab_size,
activation_fn=None,
scope="logits")
predictions = self.prediction_fn(logits)
outputs = DecoderOutput(logits, predictions)

if initial_call:
outputs = DecoderOutput(
logits=tf.zeros([self.vocab_size]),
predictions=tf.zeros([], dtype=tf.int64))
logits=tf.zeros([self.vocab_size]),
predictions=tf.zeros(
[], dtype=tf.int64))

# Append the attention context to the inputs
next_input = next_input_fn(
time_, (None if initial_call else cell_output), cell_state, loop_state, outputs)
next_input = next_input_fn(time_, (None if initial_call else cell_output),
cell_state, loop_state, outputs)
next_input = tf.concat(1, [next_input, attention_context])

return DecoderStepOutput(
outputs=outputs,
next_input=next_input,
next_cell_state=cell_state,
next_loop_state=next_loop_state)
outputs=outputs,
next_input=next_input,
next_cell_state=cell_state,
next_loop_state=next_loop_state)
37 changes: 24 additions & 13 deletions seq2seq/decoders/basic_decoder.py
Expand Up @@ -5,17 +5,27 @@
import tensorflow as tf
from seq2seq.decoders import DecoderBase, DecoderOutput, DecoderStepOutput


class BasicDecoder(DecoderBase):
"""A simple RNN decoder that performed a softmax operations on the cell output.
"""Simple RNN decoder that performed a softmax operations on the cell output.
Args:
cell: An instance of ` tf.nn.rnn_cell.RNNCell`
vocab_size: Output vocabulary size, i.e. number of units in the softmax layer
max_decode_length: Maximum length for decoding steps for each example of shape `[B]`.
prediction_fn: Optional. A function that generates a predictions of shape `[B]` from a logits
of shape `[B, vocab_size]`. By default, this is argmax.
vocab_size: Output vocabulary size, i.e. number of units
in the softmax layer
max_decode_length: Maximum length for decoding steps for each example
of shape `[B]`.
prediction_fn: Optional. A function that generates a predictions
of shape `[B]` from a logits of shape `[B, vocab_size]`.
By default, this is argmax.
"""
def __init__(self, cell, vocab_size, max_decode_length, prediction_fn=None, name="basic_decoder"):

def __init__(self,
cell,
vocab_size,
max_decode_length,
prediction_fn=None,
name="basic_decoder"):
super(BasicDecoder, self).__init__(cell, max_decode_length, name)
self.vocab_size = vocab_size
self.prediction_fn = prediction_fn
Expand All @@ -31,20 +41,21 @@ def _step(self, time_, cell_output, cell_state, loop_state, next_input_fn):
cell_output = tf.zeros([1, self.cell.output_size])

logits = tf.contrib.layers.fully_connected(
inputs=cell_output, num_outputs=self.vocab_size, activation_fn=None)
inputs=cell_output, num_outputs=self.vocab_size, activation_fn=None)

if initial_call:
outputs = DecoderOutput(
logits=tf.zeros([self.vocab_size]),
predictions=tf.zeros([], dtype=tf.int64))
logits=tf.zeros([self.vocab_size]),
predictions=tf.zeros(
[], dtype=tf.int64))
else:
predictions = self.prediction_fn(logits)
outputs = DecoderOutput(logits, predictions)

next_input = next_input_fn(time_, (None if initial_call else cell_output),
cell_state, loop_state, outputs)
return DecoderStepOutput(
outputs=outputs,
next_input=next_input,
next_cell_state=cell_state,
next_loop_state=None)
outputs=outputs,
next_input=next_input,
next_cell_state=cell_state,
next_loop_state=None)

0 comments on commit 15b14a8

Please sign in to comment.