Reference: https://github.com/tensorflow/tensorflow/blob/r1.1/tensorflow/contrib/seq2seq/python/kernel_tests/basic_decoder_test.py

In [1]:
import numpy as np
import tensorflow as tf
from pprint import pprint
from tensorflow.python.framework import tensor_shape
from tensorflow.contrib.rnn.python.ops.core_rnn_cell_impl import BasicRNNCell

In [2]:
from tensorflow.contrib.seq2seq.python.ops.basic_decoder import BasicDecoder, BasicDecoderOutput
from tensorflow.contrib.seq2seq.python.ops.helper import TrainingHelper
from tensorflow.python.layers.core import Dense

In [3]:
sequence_length = [3, 4, 3, 1, 0]
batch_size = 5
max_time = 8
input_size = 7
hidden_size = 10
output_size = 3

inputs = np.random.randn(batch_size, max_time, input_size).astype(np.float32)

output_layer = Dense(output_size) # will get a trainable variable size [hidden_size x output_size]

In [4]:
output_layer.__dict__ # doesn't have any variable yet

{'_built': False,
 '_losses': [],
 '_non_trainable_variables': [],
 '_reuse': None,
 '_scope': <tensorflow.python.ops.variable_scope.VariableScope at 0x11c9eff28>,
 '_trainable': True,
 '_trainable_variables': [],
 '_updates': [],
 'activation': None,
 'activity_regularizer': None,
 'bias_initializer': <tensorflow.python.ops.init_ops.Zeros at 0x11997c6a0>,
 'bias_regularizer': None,
 'dtype': tf.float32,
 'kernel_initializer': None,
 'kernel_regularizer': None,
 'name': 'dense',
 'units': 3,
 'use_bias': True}

#### Prepare decoder cell

In [5]:
dec_cell = BasicRNNCell(hidden_size)

#### Prepare Helper

In [6]:
helper = TrainingHelper(inputs, sequence_length)

#### Prepare Decoder

In [7]:
decoder = BasicDecoder(
    cell=dec_cell,
    helper=helper,
    initial_state=dec_cell.zero_state(dtype=tf.float32, batch_size=batch_size),
    output_layer=output_layer)

#### check decoder attributes

In [8]:
decoder.__dict__

{'_cell': <tensorflow.contrib.rnn.python.ops.core_rnn_cell_impl.BasicRNNCell at 0x10abb10f0>,
 '_helper': <tensorflow.contrib.seq2seq.python.ops.helper.TrainingHelper at 0x10b146ef0>,
 '_initial_state': <tf.Tensor 'BasicRNNCellZeroState/zeros:0' shape=(5, 10) dtype=float32>,
 '_output_layer': <tensorflow.python.layers.core.Dense at 0x11c9efd30>}

In [9]:
[attr for attr in dir(decoder) if '__' not in attr]

['_abc_cache',
 '_abc_negative_cache',
 '_abc_negative_cache_version',
 '_abc_registry',
 '_cell',
 '_helper',
 '_initial_state',
 '_output_layer',
 '_rnn_output_size',
 'batch_size',
 'initialize',
 'output_dtype',
 'output_size',
 'step']

In [10]:
print(decoder.output_size)
print(decoder.output_dtype)
print(decoder.batch_size)

BasicDecoderOutput(rnn_output=TensorShape([Dimension(3)]), sample_id=TensorShape([]))
BasicDecoderOutput(rnn_output=tf.float32, sample_id=tf.int32)
Tensor("TrainingHelper/Size:0", shape=(), dtype=int32)


#### Initialize states

In [11]:
first_finished, first_inputs, first_state = decoder.initialize()
(first_finished, first_inputs, first_state)
# first_finished: [batch_size]
# first_inputs: [batch_size x input_size]
# first_state: [batch_size x hidden_size]

(<tf.Tensor 'TrainingHelperInitialize/Equal:0' shape=(5,) dtype=bool>,
 <tf.Tensor 'TrainingHelperInitialize/cond/Merge:0' shape=(5, 7) dtype=float32>,
 <tf.Tensor 'BasicRNNCellZeroState/zeros:0' shape=(5, 10) dtype=float32>)

#### Unroll single step

In [12]:
step_outputs, step_state, step_next_inputs, step_finished = decoder.step(
    tf.constant(0), first_inputs, first_state)
(step_outputs, step_state, step_next_inputs, step_finished)
# step_outputs.rnn_output: [batch_size x output_size]
# step_outputs.sample_id: [batch_size]
# step_state: [batch_size x max_time]
# step_next_inputs: [batch_size x input_size]
# step_finished: [batch_size]

(BasicDecoderOutput(rnn_output=<tf.Tensor 'dense/BiasAdd:0' shape=(5, 3) dtype=float32>, sample_id=<tf.Tensor 'BasicDecoderStep/TrainingHelperSample/Cast:0' shape=(5,) dtype=int32>),
 <tf.Tensor 'BasicDecoderStep/basic_rnn_cell/Tanh:0' shape=(5, 10) dtype=float32>,
 <tf.Tensor 'BasicDecoderStep/TrainingHelperNextInputs/cond/Merge:0' shape=(5, 7) dtype=float32>,
 <tf.Tensor 'BasicDecoderStep/TrainingHelperNextInputs/GreaterEqual:0' shape=(5,) dtype=bool>)

#### output_layer got variable finally!
https://github.com/tensorflow/tensorflow/blob/r1.1/tensorflow/contrib/seq2seq/python/ops/basic_decoder.py#L141
```
def step(self, time. inputs, state, name=None):
...
if self._output_layer is not None:
    cell_outputs = self._output_layer(cell_outputs)
...
```

In [13]:
output_layer.__dict__

{'_built': True,
 '_losses': [],
 '_non_trainable_variables': [],
 '_reuse': None,
 '_scope': <tensorflow.python.ops.variable_scope.VariableScope at 0x11c9eff28>,
 '_trainable': True,
 '_trainable_variables': [<tf.Variable 'dense/kernel:0' shape=(10, 3) dtype=float32_ref>,
  <tf.Variable 'dense/bias:0' shape=(3,) dtype=float32_ref>],
 '_updates': [],
 'activation': None,
 'activity_regularizer': None,
 'bias': <tf.Variable 'dense/bias:0' shape=(3,) dtype=float32_ref>,
 'bias_initializer': <tensorflow.python.ops.init_ops.Zeros at 0x11997c6a0>,
 'bias_regularizer': None,
 'dtype': tf.float32,
 'kernel': <tf.Variable 'dense/kernel:0' shape=(10, 3) dtype=float32_ref>,
 'kernel_initializer': None,
 'kernel_regularizer': None,
 'name': 'dense',
 'units': 3,
 'use_bias': True}

#### Run Graph

In [14]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    results = sess.run({
        "batch_size": decoder.batch_size,
        "first_finished": first_finished,
        "first_inputs": first_inputs,
        "first_state": first_state,
        "step_outputs": step_outputs,
        "step_state": step_state,
        "step_next_inputs": step_next_inputs,
        "step_finished": step_finished})
pprint(results)

{'batch_size': 5,
 'first_finished': array([False, False, False, False,  True], dtype=bool),
 'first_inputs': array([[-0.78533345,  0.11858682, -0.20014298, -0.96553558,  0.31251544,
        -1.33988655,  0.82164931],
       [ 2.5350647 , -0.07409398,  1.0977391 ,  1.11003268,  0.05649788,
         0.89605135,  0.36308917],
       [ 0.71170074,  1.61647308,  0.32309851, -0.07257813,  0.32730117,
        -1.07073486,  0.70246333],
       [-1.00114799, -2.05082059, -0.37695187,  0.36587235, -1.17279899,
         0.48194218,  0.89679271],
       [-0.4480046 , -0.12000368,  0.0555664 , -0.21333848,  1.22862589,
        -1.38129044, -0.45545465]], dtype=float32),
 'first_state': array([[ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.]], dtype=float32),
 'ste

##### Note that
- `first_finished[4] == True`
- Because `sequence_length[4]` is 0 (recall that `sequence_length=[3, 4, 3, 1, 0]`)


- `step_finished[3]` and `step_finished[4]` are both `True`
- After one step of unrolling, the 3rd batch is also completed