In [1]:
# http://www.wildml.com/2016/08/rnns-in-tensorflow-a-practical-guide-and-undocumented-features/
# http://learningtensorflow.com/index.html
# http://suriyadeepan.github.io/2016-12-31-practical-seq2seq/

import tensorflow as tf
import numpy as np
from tensorflow.contrib import rnn
import pprint
pp = pprint.PrettyPrinter(indent=4)
sess = tf.InteractiveSession()

In [2]:
with tf.variable_scope('one_cell') as scope:
    # One cell RNN input_dim (3) -> output_dim (5)
    hidden_size = 5
    cell = tf.contrib.rnn.BasicRNNCell(num_units=hidden_size)
    print(cell.output_size, cell.state_size)

    x_data = np.array([[[1, 2, 3]]], dtype=np.float32)
    outputs, _states = tf.nn.dynamic_rnn(cell, x_data, dtype=tf.float32)

    sess.run(tf.global_variables_initializer())
    pp.pprint(outputs.eval())

5 5
array([[[-0.04824175, -0.11266616, -0.59395987,  0.99640656, -0.66602409]]], dtype=float32)


In [3]:
with tf.variable_scope('two_sequances') as scope:
    # One cell RNN input_dim (3) -> output_dim (5). sequence: 2
    hidden_size = 5
    cell = tf.contrib.rnn.BasicRNNCell(num_units=hidden_size)
    x_data = np.array([[[1, 2, 3],
                        [4, 5, 6]]], dtype=np.float32)
    outputs, states = tf.nn.dynamic_rnn(cell, x_data, dtype=tf.float32)
    sess.run(tf.global_variables_initializer())
    pp.pprint(outputs.eval())

array([[[-0.11753262,  0.82967931,  0.92287099, -0.94706255,  0.99342507],
        [-0.44019806,  0.9612658 ,  0.98902148, -0.99947858,  0.9999997 ]]], dtype=float32)


In [4]:
with tf.variable_scope('3_batches') as scope:
    # One cell RNN input_dim (3) -> output_dim (5). sequence: 2
    cell = rnn.BasicLSTMCell(num_units=5, state_is_tuple=True)
    x_data = np.array([[[1, 2, 3],
                        [4, 5, 6]],

                       [[7, 8, 9],
                        [10, 11, 12]],

                       [[13, 14, 15],
                        [16, 17, 18]], ], dtype=np.float32)
    outputs, _states = tf.nn.dynamic_rnn(
        cell, x_data, sequence_length=[1,2,1], dtype=tf.float32)
    sess.run(tf.global_variables_initializer())
    pp.pprint(outputs.eval())

array([[[ -9.24924016e-02,   1.47779593e-02,   1.73619956e-01,
          -4.51426417e-01,  -3.30132879e-02],
        [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
           0.00000000e+00,   0.00000000e+00]],

       [[ -2.11092457e-03,   1.20736848e-04,   3.24320495e-02,
          -7.11563170e-01,  -1.55489382e-04],
        [ -1.72451473e-04,   1.67748312e-05,   1.05808750e-02,
          -9.04355943e-01,  -1.40012504e-04]],

       [[ -2.60747729e-05,   5.85509952e-07,   2.34753476e-03,
          -7.55022943e-01,  -5.58087777e-07],
        [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
           0.00000000e+00,   0.00000000e+00]]], dtype=float32)


In [5]:
with tf.variable_scope('initial_state') as scope:
    # One cell RNN input_dim (3) -> output_dim (5). sequence: 2
    batch_size = 3
    cell = rnn.BasicLSTMCell(num_units=5, state_is_tuple=True)
    initial_state = cell.zero_state(batch_size, tf.float32)

    x_data = np.array([[[1, 2, 3],
                        [4, 5, 6]],

                       [[7, 8, 9],
                        [10, 11, 12]],

                       [[13, 14, 15],
                        [16, 17, 18]], ], dtype=np.float32)
    outputs, _states = tf.nn.dynamic_rnn(cell, x_data,
                                         initial_state=initial_state, dtype=tf.float32)
    sess.run(tf.global_variables_initializer())
    pp.pprint(outputs.eval())

array([[[  1.20581007e-02,   9.10249818e-03,  -5.73653318e-02,
          -2.28022784e-01,   1.06247095e-02],
        [  1.96108937e-01,   4.87912744e-02,  -1.93600245e-02,
          -4.58395869e-01,  -7.03926757e-02]],

       [[  1.33064032e-01,   1.99696813e-02,  -7.78979913e-04,
          -5.71957648e-01,  -1.44965827e-01],
        [  2.38348722e-01,   1.22230034e-02,  -7.75410153e-05,
          -5.95831394e-01,  -1.84057772e-01]],

       [[  5.16901910e-02,   1.88314938e-03,  -6.19996081e-06,
          -6.81139827e-01,  -1.17535137e-01],
        [  8.82856771e-02,   7.83236872e-04,  -5.80701567e-07,
          -6.80091798e-01,  -1.14115342e-01]]], dtype=float32)


In [6]:
# Create input data
x_data = np.arange(24, dtype=np.float32).reshape(2, 4, 3)
pp.pprint(x_data)  # batch, sequence_length, input size

array([[[  0.,   1.,   2.],
        [  3.,   4.,   5.],
        [  6.,   7.,   8.],
        [  9.,  10.,  11.]],

       [[ 12.,  13.,  14.],
        [ 15.,  16.,  17.],
        [ 18.,  19.,  20.],
        [ 21.,  22.,  23.]]], dtype=float32)


In [7]:
with tf.variable_scope('MultiRNNCell') as scope:
    # Make rnn
    cell = rnn.BasicLSTMCell(num_units=5, state_is_tuple=True)
    cell = rnn.MultiRNNCell([cell] * 3, state_is_tuple=True)

    # rnn in/out
    outputs, _states = tf.nn.dynamic_rnn(cell, x_data, dtype=tf.float32)
    print("dynamic rnn: ", outputs)
    sess.run(tf.global_variables_initializer())
    pp.pprint(outputs.eval())  # batch size, unrolling (time), hidden_size

dynamic rnn:  Tensor("MultiRNNCell/rnn/transpose:0", shape=(2, 4, 5), dtype=float32)
array([[[-0.00159773, -0.00278636,  0.00216246,  0.00038013, -0.00024984],
        [-0.00624782, -0.00809411,  0.00787236,  0.0043    , -0.00170721],
        [-0.01344065, -0.01426034,  0.01710082,  0.01179738, -0.00482255],
        [-0.02229064, -0.0205432 ,  0.02860553,  0.02171124, -0.00901885]],

       [[-0.00217091, -0.00350074,  0.00534471,  0.00142277, -0.003283  ],
        [-0.0062384 , -0.00876253,  0.01459183,  0.00550701, -0.00811026],
        [-0.01145217, -0.01450423,  0.02608694,  0.01158843, -0.01365107],
        [-0.01722418, -0.02011818,  0.038624  ,  0.01883249, -0.01951919]]], dtype=float32)


In [8]:
with tf.variable_scope('dynamic_rnn') as scope:
    cell = rnn.BasicLSTMCell(num_units=5, state_is_tuple=True)
    outputs, _states = tf.nn.dynamic_rnn(cell, x_data, dtype=tf.float32,
                                         sequence_length=[1, 3]) # lentgh 1 for batch 1, lentgh 2 for batch 2
    print("dynamic rnn: ", outputs)
    sess.run(tf.global_variables_initializer())
    pp.pprint(outputs.eval())  # batch size, unrolling (time), hidden_size

dynamic rnn:  Tensor("dynamic_rnn/rnn/transpose:0", shape=(2, 4, 5), dtype=float32)
array([[[  1.55049935e-01,  -8.28458890e-02,  -1.36540994e-01,
          -2.43278760e-02,   1.60975546e-01],
        [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
           0.00000000e+00,   0.00000000e+00],
        [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
           0.00000000e+00,   0.00000000e+00],
        [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
           0.00000000e+00,   0.00000000e+00]],

       [[  6.44681007e-02,  -4.09913715e-03,  -4.74524111e-01,
           2.25839461e-03,   5.23494780e-01],
        [  1.16558671e-01,  -1.04439410e-03,  -7.71331191e-01,
           8.13173072e-04,   8.47356856e-01],
        [  1.55143157e-01,  -2.68030766e-04,  -9.02186215e-01,
           3.48752073e-04,   9.62113023e-01],
        [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
           0.00000000e+00,   0.00000000e+00]]], dtype=float32)


In [9]:
with tf.variable_scope('bi-directional') as scope:
    # bi-directional rnn
    cell_fw = rnn.BasicLSTMCell(num_units=5, state_is_tuple=True)
    cell_bw = rnn.BasicLSTMCell(num_units=5, state_is_tuple=True)

    outputs, states = tf.nn.bidirectional_dynamic_rnn(cell_fw, cell_bw, x_data,
                                                      sequence_length=[2, 3],
                                                      dtype=tf.float32)

    sess.run(tf.global_variables_initializer())
    pp.pprint(sess.run(outputs))
    pp.pprint(sess.run(states))

(   array([[[  4.45090048e-02,  -5.48573909e-03,   1.76832363e-01,
          -6.30143331e-03,   8.78377333e-02],
        [ -1.48050860e-01,  -9.71930625e-04,   4.88166243e-01,
           1.77669868e-01,   4.02208827e-02],
        [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
           0.00000000e+00,   0.00000000e+00],
        [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
           0.00000000e+00,   0.00000000e+00]],

       [[ -1.47792948e-02,  -1.10883569e-08,   5.36434472e-01,
           1.19788200e-01,   2.63353104e-06],
        [ -8.44009034e-03,  -1.02014619e-09,   8.31147254e-01,
           1.08465023e-01,   1.45638040e-07],
        [ -3.89013323e-03,  -1.10596886e-10,   9.46177483e-01,
           7.38197416e-02,   7.62108421e-09],
        [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
           0.00000000e+00,   0.00000000e+00]]], dtype=float32),
    array([[[ -3.23204733e-02,  -4.90211964e-01,   2.18950495e-01,
          -3.21291387e-01,   1.235271

In [10]:
# flattern based softmax
hidden_size=3
sequence_length=4
batch_size=2
num_classes=5

pp.pprint(x_data) # hidden_size=3, sequence_length=4, batch_size=2
x_data = x_data.reshape(-1, hidden_size)
pp.pprint(x_data)

softmax_w = np.arange(15, dtype=np.float32).reshape(hidden_size, num_classes)
outputs = np.matmul(x_data, softmax_w)
outputs = outputs.reshape(-1, sequence_length, num_classes) # batch, seq, class
pp.pprint(outputs)

array([[[  0.,   1.,   2.],
        [  3.,   4.,   5.],
        [  6.,   7.,   8.],
        [  9.,  10.,  11.]],

       [[ 12.,  13.,  14.],
        [ 15.,  16.,  17.],
        [ 18.,  19.,  20.],
        [ 21.,  22.,  23.]]], dtype=float32)
array([[  0.,   1.,   2.],
       [  3.,   4.,   5.],
       [  6.,   7.,   8.],
       [  9.,  10.,  11.],
       [ 12.,  13.,  14.],
       [ 15.,  16.,  17.],
       [ 18.,  19.,  20.],
       [ 21.,  22.,  23.]], dtype=float32)
array([[[  25.,   28.,   31.,   34.,   37.],
        [  70.,   82.,   94.,  106.,  118.],
        [ 115.,  136.,  157.,  178.,  199.],
        [ 160.,  190.,  220.,  250.,  280.]],

       [[ 205.,  244.,  283.,  322.,  361.],
        [ 250.,  298.,  346.,  394.,  442.],
        [ 295.,  352.,  409.,  466.,  523.],
        [ 340.,  406.,  472.,  538.,  604.]]], dtype=float32)


In [11]:
# [batch_size, sequence_length, emb_dim ]
prediction1 = tf.constant([[[0, 1], [0, 1], [0, 1]]], dtype=tf.float32)
prediction2 = tf.constant([[[1, 0], [1, 0], [1, 0]]], dtype=tf.float32)
prediction3 = tf.constant([[[0, 1], [1, 0], [0, 1]]], dtype=tf.float32)

# [batch_size, sequence_length]
y_data = tf.constant([[1, 1, 1]])

# [batch_size * sequence_length]
weights = tf.constant([[1, 1, 1]], dtype=tf.float32)

sequence_loss1 = tf.contrib.seq2seq.sequence_loss(prediction1, y_data, weights)
sequence_loss2 = tf.contrib.seq2seq.sequence_loss(prediction2, y_data, weights)
sequence_loss3 = tf.contrib.seq2seq.sequence_loss(prediction3, y_data, weights)

sess.run(tf.global_variables_initializer())
print("Loss1: ", sequence_loss1.eval(),
      "Loss2: ", sequence_loss2.eval(),
      "Loss3: ", sequence_loss3.eval())

Loss1:  0.313262 Loss2:  1.31326 Loss3:  0.646595
