In [1]:
from __future__ import division
import tensorflow as tf
import numpy as np

with tf.Graph().as_default():
    beam_size = 3 # Number of hypotheses in beam.
    num_symbols = 5 # Output vocabulary size.
    embedding_size = 10
    num_steps = 3
    embedding = tf.zeros([num_symbols, embedding_size])
    output_projection = None

    # log_beam_probs: list of [beam_size, 1] Tensors
    #  Ordered log probabilities of the `beam_size` best hypotheses
    #  found in each beam step (highest probability first).
    # beam_symbols: list of [beam_size] Tensors 
    #  The ordered `beam_size` words / symbols extracted by the beam
    #  step, which will be appended to their corresponding hypotheses
    #  (corresponding hypotheses found in `beam_path`).
    # beam_path: list of [beam_size] Tensor
    #  The ordered `beam_size` parent indices. Their values range
    #  from [0, `beam_size`), and they denote which previous
    #  hypothesis each word should be appended to.
    log_beam_probs, beam_symbols, beam_path  = [], [], []
    def beam_search(prev, i):
        if output_projection is not None:
            prev = tf.nn.xw_plus_b(
                prev, output_projection[0], output_projection[1])

        # Compute 
        #  log P(next_word, hypothesis) = 
        #  log P(next_word | hypothesis)*P(hypothesis) =
        #  log P(next_word | hypothesis) + log P(hypothesis)
        # for each hypothesis separately, then join them together 
        # on the same tensor dimension to form the example's 
        # beam probability distribution:
        # [P(word1, hypothesis1), P(word2, hypothesis1), ...,
        #  P(word1, hypothesis2), P(word2, hypothesis2), ...]

        # If TF had a log_sum_exp operator, then it would be 
        # more numerically stable to use: 
        #   probs = prev - tf.log_sum_exp(prev, reduction_dims=[1])
        probs = tf.log(tf.nn.softmax(prev))
        # i == 1 corresponds to the input being "<GO>", with
        # uniform prior probability and only the empty hypothesis
        # (each row is a separate example).
        if i > 1:
            probs = tf.reshape(probs + log_beam_probs[-1], 
                               [-1, beam_size * num_symbols])

        # Get the top `beam_size` candidates and reshape them such
        # that the number of rows = batch_size * beam_size, which
        # allows us to process each hypothesis independently.
        best_probs, indices = tf.nn.top_k(probs, beam_size)
        indices = tf.stop_gradient(tf.squeeze(tf.reshape(indices, [-1, 1])))
        best_probs = tf.stop_gradient(tf.reshape(best_probs, [-1, 1]))

        symbols = indices % num_symbols # Which word in vocabulary.
        print(symbols.get_shape())
        beam_parent = indices // num_symbols # Which hypothesis it came from.

        beam_symbols.append(symbols)
        beam_path.append(beam_parent)
        log_beam_probs.append(best_probs)
        return tf.nn.embedding_lookup(embedding, symbols)

    # Setting up graph.
    inputs = [tf.placeholder(tf.float32, shape=[None, num_symbols])
              for i in range(num_steps)]
    for i in range(num_steps):
        beam_search(inputs[i], i + 1)

    # Running the graph.
    input_vals = [0, 0, 0]
    l = np.log
    eps = -10 # exp(-10) ~= 0

    # These values mimic the distribution of vocabulary words
    # from each hypothesis independently (in log scale since
    # they will be put through exp() in softmax).
    input_vals[0] = np.array([[0, eps, l(2), eps, l(3)]])
    # Step 1 beam hypotheses =
    # (1) Path: [4], prob = log(1 / 2)
    # (2) Path: [2], prob = log(1 / 3)
    # (3) Path: [0], prob = log(1 / 6)

    input_vals[1] = np.array([[l(1.2), 0, 0, l(1.1), 0], # Path [4] 
                              [0,   eps, eps, eps, eps], # Path [2]
                              [0,  0,   0,   0,   0]])   # Path [0]
    # Step 2 beam hypotheses =
    # (1) Path: [2, 0], prob = log(1 / 3) + log(1)
    # (2) Path: [4, 0], prob = log(1 / 2) + log(1.2 / 5.3)
    # (3) Path: [4, 3], prob = log(1 / 2) + log(1.1 / 5.3)

    input_vals[2] = np.array([[0,  l(1.1), 0,   0,   0], # Path [2, 0]
                              [eps, 0,   eps, eps, eps], # Path [4, 0]
                              [eps, eps, eps, eps, 0]])  # Path [4, 3]
    # Step 3 beam hypotheses =
    # (1) Path: [4, 0, 1], prob = log(1 / 2) + log(1.2 / 5.3) + log(1)
    # (2) Path: [4, 3, 4], prob = log(1 / 2) + log(1.1 / 5.3) + log(1)
    # (3) Path: [2, 0, 1], prob = log(1 / 3) + log(1) + log(1.1 / 5.1)

    input_feed = {inputs[i]: input_vals[i][:beam_size, :] 
                  for i in xrange(num_steps)} 
    output_feed = beam_symbols + beam_path + log_beam_probs
    session = tf.InteractiveSession()
    outputs = session.run(output_feed, feed_dict=input_feed)

    expected_beam_symbols = [[4, 2, 0],
                             [0, 0, 3],
                             [1, 4, 1]]
    expected_beam_path = [[0, 0, 0],
                          [1, 0, 0],
                          [1, 2, 0]]

    print("predicted beam_symbols vs. expected beam_symbols")
    for ind, predicted in enumerate(outputs[:num_steps]):
        print(list(predicted), expected_beam_symbols[ind])
    print("\npredicted beam_path vs. expected beam_path")
    for ind, predicted in enumerate(outputs[num_steps:num_steps * 2]):
        print(list(predicted), expected_beam_path[ind])
    print("\nlog beam probs")
    for log_probs in outputs[2 * num_steps:]:
        print(log_probs)

<unknown>
<unknown>
<unknown>
predicted beam_symbols vs. expected beam_symbols
([4, 2, 0], [4, 2, 0])
([0, 0, 3], [0, 0, 3])
([1, 4, 1], [1, 4, 1])

predicted beam_path vs. expected beam_path
([0, 0, 0], [0, 0, 0])
([1, 0, 0], [1, 0, 0])
([1, 2, 0], [1, 2, 0])

log beam probs
[[-0.6931622 ]
 [-1.09862733]
 [-1.79177451]]
[[-1.098809  ]
 [-2.17854738]
 [-2.26555896]]
[[-2.17872906]
 [-2.26574039]
 [-2.63273931]]


In [3]:
tf.nn.top_k??

In [28]:
tf.InteractiveSession()

<tensorflow.python.client.session.InteractiveSession at 0x5882210>

In [29]:
x = tf.constant([[1.0, 3.0, 2.0, 4.0],[1.8, 1.1, 1.0, 0.9]])

In [30]:
u,v = tf.nn.top_k(x, 2)

In [31]:
u.eval()

array([[ 4.        ,  3.        ],
       [ 1.79999995,  1.10000002]], dtype=float32)

In [32]:
v.eval()

array([[3, 1],
       [0, 1]], dtype=int32)

In [33]:
tf.nn.seq2seq.rnn_decoder??
res = tf.expand_dims(tensor, 1)
res = tf.tile(res, [1, self.beam_size, 1])

NameError: name 'tensor' is not defined

In [34]:
x = tf.constant([[2.0],[3.0],[4.0]])
res = tf.expand_dims(x, 1)
res.eval()
res.eval().shape

(3, 1, 1)

In [35]:
res = tf.tile(res, [1, 3, 1])
res.eval()

array([[[ 2.],
        [ 2.],
        [ 2.]],

       [[ 3.],
        [ 3.],
        [ 3.]],

       [[ 4.],
        [ 4.],
        [ 4.]]], dtype=float32)

In [36]:
    def tile_along_beam(tensor, beam_size=5):
        """
        Helps tile tensors for each beam.
        
        Args:
          tensor: a 2-D tensor, [batch_size x T]
        Return:
          An [batch_size*beam_size x T] tensor, where each row of the input
          tensor is copied beam_size times in a row in the output
        """
        res = tf.expand_dims(tensor, 1)
        res = tf.tile(res, [1, beam_size, 1])
        res = tf.reshape(res, [-1, tf.shape(tensor)[1]])
        try:
            new_first_dim = tensor.get_shape()[0] * self.beam_size
        except:
            new_first_dim = None
        res.set_shape((new_first_dim, tensor.get_shape()[1]))
        return res

In [37]:
x = tf.constant([[1.3, 2.4, 1.6], [3.2, 0.2, 1.5]])

In [38]:
tile_along_beam(x).eval()

array([[ 1.29999995,  2.4000001 ,  1.60000002],
       [ 1.29999995,  2.4000001 ,  1.60000002],
       [ 1.29999995,  2.4000001 ,  1.60000002],
       [ 1.29999995,  2.4000001 ,  1.60000002],
       [ 1.29999995,  2.4000001 ,  1.60000002],
       [ 3.20000005,  0.2       ,  1.5       ],
       [ 3.20000005,  0.2       ,  1.5       ],
       [ 3.20000005,  0.2       ,  1.5       ],
       [ 3.20000005,  0.2       ,  1.5       ],
       [ 3.20000005,  0.2       ,  1.5       ]], dtype=float32)

In [41]:
    def tile_along_beam2(tensor, beam_size=5):
        """
        Helps tile tensors for each beam.
        
        Args:
          tensor: a 2-D tensor, [batch_size x T]
        Return:
          An [batch_size*beam_size x T] tensor, where each row of the input
          tensor is copied beam_size times in a row in the output
        """
        res = tf.tile(tensor, [1, beam_size])
        #res = tf.reshape(res, [-1, tf.shape(tensor)[1]])
        return res

In [42]:
tile_along_beam2(x).eval()

array([[ 1.29999995,  2.4000001 ,  1.60000002,  1.29999995,  2.4000001 ,
         1.60000002,  1.29999995,  2.4000001 ,  1.60000002,  1.29999995,
         2.4000001 ,  1.60000002,  1.29999995,  2.4000001 ,  1.60000002],
       [ 3.20000005,  0.2       ,  1.5       ,  3.20000005,  0.2       ,
         1.5       ,  3.20000005,  0.2       ,  1.5       ,  3.20000005,
         0.2       ,  1.5       ,  3.20000005,  0.2       ,  1.5       ]], dtype=float32)

In [47]:
y = tile_along_beam2(x)

In [46]:
tf.reshape(y, [-1, x.get_shape()[1]]).eval()

TypeError: Expected binary or unicode string, got -1

In [49]:
x.get_shape()[1]

Dimension(3)

In [51]:
tf.reshape(y, [-1, tf.shape(x)[1]]).eval()

array([[ 1.29999995,  2.4000001 ,  1.60000002],
       [ 1.29999995,  2.4000001 ,  1.60000002],
       [ 1.29999995,  2.4000001 ,  1.60000002],
       [ 1.29999995,  2.4000001 ,  1.60000002],
       [ 1.29999995,  2.4000001 ,  1.60000002],
       [ 3.20000005,  0.2       ,  1.5       ],
       [ 3.20000005,  0.2       ,  1.5       ],
       [ 3.20000005,  0.2       ,  1.5       ],
       [ 3.20000005,  0.2       ,  1.5       ],
       [ 3.20000005,  0.2       ,  1.5       ]], dtype=float32)

In [52]:
tf.reshape(tf.tile(x, [5, 1]), [-1, tf.shape(x)[1]]).eval()

array([[ 1.29999995,  2.4000001 ,  1.60000002],
       [ 3.20000005,  0.2       ,  1.5       ],
       [ 1.29999995,  2.4000001 ,  1.60000002],
       [ 3.20000005,  0.2       ,  1.5       ],
       [ 1.29999995,  2.4000001 ,  1.60000002],
       [ 3.20000005,  0.2       ,  1.5       ],
       [ 1.29999995,  2.4000001 ,  1.60000002],
       [ 3.20000005,  0.2       ,  1.5       ],
       [ 1.29999995,  2.4000001 ,  1.60000002],
       [ 3.20000005,  0.2       ,  1.5       ]], dtype=float32)

In [56]:
        nondone_mask = tf.reshape(
            tf.cast(tf.equal(tf.range(4), 3), tf.float32) * -1e18,
            [1, 1, 4]
        )

In [57]:
nondone_mask.eval()

array([[[ -0.00000000e+00,  -0.00000000e+00,  -0.00000000e+00,
          -9.99999984e+17]]], dtype=float32)

In [58]:
tf.equal(tf.range(4), 3).eval()

array([False, False, False,  True], dtype=bool)

In [60]:
tf.cast(tf.equal(tf.range(4), 2), tf.float32).eval()

array([ 0.,  0.,  1.,  0.], dtype=float32)

In [65]:
tf.expand_dims?

In [64]:
nondone_mask.eval().shape

(1, 1, 4)

In [2]:
tf.reshape(
          (tf.range(2 * 3) // 3) * 3,
          [2, 3]
      ).eval()

ValueError: Cannot use the default session to evaluate tensor: the tensor's graph is different from the session's graph. Pass an explicit session to `eval(session=sess)`.

In [67]:
tf.reshape(
          (tf.range(3 * 5) // 5) * 5,
          [3, 5]
      ).eval().shape

(3, 5)

In [68]:
tf.zeros?

In [69]:
tf.select?

In [None]:
symbos