In [1]:
# 示例代码运行环境
%load_ext watermark
%watermark -p tensorflow,numpy -v -m

CPython 2.7.6
IPython 5.3.0

tensorflow 1.0.1
numpy 1.12.0

compiler   : GCC 4.8.4
system     : Linux
release    : 4.9.21-moby
machine    : x86_64
processor  : x86_64
CPU cores  : 2
interpreter: 64bit


In [24]:
import tensorflow as tf
import numpy as np

In [25]:
from tensorflow.contrib.legacy_seq2seq import basic_rnn_seq2seq, embedding_rnn_seq2seq, sequence_loss
from tensorflow.python.ops import variable_scope


In [4]:
?basic_rnn_seq2seq

In [61]:
vocab = {
    'A': 0,
    'B': 1,
    'C': 2,
    'D': 3,
    'E': 4,
    'F': 5,
    '<GO>': 6,
    '<EOS>': 7,
    '<PAD>': 8
}
reverse_vocab = dict([(v, k) for (k, v) in vocab.iteritems()])

In [68]:
tf.reset_default_graph()

In [69]:
encoder_inputs = map(lambda _: tf.constant([vocab[_]]), ['A', 'B', 'C'])
decoder_inputs = map(lambda _: tf.constant([vocab[_]]), ['<GO>', 'D', 'E', 'F', 'F', '<EOS>'])

In [11]:
?embedding_rnn_seq2seq

In [70]:
cell = tf.contrib.rnn.BasicRNNCell(128)
num_encoder_symbols = 9
num_decoder_symbols = 9
embedding_size = 128

outputs, states = embedding_rnn_seq2seq(
    encoder_inputs, decoder_inputs, cell,
    num_encoder_symbols, num_decoder_symbols,
    embedding_size, output_projection=None,
    feed_previous=False)

In [71]:
weights = map(lambda _: tf.constant([_], dtype=tf.float32),  [1, 1, 1, 1, 1])
loss = sequence_loss(outputs[:-1], decoder_inputs[1:], weights)

In [72]:
train_step = tf.train.GradientDescentOptimizer(0.01).minimize(loss)

In [73]:
with tf.Session() as sess:
    # Training
    sess.run(tf.global_variables_initializer())
    for iteration in range(50):
        sess.run(train_step)
        print(sess.run(loss))
        
    print("---Deocding----")
    
    # Decoding
    with variable_scope.variable_scope(variable_scope.get_variable_scope(), reuse=True):
        decode_decoder_inputs = map(lambda _: tf.constant([vocab[_]]), ['<GO>', 'A', 'A', 'A', 'A', 'A', 'A'])
        outputs, states = embedding_rnn_seq2seq(
            encoder_inputs, decode_decoder_inputs, cell,
            num_encoder_symbols, num_decoder_symbols,
            embedding_size, output_projection=None,
            feed_previous=True)

        for o in outputs:
            m = np.argmax(o.eval(), axis=1)
            print(reverse_vocab[m[0]])

2.10133
1.88505
1.69095
1.51902
1.36791
1.23556
1.11963
1.01785
0.928161
0.848835
0.778436
0.715787
0.65991
0.609978
0.565283
0.52521
0.489219
0.456837
0.427646
0.401277
0.377407
0.355749
0.336055
0.318103
0.301702
0.286682
0.272896
0.260212
0.248517
0.237711
0.227704
0.218419
0.209786
0.201744
0.194239
0.187223
0.180652
0.174489
0.168698
0.163249
0.158115
0.153269
0.148691
0.144359
0.140255
0.136363
0.132667
0.129153
0.125809
0.122624
---Deocding----
D
E
F
F
<EOS>
<EOS>
D


## 用 placeholder 代替 Tensor

placeholder 的类型是 Tensor，因此上述 API 中的 Tensor 都可以用 placeholder 代替，使得训练、测试过程的数据可以变动

In [12]:
type(tf.placeholder(tf.int32, shape=[None]))

tensorflow.python.framework.ops.Tensor

In [91]:
tf.reset_default_graph()

In [92]:
encoder_length = 5
decoder_length = 5

cell = tf.contrib.rnn.BasicRNNCell(128)
num_encoder_symbols = 9
num_decoder_symbols = 9
embedding_size = 128

encoder_placeholders = [tf.placeholder(tf.int32, shape=[None],
                                       name="encoder_%d" % i) for i in range(encoder_length)]
decoder_placeholders = [tf.placeholder(tf.int32, shape=[None],
                                       name="decoder_%d" % i) for i in range(decoder_length)]
target_placeholders = [tf.placeholder(tf.int32, shape=[None],
                                       name="target_%d" % i) for i in range(decoder_length)]
target_weights_placeholders = [tf.placeholder(tf.float32, shape=[None],
                                       name="decoder_weight_%d" % i) for i in range(decoder_length)]
outputs, states = embedding_rnn_seq2seq(
    encoder_placeholders, decoder_placeholders, cell,
    num_encoder_symbols, num_decoder_symbols,
    embedding_size, output_projection=None,
    feed_previous=False)

loss = sequence_loss(outputs, target_placeholders, target_weights_placeholders)
train_step = tf.train.GradientDescentOptimizer(0.01).minimize(loss)

In [93]:
def seq2seq_pad(encoder_inputs, encoder_length, decoder_inputs, decoder_length, vocab, pad_symbol='<PAD>'):
    """
    - encoder_input: A nested list of symbol str for encoding, length: batch_size
    - encoder_length: max length of encoder input
    - decoder_input: A nested list of symbol str for decoding, length: batch_size
    - decoder_length: max length of decoder input
    - vocab: vocabulary index, symbol (str) -> index (int)
    
    Example: 
    ["hello", "world"] -> ["hi", "<EOS>"]
    ["cover", "me"] -> ["roger", "<EOS>"]
    
    seq2seq_pad([['hello', 'world'], ['cover', 'me']], 4, [['hi', '<EOS>'], ['roger', '<EOS>']], 4, vocab)
    
    Assume that index of "<PAD>" is 0

    Output:
    [[0, 0, <index of 'hello'>, <index of 'world'>], [0, 0, <index of 'cover'>, <index of 'me'>]],
    [[<index of 'hi'>, <index of 'EOS'>, 0, 0], [<index of 'roger'>, <index of 'EOS'>, 0, 0]]
    """
    pad_index = vocab[pad_symbol]
    def to_index(inputs, length, pad_from_start=True):
        inputs_to_index = []
        for cur_input in inputs:
            cur_input_to_index = [pad_index] * length
            l = len(cur_input)
            if l < length:
                if pad_from_start:
                    cur_input_to_index[(length - l):] = [vocab[i] for i in cur_input]
                else:
                    cur_input_to_index[:l] = [vocab[i] for i in cur_input]
            else:
                cur_input_to_index = [vocab[i] for i in cur_input[:length]]
            inputs_to_index.append(cur_input_to_index)    
        return inputs_to_index
    return to_index(encoder_inputs, encoder_length, True), to_index(decoder_inputs, decoder_length, False)

In [94]:
seq2seq_pad([['A', 'B'], ['B', 'A']], 5, [['<GO>', 'C', 'D', '<EOS>'], ['<GO>', 'D', 'C', '<EOS>']], 5, vocab)

([[8, 8, 8, 0, 1], [8, 8, 8, 1, 0]], [[6, 2, 3, 7, 8], [6, 3, 2, 7, 8]])

In [95]:
encoder_inputs, decoder_inputs = seq2seq_pad([['A', 'B'], ['B', 'A']],
                                             5, [['<GO>', 'C', 'D', '<EOS>'], ['<GO>', 'D', 'C', '<EOS>']], 5, vocab)

In [96]:
# 对 nested list 进行『转置』，得到 TF seq2seq 需要的输入形状
print(zip(*encoder_inputs))

[(8, 8), (8, 8), (8, 8), (0, 1), (1, 0)]


In [97]:
def left_shift(decoder_inputs, pad_idx):
    # for generating targets
    return [list(input_[1:]) + [pad_idx] for input_ in decoder_inputs]

In [98]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    # 两对翻译 pairs
    # AB -> CD
    # BA -> EFF
    encoder_inputs, decoder_inputs = seq2seq_pad([['A', 'B'], ['B', 'A']],
                                                 5, [['<GO>', 'C', 'D', '<EOS>'], ['<GO>', 'E', 'F', 'F', '<EOS>']], 8, vocab)
    encoder_inputs = zip(*encoder_inputs)
    # 还有一种方案是直接通过 shift decoder_placeholders 来得到 target_placeholders，这样只需要提供 decoder_placeholders 即可
    target_inputs = zip(*left_shift(decoder_inputs, vocab['<PAD>']))
    decoder_inputs = zip(*decoder_inputs)
    
    feed_dict = dict()
    # Prepare input data    
    for (i, placeholder) in enumerate(encoder_placeholders):
        # 这里用 placeholder 或者 placeholder.name 都可以
        feed_dict[placeholder.name] = np.asarray(encoder_inputs[i], dtype=int)
    for i in range(len(decoder_placeholders)):
        feed_dict[decoder_placeholders[i].name] = np.asarray(decoder_inputs[i], dtype=int)
        feed_dict[target_placeholders[i].name] = np.asarray(target_inputs[i], dtype=int)        
        # 这里使用 weights 把 <PAD> 的损失屏蔽了
        feed_dict[target_weights_placeholders[i].name] = np.asarray([float(idx != vocab['<PAD>']) for idx in target_inputs[i]],
                                                      dtype=float)
    
    # Training
    for iteration in range(50):
        sess.run(train_step, feed_dict)
        print(sess.run(loss, feed_dict)) 

    print("---Deocding----")
    
    # Decoding
    with variable_scope.variable_scope(variable_scope.get_variable_scope(), reuse=True):
        outputs, states = embedding_rnn_seq2seq(
            encoder_placeholders, decoder_placeholders, cell,
            num_encoder_symbols, num_decoder_symbols,
            embedding_size, output_projection=None,
            feed_previous=True)

        for o in outputs:
            # 注意这里也需要提供 feed_dict
            m = np.argmax(o.eval(feed_dict), axis=1)
            print(reverse_vocab[m[0]], reverse_vocab[m[1]])

2.06538
1.87133
1.69996
1.54974
1.41805
1.30219
1.19973
1.10862
1.02719
0.954077
0.888157
0.82851
0.774374
0.725111
0.680183
0.639129
0.601551
0.567101
0.535475
0.506401
0.479638
0.45497
0.432204
0.411165
0.391696
0.373656
0.356918
0.341366
0.326896
0.313415
0.300837
0.289086
0.278093
0.267795
0.258135
0.249063
0.240531
0.232497
0.224922
0.217773
0.211017
0.204626
0.198572
0.192832
0.187384
0.182208
0.177286
0.172599
0.168134
0.163875
---Deocding----
('C', 'E')
('D', 'F')
('<EOS>', 'F')
('E', '<EOS>')
('A', '<EOS>')
