In [1]:
import numpy as np
import tensorflow as tf

In [2]:
char_sum = [c for c in 'SEPabcdefghijklmnopqrstuvwxyz']#定义所有可能出现的char S、E、P是用来填充和界定边界的
dic_char = {c:i for i,c in enumerate(char_sum)}#定义字符对应数字的dic
data = [['man','woman'],['small','large'],['in','out'],['young','old'],['black','white'],['far','near'],['short','long'],['king','queen'],['up','down']]
#定义一堆反义词 seq2seq的目的就是输出反义词

#定义一下超参数
n_step=5#最长的词长度不超过5
num_class=len(char_sum)#相当于char的种类数
n_hidden = 128#每个RNN的隐藏单元个数

In [3]:
def get_batch(data):
    input_batch,output_batch,target_batch = [],[],[]
    for seq in data:#做一个padding的操作
        for i in range(2):#因为只有原词和反义词 所以用的2
            seq[i] = seq[i]+'P'*(n_step-len(seq[i]))#把长度不足5的单词 padding到5
        input_ = [dic_char[n] for n in seq[0]]#['m','a','n','P','P']->['15','3','16','2','2']
        output_ = [dic_char[n] for n in ('S' + seq[1])]#seq2seq要求target前有一位
        target_ = [dic_char[n] for n in (seq[1] + 'E')]#[25, 17, 15, 3, 16, 1]
        #用one-hot表示各个字符
        input_batch.append(np.eye(num_class)[input_])#从一个对角矩阵中找ont-hot向量
        output_batch.append(np.eye(num_class)[output_])#
        target_batch.append(target_)
    #print(input_batch[0].shape)#（5，29）5个char 每个char由一个29维的one-hot表示
    return input_batch,output_batch,target_batch

In [4]:
#定义model
encoder_input = tf.placeholder(tf.float32,[None,None,num_class])#(batch_size,max_len,num_class)
decoder_input = tf.placeholder(tf.float32,[None,None,num_class])#(batch_size,max_len+1,num_class) max_len+1 because'S'
target = tf.placeholder(tf.int32,[None,None])#(batch_size,max_len+1)max_len+1 because'E'

#encoder
with tf.variable_scope('encoder'):
    encoder = tf.contrib.rnn.BasicRNNCell(n_hidden)#根据每个RNN的隐藏单元个数 创建RNN cell
    encoder = tf.contrib.rnn.DropoutWrapper(encoder,output_keep_prob=0.5)#dropout 随机失活
    _,encoder_output = tf.nn.dynamic_rnn(encoder,encoder_input,dtype=tf.float32)

#decoder
with tf.variable_scope('decoder'):
    decoder = tf.contrib.rnn.BasicRNNCell(n_hidden)#根据每个RNN的隐藏单元个数 创建RNN cell
    decoder = tf.contrib.rnn.DropoutWrapper(decoder,output_keep_prob=0.5)#dropout 随机失活
    decoder_output,_ = tf.nn.dynamic_rnn(decoder,decoder_input,initial_state=encoder_output,dtype=tf.float32)

#还需要过一个全连接 把得到的值映射到29个类别
model = tf.layers.dense(decoder_output,num_class,activation=None)

#计算loss 以及优化
cost = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=model,labels=target))
optimizer = tf.train.AdamOptimizer(0.001).minimize(cost)

#定义测试函数 测试的是一个单词
def test(word):
    seq_data = [word,'P'*len(word)]
    input_batch,output_batch,_ = get_batch([seq_data])
    predict = tf.argmax(model,2) # model : [batch_size, max_len+1, n_class]
    res = sess.run(predict,feed_dict={encoder_input:input_batch,decoder_input:output_batch})
    decoded = [char_sum[i] for i in res[0]]
    end = decoded.index('E')
    translated = ''.join(decoded[:end])
    return translated


sess = tf.Session()
#初始化
sess.run(tf.global_variables_initializer())
#准备batch
input_batch,output_batch,target_batch = get_batch(data)
for i in range(5000):#跑5000轮
    _,cost_ = sess.run([optimizer,cost],
                        feed_dict={encoder_input:input_batch,decoder_input:output_batch,target:target_batch})
    if (i+1)%1000 == 0:
        print('Epoch %04d:' % (i+1))
        print('cost:%.6f' % cost_)
print('test')
print('man ->', test('man'))
print('mans ->', test('mans'))
print('king ->', test('king'))
print('black ->', test('black'))


Epoch 1000:
cost:0.001274
Epoch 2000:
cost:0.000304
Epoch 3000:
cost:0.000131
Epoch 4000:
cost:0.000125
Epoch 5000:
cost:0.000094
test
man -> woman
mans -> woman
king -> queen
black -> white
up -> downP
