# 用TensorFlow实现RNN/LSTM/GRU
参考自https://blog.csdn.net/jmh1996/article/details/78821216

In [1]:
import tensorflow as tf
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data",one_hot=True)

Extracting MNIST_data\train-images-idx3-ubyte.gz
Extracting MNIST_data\train-labels-idx1-ubyte.gz
Extracting MNIST_data\t10k-images-idx3-ubyte.gz
Extracting MNIST_data\t10k-labels-idx1-ubyte.gz


In [2]:
print(mnist.train.images.shape,mnist.train.labels.shape)
print(mnist.test.images.shape,mnist.test.labels.shape)

(55000, 784) (55000, 10)
(10000, 784) (10000, 10)


## 1. 设置超参数

In [10]:
train_rate=0.001  #学习速率
train_step=1000  
batch_size=1280    #每批样本数
display_step=100   #控制输出频次

frame_size=28     #序列里面每一个分量的大小。因为每个分量都是一行像素，而一行像素有28个像素点。所以frame_size为28
sequence_length=28  #每个样本序列的长度。因为我们希望把一个28x28的图片当做一个序列输入到rnn进行训练，所以我们需要对图片进行序列化。一种最方便的方法就是我们认为行与行之间存在某些关系，于是把图片的每一行取出来当做序列的一个维度。所以这里sequence_size就是设置为28。
hidden_num=100  #隐层个数
n_classes=10  #类别数

## 2. RNN/LSTM/GRU模型
单层RNN/LSTM/GRU：只需在第12行中修改为BasicRNNCell/BasicLSTMCell/GRUCell即可，其余的都不变

多层RNN/LSTM/GRU：将第12行变成第13行（即tf.nn.rnn_cell.MultiRNNCell这行）

In [11]:
graph = tf.Graph()
with graph.as_default():

    #定义输入,输出
    x=tf.placeholder(dtype=tf.float32,shape=[None,sequence_length*frame_size],name="inputx")
    y=tf.placeholder(dtype=tf.float32,shape=[None,n_classes],name="expected_y")
    #定义权值
    weights=tf.Variable(tf.truncated_normal(shape=[hidden_num,n_classes]))
    bias=tf.Variable(tf.zeros(shape=[n_classes]))

    # 定义RNN网络
    def RNN(x,weights,bias):
        '''返回[batch_size,n_classes]'''
        x=tf.reshape(x,shape=[-1,sequence_length,frame_size])
#         rnn_cell=tf.nn.rnn_cell.BasicRNNCell(hidden_num) # RNN/LSTM/GRU在此处选择BasicRNNCell/BasicLSTMCell/GRUCell。该网络中包含一个深度RNN网络，这个RNN包含hidden_num个隐层单元/RNN cell
        rnn_cell = tf.nn.rnn_cell.MultiRNNCell([tf.nn.rnn_cell.GRUCell(hidden_num) for _ in range(3)]) # 构建多层RNN/LSTM/GRU网络,3表示3层（这里都是用MultiRNNCell，没有MultiGRUCell等）
    
        output,states=tf.nn.dynamic_rnn(rnn_cell,x,dtype=tf.float32)
        return tf.nn.softmax(tf.matmul(output[:,-1,:],weights)+bias,1)

    # 计算预计输出
    predy=RNN(x,weights,bias)
    # 定义损失函数和优化算法
    cost=tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=predy,labels=y))
    train=tf.train.AdamOptimizer(train_rate).minimize(cost)
    # 计算accuracy
    correct_pred=tf.equal(tf.argmax(predy,1),tf.argmax(y,1))
    accuracy=tf.reduce_mean(tf.to_float(correct_pred))


## 开始训练
with tf.Session(graph=graph) as sess:
    print('step','accuracy','loss')
    sess.run(tf.initialize_all_variables())
    step=1
    testx,testy=mnist.test.next_batch(batch_size)
    while step<train_step:
        batch_x,batch_y=mnist.train.next_batch(batch_size)
    #    batch_x=tf.reshape(batch_x,shape=[batch_size,sequence_length,frame_size])
        _loss,__=sess.run([cost,train],feed_dict={x:batch_x,y:batch_y})
        if step % display_step ==0:
            acc,loss=sess.run([accuracy,cost],feed_dict={x:testx,y:testy})
            print(step,acc,loss)

        step+=1

step accuracy loss
Instructions for updating:
Use `tf.global_variables_initializer` instead.
100 0.777344 1.68638
200 0.876562 1.58436
300 0.971094 1.49195
400 0.971875 1.48832
500 0.983594 1.47964
600 0.976563 1.4832
700 0.983594 1.47919
800 0.9875 1.4749
900 0.985156 1.47906
