# Recurrent Neural Network

# Content

1. [Import Related Modules and Packages](#Sec1)
2. [Hyperparameters](#Sec2)
3. [Prepare for data](#Sec3)
4. [Build RNN from scratch](#Sec4)
5. [Build RNN from TF modules](#Sec5)

## 1. Import Related Modules and Packages<a id='Sec1'></a>

In [2]:
import tensorflow as tf
tf.enable_eager_execution()
tfe = tf.contrib.eager

import numpy as np
import matplotlib.pyplot as plt

## 2. Hyperparameters<a id='Sec2'></a>

In [160]:
num_epochs = 10
total_series_length = 50000
truncated_backprop_length = 15
state_size = 4
num_classes = 2
echo_step = 3
batch_size = 5
num_batches = total_series_length//batch_size//truncated_backprop_length
learning_rate = 0.3

## 3. Prepare for data<a id='Sec3'></a>

In [4]:
def generateData(total_series_length = 50000, echo_step = 3, batch_size = 5):
    x = np.array(np.random.choice(2, total_series_length, p=[0.5, 0.5]), dtype=np.int32)
    y = np.roll(x, echo_step)
    y[0:echo_step] = 0

    x = x.reshape((batch_size, -1))  # The first index changing slowest, subseries as rows
    y = y.reshape((batch_size, -1))

    return (x, y)

## 4. Build RNN from scratch<a id='Sec4'></a>

In [156]:
class BasicRNNCell(object):
    def __init__(self, input_size, state_size, output_size, init_state):
        # 定义输入层与隐含层间的参数并随机初始化
        self.W = tfe.Variable(np.random.rand(input_size+1, state_size), dtype=tf.float32)
        self.b = tfe.Variable(np.zeros((1, state_size)), dtype=tf.float32)
        
        self.W2 = tfe.Variable(np.random.rand(state_size, output_size),dtype=tf.float32)
        self.b2 = tfe.Variable(np.zeros((1, output_size)), dtype=tf.float32)
        
        self.STATE_SIZE = state_size
        self.state = init_state
        
    def __call__(self, x):
        '''
        Input:
            x - list of tensor with shape [BATCH_SIZE, ] and length TIME_STEP
        '''
            
        batch_size = tf.shape(x[0])[0]
        
        states_series = []
        for current_input in x:
            current_input = tf.reshape(current_input, [batch_size, 1])
            input_and_state_concatenated = tf.concat([current_input, self.state], 1)  # Increasing number of columns

            next_state = tf.tanh(tf.matmul(input_and_state_concatenated, self.W) + self.b)  # Broadcasted addition
            states_series.append(next_state)
            self.state = next_state
        logits_series = tf.stack([tf.matmul(state, self.W2) + self.b2 for state in states_series]) #Broadcasted addition
        predictions_series = tf.nn.softmax(logits_series)
        return predictions_series

def loss(y_pred, y_true):
    '''
    Input:
        y_pred - [BATCH_SIZE * STEP_ZIE, NUM_CLASS]
        y      - [BATCH_SIZE * STEP_ZIE, NUM_CLASS]
    '''
    eps = 1e-6
    cliped_y_pref_tf = tf.clip_by_value(y_pred, eps, 1-eps)
    losses = tf.reduce_mean(-tf.reduce_sum(y_true * tf.log(cliped_y_pref_tf), axis=1))
    # losses = tf.losses.sparse_softmax_cross_entropy(y, y_pred)
    return losses

In [159]:
init_state = np.zeros((batch_size, state_size), dtype=np.float32)
model = BasicRNNCell(state_size, state_size, num_classes, init_state)

for epoch_idx in range(num_epochs):
    x,y = generateData()
    model.state = np.zeros((batch_size, state_size))

    print("New data, epoch", epoch_idx)
    
    for batch_idx in range(num_batches):
        with tf.GradientTape() as t:
            start_idx = batch_idx * truncated_backprop_length
            end_idx = start_idx + truncated_backprop_length

            batchX = x[:,start_idx:end_idx]
            batchY = y[:,start_idx:end_idx]

            # Unpack columns
            inputs_series = tf.unstack(batchX, axis=1)
            labels_series = tf.unstack(batchY, axis=1)

            # Forward pass
            y_pred = model(inputs_series)
            stack_y_pred = tf.reshape(y_pred, (-1,2))
            stack_y = tf.reshape(tf.one_hot(tf.stack(labels_series), num_classes), (-1,2))
            losses = loss(stack_y_pred, stack_y)
        dW, db, dW2, db2 = t.gradient(losses, [model.W, model.b, model.W2, model.b2])
        model.W.assign_sub(learning_rate * dW)
        model.b.assign_sub(learning_rate * db)
        model.W2.assign_sub(learning_rate * dW2)
        model.b2.assign_sub(learning_rate * db2)        


        #loss_list.append(_total_loss)

        if batch_idx%100 == 0:
            print("Step",batch_idx, "Loss", losses.numpy())
            #plot(loss_list, _predictions_series, batchX, batchY)

New data, epoch 0
Step 0 Loss 0.75769806
Step 100 Loss 0.6888367
Step 200 Loss 0.6951802
Step 300 Loss 0.7014157
Step 400 Loss 0.6878339
Step 500 Loss 0.7052706
Step 600 Loss 0.69145566
New data, epoch 1
Step 0 Loss 0.67821056
Step 100 Loss 0.69223183
Step 200 Loss 0.67115676
Step 300 Loss 0.6947225
Step 400 Loss 0.7136558
Step 500 Loss 0.69313484
Step 600 Loss 0.7418151
New data, epoch 2
Step 0 Loss 0.71476513
Step 100 Loss 0.6844871
Step 200 Loss 0.7002073
Step 300 Loss 0.69965464
Step 400 Loss 0.7054781
Step 500 Loss 0.6978116
Step 600 Loss 0.694739
New data, epoch 3
Step 0 Loss 0.68291354
Step 100 Loss 0.69485235
Step 200 Loss 0.6941126
Step 300 Loss 0.6984619
Step 400 Loss 0.6907612
Step 500 Loss 0.6807467
Step 600 Loss 0.69389987
New data, epoch 4
Step 0 Loss 0.689857
Step 100 Loss 0.6923895
Step 200 Loss 0.6955769
Step 300 Loss 0.70237267
Step 400 Loss 0.6767152
Step 500 Loss 0.7047405
Step 600 Loss 0.70090014
New data, epoch 5
Step 0 Loss 0.7109435
Step 100 Loss 0.7033527
Step 

KeyboardInterrupt: 

In [None]:
def plot(loss_list, predictions_series, batchX, batchY):
    plt.subplot(2, 3, 1)
    plt.cla()
    plt.plot(loss_list)

    for batch_series_idx in range(5):
        one_hot_output_series = np.array(predictions_series)[:, batch_series_idx, :]
        single_output_series = np.array([(1 if out[0] < 0.5 else 0) for out in one_hot_output_series])

        plt.subplot(2, 3, batch_series_idx + 2)
        plt.cla()
        plt.axis([0, truncated_backprop_length, 0, 2])
        left_offset = range(truncated_backprop_length)
        plt.bar(left_offset, batchX[batch_series_idx, :], width=1, color="blue")
        plt.bar(left_offset, batchY[batch_series_idx, :] * 0.5, width=1, color="red")
        plt.bar(left_offset, single_output_series * 0.3, width=1, color="green")

    plt.draw()
    plt.pause(0.0001)

## 5. Build RNN from TF modules<a id='Sec5'></a>

In [2]:
#载入数据集
mnist = input_data.read_data_sets("MNIST_data/",one_hot=True)

# 输入图片是28*28
n_inputs = 28 #输入一行，一行有28个数据
max_time = 28 #一共28行
lstm_size = 100 #隐层单元
n_classes = 10 # 10个分类
batch_size = 50 #每批次50个样本
n_batch = mnist.train.num_examples // batch_size #计算一共有多少个批次

#这里的none表示第一个维度可以是任意的长度
x = tf.placeholder(tf.float32,[None,784])
#正确的标签
y = tf.placeholder(tf.float32,[None,10])

#初始化权值
weights = tf.Variable(tf.truncated_normal([lstm_size, n_classes], stddev=0.1))
#初始化偏置值
biases = tf.Variable(tf.constant(0.1, shape=[n_classes]))


#定义RNN网络
def RNN(X,weights,biases):
    # inputs=[batch_size, max_time, n_inputs]
    inputs = tf.reshape(X,[-1,max_time,n_inputs])
    #定义LSTM基本CELL
    lstm_cell = tf.contrib.rnn.core_rnn_cell.BasicLSTMCell(lstm_size)
    # final_state[0]是cell state
    # final_state[1]是hidden_state
    outputs,final_state = tf.nn.dynamic_rnn(lstm_cell,inputs,dtype=tf.float32)
    results = tf.nn.softmax(tf.matmul(final_state[1],weights) + biases)
    return results
    
    
#计算RNN的返回结果
prediction= RNN(x, weights, biases)  
#损失函数
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction,labels=y))
#使用AdamOptimizer进行优化
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
#结果存放在一个布尔型列表中
correct_prediction = tf.equal(tf.argmax(y,1),tf.argmax(prediction,1))#argmax返回一维张量中最大的值所在的位置
#求准确率
accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32))#把correct_prediction变为float32类型
#初始化
init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    for epoch in range(6):
        for batch in range(n_batch):
            batch_xs,batch_ys =  mnist.train.next_batch(batch_size)
            sess.run(train_step,feed_dict={x:batch_xs,y:batch_ys})
        
        acc = sess.run(accuracy,feed_dict={x:mnist.test.images,y:mnist.test.labels})
        print ("Iter " + str(epoch) + ", Testing Accuracy= " + str(acc))

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
Iter 0, Testing Accuracy= 0.7221
Iter 1, Testing Accuracy= 0.8016
Iter 2, Testing Accuracy= 0.8763
Iter 3, Testing Accuracy= 0.9103
Iter 4, Testing Accuracy= 0.9223
Iter 5, Testing Accuracy= 0.9311
