
# Recurrent neural network
 - RNN basic
 - MNIST by RNN
 - sequence2squence
 - sentiment analysis
 > - https://github.com/hunkim/DeepLearningZeroToAll
  - https://tensorflowkorea.gitbooks.io/tensorflow-kr/content/g3doc/tutorials/
  - https://r2rt.com/implementing-batch-normalization-in-tensorflow.html

---
## MNIST by Bi-directional LSTM 
> - many to one

In [5]:
""" Bi-directional Recurrent Neural Network.
A Bi-directional Recurrent Neural Network (LSTM) implementation example using 
TensorFlow library. This example is using the MNIST database of handwritten 
digits (http://yann.lecun.com/exdb/mnist/)
Links:
    [Long Short Term Memory](http://deeplearning.cs.cmu.edu/pdfs/Hochreiter97_lstm.pdf)
    [MNIST Dataset](http://yann.lecun.com/exdb/mnist/).
Author: Aymeric Damien
Project: https://github.com/aymericdamien/TensorFlow-Examples/
"""

from __future__ import print_function

import tensorflow as tf
from tensorflow.contrib import rnn
import numpy as np

# Import MNIST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

  from ._conv import register_converters as _register_converters


Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [6]:
'''
To classify images using a bidirectional recurrent neural network, we consider
every image row as a sequence of pixels. Because MNIST image shape is 28*28px,
we will then handle 28 sequences of 28 steps for every sample.
'''
tf.reset_default_graph()

# Training Parameters
learning_rate = 0.001
training_steps = 10000
batch_size = 100
display_step = 200

# Network Parameters
num_input = 28 # MNIST data input (img shape: 28*28)
timesteps = 28 # timesteps
num_hidden = 128 # hidden layer num of features
num_classes = 10 # MNIST total classes (0-9 digits)

# with tf.device('/cpu:0'):
with tf.device('/gpu:0'):
    # tf Graph input
    
    X = tf.placeholder("float", [None, timesteps, num_input])
    Y = tf.placeholder("float", [None, num_classes])

    # Define weights
    weights = {
        # Hidden layer weights => 2*n_hidden because of forward + backward cells
        'out': tf.Variable(tf.random_normal([num_hidden, num_classes]))
    }
    biases = {
        'out': tf.Variable(tf.random_normal([num_classes]))
    }

    def basic_RNN(x, weights, biases):
#         x = tf.unstack(x, timesteps, 1) # batch major (batch, sequence, input)-> time major (sequence, batch, input)) 
        print('x:\t\t\t',x) # (batch, sequence, input)
        cell = rnn.BasicLSTMCell(num_hidden, forget_bias=1.0)
        # Get lstm cell output
        outputs, _states = tf.nn.dynamic_rnn(cell, x, time_major=False, dtype=tf.float32)
        # Linear activation, using rnn inner loop last output
        print('outputs:\t\t', outputs) # (batch, sequence, hidden)
        
#         outputs_ = tf.unstack(outputs, axis=1)
#         print('sequene steps:', len(outputs_))
#         print('last step output', outputs_[-1]) #(batch, hidden)
#         return tf.matmul(outputs_[-1], weights['out']) + biases['out']

        outputs_ = tf.transpose(outputs, [1,0,2])
        print('outputs_(batch major)\t', outputs_)
        return tf.matmul(outputs_[-1], weights['out']) + biases['out']

    logits = basic_RNN(X, weights, biases)
    prediction = tf.nn.softmax(logits)

    # Define loss and optimizer
    loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
        logits=logits, labels=Y))
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
    train_op = optimizer.minimize(loss_op)

    # Evaluate model (with test logits, for dropout to be disabled)
    correct_pred = tf.equal(tf.argmax(prediction, 1), tf.argmax(Y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

# Initialize the variables (i.e. assign their default value)
init = tf.global_variables_initializer()

x:			 Tensor("Placeholder:0", shape=(?, 28, 28), dtype=float32, device=/device:GPU:0)
outputs:		 Tensor("rnn/transpose:0", shape=(?, 28, 128), dtype=float32, device=/device:GPU:0)
outputs_(batch major)	 Tensor("transpose_1:0", shape=(28, ?, 128), dtype=float32, device=/device:GPU:0)


In [7]:
# Start training
with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: # gpu 못올리는 에러 해결 설정

    # Run the initializer
    sess.run(init)


    for step in range(1, training_steps+1):
        batch_x, batch_y = mnist.train.next_batch(batch_size)
        # Reshape data to get 28 seq of 28 elements
        batch_x = batch_x.reshape((batch_size, timesteps, num_input))
        # Run optimization op (backprop)
        sess.run(train_op, feed_dict={X: batch_x, Y: batch_y})
        if step % display_step == 0 or step == 1:
            # Calculate batch loss and accuracy
            loss, acc = sess.run([loss_op, accuracy], feed_dict={X: batch_x,
                                                                 Y: batch_y})
            print("Step " + str(step) + ", Minibatch Loss= " + \
                  "{:.4f}".format(loss) + ", Training Accuracy= " + \
                  "{:.3f}".format(acc))

    print("Optimization Finished!")

    # Calculate accuracy for 128 mnist test images
    test_len = 128
    test_data = mnist.test.images[:test_len].reshape((-1, timesteps, num_input))
    test_label = mnist.test.labels[:test_len]
    print("Testing Accuracy:", \
        sess.run(accuracy, feed_dict={X: test_data, Y: test_label}))

Step 1, Minibatch Loss= 2.8106, Training Accuracy= 0.060
Step 200, Minibatch Loss= 2.1166, Training Accuracy= 0.290
Step 400, Minibatch Loss= 1.8897, Training Accuracy= 0.430
Step 600, Minibatch Loss= 1.7679, Training Accuracy= 0.520
Step 800, Minibatch Loss= 1.5486, Training Accuracy= 0.520
Step 1000, Minibatch Loss= 1.5983, Training Accuracy= 0.480
Step 1200, Minibatch Loss= 1.3434, Training Accuracy= 0.630
Step 1400, Minibatch Loss= 1.4437, Training Accuracy= 0.530
Step 1600, Minibatch Loss= 1.3629, Training Accuracy= 0.600
Step 1800, Minibatch Loss= 1.3655, Training Accuracy= 0.510
Step 2000, Minibatch Loss= 1.2264, Training Accuracy= 0.600
Step 2200, Minibatch Loss= 1.1985, Training Accuracy= 0.620
Step 2400, Minibatch Loss= 1.3067, Training Accuracy= 0.590
Step 2600, Minibatch Loss= 1.2667, Training Accuracy= 0.610
Step 2800, Minibatch Loss= 1.0793, Training Accuracy= 0.640
Step 3000, Minibatch Loss= 1.1217, Training Accuracy= 0.590
Step 3200, Minibatch Loss= 1.1845, Training Acc

---
##  Bi-directinal RNN (Static)

tf.nn.static_bidirectional_rnn
 - 시작시에 미리 모델을 다 만듦 - 로딩이 길지만 학습은 빠름
 - 인풋이 List of 2-d tensor(batch, input), 즉 sequence 만큼 list

tf.nn.bidirectional_dynamic_rnn
 - 학습중에 모델을 동적으로 만듦 - 로딩이 짧지만 학습이 느림
 - 인풋이 3-d  tensor(batch, seq, input) 기본이 batch major

In [1]:
import numpy as np

In [2]:
a = np.arange(30).reshape([3,2,5])
a

array([[[ 0,  1,  2,  3,  4],
        [ 5,  6,  7,  8,  9]],

       [[10, 11, 12, 13, 14],
        [15, 16, 17, 18, 19]],

       [[20, 21, 22, 23, 24],
        [25, 26, 27, 28, 29]]])

In [4]:
np.split(a, 2, axis=1)
# np.split(a, 5, axis=2)

[array([[[ 0,  1,  2,  3,  4]],
 
        [[10, 11, 12, 13, 14]],
 
        [[20, 21, 22, 23, 24]]]), array([[[ 5,  6,  7,  8,  9]],
 
        [[15, 16, 17, 18, 19]],
 
        [[25, 26, 27, 28, 29]]])]

In [5]:
tf.unstack(a, axis=1) # List of tenaor로 리턴, tf.nn.static_bidirectional_rnn에 인풋으로 사용 

[<tf.Tensor 'unstack:0' shape=(3, 5) dtype=int32>,
 <tf.Tensor 'unstack:1' shape=(3, 5) dtype=int32>]

In [6]:
'''
To classify images using a bidirectional recurrent neural network, we consider
every image row as a sequence of pixels. Because MNIST image shape is 28*28px,
we will then handle 28 sequences of 28 steps for every sample.
'''

# Training Parameters
learning_rate = 0.001
training_steps = 10000
batch_size = 100
display_step = 200

# Network Parameters
num_input = 28 # MNIST data input (img shape: 28*28)
timesteps = 28 # timesteps
num_hidden = 128 # hidden layer num of features
num_classes = 10 # MNIST total classes (0-9 digits)

with tf.device('/cpu:0'):
# with tf.device('/gpu:0'):
    # tf Graph input
    X = tf.placeholder("float", [None, timesteps, num_input])
    Y = tf.placeholder("float", [None, num_classes])

    # Define weights
    weights = {
        # Hidden layer weights => 2*n_hidden because of forward + backward cells
        'out': tf.Variable(tf.random_normal([2*num_hidden, num_classes]))
    }
    biases = {
        'out': tf.Variable(tf.random_normal([num_classes]))
    }

    def BiRNN(x, weights, biases):

        # Prepare data shape to match `rnn` function requirements
        # Current data input shape: (batch_size, timesteps, n_input)
        # Required shape: 'timesteps' tensors list of shape (batch_size, num_input)

        # Unstack to get a list of 'timesteps' tensors of shape (batch_size, num_input)
        x = tf.unstack(x, timesteps, 1)
        
        # Define lstm cells with tensorflow
        # Forward direction cell
        lstm_fw_cell = rnn.BasicLSTMCell(num_hidden, forget_bias=1.0)
        # Backward direction cell
        lstm_bw_cell = rnn.BasicLSTMCell(num_hidden, forget_bias=1.0)

        # Get lstm cell output
        try:
            outputs, _, _ = rnn.static_bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, x,
                                                  dtype=tf.float32)
        except Exception: # Old TensorFlow version only returns outputs not states
            outputs = rnn.static_bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, x,
                                            dtype=tf.float32)

        # Linear activation, using rnn inner loop last output
        return tf.matmul(outputs[-1], weights['out']) + biases['out']

    logits = BiRNN(X, weights, biases)
    prediction = tf.nn.softmax(logits)

    # Define loss and optimizer
    loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
        logits=logits, labels=Y))
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
    train_op = optimizer.minimize(loss_op)

    # Evaluate model (with test logits, for dropout to be disabled)
    correct_pred = tf.equal(tf.argmax(prediction, 1), tf.argmax(Y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

# Initialize the variables (i.e. assign their default value)
init = tf.global_variables_initializer()

In [36]:
# Start training
with tf.Session() as sess:

    # Run the initializer
    sess.run(init)

    for step in range(1, training_steps+1):
        batch_x, batch_y = mnist.train.next_batch(batch_size)
        # Reshape data to get 28 seq of 28 elements
        batch_x = batch_x.reshape((batch_size, timesteps, num_input))
        # Run optimization op (backprop)
        sess.run(train_op, feed_dict={X: batch_x, Y: batch_y})
        if step % display_step == 0 or step == 1:
            # Calculate batch loss and accuracy
            loss, acc = sess.run([loss_op, accuracy], feed_dict={X: batch_x,
                                                                 Y: batch_y})
            print("Step " + str(step) + ", Minibatch Loss= " + \
                  "{:.4f}".format(loss) + ", Training Accuracy= " + \
                  "{:.3f}".format(acc))

    print("Optimization Finished!")

    # Calculate accuracy for 128 mnist test images
    test_len = 128
    test_data = mnist.test.images[:test_len].reshape((-1, timesteps, num_input))
    test_label = mnist.test.labels[:test_len]
    print("Testing Accuracy:", \
        sess.run(accuracy, feed_dict={X: test_data, Y: test_label}))

Step 1, Minibatch Loss= 2.7034, Training Accuracy= 0.130
Step 200, Minibatch Loss= 2.1622, Training Accuracy= 0.230
Step 400, Minibatch Loss= 1.9232, Training Accuracy= 0.450
Step 600, Minibatch Loss= 1.6289, Training Accuracy= 0.520
Step 800, Minibatch Loss= 1.6521, Training Accuracy= 0.480
Step 1000, Minibatch Loss= 1.5981, Training Accuracy= 0.480
Step 1200, Minibatch Loss= 1.5832, Training Accuracy= 0.440
Step 1400, Minibatch Loss= 1.4323, Training Accuracy= 0.570
Step 1600, Minibatch Loss= 1.3384, Training Accuracy= 0.590
Step 1800, Minibatch Loss= 1.2887, Training Accuracy= 0.590
Step 2000, Minibatch Loss= 1.2511, Training Accuracy= 0.610
Step 2200, Minibatch Loss= 1.2084, Training Accuracy= 0.580
Step 2400, Minibatch Loss= 1.0337, Training Accuracy= 0.710
Step 2600, Minibatch Loss= 1.0508, Training Accuracy= 0.660
Step 2800, Minibatch Loss= 1.1189, Training Accuracy= 0.630
Step 3000, Minibatch Loss= 1.0811, Training Accuracy= 0.610
Step 3200, Minibatch Loss= 1.0023, Training Acc

KeyboardInterrupt: 