# 《Hands-On Machine Learning with Scikit-Learn&TensorFlow》

## -  Chapter 14. Recurrent Neural Networks

-----

# 1. RNN

In [1]:
import tensorflow as tf
import numpy as np
from tensorflow.contrib.layers import fully_connected
# tf.set_random_seed(1)

In [2]:
tf.reset_default_graph()

In [3]:
n_steps = 28
n_inputs = 28
n_neurons = 150
n_outputs = 10
learning_rate = 0.001

In [4]:
x = tf.placeholder(tf.float32, (None, n_steps, n_inputs))
y = tf.placeholder(tf.int32, (None,))

In [5]:
class DeviceCellWrapper(tf.contrib.rnn.RNNCell):
    def __init__(self, device, cell):
        self._cell = cell
        self._device = device
    @property
    def state_size(self):
        return self._cell.state_size
    @property
    def output_size(self):
        return self._cell.output_size
    def __call__(self, inputs, state, scope=None):
        with tf.device(self._device):
            return self._cell(inputs, state, scope)

## GPU
```python
devices = ["/gpu:0", "/gpu:1", "/gpu:2"]
cells = [DeviceCellWrapper(dev,tf.contrib.rnn.BasicRNNCell(num_units=n_neurons))
for dev in devices]
multi_layer_cell = tf.contrib.rnn.MultiRNNCell(cells)
outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)
```

In [6]:
cells = DeviceCellWrapper("/gpu:0",tf.contrib.rnn.BasicRNNCell(num_units=n_neurons))
outputs, states = tf.nn.dynamic_rnn(cells, x, dtype=tf.float32)

In [7]:
# basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)
# outputs, states = tf.nn.dynamic_rnn(cell=basic_cell, inputs=x, dtype=tf.float32)

logits = fully_connected(inputs=states, num_outputs=n_outputs, activation_fn=None)
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)

loss = tf.reduce_mean(cross_entropy)

optimizer = tf.train.AdamOptimizer(learning_rate)
training_operation = optimizer.minimize(loss)

correct = tf.nn.in_top_k(logits,y,1)
accuracy = tf.reduce_mean(tf.cast(correct,tf.float32))

init = tf.global_variables_initializer()

## tf.fully_connected()
contains initialization of iniform weights and zero bias.
```
tf.contrib.layers.fully_connected(
    inputs,
    num_outputs,
    activation_fn=tf.nn.relu,
    normalizer_fn=None,
    normalizer_params=None,
    weights_initializer=initializers.xavier_initializer(),
    weights_regularizer=None,
    biases_initializer=tf.zeros_initializer(),
    biases_regularizer=None,
    reuse=None,
    variables_collections=None,
    outputs_collections=None,
    trainable=True,
    scope=None
)
```
1. **weights_initializer=initializers.xavier_initializer()**
```
tf.contrib.layers.xavier_initializer(
    uniform=True,
    seed=None,
    dtype=tf.float32
)
```
2. ** biases_initializer=tf.zeros_initializer()**


In [8]:
from tensorflow.examples.tutorials.mnist import input_data

In [9]:
mnist = input_data.read_data_sets('mnist')
x_test = mnist.test.images.reshape((-1,n_steps,n_inputs))
y_test = mnist.test.labels

Successfully downloaded train-images-idx3-ubyte.gz 9912422 bytes.
Extracting mnist/train-images-idx3-ubyte.gz
Successfully downloaded train-labels-idx1-ubyte.gz 28881 bytes.
Extracting mnist/train-labels-idx1-ubyte.gz
Successfully downloaded t10k-images-idx3-ubyte.gz 1648877 bytes.
Extracting mnist/t10k-images-idx3-ubyte.gz
Successfully downloaded t10k-labels-idx1-ubyte.gz 4542 bytes.
Extracting mnist/t10k-labels-idx1-ubyte.gz


In [10]:
n_epochs = 100
batch_size = 150

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for iteration in range(mnist.train.num_examples//batch_size):
            x_batch, y_batch = mnist.train.next_batch(batch_size)
            x_batch = x_batch.reshape((-1,n_steps,n_inputs))
            sess.run(training_operation, feed_dict={x:x_batch, y:y_batch})
        acc_train = accuracy.eval(feed_dict={x:x_batch, y:y_batch})
        acc_test = accuracy.eval(feed_dict={x:x_test, y:y_test})
        print('{} epoch, Train accuracy: {:.3f} - Test accuracy: {:.3f}'.format(epoch, acc_train, acc_test))

0 epoch, Train accuracy: 0.927 - Test accuracy: 0.920
1 epoch, Train accuracy: 0.973 - Test accuracy: 0.950
2 epoch, Train accuracy: 0.947 - Test accuracy: 0.959
3 epoch, Train accuracy: 0.973 - Test accuracy: 0.958
4 epoch, Train accuracy: 0.987 - Test accuracy: 0.965
5 epoch, Train accuracy: 0.973 - Test accuracy: 0.965
6 epoch, Train accuracy: 0.980 - Test accuracy: 0.971
7 epoch, Train accuracy: 0.980 - Test accuracy: 0.971
8 epoch, Train accuracy: 0.973 - Test accuracy: 0.960
9 epoch, Train accuracy: 0.987 - Test accuracy: 0.972
10 epoch, Train accuracy: 0.993 - Test accuracy: 0.970
11 epoch, Train accuracy: 0.980 - Test accuracy: 0.969
12 epoch, Train accuracy: 0.993 - Test accuracy: 0.969
13 epoch, Train accuracy: 0.987 - Test accuracy: 0.976
14 epoch, Train accuracy: 0.967 - Test accuracy: 0.974
15 epoch, Train accuracy: 0.993 - Test accuracy: 0.976
16 epoch, Train accuracy: 0.960 - Test accuracy: 0.965
17 epoch, Train accuracy: 0.953 - Test accuracy: 0.973
18 epoch, Train accu

# 2. Mofan's LSTM
[https://github.com/MorvanZhou/tutorials/blob/master/tensorflowTUT/tf20_RNN2/full_code.py](https://github.com/MorvanZhou/tutorials/blob/master/tensorflowTUT/tf20_RNN2/full_code.py)

In [None]:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
tf.reset_default_graph()
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)

In [None]:
lr = 0.001
training_iters = 100000
batch_size = 128

In [None]:
n_steps = 28 # each sentence has 28 time steps/ words
n_inputs = 28 # input vector for each timestep/ lstm cell
n_hidden_units = 128
n_classes = 10

In [None]:
x = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
y = tf.placeholder(tf.float32, {None, n_classes})

In [None]:
weights = {
    'in': tf.Variable(tf.random_normal([n_inputs, n_hidden_units])),
    'out': tf.Variable(tf.random_normal([n_hidden_units, n_classes]))
}
biases = {
    'in': tf.Variable(tf.constant(0.1, shape=(n_hidden_units,))),
    'out': tf.Variable(tf.constant(0.1, shape=(n_classes,)))
}

In [None]:
def RNN(x, weights, biases):
    # (128batch, 28time step, 28inputs) -> (128*28, 28inputs)
    x = tf.reshape(x, [-1,n_inputs])
    x_in = tf.matmul(x, weights['in']) + biases['in']
    x_in = tf.reshape(x_in, [-1, n_steps, n_hidden_units])
    
    lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(n_hidden_units, forget_bias=1.0, state_is_tuple=True)
    # state_is_tuple (last memory, last output), c[t-1], h[t-1]
    
    _init_state = lstm_cell.zero_state(batch_size, dtype=tf.float32)    
    outputs, states = tf.nn.dynamic_rnn(lstm_cell, x_in, initial_state=_init_state, time_major=False)
    
    results = tf.matmul(states[1], weights['out']) + biases['out']    
    return results

## LSTM cell
```python
tf.nn.rnn_cell.BasicLSTMCell(num_units, forget_bias=1.0, state_is_tuple=True, activation=None, reuse=None, name=None, dtype=None, **kwargs)
```
```python
tf.nn.dynamic_rnn(cell, inputs, sequence_length=None, initial_state=None, dtype=None, parallel_iterations=None, swap_memory=False, time_major=False, scope=None)
```

In [None]:
pred = RNN(x, weights, biases)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
train_op = tf.train.AdamOptimizer(lr).minimize(cost)

correct_pred = tf.equal(tf.argmax(pred,1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

init = tf.initialize_all_variables()
with tf.Session() as sess:        
    sess.run(init)
    step = 0
    while step * batch_size < training_iters:
        batch_xs, batch_ys = mnist.train.next_batch(batch_size)
        batch_xs = batch_xs.reshape([batch_size, n_inputs, n_steps])
        sess.run(train_op, feed_dict={x:batch_xs, y:batch_ys})
        if step % 20 ==0:
            print(sess.run(accuracy, feed_dict={x:batch_xs, y:batch_ys}))
        step += 1

# 3. aymericdamien/TensorFlow-Examples
[https://github.com/aymericdamien/TensorFlow-Examples/blob/master/examples/3_NeuralNetworks/recurrent_network.py](https://github.com/aymericdamien/TensorFlow-Examples/blob/master/examples/3_NeuralNetworks/recurrent_network.py)

In [None]:
from __future__ import print_function

import tensorflow as tf
from tensorflow.contrib import rnn
tf.reset_default_graph()
# Import MNIST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)

'''
To classify images using a recurrent neural network, we consider every image
row as a sequence of pixels. Because MNIST image shape is 28*28px, we will then
handle 28 sequences of 28 steps for every sample.
'''

# Training Parameters
learning_rate = 0.001
training_steps = 10000
batch_size = 128
display_step = 200

# Network Parameters
num_input = 28 # MNIST data input (img shape: 28*28)
timesteps = 28 # timesteps
num_hidden = 128 # hidden layer num of features
num_classes = 10 # MNIST total classes (0-9 digits)

# tf Graph input
X = tf.placeholder("float", [None, timesteps, num_input])
Y = tf.placeholder("float", [None, num_classes])

# Define weights
weights = {
    'out': tf.Variable(tf.random_normal([num_hidden, num_classes]))
}
biases = {
    'out': tf.Variable(tf.random_normal([num_classes]))
}


def RNN(x, weights, biases):

    # Prepare data shape to match `rnn` function requirements
    # Current data input shape: (batch_size, timesteps, n_input)
    # Required shape: 'timesteps' tensors list of shape (batch_size, n_input)

    # Unstack to get a list of 'timesteps' tensors of shape (batch_size, n_input)
    x = tf.unstack(x, timesteps, 1)

    # Define a lstm cell with tensorflow
    lstm_cell = rnn.BasicLSTMCell(num_hidden, forget_bias=1.0)

    # Get lstm cell output
    outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32)

    # Linear activation, using rnn inner loop last output
    return tf.matmul(outputs[-1], weights['out']) + biases['out']

logits = RNN(X, weights, biases)
prediction = tf.nn.softmax(logits)

# Define loss and optimizer
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
    logits=logits, labels=Y))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op)

# Evaluate model (with test logits, for dropout to be disabled)
correct_pred = tf.equal(tf.argmax(prediction, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

# Initialize the variables (i.e. assign their default value)
init = tf.global_variables_initializer()

# Start training
with tf.Session() as sess:

    # Run the initializer
    sess.run(init)

    for step in range(1, training_steps+1):
        batch_x, batch_y = mnist.train.next_batch(batch_size)
        # Reshape data to get 28 seq of 28 elements
        batch_x = batch_x.reshape((batch_size, timesteps, num_input))
        # Run optimization op (backprop)
        sess.run(train_op, feed_dict={X: batch_x, Y: batch_y})
        if step % display_step == 0 or step == 1:
            # Calculate batch loss and accuracy
            loss, acc = sess.run([loss_op, accuracy], feed_dict={X: batch_x,
                                                                 Y: batch_y})
            print("Step " + str(step) + ", Minibatch Loss= " + \
                  "{:.4f}".format(loss) + ", Training Accuracy= " + \
                  "{:.3f}".format(acc))

    print("Optimization Finished!")

    # Calculate accuracy for 128 mnist test images
    test_len = 128
    test_data = mnist.test.images[:test_len].reshape((-1, timesteps, num_input))
    test_label = mnist.test.labels[:test_len]
    print("Testing Accuracy:", \
        sess.run(accuracy, feed_dict={X: test_data, Y: test_label}))

1. Init a tf  tensor

```python
tf.InteractiveSession()

a = tf.constant([[[1,2,3],[4,5,6]],[[7,8,9],[0,1,2]],[[3,4,5],[6,7,8]]])
print(a.shape)
a.eval()
```
[out]:

```
(3, 2, 3)
array([[[1, 2, 3],
        [4, 5, 6]],

       [[7, 8, 9],
        [0, 1, 2]],

       [[3, 4, 5],
        [6, 7, 8]]], dtype=int32)
```
2. tf.transpose(a, perm=None, name='transpose', conjugate=False)

 change the shape by origin position idx through perm

```python
b = tf.transpose(a, perm=[1,0,2])   
print(b.shape)
b.eval()
```
[out]:

```
(2, 3, 3)
array([[[1, 2, 3],
        [7, 8, 9],
        [3, 4, 5]],

       [[4, 5, 6],
        [0, 1, 2],
        [6, 7, 8]]], dtype=int32)
```
3. tf.unstack(value, num=None, axis=0, name='unstack')

```python
c = tf.unstack(b)
for i in c:
    print(i.eval(),'\n-----')
```
[out]:

```
[[1 2 3]
 [7 8 9]
 [3 4 5]] 
-----
[[4 5 6]
 [0 1 2]
 [6 7 8]] 
-----
```
-----

**All-in-one**
```python
x = tf.reshape(a, [-1, 3])
x.eval()
```
[out]:

```
array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9],
       [0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]], dtype=int32)
```


In [None]:

tf.InteractiveSession()

In [None]:
a = tf.constant([[[1,2,3],[4,5,6]],[[7,8,9],[0,1,2]],[[3,4,5],[6,7,8]]])
print(a.shape)
a.eval()


In [None]:
b = tf.transpose(a, perm=[1,0,2])
print(b.shape)
b.eval()

In [None]:
c = tf.unstack(b)
for i in c:
    print(i.eval(),'\n-----')

In [None]:
x = tf.unstack(a, 2, 1)
for i in x:
    print(i.eval(),'\n-----')

In [None]:
x = tf.reshape(a, [-1, 3])
x.eval()