In [87]:
# To support both python 2 and python 3
from __future__ import division, print_function, unicode_literals

# Common imports
import numpy as np
import os

# to make this notebook's output stable across runs
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

# To plot pretty figures
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12

# Where to save the figures
PROJECT_ROOT_DIR = "."
CHAPTER_ID = "rnn"

def save_fig(fig_id, tight_layout=True):
    path = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID, fig_id + ".png")
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format='png', dpi=300)
    
import tensorflow as tf

In [88]:
from IPython.display import clear_output, Image, display, HTML

def strip_consts(graph_def, max_const_size=32):
    """Strip large constant values from graph_def."""
    strip_def = tf.GraphDef()
    for n0 in graph_def.node:
        n = strip_def.node.add() 
        n.MergeFrom(n0)
        if n.op == 'Const':
            tensor = n.attr['value'].tensor
            size = len(tensor.tensor_content)
            if size > max_const_size:
                tensor.tensor_content = "b<stripped %d bytes>"%size
    return strip_def

def show_graph(graph_def, max_const_size=32):
    """Visualize TensorFlow graph."""
    if hasattr(graph_def, 'as_graph_def'):
        graph_def = graph_def.as_graph_def()
    strip_def = strip_consts(graph_def, max_const_size=max_const_size)
    code = """
        <script>
          function load() {{
            document.getElementById("{id}").pbtxt = {data};
          }}
        </script>
        <link rel="import" href="https://tensorboard.appspot.com/tf-graph-basic.build.html" onload=load()>
        <div style="height:600px">
          <tf-graph-basic id="{id}"></tf-graph-basic>
        </div>
    """.format(data=repr(str(strip_def)), id='graph'+str(np.random.rand()))

    iframe = """
        <iframe seamless style="width:1200px;height:620px;border:0" srcdoc="{}"></iframe>
    """.format(code.replace('"', '&quot;'))
    display(HTML(iframe))

# Basic RNN
illustrates a layer of 5 recurrent neurons 

![LayerOfReuron.png](attachment:LayerOfReuron.png)

In [89]:
import time
d = np.int32(time.time())
reset_graph()

n_inputs = 3
n_neurons = 5

X0 = tf.placeholder(tf.float32, [None, n_inputs])
X1 = tf.placeholder(tf.float32, [None, n_inputs])

Wx = tf.Variable(tf.random_normal(shape=[n_inputs, n_neurons],dtype=tf.float32))
Wy = tf.Variable(tf.random_normal(shape=[n_neurons,n_neurons],dtype=tf.float32))
b = tf.Variable(tf.zeros([n_neurons], dtype=tf.float32))

Y0 = tf.tanh(tf.matmul(X0, Wx) + b)
Y1 = tf.tanh(tf.matmul(Y0, Wy) + tf.matmul(X1, Wx) + b)

init = tf.global_variables_initializer()

input of four instances:
Each instance represents a sampled random process

In [90]:
import numpy as np

X0_batch = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 0, 1]]) # t = 0
X1_batch = np.array([[9, 8, 7], [0, 0, 0], [6, 5, 4], [3, 2, 1]]) # t = 1

with tf.Session() as sess:
    init.run()
    Y0_val_, Y1_val_ = sess.run([Y0, Y1], feed_dict={X0: X0_batch, X1: X1_batch})

In [91]:
Y0_val_
#np.sum(Y0_val-Y0_val_,axis=0)

array([[-0.0664006 ,  0.9625767 ,  0.68105793,  0.7091854 , -0.898216  ],
       [ 0.9977755 , -0.719789  , -0.9965761 ,  0.9673924 , -0.9998972 ],
       [ 0.99999774, -0.99898803, -0.9999989 ,  0.9967762 , -0.9999999 ],
       [ 1.        , -1.        , -1.        , -0.99818915,  0.9995087 ]],
      dtype=float32)

In [92]:
Y1_val_
#Y1_val-Y1_val_

array([[ 1.        , -1.        , -1.        ,  0.4020025 , -0.9999998 ],
       [-0.12210421,  0.6280527 ,  0.9671843 , -0.9937122 , -0.25839362],
       [ 0.9999983 , -0.9999994 , -0.9999975 , -0.8594331 , -0.9999881 ],
       [ 0.99928284, -0.99999815, -0.9999058 ,  0.9857963 , -0.92205757]],
      dtype=float32)

# Using static_rnn()
run with fixed time steps

In [93]:
n_inputs = 3
n_neurons = 5
reset_graph()

X0 = tf.placeholder(tf.float32, [None, n_inputs])
X1 = tf.placeholder(tf.float32, [None, n_inputs])

basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)
output_seqs, states = tf.contrib.rnn.static_rnn(basic_cell, [X0, X1],
                                                dtype=tf.float32)
Y0, Y1 = output_seqs

In [94]:
init = tf.global_variables_initializer()
X0_batch = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 0, 1]]) # t = 0
X1_batch = np.array([[9, 8, 7], [0, 0, 0], [6, 5, 4], [3, 2, 1]]) # t = 1
with tf.Session() as sess:
    init.run()
    X1_size=X1.eval(feed_dict={X0: X0_batch, X1: X1_batch}).shape
    [Y0_valS, Y1_valS] = sess.run([Y0,Y1], feed_dict={X0: X0_batch, X1: X1_batch})
    pass

In [95]:
print(X1_size)
Y0_valS

(4, 3)


array([[ 0.30741334, -0.32884315, -0.6542847 , -0.9385059 ,  0.52089024],
       [ 0.99122757, -0.9542542 , -0.7518079 , -0.9995208 ,  0.9820235 ],
       [ 0.9999268 , -0.99783254, -0.8247353 , -0.9999963 ,  0.99947774],
       [ 0.996771  , -0.68750614,  0.8419969 ,  0.9303911 ,  0.8120684 ]],
      dtype=float32)

In [96]:
Y1_valS

array([[ 0.99998885, -0.9997605 , -0.06679298, -0.9999804 ,  0.99982214],
       [-0.6524944 , -0.51520866, -0.37968954, -0.59225935, -0.08968385],
       [ 0.998624  , -0.997152  , -0.03308626, -0.9991565 ,  0.9932902 ],
       [ 0.99681675, -0.9598194 ,  0.39660636, -0.8307605 ,  0.7967197 ]],
      dtype=float32)

In [97]:
#show_graph(tf.get_default_graph())

# Static rnn () extension - Packing sequences
run pre-programmable time steps

In [98]:
n_steps = 3
n_inputs = 3
n_neurons = 5
reset_graph()

X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
X_seqs = tf.unstack(tf.transpose(X, perm=[1, 0, 2]))

basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)
output_seqs, states = tf.contrib.rnn.static_rnn(basic_cell, X_seqs,
                                                dtype=tf.float32)
outputs = tf.transpose(tf.stack(output_seqs), perm=[1, 0, 2])

In [99]:
init = tf.global_variables_initializer()
X_batch = np.array([
        # t = 0      t = 1 
        [[0, 1, 2], [9, 8, 7],[1,2,3]], # instance 1
        [[3, 4, 5], [0, 0, 0],[1,2,3]], # instance 2
        [[6, 7, 8], [6, 5, 4],[1,2,3]], # instance 3
        [[9, 0, 1], [3, 2, 1],[1,2,3]], # instance 4
    ])

with tf.Session() as sess:
    init.run()
    outputs_val = outputs.eval(feed_dict={X: X_batch})

In [100]:
outputs_val

array([[[-0.45652324, -0.68064123,  0.40938237,  0.63104504,
         -0.45732826],
        [-0.94288003, -0.9998869 ,  0.94055814,  0.9999985 ,
         -0.9999997 ],
        [-0.9135523 , -0.88120127,  0.8607582 ,  0.9710674 ,
         -0.9747271 ]],

       [[-0.8001535 , -0.9921827 ,  0.7817797 ,  0.9971031 ,
         -0.9964609 ],
        [-0.637116  ,  0.11300932,  0.5798437 ,  0.43105593,
         -0.63716984],
        [-0.8063481 , -0.9394014 ,  0.6785674 ,  0.9490792 ,
         -0.96972144]],

       [[-0.93605185, -0.9998379 ,  0.9308867 ,  0.9999815 ,
         -0.99998295],
        [-0.9165386 , -0.9945604 ,  0.89605415,  0.99987197,
         -0.9999751 ],
        [-0.9087715 , -0.8799446 ,  0.8602572 ,  0.96978194,
         -0.97464114]],

       [[ 0.9927369 , -0.9981933 , -0.55543643,  0.9989031 ,
         -0.9953323 ],
        [-0.02746334, -0.73191994,  0.7827872 ,  0.9525682 ,
         -0.97817713],
        [-0.88796663, -0.9282189 ,  0.8787439 ,  0.9757828 ,
         

As seen, the graph hard code all unrolled neuron layers

In [101]:
show_graph(tf.get_default_graph())

# Using dynamic_rnn()

In [102]:
n_steps = 3
n_inputs = 3
n_neurons = 5
reset_graph()

X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])

basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)
outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32,swap_memory=True)

In [103]:
init = tf.global_variables_initializer()
X_batch = np.array([
        [[0, 1, 2], [9, 8, 7],[1,2,3]], # instance 1
        [[3, 4, 5], [0, 0, 0],[0,0,0]], # instance 2
        [[6, 7, 8], [6, 5, 4],[1,2,3]], # instance 3
        [[9, 0, 1], [3, 2, 1],[1,2,3]], # instance 4
    ])

with tf.Session() as sess:
    init.run()
    outputs_val = outputs.eval(feed_dict={X: X_batch})

In [104]:
outputs_val

array([[[-0.0948875 , -0.7863541 , -0.75976855,  0.81456447,
         -0.8659667 ],
        [-0.9999981 , -0.99989897, -0.9999942 ,  0.9995981 ,
         -0.4085584 ],
        [-0.5340997 , -0.9176824 , -0.9512755 ,  0.96605945,
         -0.95755   ]],

       [[-0.9882368 , -0.9937984 , -0.99848306,  0.99407357,
         -0.9553898 ],
        [ 0.21658944, -0.20340651,  0.02591269,  0.44528607,
         -0.40116802],
        [ 0.14433643, -0.13857844,  0.05772887, -0.03391143,
          0.17604975]],

       [[-0.99991536, -0.99983823, -0.99999166,  0.9998272 ,
         -0.98561245],
        [-0.9998106 , -0.9926861 , -0.99952865,  0.9938598 ,
         -0.2555965 ],
        [-0.5135311 , -0.9037524 , -0.951425  ,  0.9623888 ,
         -0.9570052 ]],

       [[-0.9988372 ,  0.84198505, -0.97267133, -0.869843  ,
          0.99912703],
        [-0.97511095, -0.34840977, -0.9212813 ,  0.39821982,
          0.27953893],
        [-0.5399808 , -0.8658561 , -0.9558122 ,  0.94449866,
         

In [105]:
show_graph(tf.get_default_graph())

# Setting variable sequence length

In [106]:
n_steps = 2
n_inputs = 3
n_neurons = 5

reset_graph()

X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)

seq_length = tf.placeholder(tf.int32, [None])
outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32,
                                    sequence_length=seq_length,swap_memory=True)

In [107]:
init = tf.global_variables_initializer()
X_batch = np.array([
        [[0, 1, 2], [9, 8, 7]], # instance 1
        [[3, 4, 5],[0,0,0]], # instance 2
        [[6, 7, 8], [6, 5, 4]], # instance 3
        [[9, 0, 1], [3, 2, 1]], # instance 4
    ])
seq_length_batch = np.array([2, 1, 2, 2])
with tf.Session() as sess:
    init.run()
    outputs_val, states_val = sess.run(
        [outputs, states], feed_dict={X: X_batch, seq_length: seq_length_batch})

In [108]:
outputs_val

array([[[ 0.7315571 ,  0.34835717,  0.50582   , -0.22882833,
         -0.44022718],
        [-0.61832315,  0.99999994,  0.86584806,  0.9793377 ,
          0.9902246 ]],

       [[ 0.8438704 ,  0.9969755 ,  0.8478946 ,  0.4303219 ,
          0.09275495],
        [ 0.        ,  0.        ,  0.        ,  0.        ,
          0.        ]],

       [[ 0.9115922 ,  0.9999905 ,  0.95954454,  0.81892216,
          0.5774025 ],
        [-0.77545464,  0.99987745,  0.9784728 ,  0.7317201 ,
          0.888776  ]],

       [[-0.9995227 ,  0.9997784 ,  0.59744656,  0.99309695,
          0.9984741 ],
        [-0.6212    ,  0.5400085 ,  0.96938425,  0.19893228,
          0.1944804 ]]], dtype=float32)

In [109]:
states_val #contain the final state of output_val

array([[-0.61832315,  0.99999994,  0.86584806,  0.9793377 ,  0.9902246 ],
       [ 0.8438704 ,  0.9969755 ,  0.8478946 ,  0.4303219 ,  0.09275495],
       [-0.77545464,  0.99987745,  0.9784728 ,  0.7317201 ,  0.888776  ],
       [-0.6212    ,  0.5400085 ,  0.96938425,  0.19893228,  0.1944804 ]],
      dtype=float32)

In [110]:
outputs_val[:,-1,:]

array([[-0.61832315,  0.99999994,  0.86584806,  0.9793377 ,  0.9902246 ],
       [ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ],
       [-0.77545464,  0.99987745,  0.9784728 ,  0.7317201 ,  0.888776  ],
       [-0.6212    ,  0.5400085 ,  0.96938425,  0.19893228,  0.1944804 ]],
      dtype=float32)

# Training a sequence classifier
Note: the book uses tensorflow.contrib.layers.fully_connected() rather than tf.layers.dense() (which did not exist when this chapter was written). It is now preferable to use tf.layers.dense(), because anything in the contrib module may change or be deleted without notice. The dense() function is almost identical to the fully_connected() function. The main differences relevant to this chapter are:

several parameters are renamed: scope becomes name, activation_fn becomes activation (and similarly the _fn suffix is removed from other parameters such as normalizer_fn), weights_initializer becomes kernel_initializer, etc.
the default activation is now None rather than tf.nn.relu.

In [111]:
reset_graph()

n_steps = 28
n_inputs = 28
n_neurons = 150
n_outputs = 10

learning_rate = 0.001

X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
y = tf.placeholder(tf.int32, [None])

basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)
outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32)

logits = tf.layers.dense(states, n_outputs)
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y,
                                                          logits=logits)
loss = tf.reduce_mean(xentropy)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(loss)
correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

init = tf.global_variables_initializer()

In [86]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/")
X_test = mnist.test.images.reshape((-1, n_steps, n_inputs))
y_test = mnist.test.labels

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


In [112]:
from pathlib import Path

my_file = Path("./my_model_RNN_final.ckpt")

n_epochs = 100
batch_size = 150
saver = tf.train.Saver()

acc_summary = tf.summary.scalar('Accuracy', accuracy)

file_writer = tf.summary.FileWriter("./model_reuse/RNN", tf.get_default_graph())

with tf.Session() as sess:
    init.run()
    if my_file.is_file():
        saver.restore(sess, my_file.absolute)
    for epoch in range(n_epochs):
        for iteration in range(mnist.train.num_examples // batch_size):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            X_batch = X_batch.reshape((-1, n_steps, n_inputs))
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})
        
        acc_test_str = acc_summary.eval(feed_dict={accuracy:acc_test})
        file_writer.add_summary(acc_test_str, epoch)
        
        
        print(epoch, "Train accuracy:", acc_train, "Test accuracy:", acc_test)
        
        save_path = saver.save(sess, "./my_model_RNN_{0}.ckpt".format(epoch))
    save_path = saver.save(sess, "./my_model_RNN_final.ckpt")

0 Train accuracy: 0.94666666 Test accuracy: 0.9366
1 Train accuracy: 0.96666664 Test accuracy: 0.9488
2 Train accuracy: 0.96 Test accuracy: 0.961
3 Train accuracy: 0.94666666 Test accuracy: 0.9556
4 Train accuracy: 0.9533333 Test accuracy: 0.9669
5 Train accuracy: 0.97333336 Test accuracy: 0.9679
6 Train accuracy: 0.97333336 Test accuracy: 0.9673
7 Train accuracy: 0.9866667 Test accuracy: 0.9722
8 Train accuracy: 0.9533333 Test accuracy: 0.9695
9 Train accuracy: 0.96 Test accuracy: 0.9701
10 Train accuracy: 0.98 Test accuracy: 0.9745
11 Train accuracy: 0.96 Test accuracy: 0.9725
12 Train accuracy: 0.98 Test accuracy: 0.9685
13 Train accuracy: 0.9866667 Test accuracy: 0.9746
14 Train accuracy: 0.97333336 Test accuracy: 0.9737
15 Train accuracy: 1.0 Test accuracy: 0.9736
16 Train accuracy: 1.0 Test accuracy: 0.9779
17 Train accuracy: 0.9866667 Test accuracy: 0.9733
18 Train accuracy: 0.9866667 Test accuracy: 0.9722
19 Train accuracy: 0.99333334 Test accuracy: 0.9759
20 Train accuracy: 0.

AttributeError: 'function' object has no attribute 'rfind'