# Recurrent Neural Networks

In [1]:
!cat ../book*

https://github.com/ageron/handson-ml


In [21]:
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import os
import tensorflow as tf
from IPython.display import clear_output, Image, display, HTML

%matplotlib inline
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12

In [3]:
# Where to save the figures
PROJECT_ROOT_DIR = ".."

In [4]:
# to make this notebook's output stable across runs
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

def save_fig(fig_id, tight_layout=True):
    path = os.path.join(
        PROJECT_ROOT_DIR, "images", CHAPTER_ID, fig_id + ".png")
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format='png', dpi=300)

## Basic RNNs in TensorFlow
### Manual RNN

In [8]:
reset_graph()

n_inputs = 3
n_neurons = 5

In [9]:
X0 = tf.placeholder(tf.float32, [None, n_inputs])
X1 = tf.placeholder(tf.float32, [None, n_inputs])

In [10]:
Wx = tf.Variable(tf.random_normal(shape=[n_inputs, n_neurons], 
                                  dtype=tf.float32))
Wy = tf.Variable(tf.random_normal(shape=[n_neurons, n_neurons], 
                                  dtype=tf.float32))
b = tf.Variable(tf.zeros([1, n_neurons], dtype=tf.float32))

Y0 = tf.tanh(tf.matmul(X0, Wx) + b)
Y1 = tf.tanh(tf.matmul(Y0, Wy) + b)

In [11]:
init = tf.global_variables_initializer()

In [12]:
# To run, we need to feed it inputs at both time steps:
# Mini batch:        inst0,     inst1,     inst2,     inst3
X0_batch = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 0, 1]]) # t = 0
X1_batch = np.array([[9, 8, 7], [0, 0, 0], [6, 5, 4], [3, 2, 1]]) # t = 1

In [14]:
with tf.Session() as s:
    init.run()
    Y0_val, Y1_val = s.run([Y0, Y1], 
                           feed_dict={X0: X0_batch, X1: X1_batch})

In [15]:
print(Y0_val) # output at t=0
print(Y1_val) #           t=1

[[-0.0664006   0.96257669  0.68105787  0.70918542 -0.89821595]
 [ 0.9977755  -0.71978885 -0.99657625  0.9673925  -0.99989718]
 [ 0.99999774 -0.99898815 -0.99999893  0.99677622 -0.99999988]
 [ 1.         -1.         -1.         -0.99818915  0.99950868]]
[[-0.32789749  0.9033345   0.10371689 -0.97484881  0.023365  ]
 [-0.12210433  0.62805319  0.96718419 -0.99371207 -0.25839335]
 [-0.05078916  0.23659249  0.9691847  -0.99160379 -0.48125735]
 [ 0.40818208 -0.96800256 -0.13471517  0.97532058  0.38755944]]


## Static Unrolling through Time
Doing the same as above, using `static_rnn()`

In [16]:
# Same as above:
reset_graph()

X0 = tf.placeholder(tf.float32, [None, n_inputs])
X1 = tf.placeholder(tf.float32, [None, n_inputs])

basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)
output_seqs, states = tf.contrib.rnn.static_rnn(
    basic_cell, [X0, X1], dtype=tf.float32)

Y0, Y1 = output_seqs

In [17]:
init = tf.global_variables_initializer()

In [19]:
with tf.Session() as s:
    init.run()
    Y0_val, Y1_val = s.run([Y0, Y1], 
                           feed_dict={X0: X0_batch, X1: X1_batch})

In [20]:
print(Y0_val) # output at t=0
print(Y1_val) #           t=1

[[-0.81393629 -0.43182844 -0.40150994  0.7043609   0.89640522]
 [-0.9915663  -0.95103657  0.19996507  0.98335052  0.99998963]
 [-0.99965042 -0.99683058  0.68092704  0.99918783  1.        ]
 [ 0.64988363 -0.16740513  0.99994725  0.81680971  0.99995029]]
[[-0.99959785 -0.99861717  0.98714638  0.99745673  1.        ]
 [-0.72472596  0.17925572  0.53362155 -0.65215266 -0.08035918]
 [-0.9957462  -0.96851194  0.9874723   0.84106421  0.99999976]
 [-0.72859728 -0.27958852  0.80567408 -0.20587993  0.9995411 ]]


In [22]:
def strip_consts(graph_def, max_const_size=32):
    """Strip large constant values from graph_def."""
    strip_def = tf.GraphDef()
    for n0 in graph_def.node:
        n = strip_def.node.add() 
        n.MergeFrom(n0)
        if n.op == 'Const':
            tensor = n.attr['value'].tensor
            size = len(tensor.tensor_content)
            if size > max_const_size:
                tensor.tensor_content = "b<stripped %d bytes>"%size
    return strip_def

def show_graph(graph_def, max_const_size=32):
    """Visualize TensorFlow graph."""
    if hasattr(graph_def, 'as_graph_def'):
        graph_def = graph_def.as_graph_def()
    strip_def = strip_consts(graph_def, max_const_size=max_const_size)
    code = """
        <script>
          function load() {{
            document.getElementById("{id}").pbtxt = {data};
          }}
        </script>
        <link rel="import" href="https://tensorboard.appspot.com/tf-graph-basic.build.html" onload=load()>
        <div style="height:600px">
          <tf-graph-basic id="{id}"></tf-graph-basic>
        </div>
    """.format(data=repr(str(strip_def)), id='graph'+str(np.random.rand()))

    iframe = """
        <iframe seamless style="width:1200px;height:620px;border:0" srcdoc="{}"></iframe>
    """.format(code.replace('"', '&quot;'))
    display(HTML(iframe))

In [23]:
show_graph(tf.get_default_graph())

### Packing Sequences

In [24]:
reset_graph()

n_steps = 2

In [25]:
perm = [1, 0, 2]
X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
X_seqs = tf.unstack(tf.transpose(X, perm=perm))

basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)
output_seqs, states = tf.contrib.rnn.static_rnn(
    basic_cell, X_seqs, dtype=tf.float32)
outputs = tf.transpose(tf.stack(output_seqs), perm=perm)

In [26]:
init = tf.global_variables_initializer()

In [27]:
X_batch = np.array([
    # t=0       # t=1
    [[0, 1, 2], [9, 8, 7]], # instance 1
    [[3, 4, 5], [0, 0, 0]], #          2
    [[6, 7, 8], [6, 5, 4]], #          3
    [[9, 0, 1], [3, 2, 1]]  #          4
])

In [28]:
with tf.Session() as s:
    init.run()
    outputs_val = outputs.eval(feed_dict={X: X_batch})

In [29]:
print(outputs_val)

[[[-0.91279727  0.83698678 -0.89277941  0.80308062 -0.5283336 ]
  [-1.          1.         -0.99794829  0.99985468 -0.99273592]]

 [[-0.99994391  0.99951613 -0.9946925   0.99030769 -0.94413054]
  [ 0.48733309  0.93389565 -0.31362072  0.88573611  0.2424476 ]]

 [[-1.          0.99999875 -0.99975014  0.99956584 -0.99466234]
  [-0.99994856  0.99999434 -0.96058172  0.99784708 -0.9099462 ]]

 [[-0.95972425  0.99951482  0.96938795 -0.969908   -0.67668229]
  [-0.84596014  0.96288228  0.96856463 -0.14777924 -0.9119423 ]]]


In [30]:
print(np.transpose(outputs_val, axes=perm)[1])

[[-1.          1.         -0.99794829  0.99985468 -0.99273592]
 [ 0.48733309  0.93389565 -0.31362072  0.88573611  0.2424476 ]
 [-0.99994856  0.99999434 -0.96058172  0.99784708 -0.9099462 ]
 [-0.84596014  0.96288228  0.96856463 -0.14777924 -0.9119423 ]]


## Dynamic Unrolling through Time

In [34]:
reset_graph()

X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])

basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)

In [35]:
seq_length = tf.placeholder(tf.int32, [None])
outputs, states = tf.nn.dynamic_rnn(
    basic_cell, X, dtype=tf.float32, sequence_length=seq_length)

In [36]:
init = tf.global_variables_initializer()

In [37]:
seq_length_batch = np.array([2, 1, 2, 2])

In [38]:
with tf.Session() as s:
    init.run()
    outputs_val, states_val = s.run(
        [outputs, states], 
        feed_dict={X: X_batch, seq_length: seq_length_batch})

In [39]:
print(outputs_val)

[[[-0.68579948 -0.25901747 -0.80249101 -0.18141513 -0.37491536]
  [-0.99996698 -0.94501185  0.98072106 -0.9689762   0.99966913]]

 [[-0.99099374 -0.64768541 -0.67801034 -0.7415446   0.7719509 ]
  [ 0.          0.          0.          0.          0.        ]]

 [[-0.99978048 -0.85583007 -0.49696958 -0.93838578  0.98505187]
  [-0.99951065 -0.89148796  0.94170523 -0.38407657  0.97499216]]

 [[-0.02052618 -0.94588047  0.99935204  0.37283331  0.9998163 ]
  [-0.91052347  0.05769409  0.47446665 -0.44611037  0.89394671]]]


In [40]:
print(states_val)

[[-0.99996698 -0.94501185  0.98072106 -0.9689762   0.99966913]
 [-0.99099374 -0.64768541 -0.67801034 -0.7415446   0.7719509 ]
 [-0.99951065 -0.89148796  0.94170523 -0.38407657  0.97499216]
 [-0.91052347  0.05769409  0.47446665 -0.44611037  0.89394671]]


## Training a Sequence Classifier