# Visualize RNN/NLP Flow
## With a toy example
### In Tensorflow :D

In [1]:
import tensorflow as tf
import numpy as np

### Mini Hyperparams

In [2]:
#For X
batch_size = 2
num_seq = 3
num_features = 1

#For Y
num_classes = 1

#For RNN Cell
state_size = 4 #Num hidden units

#For output
vocab_size = 6


### Data

In [3]:
data = np.array(['The','lazy','fox','jumps','over','the']).reshape(batch_size,num_seq,num_features) #For visualizing
data_int = np.array([0,1,2,3,4,5]).reshape(batch_size,num_seq,num_features) #When things doesn't work without numbers
data

array([[['The'],
        ['lazy'],
        ['fox']],

       [['jumps'],
        ['over'],
        ['the']]], 
      dtype='<U5')

In [30]:
target_data = np.array(['lazy','fox','jumps','over','the','brown']).reshape(batch_size,num_seq,num_features)
target_data_int = np.array([1,2,3,4,5,0]).reshape(batch_size,num_seq,num_features)
target_data

array([[['lazy'],
        ['fox'],
        ['jumps']],

       [['over'],
        ['the'],
        ['brown']]], 
      dtype='<U5')

### Placeholders

<img src="http://i66.tinypic.com/33m9c37.jpg" style="width:600px;">

In [5]:
batchX_placeholder = tf.placeholder(dtype=tf.string,shape=[batch_size,num_seq,num_features],name='x')
batchY_placeholder = tf.placeholder(dtype=tf.string,shape=[batch_size,num_seq,num_classes],name='y')

#When things doesn't work without numbers
batchX_placeholder_float = tf.placeholder(dtype=tf.float32,shape=[batch_size,num_seq,num_features],name='x_float')
batchY_placeholder_float = tf.placeholder(dtype=tf.int32,shape=[batch_size,num_seq,num_classes],name='y_float')

In [6]:
with tf.Session() as sess:
    feed = {batchX_placeholder : data, batchY_placeholder : target_data}
    
    x = batchX_placeholder.eval(feed_dict=feed)
    y = batchY_placeholder.eval(feed_dict=feed)

print('X:\n',x)
print('\nY:\n',y)

X:
 [[['The']
  ['lazy']
  ['fox']]

 [['jumps']
  ['over']
  ['the']]]

Y:
 [[['lazy']
  ['fox']
  ['jumps']]

 [['over']
  ['the']
  ['brown']]]


### Transpose the Y 

To get the labels series by sequence

<img src="http://i66.tinypic.com/2qszajs.jpg" style="width:600px;">

In [7]:
labels_series = tf.transpose(a=batchY_placeholder, perm=[1,0,2])

In [8]:
with tf.Session() as sess:    
    feed = {batchX_placeholder : data, batchY_placeholder : target_data}
    
    results = sess.run(fetches=[labels_series],feed_dict=feed)
    
    _labels_series = np.array(results[0])

In [9]:
print(_labels_series.shape)
print('The labels of the first sequence for all instances in the batch:\n %s\n' %_labels_series[0])

print('The labels of the second sequence for all instances in the batch:\n %s\n' %_labels_series[1])

print('The labels of the third sequence for all instances in the batch:\n %s\n' %_labels_series[2])

(3, 2, 1)
The labels of the first sequence for all instances in the batch:
 [[b'lazy']
 [b'over']]

The labels of the second sequence for all instances in the batch:
 [[b'fox']
 [b'the']]

The labels of the third sequence for all instances in the batch:
 [[b'jumps']
 [b'brown']]



### RNN Cell

We will get:
 - **states_series:** The output (and state because simple RNN) of the cell in each time step
 - **current_state:** The last state (output)

In [10]:
cell = tf.contrib.rnn.BasicRNNCell(num_units=state_size)

states_series, current_state = tf.nn.dynamic_rnn(cell=cell,inputs=batchX_placeholder_float,dtype=tf.float32)

In [11]:
with tf.Session() as sess:
    tf.global_variables_initializer().run()

    
    feed = {batchX_placeholder_float : data_int, batchY_placeholder_float : target_data_int}
    
    results = sess.run(fetches=[states_series,current_state],feed_dict=feed)
    
    _states_series = np.array(results[0])
    _current_state = np.array(results[1])

#### States series

<img src="http://i64.tinypic.com/24z9e83.jpg" style="width:700px">

Note: This is a Simple RNN -> The hidden state and output are same

In [12]:
print(_states_series.shape)
_states_series

(2, 3, 4)


array([[[ 0.        ,  0.        ,  0.        ,  0.        ],
        [ 0.43238673, -0.4710772 ,  0.33191839,  0.37335438],
        [ 0.53553414, -0.96528101,  0.5895828 ,  0.76694363]],

       [[ 0.8828373 , -0.91116798,  0.77588522,  0.82648605],
        [ 0.82506567, -0.99947053,  0.88808799,  0.96316737],
        [ 0.91611695, -0.99985313,  0.93506902,  0.98136932]]], dtype=float32)

#### Current/Last state

<img src="http://i66.tinypic.com/sffjo1.jpg" style="width:700px">


In [13]:
print(_current_state.shape)
_current_state

(2, 4)


array([[ 0.53553414, -0.96528101,  0.5895828 ,  0.76694363],
       [ 0.91611695, -0.99985313,  0.93506902,  0.98136932]], dtype=float32)

#### Transpose state series

To get the states by sequence

In [14]:
states_series = tf.transpose(states_series,[1,0,2])

In [15]:
with tf.Session() as sess:    
    tf.global_variables_initializer().run()
    feed = {batchX_placeholder_float : data_int, batchY_placeholder_float : target_data_int}
    
    results = sess.run(fetches=[states_series],feed_dict=feed)
    
    _states_series = np.array(results[0])

In [16]:
print('(sequence_length,batch_size,state_size) <->',_states_series.shape,'\n')
print('The state/output of the first sequence for all instances in the batch:\n %s\n' %_states_series[0])

print('The state/output of the second sequence for all instances in the batch:\n %s\n' %_states_series[1])

print('The state/output of the third sequence for all instances in the batch:\n %s\n' %_states_series[2])

(sequence_length,batch_size,state_size) <-> (3, 2, 4) 

The state/output of the first sequence for all instances in the batch:
 [[ 0.          0.          0.          0.        ]
 [ 0.37008941 -0.93614703  0.37905324 -0.59634608]]

The state/output of the second sequence for all instances in the batch:
 [[ 0.12878965 -0.51436377  0.13220608 -0.22522394]
 [ 0.89266753 -0.9314726  -0.38482139 -0.13724595]]

The state/output of the third sequence for all instances in the batch:
 [[ 0.59113258 -0.71202987 -0.20717673 -0.02601799]
 [ 0.69573092 -0.98382539  0.16059849 -0.71726888]]



## Compute predictions - Last layer

With the outputs of the RNN cell let's make the computations for the prediction on the last layer. But before doing that we need to:

### Flatten labels series & states series


This is for compute the matmul

In summary, now **doesn't matter from which** observation/step **the prediction is**, so we **squash** into 1 minus dimension

**Flatten labels series**

<img src="http://i64.tinypic.com/214pjye.jpg">

In [17]:
flat_targets = tf.reshape(tensor=batchY_placeholder,shape=[-1])

In [18]:
with tf.Session() as sess:    
    tf.global_variables_initializer().run()
    feed = {batchY_placeholder : target_data}
    
    results = sess.run(fetches=[flat_targets],feed_dict=feed)
    
    _flat_targets = np.array(results[0])
    

In [19]:
print('From shape:',batchY_placeholder.get_shape(),' to shape: ',_flat_targets.shape)
_flat_targets

From shape: (2, 3, 1)  to shape:  (6,)


array([b'lazy', b'fox', b'jumps', b'over', b'the', b'brown'], dtype=object)

**Flatten outputs**

<img src="http://i63.tinypic.com/24b4w2a.jpg" style="width:600px;">

In [20]:
flat_outputs = tf.reshape(tensor=states_series,shape=[-1,state_size])

In [21]:
with tf.Session() as sess:    
    tf.global_variables_initializer().run()
    feed = {batchX_placeholder_float: data_int}
    
    results = sess.run(fetches=[flat_outputs],feed_dict=feed)
    
    _flat_outputs = np.array(results[0])
    

In [22]:
print('From shape:',states_series.get_shape(),' to shape: ',_flat_outputs.shape)
_flat_outputs

From shape: (3, 2, 4)  to shape:  (6, 4)


array([[ 0.        ,  0.        ,  0.        ,  0.        ],
       [-0.69274813, -0.98025954, -0.9797079 , -0.41708142],
       [-0.2769787 , -0.64579523, -0.64308214, -0.14697887],
       [-0.93733555, -0.99982619, -0.99946183, -0.32860491],
       [-0.67917567, -0.987589  , -0.96989709,  0.00420785],
       [-0.96817905, -0.99997431, -0.99989015, -0.51451689]], dtype=float32)

### Compute prediction

Matmul with flattened inputs

In [23]:
W = tf.Variable(tf.random_uniform(shape=[state_size,vocab_size]),dtype=tf.float32,name='W_out')
b = tf.Variable(tf.constant(0.1,shape=[vocab_size]),dtype=tf.float32,name='b_out')

In [24]:
logits = tf.matmul(flat_outputs,W) + b

In [25]:
with tf.Session() as sess:    
    tf.global_variables_initializer().run()
    feed = {batchX_placeholder_float: data_int, batchY_placeholder_float : target_data_int}
    
    results = sess.run(fetches=[logits],feed_dict=feed)
    
    _logits = np.array(results[0])

In [33]:
print(_logits.shape)
_logits

(6, 6)


array([[ 0.1       ,  0.1       ,  0.1       ,  0.1       ,  0.1       ,
         0.1       ],
       [-0.21786088, -1.3025943 , -1.25121176, -0.55337501, -0.96768951,
        -0.08923074],
       [-0.04639579, -0.63087821, -0.60588175, -0.2414602 , -0.50227517,
        -0.05099357],
       [-0.13417581, -1.40281415, -1.19780445, -0.63100964, -1.2592032 ,
        -0.45974395],
       [ 0.00769539, -0.89560258, -0.7360934 , -0.3729488 , -0.93226361,
        -0.32307422],
       [-0.15511206, -1.49623537, -1.27623796, -0.67309195, -1.32619226,
        -0.45543543]], dtype=float32)

### Probabilities with Softmax

In [27]:
#It's necessary numbers
flat_targets = tf.reshape(tensor=batchY_placeholder_float,shape=[-1])

In [28]:
probs = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,labels=flat_targets)

In [31]:
with tf.Session() as sess:    
    tf.global_variables_initializer().run()
    feed = {batchX_placeholder_float: data_int, batchY_placeholder_float : target_data_int}
    
    results = sess.run(fetches=[probs],feed_dict=feed)
    
    _probs = np.array(results[0])

In [32]:
_probs

array([ 1.79175949,  2.04980779,  1.61992574,  1.51073301,  2.08370972,
        1.98567867], dtype=float32)