# Visualize RNN/NLP Flow
## With a toy example
### In Tensorflow :D

In [31]:
import tensorflow as tf
import numpy as np
import pandas as pd

### Mini Hyperparams

In [2]:
#For X
batch_size = 2
num_seq = 3
num_features = 1

#For Y
num_classes = 1

#For RNN Cell
state_size = 4 #Num hidden units

#For output
vocab_size = 6


### Data

In [3]:
data = np.array(['The','lazy','fox','jumps','over','the']).reshape(batch_size,num_seq,num_features) #For visualizing
data_int = np.array([0,1,2,3,4,5]).reshape(batch_size,num_seq,num_features) #When things doesn't work without numbers
data

array([[['The'],
        ['lazy'],
        ['fox']],

       [['jumps'],
        ['over'],
        ['the']]], 
      dtype='<U5')

In [4]:
target_data = np.array(['lazy','fox','jumps','over','the','brown']).reshape(batch_size,num_seq,num_features)
target_data_int = np.array([1,2,3,4,5,0]).reshape(batch_size,num_seq,num_features)
target_data

array([[['lazy'],
        ['fox'],
        ['jumps']],

       [['over'],
        ['the'],
        ['brown']]], 
      dtype='<U5')

### Placeholders

<img src="http://i66.tinypic.com/33m9c37.jpg" style="width:600px;">

In [5]:
batchX_placeholder = tf.placeholder(dtype=tf.string,shape=[batch_size,num_seq,num_features],name='x')
batchY_placeholder = tf.placeholder(dtype=tf.string,shape=[batch_size,num_seq,num_classes],name='y')

#When things doesn't work without numbers
batchX_placeholder_float = tf.placeholder(dtype=tf.float32,shape=[batch_size,num_seq,num_features],name='x_float')
batchY_placeholder_float = tf.placeholder(dtype=tf.int32,shape=[batch_size,num_seq,num_classes],name='y_float')

In [6]:
with tf.Session() as sess:
    feed = {batchX_placeholder : data, batchY_placeholder : target_data}
    
    x = batchX_placeholder.eval(feed_dict=feed)
    y = batchY_placeholder.eval(feed_dict=feed)

print('X:\n',x)
print('\nY:\n',y)

X:
 [[['The']
  ['lazy']
  ['fox']]

 [['jumps']
  ['over']
  ['the']]]

Y:
 [[['lazy']
  ['fox']
  ['jumps']]

 [['over']
  ['the']
  ['brown']]]


### Transpose the Y 

To get the labels series by sequence

<img src="http://i66.tinypic.com/2qszajs.jpg" style="width:600px;">

In [7]:
labels_series = tf.transpose(a=batchY_placeholder, perm=[1,0,2])

In [8]:
with tf.Session() as sess:    
    feed = {batchX_placeholder : data, batchY_placeholder : target_data}
    
    results = sess.run(fetches=[labels_series],feed_dict=feed)
    
    _labels_series = np.array(results[0])

In [9]:
print(_labels_series.shape)
print('The labels of the first sequence for all instances in the batch:\n %s\n' %_labels_series[0])

print('The labels of the second sequence for all instances in the batch:\n %s\n' %_labels_series[1])

print('The labels of the third sequence for all instances in the batch:\n %s\n' %_labels_series[2])

(3, 2, 1)
The labels of the first sequence for all instances in the batch:
 [[b'lazy']
 [b'over']]

The labels of the second sequence for all instances in the batch:
 [[b'fox']
 [b'the']]

The labels of the third sequence for all instances in the batch:
 [[b'jumps']
 [b'brown']]



### RNN Cell

We will get:
 - **states_series:** The output (and state because simple RNN) of the cell in each time step
 - **current_state:** The last state (output)

In [10]:
cell = tf.contrib.rnn.BasicRNNCell(num_units=state_size)

states_series, current_state = tf.nn.dynamic_rnn(cell=cell,inputs=batchX_placeholder_float,dtype=tf.float32)

In [11]:
with tf.Session() as sess:
    tf.global_variables_initializer().run()

    
    feed = {batchX_placeholder_float : data_int, batchY_placeholder_float : target_data_int}
    
    results = sess.run(fetches=[states_series,current_state],feed_dict=feed)
    
    _states_series = np.array(results[0])
    _current_state = np.array(results[1])

#### States series

<img src="http://i64.tinypic.com/24z9e83.jpg" style="width:700px">

Note: This is a Simple RNN -> The hidden state and output are same

In [12]:
print(_states_series.shape)
_states_series

(2, 3, 4)


array([[[ 0.        ,  0.        ,  0.        ,  0.        ],
        [-0.3828496 ,  0.17064542,  0.67230123,  0.52947646],
        [-0.92135811,  0.4982436 ,  0.94858086,  0.94305122]],

       [[-0.83673453,  0.47537675,  0.98506325,  0.94341761],
        [-0.99640185,  0.81296581,  0.99910688,  0.99777246],
        [-0.99906594,  0.88451481,  0.99987727,  0.99950737]]], dtype=float32)

#### Current/Last state

<img src="http://i66.tinypic.com/sffjo1.jpg" style="width:700px">


In [13]:
print(_current_state.shape)
_current_state

(2, 4)


array([[-0.92135811,  0.4982436 ,  0.94858086,  0.94305122],
       [-0.99906594,  0.88451481,  0.99987727,  0.99950737]], dtype=float32)

#### Transpose state series

To get the states by sequence

In [14]:
states_series = tf.transpose(states_series,[1,0,2])

In [15]:
with tf.Session() as sess:    
    tf.global_variables_initializer().run()
    feed = {batchX_placeholder_float : data_int, batchY_placeholder_float : target_data_int}
    
    results = sess.run(fetches=[states_series],feed_dict=feed)
    
    _states_series = np.array(results[0])

In [16]:
print('(sequence_length,batch_size,state_size) <->',_states_series.shape,'\n')
print('The state/output of the first sequence for all instances in the batch:\n %s\n' %_states_series[0])

print('The state/output of the second sequence for all instances in the batch:\n %s\n' %_states_series[1])

print('The state/output of the third sequence for all instances in the batch:\n %s\n' %_states_series[2])

(sequence_length,batch_size,state_size) <-> (3, 2, 4) 

The state/output of the first sequence for all instances in the batch:
 [[ 0.          0.          0.          0.        ]
 [ 0.98410064  0.98064345 -0.95231408 -0.96098006]]

The state/output of the second sequence for all instances in the batch:
 [[ 0.66651201  0.64771891 -0.55021465 -0.57358873]
 [ 0.98326427  0.9985764  -0.98046207 -0.99799967]]

The state/output of the third sequence for all instances in the batch:
 [[ 0.79220331  0.95351785 -0.83097667 -0.95784646]
 [ 0.99657929  0.99970174 -0.99398762 -0.99945265]]



## Compute predictions - Last layer

With the outputs of the RNN cell let's make the computations for the prediction on the last layer. But before doing that we need to:

### Flatten labels series & states series


This is for compute the matmul

In summary, now **doesn't matter from which** observation/step **the prediction is**, so we **squash** into 1 minus dimension

**Flatten labels series**

<img src="http://i64.tinypic.com/214pjye.jpg">

In [17]:
flat_targets = tf.reshape(tensor=batchY_placeholder,shape=[-1])

In [18]:
with tf.Session() as sess:    
    tf.global_variables_initializer().run()
    feed = {batchY_placeholder : target_data}
    
    results = sess.run(fetches=[flat_targets],feed_dict=feed)
    
    _flat_targets = np.array(results[0])
    

In [19]:
print('From shape:',batchY_placeholder.get_shape(),' to shape: ',_flat_targets.shape)
_flat_targets

From shape: (2, 3, 1)  to shape:  (6,)


array([b'lazy', b'fox', b'jumps', b'over', b'the', b'brown'], dtype=object)

**Flatten outputs**

<img src="http://i63.tinypic.com/24b4w2a.jpg" style="width:600px;">

In [20]:
flat_outputs = tf.reshape(tensor=states_series,shape=[-1,state_size])

In [21]:
with tf.Session() as sess:    
    tf.global_variables_initializer().run()
    feed = {batchX_placeholder_float: data_int}
    
    results = sess.run(fetches=[flat_outputs],feed_dict=feed)
    
    _flat_outputs = np.array(results[0])
    

In [22]:
print('From shape:',states_series.get_shape(),' to shape: ',_flat_outputs.shape)
_flat_outputs

From shape: (3, 2, 4)  to shape:  (6, 4)


array([[ 0.        ,  0.        ,  0.        ,  0.        ],
       [ 0.78044307, -0.49088821, -0.97092193, -0.36441708],
       [ 0.3353413 , -0.17718661, -0.60608613, -0.12663972],
       [ 0.80831087, -0.86220902, -0.99502432, -0.55181724],
       [ 0.44298396, -0.5932681 , -0.90480542, -0.32963246],
       [ 0.91816044, -0.94902056, -0.99890029, -0.77325529]], dtype=float32)

### Compute prediction

Matmul with flattened inputs => **Logits**

<img src="http://i68.tinypic.com/6zaogi.jpg" style="width:600px;">

In [23]:
W = tf.Variable(tf.random_uniform(shape=[state_size,vocab_size]),dtype=tf.float32,name='W_out')
b = tf.Variable(tf.constant(0.1,shape=[vocab_size]),dtype=tf.float32,name='b_out')

In [24]:
logits = tf.matmul(flat_outputs,W) + b

In [25]:
with tf.Session() as sess:    
    tf.global_variables_initializer().run()
    feed = {batchX_placeholder_float: data_int, batchY_placeholder_float : target_data_int}
    
    results = sess.run(fetches=[logits],feed_dict=feed)
    
    _logits = np.array(results[0])

In [26]:
print(_logits.shape)
_logits

(6, 6)


array([[ 0.1       ,  0.1       ,  0.1       ,  0.1       ,  0.1       ,
         0.1       ],
       [ 1.49434376,  1.78906667,  0.89901924,  1.78935158,  1.72661102,
         1.89934814],
       [ 0.73228633,  0.92536426,  0.48823631,  0.97955954,  0.91697717,
         1.00008118],
       [ 1.4169637 ,  1.56162679,  0.66684574,  1.58952439,  1.62763298,
         1.72019303],
       [ 1.19641578,  1.24528551,  0.53684908,  1.20129764,  1.2815069 ,
         1.30902803],
       [ 1.64198267,  1.87490857,  0.88617158,  1.85626113,  1.84925163,
         2.0070045 ]], dtype=float32)

Which means...

Note: Random logits implies random probabilities ;) so don't expect a sense

In [33]:
df = pd.DataFrame.from_records(_logits)
df.columns = ['The','lazy','fox','jumps','over','brown']
df.index = ['The','lazy','fox','jumps','over','brown']
df

Unnamed: 0,The,lazy,fox,jumps,over,brown
The,0.1,0.1,0.1,0.1,0.1,0.1
lazy,1.494344,1.789067,0.899019,1.789352,1.726611,1.899348
fox,0.732286,0.925364,0.488236,0.97956,0.916977,1.000081
jumps,1.416964,1.561627,0.666846,1.589524,1.627633,1.720193
over,1.196416,1.245286,0.536849,1.201298,1.281507,1.309028
brown,1.641983,1.874909,0.886172,1.856261,1.849252,2.007004


### Loss with Sparse Softmax cross entropy

In [27]:
#It's necessary numbers
flat_targets = tf.reshape(tensor=batchY_placeholder_float,shape=[-1])

In [28]:
loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,labels=flat_targets)

In [29]:
with tf.Session() as sess:    
    tf.global_variables_initializer().run()
    feed = {batchX_placeholder_float: data_int, batchY_placeholder_float : target_data_int}
    
    results = sess.run(fetches=[loss],feed_dict=feed)
    
    _loss = np.array(results[0])

In [30]:
_loss

array([ 1.79175949,  2.55482769,  1.99774253,  1.72456992,  1.12336171,
        1.55354738], dtype=float32)