# RNN keras

Analisis de algunos de los parametros de una RNN en keras que indica que salida obtendremos.

In [75]:
import numpy as np

In [76]:
import tensorflow as tf

# Ejemplo basico

In [77]:
inputs = np.random.random([32, 1, 8]).astype(np.float32)

In [78]:
inputs

array([[[0.17511696, 0.4119784 , 0.81402165, 0.36833623, 0.20264131,
         0.19262253, 0.2940445 , 0.5437288 ]],

       [[0.7388204 , 0.85878253, 0.00802245, 0.3266959 , 0.6305261 ,
         0.36500242, 0.95706177, 0.9822079 ]],

       [[0.5976341 , 0.77481115, 0.5756128 , 0.87973356, 0.65232766,
         0.614466  , 0.30968425, 0.6690284 ]],

       [[0.1781885 , 0.7239204 , 0.7631887 , 0.5285136 , 0.26377898,
         0.9823421 , 0.1927358 , 0.6521435 ]],

       [[0.7596731 , 0.46793526, 0.85011816, 0.750995  , 0.48108232,
         0.09306982, 0.7124032 , 0.65434974]],

       [[0.7056643 , 0.5951643 , 0.916343  , 0.38598397, 0.27160344,
         0.6050002 , 0.17206696, 0.6246628 ]],

       [[0.68788683, 0.98595667, 0.13556135, 0.1625383 , 0.75774163,
         0.35276052, 0.9380096 , 0.3896037 ]],

       [[0.64150745, 0.09001349, 0.2858336 , 0.2201271 , 0.48425454,
         0.9189431 , 0.13198012, 0.65908074]],

       [[0.86812115, 0.31968412, 0.6373419 , 0.599216  , 0.43439

In [79]:
hidden_units = 4 #Numero de unidades ocultas

In [80]:
simple_rnn = tf.keras.layers.SimpleRNN(hidden_units, activation = None)

In [81]:
output = simple_rnn(inputs)

In [82]:
output.shape

TensorShape([32, 4])

In [83]:
output

<tf.Tensor: shape=(32, 4), dtype=float32, numpy=
array([[-7.68235564e-01,  5.16223133e-01,  3.22687477e-01,
        -6.62243366e-01],
       [-8.79832089e-01,  4.04417068e-01,  3.00171822e-01,
        -8.58621836e-01],
       [-8.39337051e-01,  5.97753346e-01, -4.38265800e-02,
        -1.00038421e+00],
       [-7.61894226e-01,  7.83567309e-01, -1.41228408e-01,
        -1.20052934e+00],
       [-1.01504302e+00,  6.27508938e-01,  3.51064295e-01,
        -8.85002494e-01],
       [-5.65414131e-01,  7.32667327e-01, -1.77659005e-01,
        -1.16646051e+00],
       [-5.74204266e-01,  3.39685857e-01,  1.36810631e-01,
        -1.36253929e+00],
       [-1.07595459e-01,  2.18886286e-01, -1.50306791e-01,
        -7.21326470e-01],
       [-9.29737806e-01,  6.41461015e-01,  2.71451801e-01,
        -9.83378291e-01],
       [-8.76539350e-01,  6.83961928e-01, -3.47850323e-02,
        -1.23138142e+00],
       [-9.95544255e-01,  2.55827039e-01,  7.33556449e-01,
        -7.67562628e-01],
       [-7.48249

<h3>Analisis</h3>

In [84]:
Wx, Wh, bh = simple_rnn.weights

In [85]:
Wx.shape, Wh.shape, bh.shape

(TensorShape([8, 4]), TensorShape([4, 4]), TensorShape([4]))

In [86]:
import numpy as np

In [87]:
h0 = np.zeros(hidden_units)

In [88]:
result = np.dot(inputs, Wx) + np.dot(h0, Wh) + bh

In [89]:
result

<tf.Tensor: shape=(32, 1, 4), dtype=float32, numpy=
array([[[-7.68235564e-01,  5.16223073e-01,  3.22687507e-01,
         -6.62243307e-01]],

       [[-8.79832029e-01,  4.04417068e-01,  3.00171793e-01,
         -8.58621776e-01]],

       [[-8.39337111e-01,  5.97753346e-01, -4.38265204e-02,
         -1.00038421e+00]],

       [[-7.61894286e-01,  7.83567309e-01, -1.41228378e-01,
         -1.20052934e+00]],

       [[-1.01504314e+00,  6.27508998e-01,  3.51064265e-01,
         -8.85002613e-01]],

       [[-5.65414071e-01,  7.32667327e-01, -1.77659020e-01,
         -1.16646063e+00]],

       [[-5.74204266e-01,  3.39685857e-01,  1.36810601e-01,
         -1.36253929e+00]],

       [[-1.07595503e-01,  2.18886286e-01, -1.50306806e-01,
         -7.21326470e-01]],

       [[-9.29737687e-01,  6.41461015e-01,  2.71451771e-01,
         -9.83378351e-01]],

       [[-8.76539290e-01,  6.83961928e-01, -3.47850174e-02,
         -1.23138142e+00]],

       [[-9.95544195e-01,  2.55826980e-01,  7.33556509e-01

In [90]:
result.shape

TensorShape([32, 1, 4])

Si consideramos solo la salida de la ultima capa recurrente, vemos que la salida en todo tiempo t es **h_next= act_f(x * Wx + h_prev * Wh + bh)**.

# Considerando solo el hnext de la ultima capa recurrente

Si **return_state** (default=False) indicamos si debemos devolver el último estado además de la salida, es decir si es False, solo retornaremos la salida, y si es True, retornaremos la salida pero tambien la salida de la ultima capa recurrente, es decir, su valor indica si ademas de la salida, debemos devolver la salida de la ultima capa recurrente.

In [124]:
inputs = np.random.random([32, 1, 8]).astype(np.float32)

In [125]:
inputs

array([[[7.77852118e-01, 3.57561499e-01, 8.75602126e-01, 7.42844522e-01,
         5.64376175e-01, 1.24173753e-01, 4.27971154e-01, 9.23465848e-01]],

       [[3.50049809e-02, 7.55835176e-01, 5.52000245e-04, 9.44924176e-01,
         5.05427480e-01, 1.72712281e-01, 7.59554744e-01, 5.35774112e-01]],

       [[2.55830348e-01, 3.87251645e-01, 3.98100793e-01, 2.98571020e-01,
         4.03187722e-01, 1.53156757e-01, 3.28493088e-01, 6.81258380e-01]],

       [[9.16783810e-01, 3.72185737e-01, 3.74732912e-01, 1.16763301e-01,
         8.66153181e-01, 9.35210824e-01, 4.45022017e-01, 7.46459246e-01]],

       [[7.71325648e-01, 8.10292959e-01, 1.65032536e-01, 8.26676607e-01,
         7.49447867e-02, 5.52654043e-02, 7.51440287e-01, 8.49115968e-01]],

       [[3.02555859e-01, 4.29183215e-01, 1.68812603e-01, 9.48250532e-01,
         5.69964767e-01, 7.43172348e-01, 9.56679702e-01, 1.46582425e-01]],

       [[9.27492857e-01, 7.06541836e-01, 4.07115281e-01, 7.55156279e-01,
         7.64244080e-01, 4.763604

In [126]:
simple_rnn = tf.keras.layers.SimpleRNN(hidden_units, activation = None, return_state = True)

In [127]:
output = simple_rnn(inputs)

In [128]:
output

[<tf.Tensor: shape=(32, 4), dtype=float32, numpy=
 array([[-0.6380446 ,  0.26784998,  0.5744807 , -1.448044  ],
        [ 0.08555439,  0.8894242 ,  0.6188705 , -0.519729  ],
        [-0.45943782,  0.5151171 ,  0.35474652, -0.606335  ],
        [-0.46182218,  1.085551  , -0.5193808 , -0.39207533],
        [ 0.29962808,  0.30769864,  0.79823387, -1.2990851 ],
        [ 0.26013878,  1.0190787 ,  0.10977888, -0.05945751],
        [-0.3011789 ,  0.73945224,  0.12392277, -1.2071009 ],
        [-0.24366201,  1.1641486 ,  0.66019166, -0.88288933],
        [-0.4166726 ,  0.8383101 ,  0.38548052, -0.56428516],
        [-0.7219967 ,  0.62917936,  0.09701812, -0.51494646],
        [-0.31463173,  1.0054178 ,  0.15095647,  0.09007488],
        [-0.27457404,  1.184313  , -0.33355606,  0.11387933],
        [-0.3920726 ,  0.8132309 ,  0.32533816, -0.22994748],
        [ 0.02448089,  0.911199  ,  0.0170981 , -0.60501707],
        [-0.5907248 ,  0.76415277, -0.1466951 , -0.7951373 ],
        [-0.56285   

<h3>Analisis</h3>

In [129]:
output[0] == output[1]

<tf.Tensor: shape=(32, 4), dtype=bool, numpy=
array([[ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True, 

In [130]:
Wx, Wh, bh = simple_rnn.weights

In [131]:
result = np.dot(inputs, Wx) + np.dot(h0, Wh) + bh

In [132]:
result

<tf.Tensor: shape=(32, 1, 4), dtype=float32, numpy=
array([[[-0.6380446 ,  0.26784998,  0.5744808 , -1.448044  ]],

       [[ 0.08555442,  0.8894242 ,  0.6188705 , -0.519729  ]],

       [[-0.45943785,  0.5151171 ,  0.35474652, -0.60633504]],

       [[-0.46182218,  1.0855509 , -0.5193808 , -0.39207536]],

       [[ 0.29962805,  0.30769864,  0.79823387, -1.2990851 ]],

       [[ 0.2601388 ,  1.0190787 ,  0.1097789 , -0.0594575 ]],

       [[-0.3011789 ,  0.73945224,  0.12392277, -1.2071009 ]],

       [[-0.243662  ,  1.1641486 ,  0.6601917 , -0.88288933]],

       [[-0.4166726 ,  0.8383101 ,  0.3854805 , -0.56428516]],

       [[-0.7219968 ,  0.62917936,  0.09701812, -0.51494646]],

       [[-0.31463176,  1.0054178 ,  0.15095651,  0.09007488]],

       [[-0.27457404,  1.184313  , -0.33355606,  0.11387931]],

       [[-0.39207262,  0.8132309 ,  0.32533816, -0.22994748]],

       [[ 0.02448089,  0.911199  ,  0.0170981 , -0.60501707]],

       [[-0.5907248 ,  0.7641527 , -0.1466951 , -0.7

Como vemos indicar **return_state = True** va a hacer que obtengamos la salida de la ultima capa recurrente, es decir, ahora obtendremos un arreglo de 2 elementos, donde la posicion 0 es la salida de la capa y la posicion 1 es la salida de la ultima capa recurrente. En este ejemplo ambas posiciones tienen el mismo valor, pues la red neuronal tiene por defecto **return_sequences = False**, por lo que no retornara la salida de cada capa recurrente sino solo la de la ultima capa.

Sin embargo, como vemos aqui no tenemos una forma de indicar que querramos que sea la salida del analisis, es decir con **return_state** pudimos indicar si nos interesa la salida de la ultima capa recurrente, pero no tenemos hasta ahora forma como indicar qué queremos que sea la salida de nuestra RNN y es aqui donde es importante **return_sequences**.

# Considerando el hnext de cada capa recurrente

Con **return_sequences** (default = false) indicamos si devolver la última salida en la secuencia de salida o la secuencia completa, es decir, si es False devolvemos solo la salida de la ultima capa recurrente, y si es True, devolvemos la salida de cada una de las capas recurrentes.

In [135]:
inputs = np.random.random([32, 5, 8]).astype(np.float32)

In [136]:
simple_rnn = tf.keras.layers.SimpleRNN(hidden_units, activation = None, return_sequences = True)

In [137]:
output = simple_rnn(inputs)

In [138]:
output

<tf.Tensor: shape=(32, 5, 4), dtype=float32, numpy=
array([[[-1.98098570e-01, -5.89577138e-01,  7.63538420e-01,
          1.81630695e+00],
        [ 1.11444569e+00, -1.98155701e+00,  2.29305714e-01,
          2.07986045e+00],
        [ 1.02539909e+00, -2.10216546e+00, -1.76975071e+00,
          1.41783690e+00],
        [-6.30316973e-01, -2.62655783e+00, -1.36045730e+00,
         -2.65821338e-01],
        [-1.93138242e+00, -3.21646214e+00,  2.97973752e-02,
          1.45798254e+00]],

       [[ 4.55073059e-01, -4.40805435e-01,  8.08938384e-01,
          1.38472462e+00],
        [ 1.64553225e+00, -1.00179768e+00, -3.22291851e-02,
          1.50546622e+00],
        [ 1.10589647e+00, -9.56535220e-01, -1.46648097e+00,
          5.87047696e-01],
        [-7.18181491e-01, -1.47059619e+00, -1.13597071e+00,
          3.48926783e-02],
        [-1.70014882e+00, -1.84120333e+00,  1.96408600e-01,
          1.23015559e+00]],

       [[ 9.49297473e-02,  1.01092428e-01,  8.11566949e-01,
          1.08

<h3>Analisis</h3>

In [142]:
output.shape

TensorShape([32, 5, 4])

Como vemos obtenemos los 32 **grupos**, y para cada uno de los 5 **elementos** que compone cada **secuencia** (grupos) obtenemos 4 valores, los que son el **hnext** que se obtiene para cada elemento en cada secuencia.

<h4>Ejemplo</h4>

In [143]:
Wx, Wh, bh = simple_rnn.weights

Calculando el hnext para el 1er elemento de cada grupo

In [145]:
result = np.dot(inputs[:,0,:], Wx) + np.dot(h0, Wh) + bh

In [146]:
result

<tf.Tensor: shape=(32, 4), dtype=float32, numpy=
array([[-1.98098540e-01, -5.89577138e-01,  7.63538420e-01,
         1.81630695e+00],
       [ 4.55073059e-01, -4.40805435e-01,  8.08938384e-01,
         1.38472474e+00],
       [ 9.49297547e-02,  1.01092435e-01,  8.11566949e-01,
         1.08681810e+00],
       [-6.16069399e-02,  4.86822128e-01,  2.70290315e-01,
         1.46846890e+00],
       [ 2.53683835e-01, -1.70819506e-01,  6.55435801e-01,
         7.93132186e-01],
       [-4.72389400e-01, -4.74379420e-01, -6.87169805e-02,
         1.35109353e+00],
       [-2.45640993e-01, -7.24026620e-01,  8.47934961e-01,
         1.84246385e+00],
       [-5.33467233e-01, -4.30674464e-01, -2.48203069e-01,
         1.09800828e+00],
       [-1.89864010e-01, -8.84647518e-02,  5.56699693e-01,
         1.50911140e+00],
       [-8.16857442e-02, -2.82138139e-01, -3.47553492e-01,
         1.00696123e+00],
       [-4.10857797e-01, -5.58885753e-01,  5.90471029e-01,
         1.45829141e+00],
       [ 6.14420