In [1]:
import numpy as np
from rnn_utils import *

In [4]:
def rnn_cell_forward(xt, a_prev, parameters):
    
    Waa = parameters["Waa"]
    Wax = parameters["Wax"]
    ba = parameters["ba"]
    Wya = parameters["Wya"]
    by = parameters["by"]
    
    
    a_next = np.tanh(np.dot(Waa, a_prev) + np.dot(Wax, xt) + ba)
    
    yt_pred = softmax(np.dot(Wya, a_next) + by)
    
    cache = (a_next, a_prev, xt, parameters)
    
    return a_next, yt_pred, cache

In [8]:
def rnn_forward(x, a0, parameters):
    
    caches = []
    
    n_x, m, T_x = x.shape
    n_a, m = a0.shape
    n_y, n_a = parameters["Wya"].shape
    
    a = np.zeros((n_a, m, T_x))
    y_pred = np.zeros((n_y, m, T_x))
    
    a_next = a0
    
    for t in range(T_x):
        
        a_next, yt_pred, cache = rnn_cell_forward(x[:, :, t], a_next, parameters)
        a[:, :, t] = a_next
        y_pred[:, :, t] = yt_pred
        
        caches.append(cache)
        
    caches = (caches, x)
    
    
    return a, y_pred, caches

In [10]:
##########################################LSTM###########################################

In [50]:
def lstm_cell_forward(xt, a_prev, c_prev, parameters):
    
    Wc = parameters["Wc"]
    Wu = parameters["Wu"]
    Wf = parameters["Wf"]
    Wo = parameters["Wo"]
    Wy = parameters["Wy"]
    bc = parameters["bc"]
    bu = parameters["bu"]
    bf = parameters["bf"]
    bo = parameters["bo"]
    by = parameters["by"]
    
    n_x, m = xt.shape
    n_y, n_a = Wy.shape
    concat = np.zeros((n_a + n_x, m))
    
    concat[: n_a, :] = a_prev
    concat[n_a :, :] = xt
    
    cct = np.tanh(np.dot(Wc, concat) + bc)
    ut = sigmoid(np.dot(Wu, concat) + bu)
    ft = sigmoid(np.dot(Wf, concat) + bf)
    ot = sigmoid(np.dot(Wo, concat) + bo)
    c_next = ut * cct + ft * c_prev
    a_next = ot * np.tanh(c_next)
    
    yt_pred = softmax(np.dot(Wy, a_next) + by)
    
    
    cache = (a_next, c_next, a_prev, c_prev, cct, ut, ft, ot, xt, parameters)
    
    
    return a_next, c_next, yt_pred, cache

In [49]:
# Test lstm_cell_forward:
np.random.seed(1)
xt = np.random.randn(3,10)
a_prev = np.random.randn(5,10)
c_prev = np.random.randn(5,10)
Wf = np.random.randn(5, 5+3)
bf = np.random.randn(5,1)
Wu = np.random.randn(5, 5+3)
bu = np.random.randn(5,1)
Wo = np.random.randn(5, 5+3)
bo = np.random.randn(5,1)
Wc = np.random.randn(5, 5+3)
bc = np.random.randn(5,1)
Wy = np.random.randn(2,5)
by = np.random.randn(2,1)

parameters = {"Wf": Wf, "Wu": Wu, "Wo": Wo, "Wc": Wc, "Wy": Wy, "bf": bf, "bu": bu, "bo": bo, "bc": bc, "by": by}

a_next, c_next, yt, cache = lstm_cell_forward(xt, a_prev, c_prev, parameters)
print("a_next[4] = ", a_next[4])
print("a_next.shape = ", c_next.shape)
print("c_next[2] = ", c_next[2])
print("c_next.shape = ", c_next.shape)
print("yt[1] =", yt[1])
print("yt.shape = ", yt.shape)
print("cache[1][3]", cache[1][3])
print("len(cache) = ", len(cache))

a_next[4] =  [-0.66408471  0.0036921   0.02088357  0.22834167 -0.85575339  0.00138482
  0.76566531  0.34631421 -0.00215674  0.43827275]
a_next.shape =  (5, 10)
c_next[2] =  [ 0.63267805  1.00570849  0.35504474  0.20690913 -1.64566718  0.11832942
  0.76449811 -0.0981561  -0.74348425 -0.26810932]
c_next.shape =  (5, 10)
yt[1] = [0.79913913 0.15986619 0.22412122 0.15606108 0.97057211 0.31146381
 0.00943007 0.12666353 0.39380172 0.07828381]
yt.shape =  (2, 10)
cache[1][3] [-0.16263996  1.03729328  0.72938082 -0.54101719  0.02752074 -0.30821874
  0.07651101 -1.03752894  1.41219977 -0.37647422]
len(cache) =  10


In [30]:
# Test cache :
print(cache)

{'Wf': array([[-0.1809203 , -0.60392063, -1.23005814,  0.5505375 ,  0.79280687,
        -0.62353073,  0.52057634, -1.14434139],
       [ 0.80186103,  0.0465673 , -0.18656977, -0.10174587,  0.86888616,
         0.75041164,  0.52946532,  0.13770121],
       [ 0.07782113,  0.61838026,  0.23249456,  0.68255141, -0.31011677,
        -2.43483776,  1.0388246 ,  2.18697965],
       [ 0.44136444, -0.10015523, -0.13644474, -0.11905419,  0.01740941,
        -1.12201873, -0.51709446, -0.99702683],
       [ 0.24879916, -0.29664115,  0.49521132, -0.17470316,  0.98633519,
         0.2135339 ,  2.19069973, -1.89636092]]), 'Wu': array([[-0.22631424,  1.33145711, -0.28730786,  0.68006984, -0.3198016 ,
        -1.27255876,  0.31354772,  0.50318481],
       [ 1.29322588, -0.11044703, -0.61736206,  0.5627611 ,  0.24073709,
         0.28066508, -0.0731127 ,  1.16033857],
       [ 0.36949272,  1.90465871,  1.1110567 ,  0.6590498 , -1.62743834,
         0.60231928,  0.4202822 ,  0.81095167],
       [ 1.044442

In [53]:
def lstm_forward(x, a0, parameters):
    
    caches = []
    
    n_x, m, T_x = x.shape
    #n_a, m = a0.shape
    n_y, n_a = parameters["Wy"].shape
    
    a = np.zeros((n_a, m, T_x))
    c = np.zeros((n_a, m, T_x))
    y = np.zeros((n_y, m, T_x))
    
    a_next = a0
    #c_next = np.zeros_like(a_next)
    c_next = np.zeros(a_next.shape)
    
    for t in range(T_x):
        
        a_next, c_next, yt, cache = lstm_cell_forward(x[:, :, t], a_next, c_next, parameters)
        a[:, :, t] = a_next
       
        y[:, :, t] = yt
        
        c[:, :, t] = c_next
        
        caches.append(cache)
        
    
    caches = (caches, x)
    
    
    
    return a, y, c, caches

In [54]:
np.random.seed(1)
x = np.random.randn(3,10,7)
a0 = np.random.randn(5,10)
Wf = np.random.randn(5, 5+3)
bf = np.random.randn(5,1)
Wi = np.random.randn(5, 5+3)
bi = np.random.randn(5,1)
Wo = np.random.randn(5, 5+3)
bo = np.random.randn(5,1)
Wc = np.random.randn(5, 5+3)
bc = np.random.randn(5,1)
Wy = np.random.randn(2,5)
by = np.random.randn(2,1)

parameters = {"Wf": Wf, "Wu": Wu, "Wo": Wo, "Wc": Wc, "Wy": Wy, "bf": bf, "bu": bu, "bo": bo, "bc": bc, "by": by}

a, y, c, caches = lstm_forward(x, a0, parameters)
print("a[4][3][6] = ", a[4][3][6])
print("a.shape = ", a.shape)
print("y[1][4][3] =", y[1][4][3])
print("y.shape = ", y.shape)
print("caches[1][1[1]] =", caches[1][1][1])
print("c[1][2][1]", c[1][2][1])
print("len(caches) = ", len(caches))

a[4][3][6] =  0.07795688775862765
a.shape =  (5, 10, 7)
y[1][4][3] = 0.9566397110268028
y.shape =  (2, 10, 7)
caches[1][1[1]] = [ 0.82797464  0.23009474  0.76201118 -0.22232814 -0.20075807  0.18656139
  0.41005165]
c[1][2][1] -0.2091340029170874
len(caches) =  2
