In [2]:
import numpy as np
from rnn_utils import *

In [5]:
def rnn_cell_forward(xt, a_prev, parameters):
    Wax = parameters["Wax"]
    Waa = parameters["Waa"]
    Wya = parameters["Wya"]
    ba = parameters["ba"]
    by = parameters["by"]
    
    # 1. compute the hidden state
    a_t = np.tanh(np.dot(Waa, a_prev) + np.dot(Wax, xt) + ba)
    # 2. predition y 
    yt_pred = softmax(np.dot(Wya, a_t) + by)
    # 3. store <at, a_prev, xt, parameters> in cache
    cache = (a_t, a_prev, xt, parameters)
    # 4. return    
    return a_t, yt_pred, cache


In [20]:
def rnn_forward(x, a0, parameters):
    caches = []
    # Retrieve dimensions from shapes of x and parameters["Wya"]
    n_x, m, T_x = x.shape
    n_y, n_a = parameters["Wya"].shape
    
    # 1. create vectors of zeros(a & y)
    a = np.zeros((n_a, m, T_x))
    y_pred = np.zeros((n_y, m, T_x))
    # 2. initialize next hidden state
    a_t = a0
    # 3. start looping
    for t in range(T_x):
        a_t, yt_pred, cache = rnn_cell_forward(x[:, :, t], a0, parameters)
        a[:, :, t] = a_t
        y_pred[:, :, t] = (yt_pred)
        caches.append(cache)
    caches = (caches, x)
    
    return a, y_pred, caches

In [21]:
def lstm_cell_forward(xt, a_prev, c_prev, parameters):
    Wf = parameters["Wf"] 
    Wi = parameters["Wi"]
    Wc = parameters["Wc"]
    Wo = parameters["Wo"]
    Wy = parameters["Wy"]
    bf = parameters["bf"]
    bi = parameters["bi"]
    bc = parameters["bc"]
    bo = parameters["bo"]
    by = parameters["by"]
    
    # Concatenate a_prev and xt
    n_x, m = xt.shape
    n_y, n_a = Wy.shape
    concatenate = np.zeros((n_x + n_a, m))
    concatenate[ : n_a, :] = a_prev  # [0:n_a] : 0到n_a-1 共n_a个值 
    concatenate[n_a : , :] = xt 
    
    Gf = sigmoid(np.dot(Wf, concatenate) + bf)
    Gi = sigmoid(np.dot(Wi, concatenate) + bi)
    Go = sigmoid(np.dot(Wo, concatenate) + bo)
    c_pred = np.tanh(np.dot(Wc, concatenate) + bc)
    c_next = Gf * c_prev + Gi * c_pred
    a_next = Go * np.tanh(c_next)
    
    yt_pred = softmax(np.dot(Wy, a_next) + by)
    #cache: tuple of values needed for the backward pass, contains (a_next, c_next, a_prev, c_prev, xt, parameters)
    cache = (a_next, c_next, a_prev, c_prev, Gf, Gi, Go, xt, C_pred, parameters)
    
    return a_next, c_next, yt_pred, cache

In [23]:
def lstm_forward(x, a0, parameters):
    caches = []
    
    n_x, m, T_x = x.shape
    n_y, n_a = parameters["Wya"].shape
    
    a = np.zeros((n_a, m, T_x))
    y = np.zeros((n_y, m, T_x)) 
    c = np.zeros((n_a, m, T_x))
    
    a_next = a0
    c_next = np.zeros((n_a, m))
    
    for t in range(T_x):
        a_next, c_next, yt_pred, cache = lstm_cell_forward(xt[:, :, t], a_next, c_next, parameters)
        a[:, :, t] = a_next
        y[:, :, t] = yt_pred
        c[:, :, t] = c_next       
        caches.append(cache)
        
    caches = (cache, x)
    
    return a, y, c, caches
    