In [1]:
import numpy as np

In [2]:
def softmax(x):
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum(axis=0)

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [3]:
# one single rnn block

def rnn_cell_forward(xt,a_prev,parameters):
    """
    Implements a single forward step of the RNN-cell as described in Figure (2)

    Arguments:
    xt -- your input data at timestep "t", numpy array of shape (n_x, m).
    a_prev -- Hidden state at timestep "t-1", numpy array of shape (n_a, m)
    parameters -- python dictionary containing:
                        Wax -- Weight matrix multiplying the input, numpy array of shape (n_a, n_x)
                        Waa -- Weight matrix multiplying the hidden state, numpy array of shape (n_a, n_a)
                        Wya -- Weight matrix relating the hidden-state to the output, numpy array of shape (n_y, n_a)
                        ba --  Bias, numpy array of shape (n_a, 1)
                        by -- Bias relating the hidden-state to the output, numpy array of shape (n_y, 1)
    Returns:
    a_next -- next hidden state, of shape (n_a, m)
    yt_pred -- prediction at timestep "t", numpy array of shape (n_y, m)
    cache -- tuple of values needed for the backward pass, contains (a_next, a_prev, xt, parameters)
    """
    # Retrieve parameter
    Wax=parameters["Wax"]
    Waa=parameters["Waa"]
    Wya=parameters["Wya"]
    ba=parameters["ba"]
    by=parameters["by"]
    
    a_next=np.tanh(np.dot(Wax,xt)+np.dot(Waa,a_prev)+ba)
    yt_prev= softmax(np.dot(Wya,a_next)+by)
    
    cache=(a_next,a_prev,xt,parameters)
    
    return a_next, yt_prev, cache

In [4]:
# rnn block connected together

def rnn_forward(x,a0,parameters):
    """
    Implement the forward propagation of the recurrent neural network described in Figure (3).

    Arguments:
    x -- Input data for every time-step, of shape (n_x, m, T_x).
    a0 -- Initial hidden state, of shape (n_a, m)
    parameters -- python dictionary containing:
                        Waa -- Weight matrix multiplying the hidden state, numpy array of shape (n_a, n_a)
                        Wax -- Weight matrix multiplying the input, numpy array of shape (n_a, n_x)
                        Wya -- Weight matrix relating the hidden-state to the output, numpy array of shape (n_y, n_a)
                        ba --  Bias numpy array of shape (n_a, 1)
                        by -- Bias relating the hidden-state to the output, numpy array of shape (n_y, 1)

    Returns:
    a -- Hidden states for every time-step, numpy array of shape (n_a, m, T_x)
    y_pred -- Predictions for every time-step, numpy array of shape (n_y, m, T_x)
    caches -- tuple of values needed for the backward pass, contains (list of caches, x)
    """
    
    caches=[]
    
    # Retrieve dimensions
    n_x,m,Tx=x.shape
    n_y,n_a=parameters['Wya'].shape
    
    # Initialize a and y with zeros
    a=np.zeros((n_a,m,Tx))
    y=np.zeros((n_y,m,Tx))
    
    a_next=a0
    
    for i in range(Tx):
        a_next, yt_next, cache= rnn_cell_forward(x[:,:,i],a_next,parameters)
        a[:,:,i]=a_next
        y[:,:,i]=yt_next
        caches.append(cache)
        
    caches=(caches,x)
    
    return a, y, caches
        
        
        
    

In [5]:
# Long-Short Term Memory (LSTM) Network

def lstm_cell_forward(xt, a_prev, c_prev, parameters):
    """
    Implement a single forward step of the LSTM-cell as described in Figure (4)

    Arguments:
    xt -- your input data at timestep "t", numpy array of shape (n_x, m).
    a_prev -- Hidden state at timestep "t-1", numpy array of shape (n_a, m)
    c_prev -- Memory state at timestep "t-1", numpy array of shape (n_a, m)
    parameters -- python dictionary containing:
                        Wf -- Weight matrix of the forget gate, numpy array of shape (n_a, n_a + n_x)
                        bf -- Bias of the forget gate, numpy array of shape (n_a, 1)
                        Wi -- Weight matrix of the update gate, numpy array of shape (n_a, n_a + n_x)
                        bi -- Bias of the update gate, numpy array of shape (n_a, 1)
                        Wc -- Weight matrix of the first "tanh", numpy array of shape (n_a, n_a + n_x)
                        bc --  Bias of the first "tanh", numpy array of shape (n_a, 1)
                        Wo -- Weight matrix of the output gate, numpy array of shape (n_a, n_a + n_x)
                        bo --  Bias of the output gate, numpy array of shape (n_a, 1)
                        Wy -- Weight matrix relating the hidden-state to the output, numpy array of shape (n_y, n_a)
                        by -- Bias relating the hidden-state to the output, numpy array of shape (n_y, 1)
                        
    Returns:
    a_next -- next hidden state, of shape (n_a, m)
    c_next -- next memory state, of shape (n_a, m)
    yt_pred -- prediction at timestep "t", numpy array of shape (n_y, m)
    cache -- tuple of values needed for the backward pass, contains (a_next, c_next, a_prev, c_prev, xt, parameters)
    
    Note: ft/it/ot stand for the forget/update/output gates, cct stands for the candidate value (c tilde),
          c stands for the memory value
    """
    
    # Retrieve parameters from "parameters"
    Wf = parameters["Wf"]
    bf = parameters["bf"]
    Wi = parameters["Wi"]
    bi = parameters["bi"]
    Wc = parameters["Wc"]
    bc = parameters["bc"]
    Wo = parameters["Wo"]
    bo = parameters["bo"]
    Wy = parameters["Wy"]
    by = parameters["by"]
    
    # Retrieve dimensions from xt and Wy
    n_x,m=xt.shape
    n_y,n_a=Wy.shape
    
    # Concatenate a_prev and xt
    concat=np.zeros([n_a+n_x,m])
    concat[:n_a,:]=a_prev
    concat[n_a:,:]=xt
    
    # Compute values for ft, it, cct, c_next, ot, a_next using the formulas
    ft=sigmoid(np.dot(Wf,concat)+bf)
    it=sigmoid(np.dot(Wi,concat)+bi)
    cct=sigmoid(np.dot(Wc,concat)+bc)
    c_next=np.multiply(ft,c_prev)+np.multiply(it,cct)
    ot=sigmoid(np.dot(Wo,concat)+bo)
    a_next=np.multiply(ot,np.tanh(c_next))
    
    yt_pred=softmax(np.dot(Wy,a_next)+by)
    
    cache = (a_next, c_next, a_prev, c_prev, ft, it, cct, ot, xt, parameters)

    return a_next, c_next, yt_pred, cache
    
    

In [6]:
# LSTM forward pass by connecting LSTM cell
def lstm_forward(x,a0,parameters):
    """
    Implement the forward propagation of the recurrent neural network using an LSTM-cell described in Figure (3).

    Arguments:
    x -- Input data for every time-step, of shape (n_x, m, T_x).
    a0 -- Initial hidden state, of shape (n_a, m)
    parameters -- python dictionary containing:
                        Wf -- Weight matrix of the forget gate, numpy array of shape (n_a, n_a + n_x)
                        bf -- Bias of the forget gate, numpy array of shape (n_a, 1)
                        Wi -- Weight matrix of the update gate, numpy array of shape (n_a, n_a + n_x)
                        bi -- Bias of the update gate, numpy array of shape (n_a, 1)
                        Wc -- Weight matrix of the first "tanh", numpy array of shape (n_a, n_a + n_x)
                        bc -- Bias of the first "tanh", numpy array of shape (n_a, 1)
                        Wo -- Weight matrix of the output gate, numpy array of shape (n_a, n_a + n_x)
                        bo -- Bias of the output gate, numpy array of shape (n_a, 1)
                        Wy -- Weight matrix relating the hidden-state to the output, numpy array of shape (n_y, n_a)
                        by -- Bias relating the hidden-state to the output, numpy array of shape (n_y, 1)
                        
    Returns:
    a -- Hidden states for every time-step, numpy array of shape (n_a, m, T_x)
    y -- Predictions for every time-step, numpy array of shape (n_y, m, T_x)
    caches -- tuple of values needed for the backward pass, contains (list of all the caches, x)
    """
    caches = []
    
    n_x,m,Tx=x.shape
    n_y, n_a= parameters['Wy'].shape
    
    a=np.zeros([n_a,m,Tx])
    c=np.zeros([n_a,m,Tx])
    y=np.zeros([n_y,m,Tx])
    
    a_next=a0
    c_next=0.0
    
    for t in range(Tx):
        a_next,c_next,yt_pred,cache = lstm_cell_forward(x[:,:,t], a_next, c_next, parameters)
        a[:,:,t]=a_next
        c[:,:,t]=c_next
        y[:,:,t]=yt_pred
        caches.append(cache)
        
    caches=(caches,x)
    return a,y,c, caches 