In [1]:
import numpy as np

In [2]:
def softmax(x):
    ex = np.exp(x)
    return ex / np.sum(ex, axis=0)

In [6]:
def rnn_cell_forward(xt, a_prev, parameters):
    """
    Forward step of 1 rnn cell
    
    nx = vocabulary size
    m = number of samples

    input size (nx, m)
    activations size (na, m)
    output size(ny, m)

    Wax -> (na, nx)
    xt -> (nx, m)
    Waa -> (na, na)
    a_prev -> (na, m)
    ba -> (na, 1)

    Wya -> (ny, na)
    a_next -> (na, m)
    by -> (ny ,1)
    """
    Wax = parameters['Wax']
    Waa = parameters['Waa']
    Wya = parameters['Wya']
    ba = parameters['ba']
    by = parameters['by']
    
    #next activation
    a_next = np.tanh(np.dot(Wax,xt) + np.dot(Waa, a_prev) + ba)
    yt_pred = softmax(np.dot(Wya, a_next) + by)
    
    cache = (a_next, a_prev, xt, parameters)
    return a_next, yt_pred, cache

In [16]:
def rnn_forward(x, a0, parameters):
    """
    Forward propogation of RNN
    
    nx = vocabulary size
    m = number of samples
    Tx = number of words/time steps
    
    x - input data of shape (nx, m, Tx)
    a0 - initial activation state of shape (n_a, m)
    """
    n_x, m, Tx = x.shape
    n_y, n_a = parameters['Wya'].shape
    
    a = np.zeros((n_a, m, Tx))
    y_pred = np.zeros((n_y, m, Tx))
    
    a_next = a0
    caches = []
    
    for t in range(Tx):
        a_next, yt_pred, cache = rnn_cell_forward(x[:,:,t], a_next, parameters)
        a[:,:,t] = a_next
        y_pred[:,:,t] = yt_pred
        caches.append(cache)
        
    caches = (caches, x)
    
    return a, y_pred, caches        