In [1]:
import numpy as np
import tensorflow as tf

  from ._conv import register_converters as _register_converters


In [101]:
def softmax(y):
    if(a.ndim==1):
        return np.exp(y)/np.sum(np.exp(y))
    r,c=y.shape
    y_softmax=np.zeros(r*c).reshape(r,c)
    for i in range(r):
        y_softmax[i,:]=np.exp(y[i,:])/np.sum(np.exp(y[i,:]))
    return y_softmax

In [40]:
np.random.seed(1)
da=5 ;dx=3;m=10;dy=2
parameters={
    'Waa':np.random.randn(da,da),
    'Wax':np.random.randn(da,dx),
    'Wya':np.random.randn(dy,da),
    'ba':np.random.randn(da,1),
    'by':np.random.randn(dy,1),
}

In [122]:
### Calculation At time step-t

def rnn_timeT(Xt,a_prev,parameters):
    Waa=parameters["Waa"]
    Wax=parameters["Wax"]
    Wya=parameters["Wya"]
    ba=parameters["ba"]
    by=parameters["by"]
 
    Wa=np.concatenate((Waa,Wax),axis=1)
    a=np.concatenate((a_prev,Xt),axis=0)
    
    a_next=np.tanh(np.dot(Wa,a)+ba)
    ypred_t=softmax(np.dot(Wya,a_next)+by)
    
    cache=(a_next,ypred_t,Xt,parameters)
    
    return a_next,ypred_t,cache

In [135]:
### Forward Propagation Of RNN

def rnn_forward(X,a_prev,parameters):
    nx,m,Tx=np.shape(X)    
    Wya=parameters["Wya"]
    
    dy,da=Wya.shape
    a_prev=np.random.randn(da,m)
    y_preds=np.zeros((dy,m,Tx))
    a_preds=np.zeros((da,m,Tx))
    caches=[]
    for t in range(Tx):
        a_next,ypred_t,cache=rnn_timeT(X[:,:,t],a_prev,parameters)
        a_prev=a_next
        y_preds[:,:,t]=ypred_t
        a_preds[:,:,t]=a_next
        caches.append(cache)
    caches=(caches,X)
    return a_preds,y_preds,caches
        

In [136]:
np.random.seed(1)
x = np.random.randn(3,10,4)
a0 = np.random.randn(5,10)
Waa = np.random.randn(5,5)
Wax = np.random.randn(5,3)
Wya = np.random.randn(2,5)
ba = np.random.randn(5,1)
by = np.random.randn(2,1)
parameters = {"Waa": Waa, "Wax": Wax, "Wya": Wya, "ba": ba, "by": by}

a, y_pred, caches = rnn_forward(x, a0, parameters)
print(a.shape,y_pred.shape)
print("a[4][1] = ", a[4][1])
print("a.shape = ", a.shape)
print("y_pred[1][3] =", y_pred[1][3])
print("y_pred.shape = ", y_pred.shape)
print("caches[1][1][3] =", caches[1][1][3])
print("len(caches) = ", len(caches))

(5, 10, 4) (2, 10, 4)
a[4][1] =  [ 0.76872174  0.99938678 -0.99845606 -0.99854009]
a.shape =  (5, 10, 4)
y_pred[1][3] = [0.07064457 0.0685364  0.02823762 0.10458415]
y_pred.shape =  (2, 10, 4)
caches[1][1][3] = [-1.1425182  -0.34934272 -0.20889423  0.58662319]
len(caches) =  2


In [119]:
np.random.seed(1)
xt = np.random.randn(3,10)
a_prev = np.random.randn(5,10)
Waa = np.random.randn(5,5)
Wax = np.random.randn(5,3)
Wya = np.random.randn(2,5)
ba = np.random.randn(5,1)
by = np.random.randn(2,1)
parameters = {"Waa": Waa, "Wax": Wax, "Wya": Wya, "ba": ba, "by": by}

#a_next, yt_pred, cache = rnn_timeT(xt,parameters,a_prev)
print("a_next[4] = ", a_next[4])
print("a_next.shape = ", a_next.shape)
print("yt_pred[1] =", yt_pred[1])
print("yt_pred.shape = ", yt_pred.shape)


a_next[4] =  [ 0.59584544  0.18141802  0.61311866  0.99808218  0.85016201  0.99980978
 -0.18887155  0.99815551  0.6531151   0.82872037]
a_next.shape =  (5, 10)
yt_pred[1] = 0.0006624948176613708
yt_pred.shape =  (10,)


In [150]:
def sigmoid(z):
    return 1/(1+np.exp(-z))

In [156]:
def lstm_cell_forward(xt,a_prev,c_prev,parameters_lstm):
    
    Wf = parameters["Wf"]
    bf = parameters["bf"]
    Wi = parameters["Wi"]
    bi = parameters["bi"]
    Wc = parameters["Wc"]
    bc = parameters["bc"]
    Wo = parameters["Wo"]
    bo = parameters["bo"]
    Wy = parameters["Wy"]
    by = parameters["by"]
    
    a=np.concatenate((a_prev,xt),axis=0)
    
    ft=sigmoid(np.dot(Wf,a)+bf)
    it=sigmoid(np.dot(Wf,a)+bi)
    cct=np.tanh(np.dot(Wc,a)+bc)
    c_next=it*cct+ft*c_prev
    ot=sigmoid(np.dot(Wo,a)+bo)
    a_next=ot*np.tanh(c_next)
    
    yt_pred=softmax(np.dot(Wy,a_next)+by)
    cache=(a_next,c_next,a_prev,c_prev,ft,it,cct,ot,xt,parameters)
    
    return a_next,c_next,yt_pred,cache

In [157]:
np.random.seed(1)
xt = np.random.randn(3,10)
a_prev = np.random.randn(5,10)
c_prev = np.random.randn(5,10)
Wf = np.random.randn(5, 5+3)
bf = np.random.randn(5,1)
Wi = np.random.randn(5, 5+3)
bi = np.random.randn(5,1)
Wo = np.random.randn(5, 5+3)
bo = np.random.randn(5,1)
Wc = np.random.randn(5, 5+3)
bc = np.random.randn(5,1)
Wy = np.random.randn(2,5)
by = np.random.randn(2,1)

parameters = {"Wf": Wf, "Wi": Wi, "Wo": Wo, "Wc": Wc, "Wy": Wy, "bf": bf, "bi": bi, "bo": bo, "bc": bc, "by": by}

a_next, c_next, yt, cache = lstm_cell_forward(xt, a_prev, c_prev, parameters)
print("a_next[4] = ", a_next[4])
print("a_next.shape = ", c_next.shape)
print("c_next[2] = ", c_next[2])
print("c_next.shape = ", c_next.shape)
print("yt[1] =", yt[1])
print("yt.shape = ", yt.shape)
print("cache[1][3] =", cache[1][3])
print("len(cache) = ", len(cache))

a_next[4] =  [-6.61034665e-01  1.30132290e-06  1.99816789e-02  1.10472103e-01
 -9.02503892e-01  1.84321373e-04  6.96154932e-01  5.99254644e-01
 -4.82840527e-04 -1.14914385e-01]
a_next.shape =  (5, 10)
c_next[2] =  [-0.03108657  0.36873069 -0.46962771 -0.12764451 -1.65572232  0.0648662
  0.19927273 -0.3248586  -0.53008902 -0.37579692]
c_next.shape =  (5, 10)
yt[1] = [0.27558558 0.04996476 0.05020302 0.0809698  0.18870078 0.06502153
 0.02736078 0.06294208 0.09572258 0.10352908]
yt.shape =  (2, 10)
cache[1][3] = [-0.09674012  1.02595065  0.9909388  -0.53590455  0.11898752 -0.30119224
  0.19372256 -1.04846837  1.34412707 -0.26531237]
len(cache) =  10


In [163]:
def lstm_forward(x,a0,parameters_lstm):
    
    nx,m,Tx=x.shape
    dy=parameters["Wy"].shape[0]
    da=parameters["Wi"].shape[0]
    
    y_preds=np.zeros((dy,m,Tx))
    a_nexts=np.zeros((da,m,Tx))
    c_nexts=np.zeros((da,m,Tx))
    caches=[]
    
    # Initialise a_next and c_next
    a_next=a0
    c_next=np.zeros(a_next.shape)
    
    for t in range(Tx):
        a_next,c_next,yt_pred,cache=lstm_cell_forward(x[:,:,t],a_next,c_next,parameters_lstm)
        a_nexts[:,:,t]=a_next
        c_nexts[:,:,t]=c_next
        y_preds[:,:,t]=yt_pred
        caches.append(cache)
    
    caches=(cache,x)
    
    return a_nexts,c_nexts,y_preds,caches
        

In [164]:
np.random.seed(1)
x = np.random.randn(3,10,7)
a0 = np.random.randn(5,10)
Wf = np.random.randn(5, 5+3)
bf = np.random.randn(5,1)
Wi = np.random.randn(5, 5+3)
bi = np.random.randn(5,1)
Wo = np.random.randn(5, 5+3)
bo = np.random.randn(5,1)
Wc = np.random.randn(5, 5+3)
bc = np.random.randn(5,1)
Wy = np.random.randn(2,5)
by = np.random.randn(2,1)

parameters = {"Wf": Wf, "Wi": Wi, "Wo": Wo, "Wc": Wc, "Wy": Wy, "bf": bf, "bi": bi, "bo": bo, "bc": bc, "by": by}

a, y, c, caches = lstm_forward(x, a0, parameters)
print("a[4][3][6] = ", a[4][3][6])
print("a.shape = ", a.shape)
print("y[1][4][3] =", y[1][4][3])
print("y.shape = ", y.shape)
print("caches[1][1[1]] =", caches[1][1][1])
print("c[1][2][1]", c[1][2][1])
print("len(caches) = ", len(caches))

a[4][3][6] =  0.08534587220849396
a.shape =  (5, 10, 7)
y[1][4][3] = 0.083572488373044
y.shape =  (5, 10, 7)
caches[1][1[1]] = [ 0.82797464  0.23009474  0.76201118 -0.22232814 -0.20075807  0.18656139
  0.41005165]
c[1][2][1] 0.14763775832383502
len(caches) =  2
