In [128]:
import numpy as np

In [129]:
class Tanh(object):
    
    def forward(self, X_in):
        return np.tanh(X_in)
    
    def backward(self, X_in, dEdY):
        #dEdX = dEdY * dYdX = dEdY * 1 - (tanh(X))^2
        dYdX = 1 - (np.tanh(X_in))**2
        return dYdX * dEdY

In [130]:
class ReLu(object):
    
    def forward(self, X_in):
        return np.maximum(X_in, 0)
    
    def backward(self, X_in, dEdY):
        dYdX = (X_in > 0)  
        return dYdX * dEdY

In [178]:
class RnnLayer(object):
    
    def __init__(self, input_dim, hidden_dim, seq_len, batch_size, use_bias=True):
        sq = np.sqrt(1. / hidden_dim)
        
        self.use_bias = use_bias
        self.seq_len = seq_len
        self.batch_size = batch_size
        self.hidden_dim = hidden_dim
        self.input_dim = input_dim
        
        self.input_weights = np.random.uniform(-sq, sq, (hidden_dim, input_dim))
        self.hidden_weights = np.random.uniform(-sq, sq, (hidden_dim, hidden_dim))
        
        if use_bias == True:
            self.hidden_bias = np.random.uniform(-sq, sq, hidden_dim)
            self.input_bias = np.random.uniform(-sq, sq, hidden_dim)
        else:
            self.hidden_bias = np.zeros((hidden_dim))
            self.input_bias = np.zeros((hidden_dim))
        
    def forward(self, X_in):
        
        #treba li dodati provjeru je li X_in stvarno ima sekvencu jednaku seq_len?
        #treba li dodati provjeru je li X_in prva koordinata jednaka batch_size
        
        #u ovom slucaju sam pretpostavio da je za sve inpute, pocetno stanje 0 u 0. vremenskom trenutku
        H0 = np.zeros((self.hidden_dim))
        
        H = np.zeros((self.batch_size, self.seq_len + 1, self.hidden_dim)) 
        H[:,0,:] = H0
        
        tanh = Tanh()
        
        for i in range(self.seq_len):
            
            input_part = np.einsum('ij,jk->ik', X_in[:,i,:], self.input_weights.T) + self.input_bias
            hidden_part = np.einsum('ij,jj->ij', H[:,i,:], self.hidden_weights.T) + self.hidden_bias
            Z = input_part + hidden_part
            
            H[:,i+1,:] = tanh.forward(Z)
        
        return H, H[:,self.seq_len,:]
            
    def backward(self, X_in, H, dEdY):
        #Vrijedi: S_k+1 = tanh(X_in[:,k+1,:] * W_in.T + B_in + H[:,k+1,:] * W_hh.T + B_h)
        #dEdW_in = dEdY * dYdW_in = dEdY * (dtanh(X) * dYdW_in)
        #dEdW_hh = dEdY * dYdW_hh...
        
        dEdW_in = np.zeros_like(self.input_weights)
        dEdW_hh = np.zeros_like(self.hidden_weights)
        
        dEdB_in = np.zeros_like(self.input_bias)
        dEdB_h = np.zeros_like(self.hidden_bias)
        pass

In [170]:
#rnn forward checker

rnn = RnnLayer(4, 5, 3, 2)
#input dim 4
#hidden dim 5
#batch 2
#timestamps 3

X_in = np.array([[[1,2,1,3],[2,2,3,1],[0,2,3,1]],[[1,3,4,3],[1,2,1,1],[1,0,1,2]]])
H, last = rnn.forward(X_in)