In [7]:
import numpy as np

In [130]:
class Tanh(object):
    
    def forward(self, X_in):
        return np.tanh(X_in)
    
    def backward(self, X_in):
        #dEdX = dEdY * dYdX = dEdY * 1 - (tanh(X))^2
        return 1 - (np.tanh(X_in))**2

In [191]:
class ReLu(object):
    
    def forward(self, X_in):
        return np.maximum(X_in, 0)
    
    def backward(self, X_in):
        dYdX = (X_in > 0)  
        return dYdX

In [199]:
class RnnLayer(object):
    
    def __init__(self, input_dim, hidden_dim, seq_len, batch_size, use_bias=True, activation=Tanh):
        sq = np.sqrt(1. / hidden_dim)
        self.use_bias = use_bias
        self.seq_len = seq_len
        self.batch_size = batch_size
        self.hidden_dim = hidden_dim
        self.input_dim = input_dim
        self.activation = activation()
        self.input_weights = np.random.uniform(-sq, sq, (hidden_dim, input_dim))
        self.hidden_weights = np.random.uniform(-sq, sq, (hidden_dim, hidden_dim))
        
        if use_bias == True:
            self.hidden_bias = np.random.uniform(-sq, sq, hidden_dim)
            self.input_bias = np.random.uniform(-sq, sq, hidden_dim)
        else:
            self.hidden_bias = np.zeros((hidden_dim))
            self.input_bias = np.zeros((hidden_dim))
        
    def forward(self, X_in):        
        #treba li dodati provjeru je li X_in stvarno ima sekvencu jednaku seq_len?
        #treba li dodati provjeru je li X_in prva koordinata jednaka batch_size
        
        #u ovom slucaju sam pretpostavio da je za sve inpute, pocetno stanje 0 u 0. vremenskom trenutku
        H = np.zeros((self.batch_size, self.seq_len + 1, self.hidden_dim)) 
        
        for i in range(self.seq_len):
            
            input_part = np.einsum('ij,jk->ik', X_in[:,i,:], self.input_weights.T) + self.input_bias
            hidden_part = np.einsum('ij,jj->ij', H[:,i,:], self.hidden_weights.T) + self.hidden_bias
            
            H[:,i+1,:] = self.activation.forward(input_part + hidden_part)
       
        return H, H[:,self.seq_len,:]
    
    def book_forward(self, X_in):
        
        H = np.zeros((self.batch_size, self.seq_len + 1, self.hidden_dim)) 
        
        for i in range(self.seq_len):
            #ovdje dobivam transponirano iz mog forwarda, ali sam u einsum zamijenio vrijednosti, tako da zapravo dobijem isto
            input_part = np.einsum('ij,jk->ki',self.input_weights, X_in[:,i,:].T) + self.input_bias
            hidden_part = np.einsum('ii,ij->ji',self.hidden_weights, H[:,i,:].T) + self.hidden_bias
            
            H[:,i+1,:] = self.activation.forward(input_part + hidden_part)
       
        return H, H[:,self.seq_len,:]
        
            
    def backward(self, X, H, dEdY):
        dEdW_in = np.zeros_like(self.input_weights)
        dEdW_hh = np.zeros_like(self.hidden_weights)
        
        print(f'self.hiddan_bias={self.hidden_bias}')
        print(f'self.input_bias={self.input_bias}')
        
        
        dEdB_in = np.zeros_like(self.input_bias)
        dEdB_h = np.zeros_like(self.hidden_bias)
        
        H_grad = np.zeros((self.batch_size, self.seq_len + 1, self.hidden_dim))
        H_grad[:,self.seq_len,:] = dEdY[:,self.seq_len - 1,:]
        
        for i in range(self.seq_len, 0, -1):
            
            dEdW_in += np.einsum('bh,bi->hi', H_grad[:,i,:], X[:,i-1,:])
            dEdW_hh += np.einsum('bh,bk->hk', H_grad[:,i,:], H[:,i-1,:])
            
            #zapeo oko aktivacijske funkcije!!!, kako to derivirati i mnoziti s matricama
            
            if(self.use_bias == True):
                dEdB_in += np.sum(self.activation.backward(H[:,i,:]) * H_grad[:,i,:], axis=(0))
                #mislim da ovdje nije potrebno imati oba biasa, mislim na početku se random postave,
                #ali ovdje uvijek računamo iste vrijednosti
                dEdB_h = dEdB_in
            #ovo pitaj !!!!
            
            if i > 1:
                H_grad[:,i-1,:] = np.einsum('bh,hh->bh', H_grad[:,i,:], self.hidden_weights) * self.activation.backward(H[:,i,:]) + dEdY[:,i-2,:]
            else:
                H_grad[:,i-1,:] = np.einsum('bh,hh->bh', H_grad[:,i,:], self.hidden_weights) * self.activation.backward(H[:,i,:])
        
        return dEdW_in, dEdW_hh, dEdB_in, dEdB_h

In [196]:
#rnn forward checker

rnn = RnnLayer(4, 5, 3, 2)
#input dim 4
#hidden dim 5
#batch 2
#timestamps 3

X_in = np.array([[[1,2,1,3],[2,2,3,1],[0,2,3,1]],[[1,3,4,3],[1,2,1,1],[1,0,1,2]]])
H, last = rnn.forward(X_in)

In [194]:
dEdY = np.array([[[ 0.34545989,  0.07336296, -0.16346513, -0.06904482,
          0.0458759 ],
        [ 0.37271336,  0.07915059, -0.17636096, -0.07449179,
          0.04949507],
        [ 0.35166208,  0.07468007, -0.16639989, -0.07028441,
          0.04669953]],

       [[ 0.36616935,  0.07776088, -0.17326446, -0.07318388,
          0.04862605],
        [ 0.33954613,  0.07210709, -0.16066685, -0.06786287,
          0.04509058],
        [ 0.35872758,  0.07618053, -0.16974315, -0.07169654,
          0.04763781]]])

Win, Wh, Bin, Bh = rnn.backward(X_in, H, dEdY)    

self.hiddan_bias=[ 0.4316296  -0.1063735  -0.31191418  0.16940213  0.00493861]
self.input_bias=[ 0.35240786  0.36911693 -0.28135108  0.06668408 -0.35253175]


In [195]:
print(f'Bin={Bin}')
print(f'Bh={Bh}')

Bin=[ 1.86685287  0.46355134 -0.56416936 -0.40622725  0.        ]
Bh=[ 1.86685287  0.46355134 -0.56416936 -0.40622725  0.        ]
