In [1]:
import numpy as np

In [2]:
class Softmax(object):
    @staticmethod
    def forward(x_in):
        exps = np.exp(x_in-np.max(x_in, axis=-1, keepdims=True))
        return exps / np.sum(exps, axis=-1, keepdims=True)


class Tanh(object):

    @staticmethod
    def forward(x_in):
        return np.tanh(x_in)

    @staticmethod
    def backward(x_in):
        # dEdX = dEdY * dYdX = dEdY * 1 - (tanh(X))^2
        return 1 - (np.tanh(x_in)) ** 2


class ReLu(object):

    @staticmethod
    def forward(x_in):
        return np.maximum(x_in, 0)

    @staticmethod
    def backward(x_in):
        return x_in > 0

In [3]:
class DenseLayer(object):

    def __init__(self, input_dim, output_dim, use_bias=True):
        sq = np.sqrt(1. / input_dim)
        self.use_bias = use_bias
        self.weights = np.random.uniform(-sq, sq, (output_dim, input_dim))
        if use_bias:
            self.bias = np.random.uniform(-sq, sq, output_dim)
        else:
            self.bias = np.zeros(output_dim)

    def forward(self, x_in):
        return np.tensordot(x_in, self.weights.T, axes=((-1), 0)) + self.bias

    def backward(self, de_dy, x_in):
        # de_dw = de_dy * dYdW = de_dy * X
        # dEdb = de_dy * dYdb = de_dy
        # dEdX = de_dy * dYdX = de_dy * W
        axis = tuple(range(len(x_in.shape) - 1))
        de_dw = np.tensordot(de_dy, x_in, axes=(axis, axis))
        de_db = np.sum(de_dy, axis=axis)
        de_dx = np.tensordot(de_dy, self.weights, axes=(-1, 0))

        return de_dx, de_dw, de_db

    def refresh(self, de_dw, de_db, learning_rate):
        self.weights = self.weights - learning_rate * de_dw
        if self.use_bias:
            self.bias = self.bias - learning_rate * de_db

In [4]:
class CrossEntropyLoss(object):
    def __init__(self):
        self.y_pred = None

    def forward(self, y, o):
        self.y_pred = Softmax.forward(o)
        return np.sum(-y * np.log(self.y_pred + 1e-15))/y.shape[0]

    def backward(self, y):
        return self.y_pred - y

In [5]:
class RnnLayer(object):

    def __init__(self, input_dim, hidden_dim, seq_len, batch_size, use_bias=True, activation=Tanh):
        sq = np.sqrt(1. / hidden_dim)
        self.use_bias = use_bias
        self.seq_len = seq_len
        self.batch_size = batch_size
        self.hidden_dim = hidden_dim
        self.input_dim = input_dim
        self.activation = activation()
        self.input_weights = np.random.uniform(-sq, sq, (hidden_dim, input_dim))
        self.hidden_weights = np.random.uniform(-sq, sq, (hidden_dim, hidden_dim))

        if self.use_bias:
            self.bias = np.random.uniform(-sq, sq, hidden_dim)
        else:
            self.bias = np.zeros(hidden_dim)

    def forward(self, x_in):
        # treba li dodati provjeru je li X_in stvarno ima sekvencu jednaku seq_len?
        # treba li dodati provjeru je li X_in prva koordinata jednaka batch_size

        # u ovom slucaju sam pretpostavio da je za sve inpute, pocetno stanje 0 u 0. vremenskom trenutku
        H = np.zeros((self.batch_size, self.seq_len + 1, self.hidden_dim))

        for i in range(self.seq_len):
            input_part = np.einsum('ij,jk->ik', x_in[:, i, :], self.input_weights.T)
            hidden_part = np.einsum('ij,jk->ik', H[:, i, :], self.hidden_weights.T)

            H[:, i + 1, :] = self.activation.forward(input_part + hidden_part + self.bias)

        return H, H[:, self.seq_len, :]

    def book_forward(self, x_in):

        H = np.zeros((self.batch_size, self.seq_len + 1, self.hidden_dim))

        for i in range(self.seq_len):
            # ovdje dobivam transponirano iz mog forwarda, ali sam u einsum zamijenio vrijednosti, tako da zapravo dobijem isto
            input_part = np.einsum('ij,jk->ki', self.input_weights, x_in[:, i, :].T)
            hidden_part = np.einsum('ii,ij->ji', self.hidden_weights, H[:, i, :].T)

            H[:, i + 1, :] = self.activation.forward(input_part + hidden_part + self.bias)

        return H, H[:, self.seq_len, :]

    def backward(self, x, h, dEdY):
        dEdW_in = np.zeros_like(self.input_weights)
        dEdW_hh = np.zeros_like(self.hidden_weights)

        dEdB_in = np.zeros_like(self.bias)

        H_grad = np.zeros((self.batch_size, self.seq_len + 1, self.hidden_dim))
        H_grad[:, self.seq_len, :] = dEdY[:, self.seq_len - 1, :]

        for i in range(self.seq_len, 0, -1):
            activation_backward = self.activation.backward(h[:, i, :]).reshape(self.batch_size, self.hidden_dim, 1)

            dEdW_in += np.sum(activation_backward * (np.einsum('bh,bi->bhi', H_grad[:, i, :], x[:, i - 1, :])), axis=0)
            dEdW_hh += np.sum(activation_backward * (np.einsum('bh,bk->bhk', H_grad[:, i, :], h[:, i - 1, :])), axis=0)

            if self.use_bias:
                dEdB_in += np.sum(self.activation.backward(h[:, i, :]) * H_grad[:, i, :], axis=0)
            else:
                pass

            if i > 1:
                H_grad[:, i - 1, :] = np.einsum('bh,hk->bk', H_grad[:, i, :], self.hidden_weights) * self.activation.backward(
                    h[:, i, :]) + dEdY[:, i - 2, :]
            else:
                H_grad[:, i - 1, :] = np.einsum('bh,hk->bk', H_grad[:, i, :],
                                                self.hidden_weights) * self.activation.backward(h[:, i, :])

        return dEdW_in, dEdW_hh, dEdB_in

    def backward_checker(self, X, H, dEdY):
        dEdW_in = np.zeros_like(self.input_weights)
        dEdW_hh = np.zeros_like(self.hidden_weights)

        print(f'self.bias={self.bias}')

        dEdB_in = np.zeros_like(self.bias)

        H_grad = np.zeros((self.batch_size, self.seq_len + 1, self.hidden_dim))
        H_grad[:, self.seq_len, :] = dEdY[:, self.seq_len - 1, :]

        for i in range(self.seq_len, 0, -1):

            for k in range(self.batch_size):
                act_grad = np.diag(self.activation.backward(H[k, i, :]))
                h_grad = H_grad[k, i, :].reshape(self.hidden_dim, 1)

                dEdW_in += np.dot(act_grad, np.dot(h_grad, X[k, i - 1, :].reshape(1, self.input_dim)))
                dEdW_hh += np.dot(act_grad, np.dot(h_grad, H[k, i - 1, :].reshape(1, self.hidden_dim)))

            if self.use_bias:
                dEdB_in += np.sum(self.activation.backward(H[:, i, :]) * H_grad[:, i, :], axis=(0))
            else:
                pass

            if i > 1:
                H_grad[:, i - 1, :] = np.einsum('bh,hk->bk', H_grad[:, i, :],
                                                self.hidden_weights) * self.activation.backward(H[:, i, :]) + dEdY[:,
                                                                                                              i - 2, :]
            else:
                H_grad[:, i - 1, :] = np.einsum('bh,hk->bk', H_grad[:, i, :],
                                                self.hidden_weights) * self.activation.backward(H[:, i, :])

        return dEdW_in, dEdW_hh, dEdB_in


In [6]:
batch_size = 3  # Number of training samples
# Addition of 2 n-bit numbers can result in a n+1 bit number
sequence_len = 3  # Length of the binary sequence

rnn = RnnLayer(2, 3, sequence_len, batch_size)
dense = DenseLayer(3, 2)
clos = CrossEntropyLoss()

X = np.array([[[0., 1.], [1., 0.], [0., 1.]],
              [[0., 1.], [0., 1.], [0., 1.]],
              [[1., 0.], [1., 0.], [1., 0.]]])

T = np.array([[[0., 1.], [1., 0.], [0., 1.]],
              [[0., 1.], [0., 1.], [0., 1.]],
              [[1., 0.], [1., 0.], [1., 0.]]])

num_iter = 1000
learning_rate = 0.1

for i in range(num_iter):
    H, _ = rnn.forward(X)
    out = dense.forward(H[:, 1:, :])
    loss = clos.forward(T, out)

    print(f'{i + 1}. iteracija- loss: {loss}')

    dEdY = clos.backward(T)

    de_dx, de_dw, de_db_d = dense.backward(dEdY, H[:, 1:, :])
    dEdW_in, dEdW_hh, de_db_r = rnn.backward(X, H, de_dx)

    dense.weights = dense.weights - learning_rate * de_dw
    if dense.use_bias:
        dense.bias = dense.bias - learning_rate * de_db_d
    rnn.input_weights = rnn.input_weights - learning_rate * dEdW_in
    rnn.hidden_weights = rnn.hidden_weights - learning_rate * dEdW_hh
    if rnn.use_bias:
        rnn.bias = rnn.bias - learning_rate * de_db_r

1. iteracija- loss: 2.134557227594829
2. iteracija- loss: 1.7604465978493187
3. iteracija- loss: 1.4400414553051117
4. iteracija- loss: 1.1008742948261738
5. iteracija- loss: 0.7867404965296728
6. iteracija- loss: 0.5471438378604198
7. iteracija- loss: 0.3886903954031437
8. iteracija- loss: 0.28928272741813005
9. iteracija- loss: 0.22577152787507782
10. iteracija- loss: 0.18325295686907295
11. iteracija- loss: 0.15335152805322408
12. iteracija- loss: 0.1313964981514841
13. iteracija- loss: 0.1146886552925583
14. iteracija- loss: 0.10159498836640192
15. iteracija- loss: 0.09108243775470458
16. iteracija- loss: 0.08247072535656767
17. iteracija- loss: 0.07529588182485948
18. iteracija- loss: 0.06923168027447966
19. iteracija- loss: 0.06404259795007874
20. iteracija- loss: 0.05955463387484883
21. iteracija- loss: 0.05563661780892437
22. iteracija- loss: 0.052187893352896274
23. iteracija- loss: 0.0491299927226596
24. iteracija- loss: 0.04640088029678121
25. iteracija- loss: 0.043950890364

207. iteracija- loss: 0.0038917447822245325
208. iteracija- loss: 0.0038720235522779326
209. iteracija- loss: 0.0038525003123388094
210. iteracija- loss: 0.0038331721091493364
211. iteracija- loss: 0.003814036047707302
212. iteracija- loss: 0.0037950892898390635
213. iteracija- loss: 0.003776329052816486
214. iteracija- loss: 0.00375775260801146
215. iteracija- loss: 0.003739357279591753
216. iteracija- loss: 0.0037211404432527678
217. iteracija- loss: 0.003703099524985655
218. iteracija- loss: 0.003685231999881761
219. iteracija- loss: 0.0036675353909691763
220. iteracija- loss: 0.0036500072680841038
221. iteracija- loss: 0.003632645246772558
222. iteracija- loss: 0.0036154469872227216
223. iteracija- loss: 0.0035984101932276125
224. iteracija- loss: 0.0035815326111768263
225. iteracija- loss: 0.003564812029075047
226. iteracija- loss: 0.0035482462755889997
227. iteracija- loss: 0.0035318332191174996
228. iteracija- loss: 0.0035155707668912237
229. iteracija- loss: 0.00349945686409325

429. iteracija- loss: 0.001824759304604378
430. iteracija- loss: 0.0018204040240784212
431. iteracija- loss: 0.00181606952180187
432. iteracija- loss: 0.0018117556497023806
433. iteracija- loss: 0.0018074622611097563
434. iteracija- loss: 0.0018031892107377011
435. iteracija- loss: 0.001798936354668914
436. iteracija- loss: 0.0017947035503377712
437. iteracija- loss: 0.0017904906565156806
438. iteracija- loss: 0.0017862975332946177
439. iteracija- loss: 0.001782124042072254
440. iteracija- loss: 0.0017779700455367931
441. iteracija- loss: 0.0017738354076511007
442. iteracija- loss: 0.0017697199936396871
443. iteracija- loss: 0.0017656236699722088
444. iteracija- loss: 0.0017615463043507869
445. iteracija- loss: 0.0017574877656946167
446. iteracija- loss: 0.0017534479241268454
447. iteracija- loss: 0.0017494266509602936
448. iteracija- loss: 0.0017454238186839948
449. iteracija- loss: 0.0017414393009503705
450. iteracija- loss: 0.0017374729725606903
451. iteracija- loss: 0.0017335247094

639. iteracija- loss: 0.001215036815941277
640. iteracija- loss: 0.0012131093218531962
641. iteracija- loss: 0.001211187961316297
642. iteracija- loss: 0.0012092727051154727
643. iteracija- loss: 0.0012073635242207845
644. iteracija- loss: 0.0012054603897864254
645. iteracija- loss: 0.0012035632731481625
646. iteracija- loss: 0.0012016721458229674
647. iteracija- loss: 0.0011997869795063482
648. iteracija- loss: 0.0011979077460726436
649. iteracija- loss: 0.0011960344175720244
650. iteracija- loss: 0.0011941669662301213
651. iteracija- loss: 0.0011923053644458392
652. iteracija- loss: 0.0011904495847913192
653. iteracija- loss: 0.001188599600008864
654. iteracija- loss: 0.0011867553830107154
655. iteracija- loss: 0.0011849169068780154
656. iteracija- loss: 0.0011830841448584744
657. iteracija- loss: 0.001181257070365516
658. iteracija- loss: 0.0011794356569777222
659. iteracija- loss: 0.0011776198784365743
660. iteracija- loss: 0.0011758097086451917
661. iteracija- loss: 0.001174005121

852. iteracija- loss: 0.0009081649182681902
853. iteracija- loss: 0.0009070910568325417
854. iteracija- loss: 0.0009060197477928983
855. iteracija- loss: 0.0009049509820573672
856. iteracija- loss: 0.0009038847505767976
857. iteracija- loss: 0.0009028210443457808
858. iteracija- loss: 0.0009017598544000571
859. iteracija- loss: 0.0009007011718195533
860. iteracija- loss: 0.0008996449877246795
861. iteracija- loss: 0.0008985912932786237
862. iteracija- loss: 0.0008975400796859093
863. iteracija- loss: 0.0008964913381923931
864. iteracija- loss: 0.0008954450600847855
865. iteracija- loss: 0.000894401236691464
866. iteracija- loss: 0.0008933598593805851
867. iteracija- loss: 0.0008923209195613799
868. iteracija- loss: 0.0008912844086825249
869. iteracija- loss: 0.0008902503182334013
870. iteracija- loss: 0.0008892186397425386
871. iteracija- loss: 0.0008881893647782081
872. iteracija- loss: 0.0008871624849476449
873. iteracija- loss: 0.0008861379918976023
874. iteracija- loss: 0.000885115