### 임베딩 계층 테스트

In [9]:
import numpy as np
import sys
sys.path.append('..')
from common.np import *  # import numpy as np (or import cupy as np)
from common.layers import *
from common.time_layers import *

corpus = np.array([0,1,2,3,4,1,5,6])
print(corpus)
xs = corpus[:-1]  # 입력
print(xs)
ts = corpus[1:]   # 출력(정답 레이블)
print(ts)

batch_size = 1
time_size = 3
batch_x = np.empty((batch_size, time_size), dtype='i')
print(batch_x.shape)

batch_x[0] = xs[:3]
print(batch_x)

batch_t = np.empty((batch_size, time_size), dtype='i')
print(batch_t.shape)

batch_t[0] = ts[:3]
print(batch_t)

embed_W = np.arange(28).reshape(7,4)
print(embed_W)

# embed = TimeEmbedding(embed_W)
# out = embed.forward(batch_x)
# print(out)

out = np.empty((1, 3, 4), dtype='f')

for t in range(3):
    out[:, t, :] = embed_W[batch_x[:, t]]
    
print(out)

[0 1 2 3 4 1 5 6]
[0 1 2 3 4 1 5]
[1 2 3 4 1 5 6]
(1, 3)
[[0 1 2]]
(1, 3)
[[1 2 3]]
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]
 [16 17 18 19]
 [20 21 22 23]
 [24 25 26 27]]
[[[ 0.  1.  2.  3.]
  [ 4.  5.  6.  7.]
  [ 8.  9. 10. 11.]]]


### RNN 계층 구현

In [10]:
import sys
sys.path.append('..')
from common.np import *  # import numpy as np (or import cupy as np)
from common.layers import *
from common.functions import sigmoid


class MyRNN:
    def __init__(self, Wx, Wh, b):
        self.params = [Wx, Wh, b]
        self.grads = [np.zeros_like(Wx), np.zeros_like(Wh), np.zeros_like(b)]
        self.cache = None

    def forward(self, x, h_prev):
        Wx, Wh, b = self.params
        t = np.dot(h_prev, Wh) + np.dot(x, Wx) + b
        h_next = t #np.tanh(t)

        self.cache = (x, h_prev, h_next)
        return h_next

    def backward(self, dh_next):
        Wx, Wh, b = self.params
        x, h_prev, h_next = self.cache

        dt = dh_next #dh_next * (1 - h_next ** 2)
        
        db = np.sum(dt, axis=0)
        print('db=', db)
        dWh = np.dot(h_prev.T, dt)
        print('dWh=', dWh)
        dh_prev = np.dot(dt, Wh.T)
        print('dh_prev=', dh_prev)
        dWx = np.dot(x.T, dt)
        print('dWx=' , dWx)
        dx = np.dot(dt, Wx.T)

        self.grads[0][...] = dWx
        self.grads[1][...] = dWh
        self.grads[2][...] = db

        return dx, dh_prev

### Time RNN 계층 구현

In [63]:
class MyTimeRNN:
    def __init__(self, Wx, Wh, b, stateful=False):
        self.params = [Wx, Wh, b]
        self.grads = [np.zeros_like(Wx), np.zeros_like(Wh), np.zeros_like(b)]
        self.layers = None

        self.h, self.dh = None, None
        self.stateful = stateful

    def forward(self, xs):
        Wx, Wh, b = self.params
        N, T, D = xs.shape
        D, H = Wx.shape

        self.layers = []
        hs = np.empty((N, T, H), dtype='f')

        if not self.stateful or self.h is None:
            self.h = np.zeros((N, H), dtype='f')

        for t in range(T):
            layer = MyRNN(*self.params)
            self.h = layer.forward(xs[:, t, :], self.h)
            hs[:, t, :] = self.h
            self.layers.append(layer)

        return hs

    def backward(self, dhs):
        Wx, Wh, b = self.params
        N, T, H = dhs.shape
        D, H = Wx.shape

        dxs = np.empty((N, T, D), dtype='f')
        dh = 0
        grads = [0, 0, 0]
        for t in reversed(range(T)):
            layer = self.layers[t]
            dx, dh = layer.backward(dhs[:, t, :] + dh)
            dxs[:, t, :] = dx

            for i, grad in enumerate(layer.grads):
                grads[i] += grad

        for i, grad in enumerate(grads):
            self.grads[i][...] = grad
        self.dh = dh

        print('self.grads=',self.grads)
        return dxs

    def set_state(self, h):
        self.h = h

    def reset_state(self):
        self.h = None

In [64]:
class MyEmbedding:
    def __init__(self, W):
        self.params = [W]
        self.grads = [np.zeros_like(W)]
        self.idx = None

    def forward(self, idx):
        W, = self.params
        self.idx = idx
        out = W[idx]
        return out

    def backward(self, dout):
        dW, = self.grads
        dW[...] = 0
        np.add.at(dW, self.idx, dout)
        print("dW=", dW)
        return None

In [65]:
class MyTimeEmbedding:
    def __init__(self, W):
        self.params = [W]
        self.grads = [np.zeros_like(W)]
        self.layers = None
        self.W = W

    def forward(self, xs):
        N, T = xs.shape
        V, D = self.W.shape

        out = np.empty((N, T, D), dtype='f')
        self.layers = []

        for t in range(T):
            layer = MyEmbedding(self.W)
            out[:, t, :] = layer.forward(xs[:, t])
            self.layers.append(layer)

        return out

    def backward(self, dout):
        N, T, D = dout.shape

        grad = 0
        for t in range(T):
            layer = self.layers[t]
            layer.backward(dout[:, t, :])
            grad += layer.grads[0]

        self.grads[0][...] = grad
        print('self.grads=',self.grads)
        return None

### Time Rnn 테스트

In [66]:
import numpy as np
import sys
sys.path.append('..')
from common.np import *  # import numpy as np (or import cupy as np)
from common.layers import *
from common.time_layers import *

corpus = np.array([0,1,2,3,4,1,5,6])
# print(copus)
xs = corpus[:-1]  # 입력
# print(xs)
ts = corpus[1:]   # 출력(정답 레이블)
# print(ts)

batch_size = 1
time_size = 3
batch_x = np.empty((batch_size, time_size), dtype='i')
# print(batch_x.shape)

batch_x[0] = xs[:3]
# print(batch_x)

batch_t = np.empty((batch_size, time_size), dtype='i')
# print(batch_t.shape)

batch_t[0] = ts[:3]
# print(batch_t)

embed_W = np.arange(28).reshape(7,4)
# print(embed_W)

embed = MyTimeEmbedding(embed_W)
out = embed.forward(batch_x)
print(out)

rnn_Wx = np.ones((4,3), dtype='f')
print(rnn_Wx)
rnn_Wh = np.ones((3,3), dtype='f')
print(rnn_Wh)
rnn_b = np.full((3,),3, dtype='f')
print(rnn_b)
rnn = MyTimeRNN(rnn_Wx, rnn_Wh, rnn_b)
out = rnn.forward(out)
print(out)

[[[ 0.  1.  2.  3.]
  [ 4.  5.  6.  7.]
  [ 8.  9. 10. 11.]]]
[[1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]]
[[1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]]
[3. 3. 3.]
[[[  9.   9.   9.]
  [ 52.  52.  52.]
  [197. 197. 197.]]]


In [67]:
dout = np.array([[[1,1,1],
                  [2,2,2],
                  [3,3,3]]])
dxs = rnn.backward(dout)
print(dxs)

db= [3 3 3]
dWh= [[156. 156. 156.]
 [156. 156. 156.]
 [156. 156. 156.]]
dh_prev= [[9. 9. 9.]]
dWx= [[24. 24. 24.]
 [27. 27. 27.]
 [30. 30. 30.]
 [33. 33. 33.]]
db= [11. 11. 11.]
dWh= [[99. 99. 99.]
 [99. 99. 99.]
 [99. 99. 99.]]
dh_prev= [[33. 33. 33.]]
dWx= [[44. 44. 44.]
 [55. 55. 55.]
 [66. 66. 66.]
 [77. 77. 77.]]
db= [34. 34. 34.]
dWh= [[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]
dh_prev= [[102. 102. 102.]]
dWx= [[  0.   0.   0.]
 [ 34.  34.  34.]
 [ 68.  68.  68.]
 [102. 102. 102.]]
self.grads= [array([[ 68.,  68.,  68.],
       [116., 116., 116.],
       [164., 164., 164.],
       [212., 212., 212.]], dtype=float32), array([[255., 255., 255.],
       [255., 255., 255.],
       [255., 255., 255.]], dtype=float32), array([48., 48., 48.], dtype=float32)]
[[[102. 102. 102. 102.]
  [ 33.  33.  33.  33.]
  [  9.   9.   9.   9.]]]


In [68]:
embed.backward(dxs)

dW= [[102 102 102 102]
 [  0   0   0   0]
 [  0   0   0   0]
 [  0   0   0   0]
 [  0   0   0   0]
 [  0   0   0   0]
 [  0   0   0   0]]
dW= [[ 0  0  0  0]
 [33 33 33 33]
 [ 0  0  0  0]
 [ 0  0  0  0]
 [ 0  0  0  0]
 [ 0  0  0  0]
 [ 0  0  0  0]]
dW= [[0 0 0 0]
 [0 0 0 0]
 [9 9 9 9]
 [0 0 0 0]
 [0 0 0 0]
 [0 0 0 0]
 [0 0 0 0]]
self.grads= [array([[102, 102, 102, 102],
       [ 33,  33,  33,  33],
       [  9,   9,   9,   9],
       [  0,   0,   0,   0],
       [  0,   0,   0,   0],
       [  0,   0,   0,   0],
       [  0,   0,   0,   0]])]


### TimeSoftfmax 분석

In [8]:
import numpy as np
ts = np.array([1,2,3])
ys = np.arange(21).reshape(3,7)
print(ys[np.arange(1 * 3), ts])

[ 1  9 17]


In [9]:
import numpy as np
ts = np.array([1,2,3])
ys = np.arange(21).reshape(3,7)
dx = ys
dx[np.arange(1 * 3), ts] -= 1
print(dx)

[[ 0  0  2  3  4  5  6]
 [ 7  8  8 10 11 12 13]
 [14 15 16 16 18 19 20]]


In [6]:
print(np.newaxis)

None


In [43]:
mask = np.array([1,0,1])
dx = np.array([[0,-0.3, 0.03, 0, 0, 2.66, 0 ],
               [0,-0.3, 0.03, 0, 0, 2.66, 0 ],
               [0,-0.3, 0.03, 0, 0, 2.66, 0 ]])
dx *= mask[:, np.newaxis] 
print(mask)
print(mask.reshape(3,1))
print(mask[:, np.newaxis])
print(dx)

[1 0 1]
[[1]
 [0]
 [1]]
[[1]
 [0]
 [1]]
[[ 0.   -0.3   0.03  0.    0.    2.66  0.  ]
 [ 0.   -0.    0.    0.    0.    0.    0.  ]
 [ 0.   -0.3   0.03  0.    0.    2.66  0.  ]]
