In [14]:
import numpy as np
W = np.arange(21).reshape(7, 3)
W

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14],
       [15, 16, 17],
       [18, 19, 20]])

In [2]:
W[2]

array([6, 7, 8])

In [17]:
idx = np.array([1, 3, 0, 1])
W[idx] # 여러 행을 한꺼번에 추출

array([[ 3,  4,  5],
       [ 9, 10, 11],
       [ 0,  1,  2],
       [ 3,  4,  5]])

In [22]:
class Embedding:
    def __init__(self, W):
        self.params = [W] # list
        self.grads = [np.zeros_like(W)]
        self.idx = None

    def forward(self, idx): # np.array
        W, = self.params
        self.idx = idx
        out = W[idx]
        return out

In [19]:
emb = Embedding(W)
emb.forward(idx)

array([[ 3,  4,  5],
       [ 9, 10, 11],
       [ 0,  1,  2],
       [ 3,  4,  5]])

In [23]:
    def backward(self, dout):
        dW, = self.grads
        dW[...] = 0 # 0으로 덮어 씀
        dW[self.idx] = dout # 실은 나쁜 예 (기존 가중치 특정 행 갱신 가능함)
        return None

In [26]:
class Embedding:
    def __init__(self, W):
        self.params = [W] # list
        self.grads = [np.zeros_like(W)]
        self.idx = None

    def forward(self, idx): # np.array
        W, = self.params
        self.idx = idx
        out = W[idx]
        return out

    def backward(self, dout):
        dW, = self.grads
        dW[...] = 0

#       for i, word_id in enumerate(self.idx):
#           dW[word_id] += dout[i]

        np.add.at(dW, self.idx, dout) # dout를 dw의 idx 행에 더해줌

        return None

In [37]:
class EmbeddingDot:
    def __init__(self, W):
        self.embed = Embedding(W)
        self.params = self.embed.params
        self.grads = self.embed.grads
        self.cache = None # 순전파 시 계산 결과를 잠시 유지하기 위함

    def forward(self, h, idx):
        '''
        W        : (hidden, vocab) -> 굳이 행/열 방향 안바꿔도 됨, 걍 뽑아낼 것
        idx      : (#mini_batch,) -> 정답 idx 각 각
        target_W : (#mini_batch, hidden)
        h        : (#mini_batch, hidden)
        
        target_W
        * h      : (#mini_batch, hidden) -> element-wise

        out      : (hidden,)
        '''
               
        target_W = self.embed.forward(idx) 
        out = np.sum(target_W * h, axis=1) # dot product : (h,)*(h,1)=(h,1)
                                           
        

In [29]:
W = np.arange(18).reshape(3, 6)
W

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17]])

In [31]:
new = [W]

In [34]:
new, = new
idx = np.array([1, 4])
new[idx]

IndexError: index 4 is out of bounds for axis 0 with size 3

In [36]:
W = np.arange(18).reshape(6, 3)
new = [W]
print(new)
new, = new
idx = np.array([1, 4])
new[idx]

[array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14],
       [15, 16, 17]])]


array([[ 3,  4,  5],
       [12, 13, 14]])

In [24]:
import sys
sys.path.append('..')
import numpy as np
from common.layers import MatMul, SoftmaxWithLoss
from common.util import preprocess, create_contexts_target, convert_one_hot

class SimpleCBOW:
    def __init__(self, vocab_size, hidden_size):
        V, H = vocab_size, hidden_size

        # 1. 가중치 초기화
        W_in = 0.01 * np.random.randn(V, H).astype('f')
        W_out = 0.01 * np.random.randn(H, V).astype('f')

        # 2. 계층 생성 (contexts 2개)
        self.in_layer0 = MatMul(W_in) # context1
        self.in_layer1 = MatMul(W_in) # context2
        self.out_layer = MatMul(W_out)
        self.loss_layer = SoftmaxWithLoss()

        # 3. 모든 가중치와 기울기를 리스트에 모은다.
        layers = [self.in_layer0, self.in_layer1, self.out_layer]
        self.params, self.grads = [], []
        for layer in layers:
            self.params += layer.params
            self.grads += layer.grads

        # 4. 인스턴스 변수에 단어의 분산 표현을 저장한다.
        self.word_vecs = W_in
    def forward(self, contexts, target):
        h0 = self.in_layer0.forward(contexts[:,0]) # MatMul.forward()
        print(contexts.shape)
        print('h0',h0.shape)
        h1 = self.in_layer1.forward(contexts[:,1])
        h = (h0 + h1) * 0.5 # 평균
        print('h', h.shape)
        print(h)
        score = self.out_layer.forward(h)
        loss = self.loss_layer.forward(score, target)
        return loss

    def backward(self, dout=1):
        ds = self.loss_layer.backward(dout)
        da = self.out_layer.backward(ds)
        da *= 0.5 # 순전파 입력을 서로 바꿔 기울기에 곱
        self.in_layer1.backward(da) # 그대로 흘림
        self.in_layer0.backward(da) # 그대로 흘림
        return None   
        
model = SimpleCBOW(7, 3)
text = 'You say goodbye and I say hello.'
corpus, word_to_id, id_to_word = preprocess(text)

# 3. ont hot encoding
vocab_size = len(word_to_id)
contexts, target = create_contexts_target(corpus, 1)
target = convert_one_hot(target, vocab_size)
contexts = convert_one_hot(contexts, vocab_size)

model.forward(contexts, target)

(6, 2, 7)
h0 (6, 3)
h (6, 3)
[[-0.00215463  0.00203248 -0.01218489]
 [ 0.00158647  0.00582324  0.00845241]
 [ 0.00145745  0.00199188  0.00410468]
 [ 0.00158647  0.00582324  0.00845241]
 [-0.0011291   0.00438889  0.01679553]
 [-0.0058708  -0.00579223  0.01247176]]


1.945911603779739

In [25]:
corpus.shape

(8,)

In [26]:
vocab_size

7