In [3]:
import numpy as np
W = np.arange(21).reshape(7, 3)
W

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14],
       [15, 16, 17],
       [18, 19, 20]])

In [2]:
W[2]

array([6, 7, 8])

In [17]:
idx = np.array([1, 3, 0, 1])
W[idx] # 여러 행을 한꺼번에 추출

array([[ 3,  4,  5],
       [ 9, 10, 11],
       [ 0,  1,  2],
       [ 3,  4,  5]])

In [4]:
class Embedding:
    def __init__(self, W):
        self.params = [W] # list
        self.grads = [np.zeros_like(W)]
        self.idx = None

    def forward(self, idx): # np.array
        W, = self.params
        self.idx = idx
        out = W[idx]
        return out

In [19]:
emb = Embedding(W)
emb.forward(idx)

array([[ 3,  4,  5],
       [ 9, 10, 11],
       [ 0,  1,  2],
       [ 3,  4,  5]])

In [23]:
    def backward(self, dout):
        dW, = self.grads
        dW[...] = 0 # 0으로 덮어 씀
        dW[self.idx] = dout # 실은 나쁜 예 (기존 가중치 특정 행 갱신 가능함)
        return None

In [5]:
class Embedding:
    def __init__(self, W):
        self.params = [W] # list
        self.grads = [np.zeros_like(W)]
        self.idx = None

    def forward(self, idx): # np.array
        W, = self.params
        self.idx = idx
        out = W[idx]
        return out

    def backward(self, dout):
        dW, = self.grads
        dW[...] = 0

#       for i, word_id in enumerate(self.idx):
#           dW[word_id] += dout[i]

        np.add.at(dW, self.idx, dout) # dout를 dw의 idx 행에 더해줌 (중복 문제)

        return None

In [34]:
class EmbeddingDot:
    def __init__(self, W):
        self.embed = Embedding(W)
        self.params = self.embed.params
        self.grads = self.embed.grads
        self.cache = None # 역전파 때 쓰기 위해 순전파 결과 저장(h, target_W)

    def forward(self, h, idx):
        '''
        W        : (vocab, hidden) -> 원래는 h,v 
        idx      : (#mini_batch,) -> 정답 idx 각 각
        target_W : (#mini_batch, hidden)
        h        : (#mini_batch, hidden)
        
        target_W
        * h      : (#mini_batch, hidden) -> np * -> element-wise

        out      : (hidden,)
        '''
               
        target_W = self.embed.forward(idx) 
        out = np.sum(target_W * h, axis=1) # dot product
                                           # * 연산하면 shape 안맞춰줘도 돼서 편함
                                           # 어차피 (h,)*(h,)
        self.cache = (h, target_W)
        return out
                                           
    def backward(self, dout): # sigmoid with loss (batch,)
        # 그냥 흘려보내면 된다.
        h, target_W = self.cache        
        # dout = (batch,) -> 행렬 곱 수행 위해 reshape
        dout = dout.reshape(dout.shape[0], 1)

        dtarget_W = dout * h # (batch,1) * (batch, hidden)
        self.embed.backward(dtarget_W)
        dh = dout * target_W
        return dh        

In [31]:
W = np.arange(18).reshape(6, 3)

In [29]:
idx = np.array([1, 3, 0, 2 ,4 ,1])
embed = Embedding(W)
target_W = embed.forward(idx)

In [30]:
embDot = EmbeddingDot(W)
h = np.array([[0.0011865, 0.00054727, -0.00350523],
 [ 0.0080607,  0.00873808, -0.00930809],
 [ 0.00719375 ,-0.00618628,  0.00065774],
 [ 0.00806074 , 0.00873808 ,-0.00930809],
 [-0.00048219,-0.00481416, -0.00305934],
 [ 0.00745012, -0.00218385 ,-0.00796147]])
embDot.forward(h, idx)
#print(target_W)
#print(target_W * h)
#print(out)

[[ 3  4  5]
 [ 9 10 11]
 [ 0  1  2]
 [ 6  7  8]
 [12 13 14]
 [ 3  4  5]]


[[ 0.0035595   0.00218908 -0.01752615]
 [ 0.0725463   0.0873808  -0.10238899]
 [ 0.         -0.00618628  0.00131548]
 [ 0.04836444  0.06116656 -0.07446472]
 [-0.00578628 -0.06258408 -0.04283076]
 [ 0.02235036 -0.0087354  -0.03980735]]


[-0.01177757  0.05753811 -0.0048708   0.03506628 -0.11120112 -0.02619239]


In [35]:
dout = np.random.randn(6)
dout * h

ValueError: operands could not be broadcast together with shapes (6,) (6,3) 

In [36]:
dout = np.random.randn(6)
dout = dout.reshape(dout.shape[0], 1)
dout * h

array([[-0.00120147, -0.00055417,  0.00354946],
       [-0.00120087, -0.00130178,  0.0013867 ],
       [-0.0026712 ,  0.0022971 , -0.00024423],
       [ 0.0024513 ,  0.00265729, -0.00283063],
       [ 0.00029956,  0.00299074,  0.00190058],
       [-0.00189828,  0.00055644,  0.00202857]])

In [55]:
# np.random.choice()
words = ['you', 'say', 'goodbye', 'I', 'hello', '.']
print(np.random.choice(words))
print(np.random.choice(words, size=5)) # 중복 있음
print(np.random.choice(words, size=5, replace=False)) # 중복 없음

# 확률분포에 따라
p = [0.5, 0.1, 0.05, 0.2, 0.05, 0.1] # 합 1이어야 함!!
print(np.random.choice(words, p=p)) # 분포 높으면 많이 추출

say
['I' 'say' '.' 'I' 'you']
['hello' 'goodbye' 'you' 'say' '.']
you


In [45]:
print(np.random.choice(words, size=2))

['hello' 'goodbye']


In [57]:
p = [0.7, 0.29, 0.01]
new_p = np.power(p, 0.75)
new_p /= np.sum(new_p)
print(new_p)

[0.64196878 0.33150408 0.02652714]


In [None]:
corpus = np.array([0, 1, 2, 3, 4, 1, 2, 3])
power = 0.75
sample_size = 2

sampler = UnigramSampler(corpus, power, sample_size) # (단어ID목록, 확률분포에제곱, 샘플링횟수)
target = np.array([1, 3, 0]) # positive 3개 미니배치
negative_sample = sampler.get_negative_sample(target)
print(negative_sample) # 각각 1/3/0 이 아닌 negative
# [[0 3]
#  [1 2]
#  [2 3]]

In [None]:
class NegativeSamplingLoss:
    def __init__(self, W, corpus, power=0.75, sample_size=5):
        self.sample_size = 