### 다중 분류에서 이진 분류로(구현)

In [17]:
import sys
sys.path.append('..')
from common.np import *  # import numpy as np
from common.layers import Embedding, SigmoidWithLoss
import collections


class EmbeddingDot:
    def __init__(self, W):
        self.embed = Embedding(W)
        self.params = self.embed.params
        self.grads = self.embed.grads
        self.cache = None

    def forward(self, h, idx):
        target_W = self.embed.forward(idx)
        print(target_W)
        out = np.sum(target_W * h, axis=1)

        self.cache = (h, target_W)
        return out

    def backward(self, dout):
        h, target_W = self.cache
        dout = dout.reshape(dout.shape[0], 1)
        print(dout.shape)
        dtarget_W = dout * h
        print(dtarget_W)
        self.embed.backward(dtarget_W)
        dh = dout * target_W
        return dh

In [18]:
W = np.arange(21).reshape(7,3)

embed_dot = EmbeddingDot(W)
h = np.array([[7,7,7]])
idx = np.array([1])
out = embed_dot.forward(h, idx)
print(out)
dout = np.array([33])
dh = embed_dot.backward(dout)
print(embed_dot.grads)
print(dh)

[[3 4 5]]
[84]
(1, 1)
[[231 231 231]]
[array([[  0,   0,   0],
       [231, 231, 231],
       [  0,   0,   0],
       [  0,   0,   0],
       [  0,   0,   0],
       [  0,   0,   0],
       [  0,   0,   0]])]
[[ 99 132 165]]


In [20]:
W = np.arange(21).reshape(7,3)

embed_dot = EmbeddingDot(W)
h = np.array([[7,7,7],[8,8,8],[9,9,9]])
idx = np.array([1,2,3])
out = embed_dot.forward(h, idx)
print(out)
dout = np.array([33,44,55])
dh = embed_dot.backward(dout)
print(embed_dot.grads)
print(dh)

[[ 3  4  5]
 [ 6  7  8]
 [ 9 10 11]]
[ 84 168 270]
(3, 1)
[[231 231 231]
 [352 352 352]
 [495 495 495]]
[array([[  0,   0,   0],
       [231, 231, 231],
       [352, 352, 352],
       [495, 495, 495],
       [  0,   0,   0],
       [  0,   0,   0],
       [  0,   0,   0]])]
[[ 99 132 165]
 [264 308 352]
 [495 550 605]]


### 네거티브 샘플링의 샘플링 기법

In [36]:
import numpy as np

np.random.choice(10)

6

In [40]:
np.random.choice(10)

6

In [48]:
words = ['you','say','goodbye','I','hello','.']
np.random.choice(words)

'I'

In [51]:
np.random.choice(words, size=5)

array(['.', 'say', '.', 'I', 'I'], dtype='<U7')

In [64]:
np.random.choice(words, size=5, replace=False)

array(['hello', 'you', 'say', '.', 'I'], dtype='<U7')

In [83]:
p = [0.5, 0.1, 0.05, 0.2, 0.05, 0.1]
np.random.choice(words, p=p)

'say'

In [27]:
p = [0.7, 0.29, 0.01]
new_p = np.power(p, 0.75)
new_p /= np.sum(new_p)
print(new_p)

[0.64196878 0.33150408 0.02652714]


In [84]:
class UnigramSampler:
    def __init__(self, corpus, power, sample_size):
        self.sample_size = sample_size
        self.vocab_size = None
        self.word_p = None

        counts = collections.Counter()
        for word_id in corpus:
            counts[word_id] += 1

        vocab_size = len(counts)
        self.vocab_size = vocab_size

        self.word_p = np.zeros(vocab_size)
        for i in range(vocab_size):
            self.word_p[i] = counts[i]

        self.word_p = np.power(self.word_p, power)
        self.word_p /= np.sum(self.word_p)

    def get_negative_sample(self, target):
        batch_size = target.shape[0]

        if not GPU:
            negative_sample = np.zeros((batch_size, self.sample_size), dtype=np.int32)

            for i in range(batch_size):
                p = self.word_p.copy()
                target_idx = target[i]
                p[target_idx] = 0
                p /= p.sum()
                negative_sample[i, :] = np.random.choice(self.vocab_size, size=self.sample_size, replace=False, p=p)
        else:
            # GPU(cupy）로 계산할 때는 속도를 우선한다.
            # 부정적 예에 타깃이 포함될 수 있다.
            negative_sample = np.random.choice(self.vocab_size, size=(batch_size, self.sample_size),
                                               replace=True, p=self.word_p)

        return negative_sample

In [95]:
corpus = np.array([0, 1, 2, 3, 4, 1, 2, 3])
power = 0.75
sample_size = 2

sampler = UnigramSampler(corpus, power, sample_size)
target = np.array([1, 3, 0])
negative_sample = sampler.get_negative_sample(target)
print(negative_sample)

[[3 0]
 [4 1]
 [2 1]]


### 네거티브 샘플링 구현

In [18]:
class NegativeSamplingLoss:
    def __init__(self, W, corpus, power=0.75, sample_size=5):
        self.sample_size = sample_size
        self.sampler = UnigramSampler(corpus, power, sample_size)
        self.loss_layers = [SigmoidWithLoss() for _ in range(sample_size + 1)]
        self.embed_dot_layers = [EmbeddingDot(W) for _ in range(sample_size + 1)]

        self.params, self.grads = [], []
        for layer in self.embed_dot_layers:
            self.params += layer.params
            self.grads += layer.grads

    def forward(self, h, target):
        batch_size = target.shape[0]
        negative_sample = self.sampler.get_negative_sample(target)

        # 긍정적 예 순전파
        score = self.embed_dot_layers[0].forward(h, target)
        correct_label = np.ones(batch_size, dtype=np.int32)
        loss = self.loss_layers[0].forward(score, correct_label)

        # 부정적 예 순전파
        negative_label = np.zeros(batch_size, dtype=np.int32)
        for i in range(self.sample_size):
            negative_target = negative_sample[:, i]
            score = self.embed_dot_layers[1 + i].forward(h, negative_target)
            loss += self.loss_layers[1 + i].forward(score, negative_label)

        return loss

    def backward(self, dout=1):
        dh = 0
        for l0, l1 in zip(self.loss_layers, self.embed_dot_layers):
            dscore = l0.backward(dout)
            dh += l1.backward(dscore)

        return dh
