---
# CBOW
---

In [None]:
## using pytorch
import torch
import torch.nn as nn
from tqdm.notebook import tqdm
EMBEDDING_DIM = 128
EPOCHS = 100

sentence = '''In the case of CBOW, one word is eliminated, and the word is predicted from surrounding words.
Therefore, it takes multiple input vectors as inputs to the model and creates one output vector.
In contrast, Skip-Gram learns by removing all words except one word and predicting the surrounding words in the context through one word. 
So, it takes a vector as input and produces multiple output vectors.
CBOW and Skip-Gram are different.'''

ex_sample = sentence.split()
print(ex_sample)

['In', 'the', 'case', 'of', 'CBOW,', 'one', 'word', 'is', 'eliminated,', 'and', 'the', 'word', 'is', 'predicted', 'from', 'surrounding', 'words.', 'Therefore,', 'it', 'takes', 'multiple', 'input', 'vectors', 'as', 'inputs', 'to', 'the', 'model', 'and', 'creates', 'one', 'output', 'vector.', 'In', 'contrast,', 'Skip-Gram', 'learns', 'by', 'removing', 'all', 'words', 'except', 'one', 'word', 'and', 'predicting', 'the', 'surrounding', 'words', 'in', 'the', 'context', 'through', 'one', 'word.', 'So,', 'it', 'takes', 'a', 'vector', 'as', 'input', 'and', 'produces', 'multiple', 'output', 'vectors.', 'CBOW', 'and', 'Skip-Gram', 'are', 'different.']


In [None]:
###### 입력받은 문장을 단어로 쪼개고, 중복을 제거해줍니다. ######

vocab = set(ex_sample) # 중복 제거
vocab_size = len(ex_sample)

###### 단어 : 인덱스, 인덱스 : 단어를 가지는 딕셔너리를 선언해 줍니다. ######

word_to_idx = {word : index for index, word in enumerate(vocab)}
idx_to_word = {index : word for index, word in enumerate(vocab)}

print(len(word_to_idx))

48


In [None]:
##### 학습을 위한 데이터를 생성 #####

def make_context_vector(context, word_to_idx):
  idxs = [word_to_idx[i] for i in context]
  return torch.tensor(idxs , dtype = torch.long)

def make_data(sentence):
  data = []
  for i in range(2, len(ex_sample)-2):
    context = [ex_sample[i-2], ex_sample[i-1], ex_sample[i+1], ex_sample[i+2]]
    target = ex_sample[i]
    data.append((context, target))
  return data

data = make_data(ex_sample)

In [None]:
##### CBOW Model #####

class CBOW(nn.Module):
  def __init__(self, vocab_size, embedding_dim):
    super(CBOW, self).__init__()

    self.embedding = nn.Embedding(vocab_size, embedding_dim)
    self.layer1 = nn.Linear(embedding_dim,64)
    self.relu = nn.ReLU()

    self.layer2 = nn.Linear(64,vocab_size)
    self.softmax = nn.LogSoftmax(dim = -1)

  def forward(self,inputs):
    out = sum(self.embedding(inputs)).view(1,-1)
    out = self.layer1(out)
    out = self.relu(out)
    out = self.layer2(out)
    out = self.softmax(out)
    return out

In [None]:
##### Model, loss function, optimizer등을 선언 #####
model = CBOW(vocab_size, EMBEDDING_DIM)
loss_function = nn.NLLLoss()
optimizer = torch.optim.SGD(model.parameters(), lr = 0.001)

In [None]:
##### Train #####
epoch = 100

for i in tqdm(range(epoch)):
  total_loss = 0
  # Gradient 초기화
  optimizer.zero_grad()
  for context, target in data:
    context_vector = make_context_vector(context, word_to_idx)
    output = model(context_vector)
    targets = torch.tensor([word_to_idx[target]])
    total_loss += loss_function(output, targets)
  print(f'epoch = {i}, loss = {total_loss}')
  total_loss.backward()
  optimizer.step()


  0%|          | 0/100 [00:00<?, ?it/s]

epoch = 0, loss = 297.2266845703125
epoch = 1, loss = 288.53582763671875
epoch = 2, loss = 280.43475341796875
epoch = 3, loss = 272.7822570800781
epoch = 4, loss = 265.3898010253906
epoch = 5, loss = 258.0736389160156
epoch = 6, loss = 250.82208251953125
epoch = 7, loss = 243.57269287109375
epoch = 8, loss = 236.2449951171875
epoch = 9, loss = 228.90196228027344
epoch = 10, loss = 221.5223846435547
epoch = 11, loss = 214.08372497558594
epoch = 12, loss = 206.60882568359375
epoch = 13, loss = 199.13983154296875
epoch = 14, loss = 191.69189453125
epoch = 15, loss = 184.33592224121094
epoch = 16, loss = 177.0757293701172
epoch = 17, loss = 169.92478942871094
epoch = 18, loss = 162.8783721923828
epoch = 19, loss = 155.92266845703125
epoch = 20, loss = 149.09378051757812
epoch = 21, loss = 142.3462371826172
epoch = 22, loss = 135.72491455078125
epoch = 23, loss = 129.21017456054688
epoch = 24, loss = 122.85318756103516
epoch = 25, loss = 116.64346313476562
epoch = 26, loss = 110.60317993164

In [None]:
##### Inference #####

test_data = ['case', 'of', 'one', 'word']
test_vector = make_context_vector(test_data, word_to_idx)
result = model(test_vector)
print('Prediction : ', idx_to_word[torch.argmax(result[0]).item()])

Prediction :  CBOW,


In [None]:
for child in model.children():
  print(child)

a,b,c,d,e = model.parameters()
print(a.shape)
print(b.shape)
print(c.shape)
print(d.shape)
print(e.shape)

Embedding(72, 128)
Linear(in_features=128, out_features=64, bias=True)
ReLU()
Linear(in_features=64, out_features=72, bias=True)
LogSoftmax(dim=-1)
torch.Size([72, 128])
torch.Size([64, 128])
torch.Size([64])
torch.Size([72, 64])
torch.Size([72])


---
# Skip-gram
----

In [None]:
def make_context_vector_skip(context, word_to_idx):
  idxs = word_to_idx[context]
  return torch.tensor(idxs , dtype = torch.long)

def make_data_skip(sentence):
  data_skip = []
  for i in range(2, len(ex_sample)-2):
    context = ex_sample[i]
    target = [ex_sample[i-2], ex_sample[i-1], ex_sample[i+1], ex_sample[i+2]]
    data_skip.append((context, target))
  return data_skip

data_skip = make_data_skip(ex_sample)

In [None]:
EMBEDDING_DIM = 128
EPOCHS = 200
CONTEXT_SIZE = 4

class Skip_gram(nn.Module):
  def __init__(self,vocab_size, embedding_dim,context_size):
    super(Skip_gram,self).__init__()

    self.context_size = context_size
    self.layer = nn.Sequential(
                        nn.Embedding(vocab_size,embedding_dim),
                        nn.Linear(embedding_dim, 64),
                        nn.ReLU(),
                        nn.Linear(64, vocab_size * context_size),
                        nn.LogSoftmax(dim = -1)
                               )
  
  def forward(self,inputs):
    out = self.layer(inputs)
    return out.view(self.context_size,vocab_size)

In [None]:
model_skip = Skip_gram(vocab_size, EMBEDDING_DIM, CONTEXT_SIZE)
loss_function = nn.NLLLoss()
optimizer = torch.optim.Adam(model_skip.parameters(), lr=0.001)

In [None]:
for i in range(EPOCHS):
  total_loss = 0
  optimizer.zero_grad()
  for context, target in data_skip:
    context_vector = make_context_vector_skip(context,word_to_idx,)
    output = model_skip(context_vector)
    target = torch.tensor([word_to_idx[t] for t in target])
    total_loss += loss_function(output,target)
  print(f'epoch = {i}, loss = {total_loss}')
  total_loss.backward()
  optimizer.step()

epoch = 0, loss = 387.027099609375
epoch = 1, loss = 384.19879150390625
epoch = 2, loss = 381.4392395019531
epoch = 3, loss = 378.72174072265625
epoch = 4, loss = 376.05694580078125
epoch = 5, loss = 373.4172668457031
epoch = 6, loss = 370.78033447265625
epoch = 7, loss = 368.1426086425781
epoch = 8, loss = 365.4821472167969
epoch = 9, loss = 362.7930603027344
epoch = 10, loss = 360.06988525390625
epoch = 11, loss = 357.3033447265625
epoch = 12, loss = 354.48583984375
epoch = 13, loss = 351.6040344238281
epoch = 14, loss = 348.6495056152344
epoch = 15, loss = 345.6238708496094
epoch = 16, loss = 342.52117919921875
epoch = 17, loss = 339.3347473144531
epoch = 18, loss = 336.0695495605469
epoch = 19, loss = 332.7181701660156
epoch = 20, loss = 329.28692626953125
epoch = 21, loss = 325.7747497558594
epoch = 22, loss = 322.1839904785156
epoch = 23, loss = 318.5202331542969
epoch = 24, loss = 314.78448486328125
epoch = 25, loss = 310.9805603027344
epoch = 26, loss = 307.1161804199219
epoch 

In [None]:
test_data = 'Skip-Gram'
test_vector = make_context_vector_skip(test_data, word_to_idx)
result_skip = model_skip(test_vector)
print('Prediction : ', [idx_to_word[torch.argmax(r).item()] for r in result_skip])

Prediction :  ['CBOW', 'contrast,', 'learns', 'by']


In [None]:
for child in model_skip.children():
  print(child)
  
a,b,c,d,e = model_skip.parameters()
print(a.shape)
print(b.shape)
print(c.shape)
print(d.shape)
print(e.shape)

Sequential(
  (0): Embedding(72, 128)
  (1): Linear(in_features=128, out_features=64, bias=True)
  (2): ReLU()
  (3): Linear(in_features=64, out_features=288, bias=True)
  (4): LogSoftmax(dim=-1)
)
torch.Size([72, 128])
torch.Size([64, 128])
torch.Size([64])
torch.Size([288, 64])
torch.Size([288])
