In [6]:
import torch
import torch.nn as nn
import numpy as np

def make_context_vector(context, word_to_ix):
    idxs = [word_to_ix[w] for w in context]
    return torch.tensor(idxs, dtype=torch.long)

CONTEXT_SIZE = 2  # 2 words to the left, 2 to the right
EMDEDDING_DIM = 100

word_to_ix = {}
ix_to_word = {}

raw_text = """We are about to study the idea of a computational process.
Computational processes are abstract beings that inhabit computers.
As they evolve, processes manipulate other abstract things called data.
The evolution of a process is directed by a pattern of rules
called a program. People create programs to direct processes. In effect,
we conjure the spirits of the computer with our spells.""".split()


# By deriving a set from `raw_text`, we deduplicate the array
vocab = set(raw_text)
vocab_size = len(vocab)

for i, word in enumerate(vocab):
    word_to_ix[word] = i
    ix_to_word[i] = word

data = []
for i in range(2, len(raw_text) - 2):
    context = [raw_text[i - 2], raw_text[i - 1],
               raw_text[i + 1], raw_text[i + 2]]
    target = raw_text[i]
    data.append((context, target))



class CBOW(torch.nn.Module):

    def __init__(self, vocab_size, embedding_dim):
        super(CBOW, self).__init__()

        #out: 1 x emdedding_dim
        self.embeddings = nn.Embedding(vocab_size, embedding_dim)
        self.linear1 = nn.Linear(embedding_dim, 128)
        self.activation_function1 = nn.ReLU()
        
        #out: 1 x vocab_size
        self.linear2 = nn.Linear(128, vocab_size)
        self.activation_function2 = nn.LogSoftmax(dim = -1)
        

    def forward(self, inputs):
        embeds = sum(self.embeddings(inputs)).view(1,-1)
        out = self.linear1(embeds)
        out = self.activation_function1(out)
        out = self.linear2(out)
        out = self.activation_function2(out)
        return out

    def get_word_emdedding(self, word):
        word = torch.LongTensor([word_to_ix[word]])
        return self.embeddings(word).view(1,-1)


model = CBOW(vocab_size, EMDEDDING_DIM)
losses = []
loss_function = nn.NLLLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001)


for epoch in range(50):
    total_loss = 0
    for context, target in data:
        context_vector = make_context_vector(context, word_to_ix)  
        model.zero_grad()
        log_probs = model(context_vector)
        loss = loss_function(log_probs, torch.tensor([word_to_ix[target]], dtype=torch.long))
        loss.backward()
        optimizer.step()

        total_loss += loss.data
    losses.append(total_loss)
    
print(losses)

[tensor(228.4854), tensor(221.1176), tensor(214.0223), tensor(207.1573), tensor(200.5137), tensor(194.0350), tensor(187.7185), tensor(181.5569), tensor(175.5262), tensor(169.5857), tensor(163.7313), tensor(157.9508), tensor(152.2783), tensor(146.6813), tensor(141.1694), tensor(135.7576), tensor(130.4146), tensor(125.1842), tensor(120.0377), tensor(115.0035), tensor(110.0562), tensor(105.2087), tensor(100.4591), tensor(95.8237), tensor(91.3065), tensor(86.9287), tensor(82.7003), tensor(78.5927), tensor(74.6298), tensor(70.8077), tensor(67.1307), tensor(63.6103), tensor(60.2401), tensor(57.0204), tensor(53.9629), tensor(51.0376), tensor(48.2875), tensor(45.6821), tensor(43.2152), tensor(40.9000), tensor(38.7191), tensor(36.6825), tensor(34.7641), tensor(32.9756), tensor(31.2977), tensor(29.7390), tensor(28.2740), tensor(26.9161), tensor(25.6351), tensor(24.4498)]
