In [1]:
import torch
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable

In [2]:
def make_context_vector(context, word_to_idx):
    idxs = [word_to_idx[w] for w in context]
    return torch.tensor(idxs, dtype=torch.long)

In [3]:
torch.manual_seed(1)

# Implementing CBOW model for the exercise given by a tutorial in pytorch.org/tutorials/beginner/nlp/word_embeddings_tutorial.html
context_size = 2 # {w_i-2 ... w_i ... w_i+2}
embedding_dim = 10

raw_text = """We are about to study the idea of a computational process.
Computational processes are abstract beings that inhabit computers.
As they evolve, processes manipulate other abstract things called data.
The evolution of a process is directed by a pattern of rules
called a program. People create programs to direct processes. In effect,
we conjure the spirits of the computer with our spells.""".split()

In [4]:
class CBOW(nn.Module):
    
    def __init__(self, vocab_size, embedding_dim):
        super(CBOW, self).__init__()
        self.embeddings = nn.Embedding(vocab_size, embedding_dim)
        self.proj = nn.Linear(embedding_dim, 128)
        self.output = nn.Linear(128, vocab_size)
        
    def forward(self, inputs):
        embeds = sum(self.embeddings(inputs)).view(1, -1)
        out = F.relu(self.proj(embeds))
        out = self.output(out)
        nll_prob = F.log_softmax(out, dim=-1)
        return nll_prob

In [5]:

vocab = set(raw_text)
vocab_size = len(vocab)

word_to_idx = {word: i for i, word in enumerate(vocab)}
idx_to_word = {i: word for i, word in enumerate(vocab)}

data = []

for i in range(2, len(raw_text) - 2):
    context = [raw_text[i-2], raw_text[i-1],
               raw_text[i+1], raw_text[i+2]]
    target = raw_text[i]
    data.append((context, target))

model = CBOW(vocab_size, embedding_dim)
optimizer = optim.SGD(model.parameters(), lr=0.001)

losses = []
loss_function = nn.NLLLoss()

for epoch in range(100):
    total_loss = 0
    for context, target in data:
        context_vector = make_context_vector(context, word_to_idx)
        
        # Remember PyTorch accumulates gradients; zero them out
        model.zero_grad()
        
        nll_prob = model(context_vector)
        loss = loss_function(nll_prob, Variable(torch.tensor([word_to_idx[target]])))
        
        # backpropagation
        loss.backward()
        # update the parameters
        optimizer.step() 
        
        total_loss += loss.item()
        
    losses.append(total_loss)

print(losses)


[231.01988804340363, 225.78056466579437, 220.84456384181976, 216.17986977100372, 211.76183533668518, 207.5667371749878, 203.57226872444153, 199.75444757938385, 196.09503555297852, 192.57728600502014, 189.18526601791382, 185.9053567647934, 182.72436380386353, 179.63405764102936, 176.62561213970184, 173.69162046909332, 170.8267730474472, 168.02760422229767, 165.28968572616577, 162.6090499162674, 159.98347198963165, 157.4135344028473, 154.89260923862457, 152.42107915878296, 149.9935007095337, 147.61142098903656, 145.2743947505951, 142.97936463356018, 140.72442543506622, 138.50938284397125, 136.33307874202728, 134.19420790672302, 132.08992570638657, 130.0212351679802, 127.98459303379059, 125.98146688938141, 124.0095751285553, 122.06960767507553, 120.15975046157837, 118.27977484464645, 116.43024837970734, 114.60841220617294, 112.81598377227783, 111.05099505186081, 109.31064599752426, 107.59685772657394, 105.9103861451149, 104.24831330776215, 102.61256778240204, 101.0044537782669, 99.4193871

In [6]:

# Let's see if our CBOW model works or not

print("*************************************************************************")

context = ['process.','Computational','are', 'abstract']
context_vector = make_context_vector(context, word_to_idx)
a = model(context_vector).data.numpy()
print('Raw text: {}\n'.format(' '.join(raw_text)))
print('Test Context: {}\n'.format(context))
max_idx = np.argmax(a)
print('Prediction: {}'.format(idx_to_word[max_idx]))

*************************************************************************
Raw text: We are about to study the idea of a computational process. Computational processes are abstract beings that inhabit computers. As they evolve, processes manipulate other abstract things called data. The evolution of a process is directed by a pattern of rules called a program. People create programs to direct processes. In effect, we conjure the spirits of the computer with our spells.

Test Context: ['process.', 'Computational', 'are', 'abstract']

Prediction: processes
