In [1]:
import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import numpy as np

In [2]:
raw_text = """We are about to study the idea of a computational process.
Computational processes are abstract beings that inhabit computers.
As they evolve, processes manipulate other abstract things called data.
The evolution of a process is directed by a pattern of rules
called a program. People create programs to direct processes. In effect,
we conjure the spirits of the computer with our spells.""".split()

In [3]:
EMBED_DIM = 10
CONTEXT_SIZE = 2 # on either side of the center word

In [4]:
vocab = set(raw_text)

In [5]:
vocab_len = len(vocab)

In [6]:
words_to_idx = {word: i for i, word in enumerate(vocab)}

In [7]:
def invert_dict(d):
    return dict([ (v, k) for k, v in d.items() ])

In [8]:
idx_to_words = invert_dict(words_to_idx)

In [9]:
context_word = [([raw_text[i-2], raw_text[i-1], raw_text[i+1], raw_text[i+2]], raw_text[i]) 
                for i in range(2, len(raw_text)-2)]

In [10]:
def words_to_numbers(io, mapping):
    i_ret = []
    o_ret = None
    
    for word in io[0]:
        i_ret.append(words_to_idx[word])
    o_ret = words_to_idx[io[1]]
    
    return (i_ret, o_ret)

In [11]:
class CBOW(nn.Module):
    
    def __init__(self, vocab_size, embed_dim):
        
        super().__init__()
        self.embed = nn.Embedding(vocab_size, embed_dim)
        self.linear1 = nn.Linear(embed_dim,128)
        self.linear2 = nn.Linear(128,256)
        self.linear3 = nn.Linear(256, vocab_size)
        
    def forward(self, the_input):
        out = self.embed(the_input).sum(dim=0).view((1, -1))
        out = F.relu(self.linear1(out))
        out = F.relu(self.linear2(out))
        out = self.linear3(out)
        log_prob = F.log_softmax(out, dim=1)
        return log_prob

In [20]:
model = CBOW(vocab_len, EMBED_DIM).cuda()
loss_fn = nn.NLLLoss().cuda()
opt = optim.SGD(model.parameters(), lr=0.001)

In [21]:
for epoch in range(2000):
    
    for cw in context_word:
        c, w = words_to_numbers(cw, words_to_idx)
        cvar = autograd.Variable(torch.cuda.LongTensor(c), requires_grad=False)
        wvar = autograd.Variable(torch.cuda.LongTensor([w]), requires_grad=False)
        
        model.zero_grad()
        log_prob = model(cvar)
        loss = loss_fn(log_prob, wvar)
        loss.backward()
        opt.step()

In [None]:
"""
model.eval()
for context, target in context_word:
        c, w = words_to_numbers(cw, words_to_idx)
        cvar = autograd.Variable(torch.LongTensor(c))
        wvar = autograd.Variable(torch.LongTensor([w]))
        lp = model(cvar)
        
        # val, idx= torch.max(lp, -1)
        # print(context, target, words_to_idx[target],idx.data[0])
        
        n = lp.data.numpy()[0]
        print(n)
        print(context, target, words_to_idx[target], np.argmax(n))
        
"""

In [23]:
for context, target in context_word:

        # Step 1. Prepare the inputs to be passed to the model (i.e, turn the words
        # into integer indices and wrap them in variables)
        context_idxs = [words_to_idx[w] for w in context]
        context_var = autograd.Variable(torch.cuda.LongTensor(context_idxs))

        # Step 3. Run the forward pass, getting log probabilities over next
        # words
        log_probs = model(context_var)
        _,idx= torch.min(log_probs,-1)
        print (context,target, idx_to_words[idx.data[0]])

['We', 'are', 'to', 'study'] about As
['are', 'about', 'study', 'the'] to abstract
['about', 'to', 'the', 'idea'] study the
['to', 'study', 'idea', 'of'] the abstract
['study', 'the', 'of', 'a'] idea abstract
['the', 'idea', 'a', 'computational'] of abstract
['idea', 'of', 'computational', 'process.'] a abstract
['of', 'a', 'process.', 'Computational'] computational abstract
['a', 'computational', 'Computational', 'processes'] process. directed
['computational', 'process.', 'processes', 'are'] Computational is
['process.', 'Computational', 'are', 'abstract'] processes directed
['Computational', 'processes', 'abstract', 'beings'] are directed
['processes', 'are', 'beings', 'that'] abstract the
['are', 'abstract', 'that', 'inhabit'] beings things
['abstract', 'beings', 'inhabit', 'computers.'] that of
['beings', 'that', 'computers.', 'As'] inhabit directed
['that', 'inhabit', 'As', 'they'] computers. directed
['inhabit', 'computers.', 'they', 'evolve,'] As directed
['computers.', 'As', '