<a href="https://colab.research.google.com/github/adithya-tp/PyTorch-Notebooks/blob/master/03_CBOW_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# ***Setting up imports and training data***

In [11]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

torch.manual_seed(2)

<torch._C.Generator at 0x7f2d22229c70>

In [12]:
CONTEXT_SIZE = 2
EMBEDDING_DIM = 10
raw_text_list = """We are about to study the idea of a computational process.
Computational processes are abstract beings that inhabit computers.
As they evolve, processes manipulate other abstract things called data.
The evolution of a process is directed by a pattern of rules
called a program. People create programs to direct processes. In effect,
we conjure the spirits of the computer with our spells.""".split()

vocab = set(raw_text_list)
vocab_dict = {word:i for i, word in enumerate(vocab)}

training_data = []
for index in range(2, len(raw_text_list)-2):
  context = [raw_text_list[index-2], raw_text_list[index-1], 
             raw_text_list[index+1], raw_text_list[index+2]]
  target = raw_text_list[index]
  training_data.append((context, target))

# let us try printing out the first three and the last three examples of the training data
print(training_data[:3])
print(training_data[-3:])

[(['We', 'are', 'to', 'study'], 'about'), (['are', 'about', 'study', 'the'], 'to'), (['about', 'to', 'the', 'idea'], 'study')]
[(['spirits', 'of', 'computer', 'with'], 'the'), (['of', 'the', 'with', 'our'], 'computer'), (['the', 'computer', 'our', 'spells.'], 'with')]


# ***Defining some utility functions and the CBOW model***

In [0]:
# function that gives us directions on which rows of embeddings we should pick up
def get_lookup_tensor(context, vocab_dict):
  idxs = [vocab_dict[word] for word in context]
  return torch.tensor(idxs, dtype=torch.long)

In [0]:
class CBOW(nn.Module):
  
  def __init__(self, len_vocab, CONTEXT_SIZE, EMBEDDING_DIM):
    super(CBOW, self).__init__()
    self.embeddings = nn.Embedding(len_vocab, EMBEDDING_DIM)
    self.linear1 = nn.Linear(EMBEDDING_DIM, 128)
    self.linear2 = nn.Linear(128, len_vocab)
    
  def forward(self, lookup_tensor):
    embeds = torch.sum(self.embeddings(lookup_tensor), dim=0).view((1, -1))
    out = F.relu(self.linear1(embeds))
    out = F.log_softmax(self.linear2(out), dim=-1)
    return out

In [35]:
model = CBOW(len(vocab), CONTEXT_SIZE, EMBEDDING_DIM)
losses = []
loss_function = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

for epoch in range(100):
  total_loss = 0
  for context, target in training_data:
    lookup_tensor = get_lookup_tensor(context, vocab_dict)
    model.zero_grad()
    out = model(lookup_tensor)
    loss = loss_function(out, torch.tensor([vocab_dict[target]], dtype=torch.long))
    loss.backward()
    optimizer.step()
    total_loss += loss.item()
  losses.append(total_loss)
print(losses)

[237.258868932724, 203.09004044532776, 177.6140923500061, 156.5703957080841, 138.30288171768188, 122.156085729599, 107.72539806365967, 94.76788902282715, 83.21345710754395, 72.86995792388916, 63.681771993637085, 55.542712926864624, 48.380168437957764, 42.11221408843994, 36.652937173843384, 31.941946268081665, 27.900954246520996, 24.45238447189331, 21.522731065750122, 19.040234088897705, 16.932087421417236, 15.14035153388977, 13.618410110473633, 12.316611289978027, 11.198600769042969, 10.23484754562378, 9.400622844696045, 8.674383163452148, 8.037551879882812, 7.474445819854736, 6.977100372314453, 6.533580780029297, 6.137628078460693, 5.782089710235596, 5.459830284118652, 5.1691389083862305, 4.90471887588501, 4.661912441253662, 4.441378593444824, 4.238258361816406, 4.050832271575928, 3.8771862983703613, 3.7175607681274414, 3.568871021270752, 3.4299850463867188, 3.300971031188965, 3.180060386657715, 3.067263603210449, 2.9617419242858887, 2.8621020317077637, 2.7687530517578125, 2.680490970

# ***Sample Test for the Trained Model***

In [0]:
def get_predicted_word(pred_tensor):
  pred_index = (torch.abs(pred_tensor.max().item() - pred_tensor<0.0001).nonzero()[0][1].item())
  for index, word in enumerate(vocab_dict):
    if index == pred_index:
      return word

In [51]:
sample_context = ["a", "process", "directed", "by"]
pred_tensor = model(get_lookup_tensor(sample_context, vocab_dict))
word = get_predicted_word(pred_tensor)
print("Prediction of central word: ", word)
print("Predicted sentence : {} {} {} {} {}".format(sample_context[0], sample_context[1], word, sample_context[2], sample_context[3]))

Prediction of central word:  is
Predicted sentence : a process is directed by
