# Continuous Bag of Words

In [None]:
import torch
from torch import nn

In [None]:
context_size = 2  # 2 words to the left, 2 to the right
raw_text = """We are about to study the idea of a computational process.
Computational processes are abstract beings that inhabit computers.
As they evolve, processes manipulate other abstract things called data.
The evolution of a process is directed by a pattern of rules
called a program. People create programs to direct processes. In effect,
we conjure the spirits of the computer with our spells.""".strip(".").strip(",").split()

In [None]:
# By deriving a set from `raw_text`, we deduplicate the array
vocab = set(raw_text)
vocab_size = len(vocab)

word_to_idx = {word: i for i, word in enumerate(vocab)}
idx_to_word = {i: word for i, word in enumerate(vocab)}
data = []
for i in range(context_size, len(raw_text) - context_size):
    context = (
        [raw_text[i - j - 1] for j in range(context_size)]
        + [raw_text[i + j + 1] for j in range(context_size)]
    )
    target = raw_text[i]
    data.append((context, target))

data[:5]

In [None]:
word_to_idx

In [None]:
class CBOW(nn.Module):

    def __init__(self, vocab_size: int, context_size: int, embedding_dim: int):
        # self.input_size = context_size * 2  # To the left and right
        super(CBOW, self).__init__()
        self.embed = nn.Embedding(vocab_size, embedding_dim)
        self.projection1 = nn.Linear(context_size * 2 * embedding_dim, 128)
        self.activation = nn.ReLU()
        self.projection2 = nn.Linear(128, vocab_size)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, inputs: torch.Tensor) -> torch.Tensor:
        e = self.embed(inputs).flatten(1, 2)
        p1 = self.activation(self.projection1(e))
        p2 = self.projection2(p1)
        
        return self.softmax(p2)

def make_context_vector(context, word_to_idx):
    idxs = [word_to_idx[w] for w in context]
    return torch.tensor(idxs, dtype=torch.long)


make_context_vector(data[0][0], word_to_idx)  # example

In [None]:
model = CBOW(vocab_size, context_size, 64)
model

In [None]:
x, y = data[0]
x, y

In [None]:
from torch import optim

losses = []
loss_function = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001)
model.train()

for epoch in range(500):
    total_loss = 0
    for d in data:
        model.zero_grad()
        x, y_true = d
        y_hat = model(make_context_vector(x, word_to_idx).unsqueeze(0))

        loss = loss_function(y_hat, torch.tensor([word_to_idx[y_true]], dtype=torch.long))
        loss.backward()
        total_loss += loss.detach().item()
        optimizer.step()
    losses.append(total_loss / len(data))   
    print(losses[-1])

In [None]:
import matplotlib.pyplot as plt

plt.plot(losses)
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.show()

In [None]:
PredictionResult = tuple[torch.Tensor, tuple[str, torch.Tensor, torch.Tensor]]  # (word, word_idx, pred_loss)

def predict(cbow_model: CBOW, input: torch.Tensor, top_n: int = 3):
    cbow_model.eval()
    context_idxs = torch.tensor([word_to_idx[w] for w in input], dtype=torch.long).unsqueeze(0)
    res = cbow_model.forward(context_idxs)
    y_hat = torch.argmax(res)
    res_val, res_ind = res.sort(descending=True)
    res_val = res_val[0][:top_n]
    res_ind = res_ind[0][:top_n]
    top_n_ranked = [(idx_to_word[ind.item()], ind, val.detach()) for ind, val in zip(res_ind, res_val)]
    
    return y_hat, top_n_ranked


In [None]:
# Predict a word given some context
x_words = "We are to study".split()
y_word = "about"
n = 3

y_hat, top_n = predict(model, x_words, top_n=n)
y_true = torch.tensor([word_to_idx[y_word]], dtype=torch.long)

print(y_hat)
print(y_true)
print(top_n)

In [None]:
for i, d in enumerate(data[len(data)-10:]):
    x_words, y_word = d
    y_hat, top_n = predict(model, x_words)
    y_true = torch.tensor([word_to_idx[y_word]], dtype=torch.long)
    print(f"Example {i}, Top {n} predictions", top_n)
    print(f"Target word '{y_word}' \n")