5. Implement the Continuous Bag of Words (CBOW) Model. Stages can be 

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim

In [11]:
# a. Data Preparation 

In [4]:
# Provided raw text data
raw_text = """We are about to study the idea of a computational process.
Computational processes are abstract beings that inhabit computers.
As they evolve, processes manipulate other abstract things called data.
The evolution of a process is directed by a pattern of rules
called a program. People create programs to direct processes. In effect,
we conjure the spirits of the computer with our spells.""".split()

In [5]:
# Create vocabulary and word-to-index mapping
vocab = set(raw_text)
word_to_ix = {word: i for i, word in enumerate(vocab)}
vocab_size = len(vocab)

In [13]:
# b. Generate training data 

In [6]:
# Generate context-target pairs
context_size = 2
data = [(raw_text[i - context_size:i] + raw_text[i + 1:i + context_size + 1], target)
        for i, target in enumerate(raw_text[context_size:-context_size])]

In [14]:
#c. Train model 

In [7]:
# CBOW Model
class CBOW(nn.Module):
    def __init__(self, vocab_size, embedding_dim):
        super(CBOW, self).__init__()
        self.embeddings = nn.Embedding(vocab_size, embedding_dim)
        self.linear = nn.Linear(embedding_dim, vocab_size)

    def forward(self, inputs):
        embeds = self.embeddings(inputs).sum(dim=0)
        out = self.linear(embeds)
        return out

model = CBOW(vocab_size, embedding_dim=100)

loss_function = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001)


In [8]:
for epoch in range(50):
    total_loss = 0
    for context, target in data:
        context_idxs = torch.tensor([word_to_ix[word] for word in context], dtype=torch.long)
        target_idx = torch.tensor(word_to_ix[target], dtype=torch.long)

        optimizer.zero_grad()
        log_probs = model(context_idxs)
        loss = loss_function(log_probs.unsqueeze(0), target_idx.unsqueeze(0))
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

In [9]:
test_context = ['People', 'create', 'to', 'direct']
context_idx = torch.tensor([word_to_ix[word] for word in test_context], dtype=torch.long)
output = model(context_idx)
predicted_idx = torch.argmax(output).item()
predicted_word = next(word for word, idx in word_to_ix.items() if idx == predicted_idx)

In [15]:
#d. Output

In [10]:
# Print result
print(f'Raw text: {" ".join(raw_text)}\n')
print(f'Context: {test_context}\n')
print(f'Prediction: {predicted_word}')

Raw text: We are about to study the idea of a computational process. Computational processes are abstract beings that inhabit computers. As they evolve, processes manipulate other abstract things called data. The evolution of a process is directed by a pattern of rules called a program. People create programs to direct processes. In effect, we conjure the spirits of the computer with our spells.

Context: ['People', 'create', 'to', 'direct']

Prediction: direct
