In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

torch.manual_seed(1)

<torch._C.Generator at 0x7ce96f1bb090>

In [2]:
word_to_ix = {"hello": 0, "world": 1}
embeds = nn.Embedding(2, 5)  # 2 words in vocab, 5 dimensional embeddings
lookup_tensor = torch.tensor([word_to_ix["hello"]], dtype=torch.long)
hello_embed = embeds(lookup_tensor)
print(hello_embed)

tensor([[ 0.6614,  0.2669,  0.0617,  0.6213, -0.4519]],
       grad_fn=<EmbeddingBackward0>)


In [3]:
CONTEXT_SIZE = 2  # 2 words to the left, 2 to the right
EMBEDDING_DIM = 10
raw_text = """Prince Hamlet of Denmark is the son of the recently deceased King Hamlet, 
and nephew of King Claudius, his father's brother and successor. Claudius hastily married
 King Hamlet's widow, Gertrude, Hamlet's mother, and took the throne for himself. Denmark 
 has a long-standing feud with neighbouring Norway, in which King Hamlet slew King Fortinbras 
 of Norway in a battle some years ago. Although Denmark defeated Norway and the Norwegian 
 throne fell to King Fortinbras's infirm brother, Denmark fears that an invasion led by the 
 dead Norwegian king's son, Prince Fortinbras, is imminent.
On a cold night on the ramparts of Elsinore, the Danish royal castle, the sentries Bernardo 
and Marcellus discuss a ghost resembling the late King Hamlet which they have recently seen, 
and bring Prince Hamlet's friend Horatio as a witness. After the ghost appears again, the 
three vow to tell Prince Hamlet what they have witnessed.
The court gathers the next day, and King Claudius and Queen Gertrude discuss affairs of 
state with their elderly adviser Polonius. Claudius grants permission for Polonius's son 
Laertes to return to school in France, and he sends envoys to inform the King of Norway 
about Fortinbras. Claudius also questions Hamlet regarding his continuing to grieve for 
his father, and forbids him to return to his university in Wittenberg. After the court 
exits, Hamlet despairs of his father's death and his mother's hasty remarriage. Learning 
of the ghost from Horatio, Hamlet resolves to see it himself.
As Polonius's son Laertes prepares to depart for France, Polonius offers him advice that 
culminates in the maxim "to thine own self be true."[6] Polonius's daughter, Ophelia, 
admits her interest in Hamlet, but Laertes warns her against seeking the prince's attention, 
and Polonius orders her to reject his advances. That night on the rampart, the ghost appears 
to Hamlet, tells the prince that he was poisoned by Claudius, and demands that Hamlet avenge 
the murder. Hamlet agrees, and the ghost vanishes. The prince confides to Horatio and the 
sentries that from now on he plans to "put an antic disposition on", or act as though he 
has gone mad. Hamlet forces them to swear to keep his plans for revenge secret; however, 
he remains uncertain of the ghost's reliability.""".split()

In [4]:
# By deriving a set from `raw_text`, we deduplicate the array
vocab = set(raw_text)
vocab_size = len(vocab)

word_to_ix = {word: i for i, word in enumerate(vocab)}
ix_to_word = {i: word for i, word in enumerate(vocab)}

In [5]:
data = []
for i in range(CONTEXT_SIZE, len(raw_text) - CONTEXT_SIZE):
    context = (
        [raw_text[i - j - 1] for j in range(CONTEXT_SIZE)]
        + [raw_text[i + j + 1] for j in range(CONTEXT_SIZE)]
    )
    target = raw_text[i]
    data.append((context, target))
print(data[:5])


[(['Hamlet', 'Prince', 'Denmark', 'is'], 'of'), (['of', 'Hamlet', 'is', 'the'], 'Denmark'), (['Denmark', 'of', 'the', 'son'], 'is'), (['is', 'Denmark', 'son', 'of'], 'the'), (['the', 'is', 'of', 'the'], 'son')]


In [6]:
class CBOW(nn.Module):

    def __init__(self,vocab_size, embedding_dim):
        super(CBOW, self).__init__()
        # output : 1 x embedding_dim
        self.embed = nn.Embedding(vocab_size, embedding_dim,padding_idx=0)
        self.fc1 = nn.Linear(embedding_dim, 128)
        self.activation1 = nn.ReLU()

        #output : 1 x vocab_size
        self.fc2 = nn.Linear(128, vocab_size)
        self.activation2 = nn.LogSoftmax(dim = -1)

    def forward(self, inputs):
        x = sum(self.embed(inputs)).view(1,-1)
        x = self.fc1(x)
        x = self.activation1(x)
        x = self.fc2(x)
        x = self.activation2(x)
        return x
    
    def get_word_embedding(self, word):
        word = torch.tensor([word_to_ix[word]], dtype=torch.long)
        return self.embed(word).view(1,-1)

In [7]:
def make_context_vector(context, word_to_ix):
    idxs = [word_to_ix[w] for w in context]
    return torch.tensor(idxs, dtype=torch.long)

In [8]:
make_context_vector(data[0][0], word_to_ix)  # example

model = CBOW(vocab_size, EMBEDDING_DIM)

loss_fn = nn.NLLLoss()
optim = torch.optim.Adam(model.parameters(), lr=0.001)

for epoch in range(1000):
    total_loss = 0
    for context, target in data:
        context_idxs = make_context_vector(context, word_to_ix)
        # model.zero_grad()
        log_probs = model(context_idxs)
        loss = loss_fn(log_probs, torch.tensor([word_to_ix[target]], dtype=torch.long))
        total_loss += loss
    if epoch%100==0:
        print(f' {epoch}-th epoch\'s loss is {total_loss.item()}')
              
    optim.zero_grad()
    total_loss.backward()
    optim.step()

context = ['late', 'King',  'which', 'they']
context_idxs = make_context_vector(context, word_to_ix)
a = model(context_idxs)

print(f'text {raw_text}')
print(f'context {context}')
print(f'prediction : "{ix_to_word[torch.argmax(a).item()]}"')


 0-th epoch's loss is 2017.4041748046875
 100-th epoch's loss is 478.4582824707031
 200-th epoch's loss is 111.26284790039062
 300-th epoch's loss is 33.59030532836914
 400-th epoch's loss is 14.087678909301758
 500-th epoch's loss is 7.677165508270264
 600-th epoch's loss is 4.857917785644531
 700-th epoch's loss is 3.366039514541626
 800-th epoch's loss is 2.475390911102295
 900-th epoch's loss is 1.8973357677459717
text ['Prince', 'Hamlet', 'of', 'Denmark', 'is', 'the', 'son', 'of', 'the', 'recently', 'deceased', 'King', 'Hamlet,', 'and', 'nephew', 'of', 'King', 'Claudius,', 'his', "father's", 'brother', 'and', 'successor.', 'Claudius', 'hastily', 'married', 'King', "Hamlet's", 'widow,', 'Gertrude,', "Hamlet's", 'mother,', 'and', 'took', 'the', 'throne', 'for', 'himself.', 'Denmark', 'has', 'a', 'long-standing', 'feud', 'with', 'neighbouring', 'Norway,', 'in', 'which', 'King', 'Hamlet', 'slew', 'King', 'Fortinbras', 'of', 'Norway', 'in', 'a', 'battle', 'some', 'years', 'ago.', 'Alth