In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np

from torch.autograd import Variable

In [2]:
X = '''
it was the best of times it was the worst of times 
it was the age of wisdom it was the age of foolishness
it was the epoch of belief it was the epoch of credulity
'''.split()

In [3]:
vocab = list(set(X))
word_to_ix ={ word: i for i, word in enumerate(vocab) }

In [4]:
def make_context_vector(context, word_to_ix):
    idxs = [word_to_ix[w] for w in context]
    return idxs

In [5]:
CONTEXT_SIZE = 2

data = []
X_data, y = [], []
# look at every window in the data
for i in range(2, len(X)-2):
    context = [X[i - 2], X[i - 1],
               X[i + 1], X[i + 2]]
    target = X[i]
    data.append((context, target))
    X_data.append(make_context_vector(context, word_to_ix)) ; y.append(word_to_ix[target])

print(X_data)

[[4, 0, 10, 6], [0, 12, 6, 8], [12, 10, 8, 4], [10, 6, 4, 0], [6, 8, 0, 12], [8, 4, 12, 9], [4, 0, 9, 6], [0, 12, 6, 8], [12, 9, 8, 4], [9, 6, 4, 0], [6, 8, 0, 12], [8, 4, 12, 11], [4, 0, 11, 6], [0, 12, 6, 3], [12, 11, 3, 4], [11, 6, 4, 0], [6, 3, 0, 12], [3, 4, 12, 11], [4, 0, 11, 6], [0, 12, 6, 5], [12, 11, 5, 4], [11, 6, 4, 0], [6, 5, 0, 12], [5, 4, 12, 7], [4, 0, 7, 6], [0, 12, 6, 2], [12, 7, 2, 4], [7, 6, 4, 0], [6, 2, 0, 12], [2, 4, 12, 7], [4, 0, 7, 6], [0, 12, 6, 1]]


In [6]:
# cast our data and targets as torch tensors
X_data = torch.LongTensor(X_data)
y = torch.LongTensor(y)

In [7]:
data[:10]

[(['it', 'was', 'best', 'of'], 'the'),
 (['was', 'the', 'of', 'times'], 'best'),
 (['the', 'best', 'times', 'it'], 'of'),
 (['best', 'of', 'it', 'was'], 'times'),
 (['of', 'times', 'was', 'the'], 'it'),
 (['times', 'it', 'the', 'worst'], 'was'),
 (['it', 'was', 'worst', 'of'], 'the'),
 (['was', 'the', 'of', 'times'], 'worst'),
 (['the', 'worst', 'times', 'it'], 'of'),
 (['worst', 'of', 'it', 'was'], 'times')]

In [8]:
class CBOW(nn.Module):
    def __init__(self, vocab_size, embedding_dim):
        super(CBOW, self).__init__()
        self.embeddings = nn.Embedding(vocab_size, embedding_dim, sparse=True)
        self.z = nn.Linear(embedding_dim, vocab_size)

    def forward(self, x):
        embs = self.embeddings(x).sum(dim=1)
        return F.log_softmax(self.z(embs), dim=1)

In [9]:
def batch_data(X, y, batch_size=8):
    indices = np.arange(X.shape[0])
    np.random.shuffle(indices)
    count = 0
    
    while count < X.shape[0]:
        yield X[count:(count+batch_size), :], y[count:(count+batch_size)]
        count += batch_size

In [10]:
model = CBOW(len(vocab), 5)

In [11]:
losses = []
loss_function = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.05)

for epoch in range(1000):
    for X_batch, y_batch in batch_data(X_data, y):
        model.zero_grad()
        
        log_probs = model(Variable(X_batch))
        loss = loss_function(log_probs, Variable(y_batch))

        loss.backward()
        optimizer.step()

In [12]:
embeddings = dict(model.named_parameters())['embeddings.weight']

In [13]:
w1 = embeddings[word_to_ix['best'], :]
w2 = embeddings[word_to_ix['worst'], :]

w3 = embeddings[word_to_ix['wisdom'], :]
w4 = embeddings[word_to_ix['foolishness'], :]

In [14]:
cos = nn.CosineSimilarity(dim=0, eps=1e-6)

In [15]:
print(cos(w1, w2))
print(cos(w3, w4))

Variable containing:
 0.5196
[torch.FloatTensor of size 1]

Variable containing:
 0.7928
[torch.FloatTensor of size 1]

