In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable

dtype = torch.FloatTensor
sentences = ["i like dog","i love milk","i hate coffee"]

In [2]:
word_list = " ".join(sentences).split()
word_list

['i', 'like', 'dog', 'i', 'love', 'milk', 'i', 'hate', 'coffee']

In [3]:
word_list = list(set(word_list))
word_list

['i', 'dog', 'hate', 'love', 'milk', 'coffee', 'like']

In [4]:
word2index = {w: i for i,w in enumerate(word_list)}
word2index

{'i': 0, 'dog': 1, 'hate': 2, 'love': 3, 'milk': 4, 'coffee': 5, 'like': 6}

In [5]:
index2word = {i:w for i,w in enumerate(word_list)}
index2word

{0: 'i', 1: 'dog', 2: 'hate', 3: 'love', 4: 'milk', 5: 'coffee', 6: 'like'}

In [6]:
n_class = len(word2index)
n_class

7

In [7]:
# NNLM Parameter
n_step = 2 # n-1
n_hidden = 2 # h
m=2 # m in paper

def make_batch(senteces):
    input_batch=[]
    target_batch=[]
    
    for sen in sentences:
        word = sen.split()
        input = [word2index[n] for n in word[:-1]]
        target = word2index[word[-1]]
        
        input_batch.append(input)
        target_batch.append(target)
    return input_batch,target_batch
        
        

In [8]:
# Model
class NNLM(nn.Module):
    def __init__(self):
        super(NNLM, self).__init__()
        self.C = nn.Embedding(n_class, m)
        self.H = nn.Parameter(torch.randn(n_step * m, n_hidden).type(dtype))
        self.W = nn.Parameter(torch.randn(n_step * m, n_class).type(dtype))
        self.d = nn.Parameter(torch.randn(n_hidden).type(dtype))
        self.U = nn.Parameter(torch.randn(n_hidden, n_class).type(dtype))
        self.b = nn.Parameter(torch.randn(n_class).type(dtype))

    def forward(self, X):
        X = self.C(X)
        X = X.view(-1, n_step * m) 
        tanh = torch.tanh(self.d + torch.mm(X, self.H)) 
        output = self.b + torch.mm(X, self.W) + torch.mm(tanh, self.U) 
        return output

model = NNLM()

In [9]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(),lr=0.001)

input_batch, target_batch=make_batch(sentences)

input_batch = Variable(torch.LongTensor(input_batch))
target_batch = Variable(torch.LongTensor(target_batch))

print(input_batch,target_batch)
print(input_batch.shape)

tensor([[0, 6],
        [0, 3],
        [0, 2]]) tensor([1, 4, 5])
torch.Size([3, 2])


In [10]:
for epoch in range(500):
    
    optimizer.zero_grad()
    output = model(input_batch)
    
    # output : [batch_size, n_class], target_batch : [batch_size] (LongTensor, not one-hot)
    
    loss = criterion(output, target_batch)
    if (epoch + 1)%100 ==0:
        print('Epoch:','%03d' % (epoch+1),'cost = ','{:.6f}'.format(loss))
    
    loss.backward()
    optimizer.step()

Epoch: 100 cost =  2.715838
Epoch: 200 cost =  1.645174
Epoch: 300 cost =  0.941450
Epoch: 400 cost =  0.553835
Epoch: 500 cost =  0.349296


In [11]:
# Predict
predict = model(input_batch).data.max(1, keepdim=True)[1]
print(predict.squeeze())
# Test

print([sen.split()[:2] for sen in sentences], '->', [index2word[n.item()] for n in predict.squeeze()])


tensor([1, 4, 5])
[['i', 'like'], ['i', 'love'], ['i', 'hate']] -> ['dog', 'milk', 'coffee']
