In [11]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable

In [12]:
dtype = torch.FloatTensor
#데이터 타입 = Float형

sentences = ["i like dog", "i love coffee","i hate milk"]

word_list = " ".join(sentences).split()
#['i', 'like', 'dog', 'i', 'love', 'coffee', 'i', 'hate', 'milk']
# join으로 리스트의 원소들을 묶고 각 단어별로 자른다.

word_list  = list(set(word_list))
# set -> list 중복 원소 제거, 순서 바뀜
#['coffee', 'dog', 'i', 'love', 'milk', 'like', 'hate']

word_dict ={w:i for i ,w in enumerate(word_list)}
#{'coffee': 0, 'dog': 1, 'i': 2, 'love': 3, 'milk': 4, 'like': 5, 'hate': 6}

number_dict = {i:w for i,w in enumerate(word_list)}
#{0: 'coffee', 1: 'dog', 2: 'i', 3: 'love', 4: 'milk', 5: 'like', 6: 'hate'}

n_class = len(word_dict) #number of Vocabulary

# NNLM Parameter
n_step = 2 #n-1 in paper
n_hidden = 2 # h in paper
m = 2 # m in paper


def make_batch(sentences):
    input_batch = []
    target_batch = []

    for sen in sentences:
        word = sen.split()
        input = [word_dict[n] for n in word[:-1]]
        target = word_dict[word[-1]]

        input_batch.append(input)
        target_batch.append(target)

    return input_batch, target_batch

# Model
class NNLM(nn.Module):
    def __init__(self):
        super(NNLM, self).__init__()
        self.C = nn.Embedding(n_class, m)
        self.H = nn.Parameter(torch.randn(n_step * m, n_hidden).type(dtype))
        self.W = nn.Parameter(torch.randn(n_step * m, n_class).type(dtype))
        self.d = nn.Parameter(torch.randn(n_hidden).type(dtype))
        self.U = nn.Parameter(torch.randn(n_hidden, n_class).type(dtype))
        self.b = nn.Parameter(torch.randn(n_class).type(dtype))

    def forward(self, X):
        X = self.C(X)
        X = X.view(-1, n_step * m) # [batch_size, n_step * n_class]
        tanh = torch.tanh(self.d + torch.mm(X, self.H)) # [batch_size, n_hidden]
        output = self.b + torch.mm(X, self.W) + torch.mm(tanh, self.U) # [batch_size, n_class]
        return output

In [13]:
model = NNLM()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = 0.001)

input_batch,target_batch = make_batch(sentences)
input_batch = Variable(torch.LongTensor(input_batch))
target_batch = Variable(torch.LongTensor(target_batch))


In [14]:

# Training
for epoch in range(5000):

    optimizer.zero_grad()
    output = model(input_batch)

    # output : [batch_size, n_class], target_batch : [batch_size] (LongTensor, not one-hot)
    loss = criterion(output, target_batch)
    if (epoch + 1)%1000 == 0:
        print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))

    loss.backward()
    optimizer.step()


Epoch: 1000 cost = 0.265721
Epoch: 2000 cost = 0.032544
Epoch: 3000 cost = 0.009111
Epoch: 4000 cost = 0.003589
Epoch: 5000 cost = 0.001638


In [5]:

# Predict
predict = model(input_batch).data.max(1, keepdim=True)[1]

# Test
print([sen.split()[:2] for sen in sentences], '->', [number_dict[n.item()] for n in predict.squeeze()])


[['i', 'like'], ['i', 'love'], ['i', 'hate']] -> ['dog', 'coffee', 'milk']


In [23]:
predict = model(input_batch).data.max(1, keepdim=True)


In [29]:
model(input_batch).data

tensor([[-5.2875, -5.8460, -3.9452, 10.3330,  3.5947,  1.5534,  0.6589],
        [ 0.0132, -6.0936, -2.5246,  1.3111,  8.0863,  1.1922, -0.4434],
        [-5.2369, -5.8879, -2.6417,  0.8454,  4.6013, 11.7264, -9.5941]])

In [27]:
model(input_batch).data.max(1)

(tensor([10.3330,  8.0863, 11.7264]), tensor([3, 4, 5]))

In [30]:
model(input_batch).data.max(1,keepdim = True)

(tensor([[10.3330],
         [ 8.0863],
         [11.7264]]), tensor([[3],
         [4],
         [5]]))

In [31]:
model(input_batch).data.max(1,keepdim = True)[1]

tensor([[3],
        [4],
        [5]])