In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

In [2]:
def make_batch():
    input_batch = []
    target_batch = []

    for sen in sentences:
        # ex) I like dog
        word = sen.split()  # space tokenizer
        input = [word_dict[n] for n in word[:-1]]   # 앞에 N개의 단어 (I - 3, like - 0)
        target = word_dict[word[-1]]       # 예측할 단어 (dog - 6)

        input_batch.append(input)
        target_batch.append(target)

    return input_batch, target_batch

In [3]:
# Model
class NNLM(nn.Module):
    def __init__(self):
        super(NNLM, self).__init__()
        self.C = nn.Embedding(n_class, m)
        self.H = nn.Linear(n_step*m, n_hidden, bias=False)
        self.d = nn.Parameter(torch.ones(n_hidden))
        self.U = nn.Linear(n_hidden, n_class, bias=False)
        self.W = nn.Linear(n_step*m, n_class, bias=False)
        self.b = nn.Parameter(torch.ones(n_class))

    def forward(self, X):
        X = self.C(X)   # X : [batch_size, n_step, m]
        X = X.view(-1, n_step*m)    # [batch_size, n_step*m]
        tanh = torch.tanh(self.d + self.H(X))   # [batch_size, n_hidden]
        output = self.b + self.W(X) + self.U(tanh)  # [batch_size, n_class]
        return output



In [9]:
n_step = 2
n_hidden = 2
m = 2

sentences = ["I like dog", "I love coffee", "I hate milk"]

word_list = " ".join(sentences).split()
word_list = list(set(word_list))    # ['like', 'milk', 'coffee', 'I', 'hate', 'love', 'dog']
word_dict = {w:i for i,w in enumerate(word_list)}   # ex) 'like' : 0
number_dict = {i:w for i,w in enumerate(word_list)} # ex) 0 : 'like'
n_class = len(word_dict)    # number of Vocabulary
print('V :', n_class)


V : 7


In [10]:
# model 선언
model = NNLM()

In [12]:
criterion = nn.CrossEntropyLoss()   # cost function
optimizer = optim.Adam(model.parameters(), lr=0.001)    # 최적화

In [16]:
input_batch, target_batch = make_batch()
print('input_batch :', input_batch)
print('target_batch :', target_batch)

input_batch = torch.LongTensor(input_batch)
target_batch = torch.LongTensor(target_batch)

input_batch : [[3, 0], [3, 5], [3, 4]]
target_batch : [6, 2, 1]


In [17]:
print('After input : {}, target : {}'.format(input_batch.shape, output_batch.shape))

After input : torch.Size([3, 2]), target : torch.Size([3])


In [18]:
# Training
for epoch in range(5000):
    optimizer.zero_grad()
    output = model(input_batch)     # 순전파

    # output : [batch_size, n_class]
    # target : [batch_size]
    loss = criterion(output, target_batch)  # 손실함수
    if (epoch+1)%1000==0:
        print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))
    
    loss.backward()     # 역전파
    optimizer.step()    # learning rate만큼 이동

Epoch: 1000 cost = 0.079087
Epoch: 2000 cost = 0.012405
Epoch: 3000 cost = 0.004127
Epoch: 4000 cost = 0.001769
Epoch: 5000 cost = 0.000858


In [25]:
# Predict
predict = model(input_batch).data.max(1, keepdim=True)[1]

In [29]:
print('predict :', predict, ', shape=', predict.shape)

sqz = predict.squeeze()
print('After squeeze() :', sqz, sqz.shape)

print('Tensor ', sqz[0],'의 item : ', sqz[0].item())

predict : tensor([[6],
        [2],
        [1]]) , shape= torch.Size([3, 1])
After squeeze() : tensor([6, 2, 1]) torch.Size([3])
Tensor  tensor(6) 의 item :  6


In [30]:
# Test
print([sen.split()[:2] for sen in sentences], '→', [number_dict[n.item()] for n in predict.squeeze()])

[['I', 'like'], ['I', 'love'], ['I', 'hate']] → ['dog', 'coffee', 'milk']
