In [33]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

In [34]:
sentence = "Repeat is the best medicine for memory".split()
vocab = list(set(sentence))
print(vocab)

['memory', 'medicine', 'is', 'for', 'best', 'Repeat', 'the']


In [35]:
word2index={tkn:i for i, tkn in enumerate(vocab,1)}
word2index['<unk>']=0
print(word2index)

{'memory': 1, 'medicine': 2, 'is': 3, 'for': 4, 'best': 5, 'Repeat': 6, 'the': 7, '<unk>': 0}


In [36]:
print(word2index['memory'])

1


In [37]:
print(word2index['for'])

4


In [38]:
index2word={v:k for k,v in word2index.items()}
print(index2word)
print(index2word[1])

{1: 'memory', 2: 'medicine', 3: 'is', 4: 'for', 5: 'best', 6: 'Repeat', 7: 'the', 0: '<unk>'}
memory


In [39]:
def build_data(sentence, word2index):
    encoded=[word2index[token] for token in sentence]
    # print(encoded)
    input_seq, label_seq = encoded[:-1] , encoded[1:] # [:-1] 맨마지막까지, [1:] 첫번째부터
    # print(input_seq)
    # print(label_seq)
    input_seq=torch.LongTensor(input_seq).unsqueeze(0) # 차원 하나 추가
    label_seq=torch.LongTensor(label_seq).unsqueeze(0)
    return input_seq, label_seq

In [40]:
X, Y=build_data(sentence, word2index)

In [41]:
print(X)
print(Y)

tensor([[6, 3, 7, 5, 2, 4]])
tensor([[3, 7, 5, 2, 4, 1]])


In [42]:
class Net(nn.Module):
    def __init__(self, vocab_size, input_size, hidden_size):
        super(Net, self).__init__()
        self.embedding_layer=nn.Embedding(num_embeddings=vocab_size,
                                          embedding_dim=input_size)
        self.rnn_layer=nn.RNN(input_size, hidden_size, batch_first=True)
        self.linear=nn.Linear(hidden_size, vocab_size)
    
    def forward(self, x):
        # 1. 임베딩 층
        # 크기변화: (배치크기, 시퀀스 길이, 임베딩 차원)
        output=self.embedding_layer(x)
        # 2. RNN 층
        # 크기변화 : (배치크기, 시퀀스 길이, 임베팅 차원)
        # => output (배치 크기, 시퀀스 길이, 은닉층 크기), hidden (1, 배치크기, 은닉층 크기)
        output, hidden=self.rnn_layer(output)
        # 3. 최종 출력층
        # 크기변화: (배치크기, 시퀀스 길이, 은닉층 크기) => (배치크기, 시퀀스 길이, 단어장 크기)
        output=self.linear(output)
        # 4. view를 통해서 배치 차원 제거
        # 크기변화 : (배치 크기, 시퀀스 길이, 단어장 크기) => (배치크기*시퀀스 길이, 단어장 크기)
        return output.view(-1, output.size(2))

In [43]:
vocab_size=len(word2index)
input_size=5
hidden_size=20

In [44]:
model=Net(vocab_size, input_size, hidden_size)
loss_function=nn.CrossEntropyLoss()
optimizer=optim.Adam(params=model.parameters())

In [45]:
output=model(X)
print(output)

tensor([[-0.1709, -0.0914, -0.2755, -0.1728,  0.5151, -0.0241,  0.2234, -0.0221],
        [-0.2367,  0.0046, -0.2177, -0.1032,  0.5179,  0.0145,  0.3204, -0.3410],
        [-0.1684,  0.1926, -0.4853, -0.1542,  0.3133, -0.0503,  0.2816, -0.2540],
        [-0.1452, -0.0495, -0.2307, -0.1508,  0.3616,  0.1729,  0.1890, -0.1250],
        [-0.0276,  0.0911, -0.5461, -0.0565, -0.3087, -0.4496, -0.0253, -0.5625],
        [-0.1683, -0.2321, -0.3446, -0.0212,  0.3580, -0.0806,  0.1952,  0.1104]],
       grad_fn=<ViewBackward0>)


In [46]:
decoded=lambda y: [index2word.get(x) for x in y]
print(decoded)

<function <lambda> at 0x000001EFE38BB9A0>


In [47]:
for step in range(201):
    optimizer.zero_grad()
    outputs=model(X)
    loss=loss_function(outputs, Y.view(-1))

    loss.backward()
    optimizer.step()

    if step % 40 ==0:
        print("step:", step, "loss:", loss.item())
        pred=outputs.softmax(-1).argmax(-1).tolist()
        print(" ".join(['Repeat']+decoded(pred)))
        print()

step: 0 loss: 2.281268835067749
Repeat for for for for memory for

step: 40 loss: 1.6887378692626953
Repeat for for best medicine for memory

step: 80 loss: 1.0410583019256592
Repeat is the best medicine for memory

step: 120 loss: 0.5746766924858093
Repeat is the best medicine for memory

step: 160 loss: 0.31949421763420105
Repeat is the best medicine for memory

step: 200 loss: 0.1859656423330307
Repeat is the best medicine for memory

