In [79]:
import torch
import torch.nn as nn
from torch.autograd import Variable

In [80]:
idx2char = ['h', 'i', 'e', 'l', 'o']

In [81]:
# hihell 넣으면 ihello 나오도록 학습시켜보자!

x_data = [[0, 1, 0, 2, 3, 3]]   # hihell
x_one_hot = [[[1, 0, 0, 0, 0],   # h 0
              [0, 1, 0, 0, 0],   # i 1
              [1, 0, 0, 0, 0],   # h 0
              [0, 0, 1, 0, 0],   # e 2
              [0, 0, 0, 1, 0],   # l 3
              [0, 0, 0, 1, 0]]]  # l 3

y_data = [1, 0, 2, 3, 3, 4]    # ihello

In [82]:
# input과 LongTensor를 Variable안에 담는다. 
# label은 왜 LongTensor안에 넣는거지...?

inputs = Variable(torch.Tensor(x_one_hot))
labels = Variable(torch.LongTensor(y_data))

In [83]:
num_classes = 5      # h, i, e, l, o중 하나 맞춰야함
input_size = 5       # h = [1,0,0,0,0]
hidden_size = 5      # hidden layer의 size
batch_size = 1       # one sentence
sequence_length = 6  # |ihello| == 6
num_layers = 1       # one-layer rnn

In [84]:
class RNN(nn.Module):

    def __init__(self, num_classes, input_size, hidden_size, num_layers):
        super(RNN, self).__init__()

        self.num_classes = num_classes          # 5
        self.num_layers = num_layers            # 1
        self.input_size = input_size            # 5
        self.hidden_size = hidden_size          # 5
        self.sequence_length = sequence_length  # 6

        self.rnn = nn.RNN(input_size=5, hidden_size=5, batch_first=True)

    def forward(self, x):
        # 첫 hidden_state=0으로 초기화
        # (num_layers * num_directions, batch, hidden_size) for batch_first=True
        h_0 = Variable(torch.zeros(
            self.num_layers, x.size(0), self.hidden_size))

        # Reshape input
        x.view(x.size(0), self.sequence_length, self.input_size)


        # Input: (batch, seq_len, input_size)
        # h_0: (num_layers * num_directions, batch, hidden_size)

        out, _ = self.rnn(x, h_0)
        
        # out_size = (batch: 1, seq_len: 6,out_size: 5)
        return out.view(-1, num_classes)
        # new_out_size = (6,5)

In [85]:
# Instantiate RNN model
rnn = RNN(num_classes, input_size, hidden_size, num_layers)
print(rnn)

RNN(
  (rnn): RNN(5, 5, batch_first=True)
)


In [86]:
# Set loss and optimizer function
# CrossEntropyLoss = LogSoftmax + NLLLoss
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(rnn.parameters(), lr=0.1)


In [87]:
# Train the model
for epoch in range(100):
    outputs = rnn(inputs)
    optimizer.zero_grad()
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()
    # output (6,5) seq_len, output_size
    _, idx = outputs.max(1) # 각 행별로 가장 큰 수와 그 수의 위치
    idx = idx.data.numpy()  # tensor to numpy
    
    result_str = [idx2char[c] for c in idx.squeeze()] # 그 위치에 해당하는 char가져오기
    print("epoch: %d, loss: %1.3f" % (epoch + 1, loss.data))
    print("Predicted string: ", ''.join(result_str))

print("Learning finished!")

# 처음에는 랜덤으로 배정된 parameter때문에 lolioe로 시작 epoch8 부터 ihello 출력됨

epoch: 1, loss: 1.689
Predicted string:  lolioe
epoch: 2, loss: 1.431
Predicted string:  lhllle
epoch: 3, loss: 1.299
Predicted string:  lhlllo
epoch: 4, loss: 1.199
Predicted string:  lhlllo
epoch: 5, loss: 1.097
Predicted string:  lhlllo
epoch: 6, loss: 0.995
Predicted string:  lhlllo
epoch: 7, loss: 0.909
Predicted string:  lhello
epoch: 8, loss: 0.845
Predicted string:  ihello
epoch: 9, loss: 0.804
Predicted string:  ihello
epoch: 10, loss: 0.760
Predicted string:  ihello
epoch: 11, loss: 0.705
Predicted string:  ihello
epoch: 12, loss: 0.659
Predicted string:  ihello
epoch: 13, loss: 0.625
Predicted string:  ihello
epoch: 14, loss: 0.600
Predicted string:  ihello
epoch: 15, loss: 0.580
Predicted string:  ihello
epoch: 16, loss: 0.562
Predicted string:  ihello
epoch: 17, loss: 0.547
Predicted string:  ihello
epoch: 18, loss: 0.534
Predicted string:  ihello
epoch: 19, loss: 0.525
Predicted string:  ihello
epoch: 20, loss: 0.521
Predicted string:  ihello
epoch: 21, loss: 0.514
Predic