In [1]:
import torch
import torch.nn as nn
from torch.autograd import Variable

In [2]:
idx2char = ['h', 'i', 'e', 'l', 'o']

In [3]:
# hihell 넣으면 ihello 나오도록 학습시켜보자!

x_data = [[0, 1, 0, 2, 3, 3]]   # hihell
x_one_hot = [[[1, 0, 0, 0, 0],   # h 0
              [0, 1, 0, 0, 0],   # i 1
              [1, 0, 0, 0, 0],   # h 0
              [0, 0, 1, 0, 0],   # e 2
              [0, 0, 0, 1, 0],   # l 3
              [0, 0, 0, 1, 0]]]  # l 3

y_data = [1, 0, 2, 3, 3, 4]    # ihello

In [4]:
# input과 LongTensor를 Variable안에 담는다. 
# label은 왜 LongTensor안에 넣는거지...?

inputs = Variable(torch.Tensor(x_one_hot))
labels = Variable(torch.LongTensor(y_data))

In [5]:
num_classes = 5      # h, i, e, l, o중 하나 맞춰야함
input_size = 5       # h = [1,0,0,0,0]
hidden_size = 5      # hidden layer의 size
batch_size = 1       # one sentence
sequence_length = 6  # |ihello| == 6
num_layers = 1       # one-layer rnn

In [6]:
class RNN(nn.Module):

    def __init__(self, num_classes, input_size, hidden_size, num_layers):
        super(RNN, self).__init__()

        self.num_classes = num_classes          # 5
        self.num_layers = num_layers            # 1
        self.input_size = input_size            # 5
        self.hidden_size = hidden_size          # 5
        self.sequence_length = sequence_length  # 6

        self.rnn = nn.RNN(input_size=5, hidden_size=5, batch_first=True)

    def forward(self, x):
        # 첫 hidden_state=0으로 초기화
        # (num_layers * num_directions, batch, hidden_size) for batch_first=True
        h_0 = Variable(torch.zeros(
            self.num_layers, x.size(0), self.hidden_size))

        # Reshape input
        x.view(x.size(0), self.sequence_length, self.input_size)


        # Input: (batch, seq_len, input_size)
        # h_0: (num_layers * num_directions, batch, hidden_size)

        out, _ = self.rnn(x, h_0) ## out이랑 hidden 나오는데 out만 가져온다
        
        # out_size = (batch: 1, seq_len: 6,out_size: 5)
        return out.view(-1, num_classes)
        # new_out_size = (6,5)

In [7]:
rnn = nn.RNN(input_size=5, hidden_size=5, batch_first=True)

In [8]:
x = inputs

In [9]:
h_0 = Variable(torch.zeros(num_layers, x.size(0), hidden_size))
x.view(x.size(0),sequence_length,input_size)
out,hidden = rnn(x,h_0)

In [10]:
out.size()

torch.Size([1, 6, 5])

In [11]:
hidden

tensor([[[ 0.0586, -0.1359,  0.8714,  0.4888,  0.6944]]],
       grad_fn=<StackBackward>)

In [22]:
hidden

tensor([[[-0.0114,  0.1040, -0.2398, -0.2624,  0.3381]]],
       grad_fn=<StackBackward>)

In [27]:
# Instantiate RNN model
rnn = RNN(num_classes, input_size, hidden_size, num_layers)
print(rnn)

RNN(
  (rnn): RNN(5, 5, batch_first=True)
)


In [28]:
# Set loss and optimizer function
# CrossEntropyLoss = LogSoftmax + NLLLoss
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(rnn.parameters(), lr=0.1)


In [29]:
for epoch in range(1):
    outputs = rnn(inputs)
    print(outputs)   # 6X5 sequence_length X out_size

tensor([[ 0.1215,  0.6467,  0.2845,  0.4228,  0.5358],
        [ 0.1898,  0.6626,  0.7235, -0.0828,  0.3859],
        [ 0.0160,  0.5977,  0.5046,  0.0944, -0.0965],
        [ 0.6141,  0.6255,  0.4698,  0.1151, -0.2778],
        [ 0.2353, -0.0698,  0.2430,  0.2054,  0.0427],
        [ 0.4891,  0.3180,  0.3902,  0.3800,  0.3246]], grad_fn=<ViewBackward>)


In [30]:
for epoch in range(1):
    outputs = rnn(inputs)
    optimizer.zero_grad()
    loss = criterion(outputs,labels)
    loss.backward()
    optimizer.step()
    print(outputs.max(1)) #각 행별로 가장 큰 수와 그 수의 위치
 

(tensor([0.6467, 0.7235, 0.5977, 0.6255, 0.2430, 0.4891],
       grad_fn=<MaxBackward0>), tensor([1, 2, 1, 1, 2, 0]))


In [31]:
for epoch in range(1):
    outputs = rnn(inputs)
    optimizer.zero_grad()
    loss = criterion(outputs,labels)
    loss.backward()
    optimizer.step()
    _,idx = outputs.max(1)
    idx = idx.data.numpy()
    print(idx)

[1 0 1 0 3 3]


In [32]:
for epoch in range(1):
    outputs = rnn(inputs)
    optimizer.zero_grad()
    loss = criterion(outputs,labels)
    loss.backward()
    optimizer.step()
    _,idx = outputs.max(1)
    idx = idx.data.numpy()
    result_str = [idx2char[c] for c in idx]

In [33]:
''.join(result_str)

'ihlhlo'

In [34]:
# Train the model
for epoch in range(100):
    outputs = rnn(inputs)
    optimizer.zero_grad()
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()
    # output (6,5) seq_len, output_size
    _, idx = outputs.max(1) # 각 행별로 가장 큰 수와 그 수의 위치
    idx = idx.data.numpy()  # tensor to numpy
    
    result_str = [idx2char[c] for c in idx.squeeze()] # 그 위치에 해당하는 char가져오기
    print("epoch: %d, loss: %1.3f" % (epoch + 1, loss.data))
    print("Predicted string: ", ''.join(result_str))

print("Learning finished!")

# 처음에는 랜덤으로 배정된 parameter때문에 lolioe로 시작 epoch8 부터 ihello 출력됨

epoch: 1, loss: 1.178
Predicted string:  ihlhlo
epoch: 2, loss: 1.077
Predicted string:  ihehlo
epoch: 3, loss: 1.018
Predicted string:  ihehlo
epoch: 4, loss: 0.979
Predicted string:  ihehlo
epoch: 5, loss: 0.941
Predicted string:  ihehlo
epoch: 6, loss: 0.904
Predicted string:  ihehlo
epoch: 7, loss: 0.869
Predicted string:  ehehlo
epoch: 8, loss: 0.839
Predicted string:  ehehlo
epoch: 9, loss: 0.814
Predicted string:  ehello
epoch: 10, loss: 0.787
Predicted string:  ihello
epoch: 11, loss: 0.762
Predicted string:  ihello
epoch: 12, loss: 0.757
Predicted string:  ihello
epoch: 13, loss: 0.761
Predicted string:  ihello
epoch: 14, loss: 0.756
Predicted string:  ihello
epoch: 15, loss: 0.739
Predicted string:  ihello
epoch: 16, loss: 0.721
Predicted string:  ihello
epoch: 17, loss: 0.711
Predicted string:  ihello
epoch: 18, loss: 0.702
Predicted string:  ihello
epoch: 19, loss: 0.695
Predicted string:  ihello
epoch: 20, loss: 0.686
Predicted string:  ihello
epoch: 21, loss: 0.678
Predic