In [None]:
rnn = nn.RNN(input_size, hidden_size) # cell 을 선언하는 과정
outputs, _status = rnn(input_data) # 입력, 출력을 구성하는 과정
# outputs.shape == (batch_size, sequence_length, hidden_size)
# input_data.shape == (batch_size, sequence_length, input_size)

## RNN run example

In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim

In [2]:
h = [1,0,0,0]
e = [0,1,0,0]
l = [0,0,1,0]
o = [0,0,0,1]
input_size = 4 # embedding vector 의 dimension
hidden_size = 2 # desirable output size
# sequence_length 는 자동으로 인식한다 (별도로 길이 정보를 전달할 필요가 없음).
# batch_size 또한 자동으로 인식한다 (별도로 배치크기 정보를 전달할 필요가 없음).

In [3]:
input_data_np = np.array([[h,e,l,l,o],
                          [e,o,l,l,l],
                          [l,l,e,e,l]], dtype=np.float32)

input_data = torch.Tensor(input_data_np)
rnn = nn.RNN(input_size, hidden_size)
outputs, _status = rnn(input_data)

In [None]:
criterion = nn.CrossEntropyLoss()
# loss = criterion(outputs.view(-1, input_size), Y)

## 'hihello' problem: predict next character

In [None]:
char_set = ['h','i','e','l','o']
# hyper parameters
input_size = len(char_set)
hidden_size = len(char_set)
learning_rate = 0.1
# data setting
x_data = [[0,1,0,2,3,3]] # h i h e l l
x_one_hot = [[[1,0,0,0,0],
              [0,1,0,0,0],
              [1,0,0,0,0],
              [0,0,1,0,0],
              [0,0,0,1,0],
              [0,0,0,1,0]]]
y_data = [[1,0,2,3,3,4]] # i h e l l o
# transform as torch tensor variable
X = torch.FloatTensor(x_one_hot)
Y = torch.LongTensor(y_data)

## generalized version: (charseq) code run through

In [5]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim

In [8]:
sample = " if you want you"
# make dictionary
char_set = list(set(sample))
char_dict = {c: i for i, c in enumerate(char_set)}
# hyper parameters
input_size = len(char_dict)
hidden_size = len(char_dict)
learning_rate = 0.1
# data setting
sample_idx = [char_dict[c] for c in sample]
x_data = [sample_idx[:-1]]
y_data = [sample_idx[1:]]
x_one_hot = [np.eye(input_size)[x] for x in x_data]
# transform as torch tensor variable
X = torch.FloatTensor(x_one_hot)
Y = torch.LongTensor(y_data)

In [11]:
# declare RNN
rnn = nn.RNN(input_size, hidden_size, batch_first=True)
# loss & optimizer setting
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(rnn.parameters(), lr=learning_rate)

for i in range(100):
    optimizer.zero_grad()
    outputs, _status = rnn(X)
    loss = criterion(outputs.view(-1, input_size), Y.view(-1))
    loss.backward()
    optimizer.step()
    
    result = outputs.data.numpy().argmax(axis=2)
    result_str = ''.join([char_set[c] for c in np.squeeze(result)])
    print(f"[Epoch: {i}]", "loss: {:.4f}".format(loss.item()), "\nprediction:", result, "\ntreu Y:", y_data, "\nprediction str: ", result_str)

[Epoch: 0] loss: 2.4586 
prediction: [[5 6 9 9 9 6 9 5 6 3 5 5 9 9 6]] 
treu Y: [[4, 3, 2, 0, 8, 1, 2, 5, 9, 7, 6, 2, 0, 8, 1]] 
prediction str:  wtaaatawtfwwaat
[Epoch: 1] loss: 2.1407 
prediction: [[7 7 7 7 7 1 7 7 9 1 7 7 7 7 1]] 
treu Y: [[4, 3, 2, 0, 8, 1, 2, 5, 9, 7, 6, 2, 0, 8, 1]] 
prediction str:  nnnnnunnaunnnnu
[Epoch: 2] loss: 1.9109 
prediction: [[7 2 2 2 2 1 3 1 9 1 2 2 2 2 1]] 
treu Y: [[4, 3, 2, 0, 8, 1, 2, 5, 9, 7, 6, 2, 0, 8, 1]] 
prediction str:  n    ufuau    u
[Epoch: 3] loss: 1.7009 
prediction: [[5 3 2 9 2 1 2 5 9 1 2 2 3 2 1]] 
treu Y: [[4, 3, 2, 0, 8, 1, 2, 5, 9, 7, 6, 2, 0, 8, 1]] 
prediction str:  wf a u wau  f u
[Epoch: 4] loss: 1.5401 
prediction: [[5 3 2 9 2 1 3 5 9 1 6 2 0 8 1]] 
treu Y: [[4, 3, 2, 0, 8, 1, 2, 5, 9, 7, 6, 2, 0, 8, 1]] 
prediction str:  wf a ufwaut you
[Epoch: 5] loss: 1.4352 
prediction: [[0 3 2 0 8 1 3 5 9 7 6 2 0 8 1]] 
treu Y: [[4, 3, 2, 0, 8, 1, 2, 5, 9, 7, 6, 2, 0, 8, 1]] 
prediction str:  yf youfwant you
[Epoch: 6] loss: 1.3668 
pre