### 1) 문자 단위 RNN(Char RNN)
* 다대다 RNN은 대표적으로 품사 태깅, 개체명 인식 등에서 사용됨

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

import numpy as np

In [None]:
input_str = 'apple'
label_str = 'pple!'

In [None]:
char_vocab = sorted(list(set(input_str+label_str)))
char_vocab

['!', 'a', 'e', 'l', 'p']

In [None]:
vocab_size = len(char_vocab)
print("문자 집합의 크기 : {}".format(vocab_size))

문자 집합의 크기 : 5


In [None]:
input_size = vocab_size # 입력의 크기는 문자 집합의 크기
hidden_size = 5
output_size = 5
learning_rate = 0.1

In [None]:
char_to_index = {char: i for i, char in enumerate(char_vocab)}
char_to_index

{'!': 3, 'a': 1, 'e': 4, 'l': 2, 'p': 0}

In [None]:
index_to_char = {}
for key, value in char_to_index.items():
  index_to_char[value] = key
print(index_to_char)

{0: 'p', 1: 'a', 2: 'l', 3: '!', 4: 'e'}


In [None]:
x = [char_to_index[char] for char in input_str]
y = [char_to_index[char] for char in label_str]
print(x)
print(y)

[1, 0, 0, 2, 4]
[0, 0, 2, 4, 3]


In [None]:
# 배치 차원 추가
x = [x]
y = [y]
print(x)
print(y)

[[1, 0, 0, 2, 4]]
[[0, 0, 2, 4, 3]]


In [None]:
x_one_hot = [np.eye(vocab_size)[x_data] for x_data in x]
print(x_one_hot)

[array([[0., 1., 0., 0., 0.],
       [1., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1.]])]


In [None]:
X = torch.FloatTensor(x_one_hot)
Y = torch.LongTensor(y)

In [None]:
print('훈련 데이터의 크기 : {}'.format(X.shape))
print('레이블의 크기 : {}'.format(Y.shape))

훈련 데이터의 크기 : torch.Size([1, 5, 5])
레이블의 크기 : torch.Size([1, 5])


In [None]:
X

tensor([[[0., 1., 0., 0., 0.],
         [1., 0., 0., 0., 0.],
         [1., 0., 0., 0., 0.],
         [0., 0., 1., 0., 0.],
         [0., 0., 0., 0., 1.]]])

In [None]:
Y

tensor([[0, 0, 2, 4, 3]])

In [None]:
class Net(nn.Module):
  def __init__(self, input_size, hidden_size, output_size):
    super(Net, self).__init__()
    self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
    self.fc = nn.Linear(hidden_size, output_size, bias=True)

  def forward(self, x):
    net, _status = self.rnn(x)
    net = self.fc(net)
    return net

In [None]:
net = Net(input_size, hidden_size, output_size)

In [None]:
outputs = net(X)
print(outputs.shape)
#(1, 5, 5)의 크기를 가지는데 각각 배치 차원, 시점(timesteps), 출력의 크기(단어 벡터)

torch.Size([1, 5, 5])


In [None]:
outputs

tensor([[[-0.1729,  0.3794, -0.0231, -0.4917,  0.4815],
         [-0.1583,  0.4793, -0.0176, -0.5513,  0.4017],
         [-0.2229,  0.5462, -0.0015, -0.5580,  0.3624],
         [-0.3332,  0.4757, -0.1672, -0.7412,  0.8428],
         [-0.0374,  0.3287, -0.0819, -0.6038,  0.5582]]],
       grad_fn=<AddBackward0>)

In [None]:
print(outputs.view(-1, input_size).shape)

torch.Size([5, 5])


In [None]:
print(Y.shape)
print(Y.view(-1).shape)

torch.Size([1, 5])
torch.Size([5])


In [None]:
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), learning_rate)

In [None]:
for i in range(100):
  optimizer.zero_grad()
  outputs = net(X)
  loss = loss_fn(outputs.view(-1,input_size), Y.view(-1))
  
  loss.backward()
  optimizer.step()

  result = outputs.data.numpy().argmax(axis=2)
  result_str = ''.join([index_to_char[c] for c in np.squeeze(result)])
  print(i, "loss: ", loss.item(), "prediction: ", result, "true Y: ", y, "prediction str: ", result_str)

0 loss:  1.744762659072876 prediction:  [[4 1 1 4 4]] true Y:  [[0, 0, 2, 4, 3]] prediction str:  eaaee
1 loss:  1.3953529596328735 prediction:  [[0 0 0 4 0]] true Y:  [[0, 0, 2, 4, 3]] prediction str:  pppep
2 loss:  1.2456333637237549 prediction:  [[0 0 0 0 0]] true Y:  [[0, 0, 2, 4, 3]] prediction str:  ppppp
3 loss:  1.106453776359558 prediction:  [[0 0 2 0 0]] true Y:  [[0, 0, 2, 4, 3]] prediction str:  pplpp
4 loss:  0.9420631527900696 prediction:  [[0 0 2 4 0]] true Y:  [[0, 0, 2, 4, 3]] prediction str:  pplep
5 loss:  0.7849623560905457 prediction:  [[0 0 2 4 0]] true Y:  [[0, 0, 2, 4, 3]] prediction str:  pplep
6 loss:  0.6624113321304321 prediction:  [[0 0 2 4 3]] true Y:  [[0, 0, 2, 4, 3]] prediction str:  pple!
7 loss:  0.5566275715827942 prediction:  [[0 0 2 4 3]] true Y:  [[0, 0, 2, 4, 3]] prediction str:  pple!
8 loss:  0.4616056978702545 prediction:  [[0 0 2 4 3]] true Y:  [[0, 0, 2, 4, 3]] prediction str:  pple!
9 loss:  0.38467228412628174 prediction:  [[0 0 2 4 3]] t

### 2) 문자 단위 RNN(Char RNN) - 더 많은 데이터



In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

In [None]:
sentence = ("if you want to build a ship, don't drum up people together to "
            "collect wood and don't assign them tasks and work, but rather "
            "teach them to long for the endless immensity of the sea.")

In [None]:
len(sentence)

180

In [None]:
char_set = list(set(sentence))

In [None]:
char_set[:10]

['m', 'a', "'", 'n', '.', 'p', ',', 'g', 'b', 'f']

In [None]:
char_dic = {char: i for i, char in enumerate(char_set)}
print(char_dic)

{'m': 0, 'a': 1, "'": 2, 'n': 3, '.': 4, 'p': 5, ',': 6, 'g': 7, 'b': 8, 'f': 9, 'u': 10, 'c': 11, 's': 12, 't': 13, 'k': 14, 'o': 15, 'y': 16, 'w': 17, 'h': 18, 'd': 19, 'e': 20, ' ': 21, 'r': 22, 'i': 23, 'l': 24}


In [None]:
dic_size = len(char_dic)
print('문자 집합의 크기 : {}'.format(dic_size))

문자 집합의 크기 : 25


In [None]:
hidden_size = dic_size
sequence_length = 10  # 임의 숫자 지정
learning_rate = 0.1

In [None]:
x_data = []
y_data = []

for i in range(0, len(sentence) - sequence_length):
  x_str = sentence[i : i + sequence_length]
  y_str = sentence[i+1 : i+1+ sequence_length]
  print(i, x_str, "->", y_str)

  x_data.append([char_dic[char] for char in x_str])
  y_data.append([char_dic[char] for char in y_str])

0 if you wan -> f you want
1 f you want ->  you want 
2  you want  -> you want t
3 you want t -> ou want to
4 ou want to -> u want to 
5 u want to  ->  want to b
6  want to b -> want to bu
7 want to bu -> ant to bui
8 ant to bui -> nt to buil
9 nt to buil -> t to build
10 t to build ->  to build 
11  to build  -> to build a
12 to build a -> o build a 
13 o build a  ->  build a s
14  build a s -> build a sh
15 build a sh -> uild a shi
16 uild a shi -> ild a ship
17 ild a ship -> ld a ship,
18 ld a ship, -> d a ship, 
19 d a ship,  ->  a ship, d
20  a ship, d -> a ship, do
21 a ship, do ->  ship, don
22  ship, don -> ship, don'
23 ship, don' -> hip, don't
24 hip, don't -> ip, don't 
25 ip, don't  -> p, don't d
26 p, don't d -> , don't dr
27 , don't dr ->  don't dru
28  don't dru -> don't drum
29 don't drum -> on't drum 
30 on't drum  -> n't drum u
31 n't drum u -> 't drum up
32 't drum up -> t drum up 
33 t drum up  ->  drum up p
34  drum up p -> drum up pe
35 drum up pe -> rum up peo
36

In [None]:
print(x_data[0])  # if you wan에 해당됨.
print(y_data[0])  # f you want에 해당됨.

[23, 9, 21, 16, 15, 10, 21, 17, 1, 3]
[9, 21, 16, 15, 10, 21, 17, 1, 3, 13]


In [None]:
x_one_hot = [np.eye(dic_size)[x] for x in x_data] # x 데이터는 원-핫 인코딩
X = torch.FloatTensor(x_one_hot)
Y = torch.LongTensor(y_data)

In [None]:
print('훈련 데이터의 크기 : {}'.format(X.shape))
print('레이블의 크기 : {}'.format(Y.shape))


훈련 데이터의 크기 : torch.Size([170, 10, 25])
레이블의 크기 : torch.Size([170, 10])


In [None]:
print(X[0])

tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 1., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.,
         0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.,
         0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 1., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
         0., 0., 0., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.,

In [None]:
print(Y[0])

tensor([ 9, 21, 16, 15, 10, 21, 17,  1,  3, 13])


In [None]:
class Net(nn.Module):
  def __init__(self, input_dim, hidden_dim, layers):
    super(Net, self).__init__()
    self.rnn = nn.RNN(input_dim, hidden_dim, num_layers=layers, batch_first=True)
    self.fc = nn.Linear(hidden_dim, hidden_dim, bias=True)
  
  def forward(self, x):
    net, _status = self.rnn(x)
    net = self.fc(net)
    return net


In [None]:
net = Net(dic_size, hidden_size, 2)

In [None]:
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), learning_rate)

In [None]:
outputs = net(X)
print(outputs.shape)

torch.Size([170, 10, 25])


In [None]:
print(outputs.view(-1, dic_size).shape) # 2차원 텐서로 변환.

torch.Size([1700, 25])


In [None]:
print(Y.shape)
print(Y.view(-1).shape)

torch.Size([170, 10])
torch.Size([1700])


In [None]:
for i in range(100):
  optimizer.zero_grad()
  outputs = net(X)

  loss = loss_fn(outputs.view(-1, dic_size), Y.view(-1))
  loss.backward()
  optimizer.step()

  results = outputs.argmax(dim=2)
  predict_str = ""
  for j, result in enumerate(results):
      if j == 0: # 처음에는 예측 결과를 전부 가져오지만
          predict_str += ''.join([char_set[t] for t in result])
      else: # 그 다음에는 마지막 글자만 반복 추가
          predict_str += char_set[result[-1]]

  print(predict_str)

fooooooooooo'oooooooooo'ooooo'ooo'''ooooooooooo'oo'o'o'fooo'ooooooooooooo'ooo'o'ooo''o''f'oo'oooo'o'o''oo'oooooooooooooofooo'ooo'o'oooo'ooooooooooo'ooooooooo''oo'oooo'ooooooooo''f
  t   t    t  t  t  t t tt  t   t t  t t  t      t        t  t       t    t   t   t t   t  t    t     t   t     t   t t    t     t    t  t t  t   t   t       tt     t  t  t t t   
ii..noik.a...a.a..a...aaa..aaa.aa..........a..aa...an..asa..n....a..aa..aa..aaa..aa..aa........a.ans...aa...aan..aaa...aas..ans...a..a..a....a.aa...as..aaa...aaa....an.....aa.aa.a
 oooo ooooooooooooooooo ooooooooooooo oooooooooooooooooooooooooo oo oooo ooooooooooooooo oo oo oooooooooooooo oooooo oo ooooooooo oo oooooooooooooooo ooooooooooo ooo oo oo oo oo o
                                                                                                                                                                                   
 ueeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeueeeeeeeeeeeeeeeeeeeeeeeeeeueeeee

### 3) 단어 단위 RNN - 임베딩 사용

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

In [2]:
sentence = "Repeat is the best medicine for memory".split()

In [3]:
vocab = list(set(sentence))
print(vocab)

['for', 'Repeat', 'is', 'the', 'medicine', 'best', 'memory']


In [4]:
word2index = {word: i+1 for i, word in enumerate(vocab)}
word2index['<unk'] = 0

In [5]:
print(word2index)

{'for': 1, 'Repeat': 2, 'is': 3, 'the': 4, 'medicine': 5, 'best': 6, 'memory': 7, '<unk': 0}


In [6]:
print(word2index['memory'])

7


In [11]:
index2word = {value: key for key,value in word2index.items()}
print(index2word)

{1: 'for', 2: 'Repeat', 3: 'is', 4: 'the', 5: 'medicine', 6: 'best', 7: 'memory', 0: '<unk'}


In [13]:
print(index2word[7])

memory


In [20]:
def build_data(sentence, word2index):
  encoded = [word2index[word] for word in sentence]
  input_seq, label_seq = encoded[:-1], encoded[1:]

  input_seq = torch.LongTensor(input_seq).unsqueeze(0) # 배치 차원 추가
  label_seq = torch.LongTensor(label_seq).unsqueeze(0) # 배치 차원 추가
  return input_seq, label_seq

In [21]:
X, Y = build_data(sentence, word2index)

In [22]:
print(X)
print(Y)

tensor([[2, 3, 4, 6, 5, 1]])
tensor([[3, 4, 6, 5, 1, 7]])


In [28]:

class Net(nn.Module):
    def __init__(self, vocab_size, input_size, hidden_size, batch_first=True):
        super(Net, self).__init__()
        self.embedding_layer = nn.Embedding(num_embeddings=vocab_size, # 워드 임베딩
                                            embedding_dim=input_size)
        self.rnn_layer = nn.RNN(input_size, hidden_size, # 입력 차원, 은닉 상태의 크기 정의
                                batch_first=batch_first)
        self.linear = nn.Linear(hidden_size, vocab_size) # 출력은 원-핫 벡터의 크기를 가져야함. 또는 단어 집합의 크기만큼 가져야함.

    def forward(self, x):
        # 1. 임베딩 층
        # 크기변화: (배치 크기, 시퀀스 길이) => (배치 크기, 시퀀스 길이, 임베딩 차원)
        output = self.embedding_layer(x)
        # 2. RNN 층
        # 크기변화: (배치 크기, 시퀀스 길이, 임베딩 차원)
        # => output (배치 크기, 시퀀스 길이, 은닉층 크기), hidden (1, 배치 크기, 은닉층 크기)
        output, hidden = self.rnn_layer(output)
        # 3. 최종 출력층
        # 크기변화: (배치 크기, 시퀀스 길이, 은닉층 크기) => (배치 크기, 시퀀스 길이, 단어장 크기)
        output = self.linear(output)
        # 4. view를 통해서 배치 차원 제거
        # 크기변화: (배치 크기, 시퀀스 길이, 단어장 크기) => (배치 크기*시퀀스 길이, 단어장 크기)
        return output.view(-1, output.size(2))

In [29]:
# 하이퍼 파라미터
vocab_size = len(word2index)  # 단어장의 크기는 임베딩 층, 최종 출력층에 사용된다. <unk> 토큰을 크기에 포함한다.
input_size = 5  # 임베딩 된 차원의 크기 및 RNN 층 입력 차원의 크기
hidden_size = 20  # RNN의 은닉층 크기

In [30]:
model = Net(vocab_size, input_size, hidden_size, batch_first=True)
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())

In [31]:
output = model(X)
print(output)

tensor([[-0.0951,  0.0305, -0.3060, -0.4093,  0.0210, -0.0334,  0.2216,  0.4513],
        [-0.1546,  0.0888,  0.0169, -0.5814,  0.2376, -0.2113,  0.0717,  0.2380],
        [-0.3460,  0.1971, -0.2975, -0.3280,  0.2435, -0.2133, -0.2123,  0.2977],
        [-0.2902,  0.0031, -0.1246, -0.4361,  0.2483, -0.0782,  0.1004,  0.3086],
        [-0.1777,  0.0747, -0.0445, -0.2044,  0.0917, -0.1716, -0.1128,  0.2134],
        [-0.2373,  0.3339, -0.1757, -0.4314,  0.1681,  0.0178,  0.3352,  0.3855]],
       grad_fn=<ViewBackward>)


In [32]:
output.shape

torch.Size([6, 8])

In [43]:
decode = lambda y: [index2word.get(x) for x in y]

In [50]:
decode(output.softmax(-1).argmax(-1).tolist())

['memory', 'memory', 'memory', 'memory', 'memory', 'memory']

In [54]:
for step in range(201):
    # 경사 초기화
    optimizer.zero_grad()
    # 순방향 전파
    output = model(X)
    # 손실값 계산
    loss = loss_fn(output, Y.view(-1))
    # 역방향 전파
    loss.backward()
    # 매개변수 업데이트
    optimizer.step()
    # 기록
    if step % 40 == 0:
        print("[{:02d}/201] {:.4f} ".format(step+1, loss))
        pred = output.softmax(-1).argmax(-1).tolist()
        print(" ".join(["Repeat"] + decode(pred)))
        print()

[01/201] 2.0824 
Repeat memory memory memory memory memory memory

[41/201] 1.5544 
Repeat memory the best medicine for memory

[81/201] 0.9084 
Repeat memory the best medicine for memory

[121/201] 0.4893 
Repeat is the best medicine for memory

[161/201] 0.2596 
Repeat is the best medicine for memory

[201/201] 0.1466 
Repeat is the best medicine for memory

