#many to many 문자 단위 RNN 구현

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np


apple을 입력받으면 pple!을 출력하는 RNN

In [None]:
input_str = 'apple'
label_str = 'pple!'

char_vocab = sorted(list(set(input_str+label_str)))
vocab_size = len(char_vocab)
print("vocabulary size: {}".format(vocab_size))

vocabulary size: 5


In [None]:
print(char_vocab)

['!', 'a', 'e', 'l', 'p']


입력할 때, one hot vector을 사용하기 때문에 입력값은 vocabulary size가 될 것이다.

In [None]:
#hyperparameter 정의
input_size = vocab_size
hidden_size = 5
output_size = 5
learning_rate = 0.1


문자 집합에 고유한 정수를 부여하자

In [None]:
char_to_index = dict((c,i) for i,c in enumerate(char_vocab))
print(char_to_index)

{'!': 0, 'a': 1, 'e': 2, 'l': 3, 'p': 4}


In [None]:
index_to_char={}
for key, value in char_to_index.items():
    index_to_char[value] = key
print(index_to_char)

{0: '!', 1: 'a', 2: 'e', 3: 'l', 4: 'p'}


In [None]:
x_data = [char_to_index[c] for c in input_str]
y_data = [char_to_index[c] for c in label_str]
print(x_data, '=', 'apple')
print(y_data,  '=', 'pple!')

[1, 4, 4, 3, 2] = apple
[4, 4, 3, 2, 0] = pple!


In [None]:
#파이토치는 3차원의 tensor를 필요로 한다.-> 차원을 추가
x_data = [x_data]
y_data = [y_data]
print(x_data)
print(y_data)

[[1, 4, 4, 3, 2]]
[[4, 4, 3, 2, 0]]


In [None]:
x_one_hot = [np.eye(vocab_size)[x] for x in x_data]
print(x_one_hot)

[array([[0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 1.],
       [0., 0., 0., 1., 0.],
       [0., 0., 1., 0., 0.]])]


In [None]:
X = torch.FloatTensor(x_one_hot)
Y = torch.LongTensor(y_data)

  X = torch.FloatTensor(x_one_hot)


In [None]:
print('훈련 데이터의 크기 : {}'.format(X.shape))
print('레이블의 크기 : {}'.format(Y.shape))

훈련 데이터의 크기 : torch.Size([1, 5, 5])
레이블의 크기 : torch.Size([1, 5])


In [None]:
class Net(torch.nn.Module):
  def __init__(self, input_size, hidden_size, output_size):
    super(Net, self).__init__()
    self.rnn = torch.nn.RNN(input_size, hidden_size, batch_first = True)
    self.fc = torch.nn.Linear(hidden_size, output_size, bias = True)
  def forward(self,x):
    x, status = self.rnn(x)
    x = self.fc(x)
    return x

In [None]:
net = Net(input_size, hidden_size, output_size)

In [None]:
outputs = net(X)
print(outputs.shape)

torch.Size([1, 5, 5])


In [None]:
print(outputs.view(-1, input_size).shape)

torch.Size([5, 5])


In [None]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), learning_rate)

In [None]:
for i in range(100):
  optimizer.zero_grad()
  outputs = net(X)
  loss = criterion(outputs.view(-1, input_size), Y.view(-1))
  loss.backward()
  optimizer.step()

  result = outputs.data.numpy().argmax(axis=2)
  result_str = ''.join([index_to_char[c] for c in np.squeeze(result)])
  print(i, "loss: ", loss.item(), "prediction: ", result, "true Y: ", y_data, "prediction str: ", result_str)


0 loss:  1.5996636152267456 prediction:  [[3 2 2 0 2]] true Y:  [[4, 4, 3, 2, 0]] prediction str:  lee!e
1 loss:  1.3504164218902588 prediction:  [[4 4 4 3 0]] true Y:  [[4, 4, 3, 2, 0]] prediction str:  pppl!
2 loss:  1.1393619775772095 prediction:  [[4 4 4 4 0]] true Y:  [[4, 4, 3, 2, 0]] prediction str:  pppp!
3 loss:  0.9507560729980469 prediction:  [[4 4 4 0 0]] true Y:  [[4, 4, 3, 2, 0]] prediction str:  ppp!!
4 loss:  0.7709124088287354 prediction:  [[4 4 4 2 0]] true Y:  [[4, 4, 3, 2, 0]] prediction str:  pppe!
5 loss:  0.605263352394104 prediction:  [[4 4 4 2 0]] true Y:  [[4, 4, 3, 2, 0]] prediction str:  pppe!
6 loss:  0.4821853041648865 prediction:  [[4 4 4 2 0]] true Y:  [[4, 4, 3, 2, 0]] prediction str:  pppe!
7 loss:  0.387665331363678 prediction:  [[4 4 3 2 0]] true Y:  [[4, 4, 3, 2, 0]] prediction str:  pple!
8 loss:  0.3024658262729645 prediction:  [[4 4 3 2 0]] true Y:  [[4, 4, 3, 2, 0]] prediction str:  pple!
9 loss:  0.23931105434894562 prediction:  [[4 4 3 2 0]] t

#더 많은 데이터에 적용해보기

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim


In [None]:
sentence = ("if you want to build a ship, don't drum up people together to "
            "collect wood and don't assign them tasks and work, but rather "
            "teach them to long for the endless immensity of the sea.")


문자 vocab 만들고 각 vocab에 대해 숫자에 대응시키기

In [None]:
char_set = list(set(sentence))
char_dic = {c : i for i, c in enumerate(char_set)}

In [None]:
print(char_dic)

{',': 0, 'e': 1, 't': 2, 'i': 3, 'u': 4, 'k': 5, 'f': 6, 'l': 7, 'o': 8, 'a': 9, 'n': 10, '.': 11, 'g': 12, 'p': 13, 'r': 14, ' ': 15, 'c': 16, 'b': 17, 'y': 18, 'w': 19, 'd': 20, 's': 21, 'h': 22, "'": 23, 'm': 24}


In [None]:
dic_size = len(char_dic)

In [None]:
#hyperparameter 설정
hidden_size = dic_size
sequence_length = 10 #character를 10개씩 끊어서
learning_rate = 0.1

In [None]:
x_data = []
y_data = []

for i in range(0, len(sentence) - sequence_length):
  x_str = sentence[i:i+sequence_length]
  y_str = sentence[i + 1: i+sequence_length + 1]
  print(i, x_str, '->', y_str)

  x_data.append([char_dic[c] for c in x_str])
  y_data.append([char_dic[c] for c in y_str])


0 if you wan -> f you want
1 f you want ->  you want 
2  you want  -> you want t
3 you want t -> ou want to
4 ou want to -> u want to 
5 u want to  ->  want to b
6  want to b -> want to bu
7 want to bu -> ant to bui
8 ant to bui -> nt to buil
9 nt to buil -> t to build
10 t to build ->  to build 
11  to build  -> to build a
12 to build a -> o build a 
13 o build a  ->  build a s
14  build a s -> build a sh
15 build a sh -> uild a shi
16 uild a shi -> ild a ship
17 ild a ship -> ld a ship,
18 ld a ship, -> d a ship, 
19 d a ship,  ->  a ship, d
20  a ship, d -> a ship, do
21 a ship, do ->  ship, don
22  ship, don -> ship, don'
23 ship, don' -> hip, don't
24 hip, don't -> ip, don't 
25 ip, don't  -> p, don't d
26 p, don't d -> , don't dr
27 , don't dr ->  don't dru
28  don't dru -> don't drum
29 don't drum -> on't drum 
30 on't drum  -> n't drum u
31 n't drum u -> 't drum up
32 't drum up -> t drum up 
33 t drum up  ->  drum up p
34  drum up p -> drum up pe
35 drum up pe -> rum up peo
36

In [None]:
print(x_data[0])
print(y_data[0])

[3, 6, 15, 18, 8, 4, 15, 19, 9, 10]
[6, 15, 18, 8, 4, 15, 19, 9, 10, 2]


In [None]:
x_one_hot = [np.eye(dic_size)[x] for x in x_data]
X=torch.FloatTensor(x_one_hot)
Y=torch.LongTensor(y_data)

In [None]:
class Net(torch.nn.Module):
  def __init__(self, input_dim, hidden_dim, layers):
    super(Net, self).__init__()
    self.rnn = torch.nn.RNN(input_dim, hidden_dim, num_layers=layers, batch_first = True)
    self.fc = torch.nn.Linear(hidden_dim, hidden_dim, bias=True)
  def forward(self,x):
    x, status = self.rnn(x)
    x = self.fc(x)
    return x

In [None]:
net = Net(dic_size, hidden_size, 2)

In [None]:
outputs = net(X)
print(outputs.shape)

torch.Size([170, 10, 25])


In [None]:
print(outputs.view(-1, dic_size).shape)

torch.Size([1700, 25])


In [None]:
print(Y.shape)
print(Y.view(-1).shape)

torch.Size([170, 10])
torch.Size([1700])


In [None]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), learning_rate)


In [None]:
for i in range(100):
    optimizer.zero_grad()
    outputs = net(X) # (170, 10, 25) 크기를 가진 텐서를 매 에포크마다 모델의 입력으로 사용
    loss = criterion(outputs.view(-1, dic_size), Y.view(-1))
    loss.backward()
    optimizer.step()

    # results의 텐서 크기는 (170, 10)
    results = outputs.argmax(dim=2)
    predict_str = ""
    for j, result in enumerate(results):
        if j == 0: # 처음에는 예측 결과를 전부 가져오지만
            predict_str += ''.join([char_set[t] for t in result])
        else: # 그 다음에는 마지막 글자만 반복 추가
            predict_str += char_set[result[-1]]

    print(predict_str)


,,,,,,,,,,,,,,,e,,,,,,,,,,,,,,,,e,,,,,n,,,,,,,,,,,,,,,,e,e,,,,e,,,,e,,,,,,,,,,,,,,e,,,,,,,,,e,,e,,,,,,,,,,,,,,,,,e,e,e,,e,e,,,,,,,,,,e,,,,,,,,,,,,,,e,,,,,,,,,,,,,,,,,e,,,,,,n,,,,,
tt                                                                                                                                                                                 
                                                                                                                                                                                   
l,g  ygu ttttttttttrtrlrttt,ttttt,lttttlttrttttttttttttl,ttylttr,ttt,tttt,,ttrttttttt,ttt,ttlttttttttr,ttt,tttttt,tttyttl,ttylt l,ttt,ttt,tr,ttt,tt,llttrttt,rttttt,,ttt,tttrtttttt
ltaeittaoeeee eeeoeee u ee  eeee    eee  e eeeeee  ee uu ee  ee  eoe  ee   e eee  eeeeeee  e eo e eee  eee ee eeoee e eee oe eod  eoe eoeee  eee  e   ee  ee  eeeee  eeeeeeeuee e  
tttorietosoooosostooossoooostoosooooosossosoooooosooosoososoososoooooooossooososostotsonsossoooosooo