# PyTorch로 시작하는 딥러닝 입문
## 10. 다대다 RNN을 이용한 텍스트 생성

### 문자 단위 RNN

#### 문자 단위 RNN
RNN의 입출력의 단위가 단어 레벨(word-level)이 아니라 문자 레벨(character-level)로 하여 RNN을 구현한 것

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

In [None]:
# 훈련 데이터 전처리하기
input_str = 'apple'
label_str = 'pple!'
char_vocab = sorted(list(set(input_str + label_str)))
vocab_size = len(char_vocab)
print('문자 집합의 크기 : {}'.format(vocab_size))

문자 집합의 크기 : 5


In [None]:
input_size = vocab_size
hidden_size = 5
output_size = 5
learning_rate = 0.1

In [None]:
char_to_index = dict((c, i) for i, c in enumerate(char_vocab))
print(char_to_index)

{'!': 0, 'a': 1, 'e': 2, 'l': 3, 'p': 4}


In [None]:
index_to_char = {}
for key, value in char_to_index.items():
    index_to_char[value] = key
    
print(index_to_char)

{0: '!', 1: 'a', 2: 'e', 3: 'l', 4: 'p'}


In [None]:
x_data = [char_to_index[c] for c in input_str]
y_data = [char_to_index[c] for c in label_str]
print(x_data)
print(y_data)

[1, 4, 4, 3, 2]
[4, 4, 3, 2, 0]


파이토치의 nn.RNN()은 기본적으로 3차원 텐서를 입력 받음

In [None]:
x_data = [x_data]
y_data = [y_data]
print(x_data, np.shape(x_data))
print(y_data, np.shape(y_data))

[[1, 4, 4, 3, 2]] (1, 5)
[[4, 4, 3, 2, 0]] (1, 5)


In [None]:
x_one_hot = [np.eye(vocab_size)[x] for x in x_data]
print(x_one_hot)

[array([[0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 1.],
       [0., 0., 0., 1., 0.],
       [0., 0., 1., 0., 0.]])]


In [None]:
X = torch.FloatTensor(x_one_hot)
Y = torch.LongTensor(y_data)

In [None]:
print('훈련 데이터의 크기 : {}'.format(X.shape))
print('레이블의 크기 : {}'.format(Y.shape))

훈련 데이터의 크기 : torch.Size([1, 5, 5])
레이블의 크기 : torch.Size([1, 5])


In [None]:
# 모델 구현하기
class Net(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(Net, self).__init__()
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size, bias=True)

    def forward(self, x):
        x, _status = self.rnn(x)
        x = self.fc(x)
        return x

In [None]:
net = Net(input_size, hidden_size, output_size)

In [None]:
outputs = net(X)
print(outputs.shape)

torch.Size([1, 5, 5])


In [None]:
print(outputs.view(-1, input_size).shape) 

torch.Size([5, 5])


In [None]:
print(Y.shape)
print(Y.view(-1).shape)

torch.Size([1, 5])
torch.Size([5])


In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=learning_rate)

In [None]:
for i in range(100):
    optimizer.zero_grad()
    outputs = net(X)
    loss = criterion(outputs.view(-1, input_size), Y.view(-1)) # view를 하는 이유는 배치 차원을 제거하기 위함
    loss.backward()
    optimizer.step()

    result = outputs.data.numpy().argmax(axis=2) # 최종 예측값인 각 time-step 별 5차원 벡터에서 가장 큰 값의 인덱스 선택
    result_str = ''.join([index_to_char[c] for c in np.squeeze(result)])
    print(i, "loss: ", loss.item(), "prediction: ", result, "true Y: ", y_data, "prediction str: ", result_str)

0 loss:  1.703106164932251 prediction:  [[0 2 2 2 0]] true Y:  [[4, 4, 3, 2, 0]] prediction str:  !eee!
1 loss:  1.4952465295791626 prediction:  [[0 0 3 0 0]] true Y:  [[4, 4, 3, 2, 0]] prediction str:  !!l!!
2 loss:  1.3186657428741455 prediction:  [[0 3 3 4 0]] true Y:  [[4, 4, 3, 2, 0]] prediction str:  !llp!
3 loss:  1.1146703958511353 prediction:  [[4 3 3 2 0]] true Y:  [[4, 4, 3, 2, 0]] prediction str:  plle!
4 loss:  0.8867151141166687 prediction:  [[4 3 3 2 0]] true Y:  [[4, 4, 3, 2, 0]] prediction str:  plle!
5 loss:  0.6739281415939331 prediction:  [[4 4 3 2 0]] true Y:  [[4, 4, 3, 2, 0]] prediction str:  pple!
6 loss:  0.476294606924057 prediction:  [[4 4 3 2 0]] true Y:  [[4, 4, 3, 2, 0]] prediction str:  pple!
7 loss:  0.31972306966781616 prediction:  [[4 4 3 2 0]] true Y:  [[4, 4, 3, 2, 0]] prediction str:  pple!
8 loss:  0.21356132626533508 prediction:  [[4 4 3 2 0]] true Y:  [[4, 4, 3, 2, 0]] prediction str:  pple!
9 loss:  0.14380432665348053 prediction:  [[4 4 3 2 0]]

### 문자 단위 RNN - 더 많은 데이터

#### 문자 단위 RNN

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

In [2]:
# 훈련 데이터 전처리하기
sentence = ("if you want to build a ship, don't drum up people together to "
            "collect wood and don't assign them tasks and work, but rather "
            "teach them to long for the endless immensity of the sea.")

In [11]:
char_set = list(set(sentence))
char_dic = {c: i for i, c in enumerate(char_set)} 

In [12]:
print(char_dic)

{' ': 0, 'y': 1, 'l': 2, 'b': 3, ',': 4, 'a': 5, 'i': 6, 'g': 7, 'd': 8, 'f': 9, 'o': 10, 'p': 11, 'r': 12, 't': 13, 'm': 14, 'k': 15, "'": 16, 's': 17, '.': 18, 'e': 19, 'n': 20, 'h': 21, 'w': 22, 'u': 23, 'c': 24}


In [8]:
dic_size = len(char_dict)
print('문자 집합의 크기 : {}'.format(dic_size))

문자 집합의 크기 : 25


In [13]:
hidden_size = dic_size
sequence_length = 10 
learning_rate = 0.1

In [14]:
x_data = []
y_data = []

for i in range(0, len(sentence) - sequence_length):
    x_str = sentence[i:i + sequence_length]
    y_str = sentence[i + 1 : i + sequence_length + 1]
    print(i, x_str, "->", y_str)

    x_data.append([char_dic[c] for c in x_str])
    y_data.append([char_dic[c] for c in y_str]) 

0 if you wan -> f you want
1 f you want ->  you want 
2  you want  -> you want t
3 you want t -> ou want to
4 ou want to -> u want to 
5 u want to  ->  want to b
6  want to b -> want to bu
7 want to bu -> ant to bui
8 ant to bui -> nt to buil
9 nt to buil -> t to build
10 t to build ->  to build 
11  to build  -> to build a
12 to build a -> o build a 
13 o build a  ->  build a s
14  build a s -> build a sh
15 build a sh -> uild a shi
16 uild a shi -> ild a ship
17 ild a ship -> ld a ship,
18 ld a ship, -> d a ship, 
19 d a ship,  ->  a ship, d
20  a ship, d -> a ship, do
21 a ship, do ->  ship, don
22  ship, don -> ship, don'
23 ship, don' -> hip, don't
24 hip, don't -> ip, don't 
25 ip, don't  -> p, don't d
26 p, don't d -> , don't dr
27 , don't dr ->  don't dru
28  don't dru -> don't drum
29 don't drum -> on't drum 
30 on't drum  -> n't drum u
31 n't drum u -> 't drum up
32 't drum up -> t drum up 
33 t drum up  ->  drum up p
34  drum up p -> drum up pe
35 drum up pe -> rum up peo
36

In [15]:
print(x_data[0])
print(y_data[0])

[6, 9, 0, 1, 10, 23, 0, 22, 5, 20]
[9, 0, 1, 10, 23, 0, 22, 5, 20, 13]


In [17]:
import numpy as np

x_one_hot = [np.eye(dic_size)[x] for x in x_data]
X = torch.FloatTensor(x_one_hot)
Y = torch.LongTensor(y_data)

In [18]:
print('훈련 데이터의 크기 : {}'.format(X.shape))
print('레이블의 크기 : {}'.format(Y.shape))

훈련 데이터의 크기 : torch.Size([170, 10, 25])
레이블의 크기 : torch.Size([170, 10])


In [19]:
print(X[0])

tensor([[0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 1., 0.],
        [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 1., 0., 0.],
        [0., 0., 0., 0., 0., 1., 0., 0., 0., 0.,

In [20]:
print(Y[0])

tensor([ 9,  0,  1, 10, 23,  0, 22,  5, 20, 13])


In [21]:
# 모델 구현하기
class Net(nn.Module):
    def __init__(self, input_dim, hidden_dim, layers):
        super(Net, self).__init__()
        self.rnn = nn.RNN(input_dim, hidden_dim, num_layers=layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, hidden_dim, bias=True)

    def forward(self, x):
        x, _status = self.rnn(x)
        x = self.fc(x)
        return x

In [22]:
net = Net(dic_size, hidden_size, 2)

In [23]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), learning_rate)

In [25]:
outputs = net(X)
print(outputs.shape) # 배치 차원, 시점, 출력

torch.Size([170, 10, 25])


정확도를 측정할 땐  
모두 펼쳐서 계산해야 하므로  
배치 차원과 시점 차원을 하나로 만듦

In [27]:
print(outputs.view(-1, outputs.size(-1)).shape)

torch.Size([1700, 25])


In [29]:
print(Y.shape)
print(Y.view(-1).shape)

torch.Size([170, 10])
torch.Size([1700])


In [31]:
for i in range(100):
    optimizer.zero_grad()
    outputs = net(X)

    loss = criterion(outputs.view(-1, dic_size), Y.view(-1))
    loss.backward()
    optimizer.step()

    results = outputs.argmax(dim=2)
    predict_str = ""
    for j, result in enumerate(results):
        if j == 0:
            predict_str += ''.join([char_set[t] for t in result])
        else:
            predict_str += char_set[result[-1]]
    print(predict_str)

lunnuunnuuunuhnuunuunuunuuuhnnuuunnnnunnnnnnuuuuunnuuunnunnnhnuuuuununnuuunuuunuuuunnuunuunnuhupnnhuuunuuunuuuuunuuunnuupunnnununnulupnnhnuhuunnunnnuununununnnuunununuunuunnnunnuu
 oiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
                                                                                                                                                                                   
                       a     a     n                                                                                 n            n                    a                           
 o    e o    e o    e e e e e e e e e o  e oo o   o  e e   oe o  e          e   e e e e e   oe e oo   e e e e o e e e e e   oe eoooe e  e oo    e e e e e e     e e o o      e e e 
 huhuhthuhu huhhhhhhuhehehuhthuhthehehu hu hhuheh huhuhuhu huhhuh hhhththu h uhhuhthehehuhuhhuhu huh

### 단어 단위 RNN - 임베딩 사용

#### 단어 단위 RNN - 임베딩 사용

In [32]:
import torch
import torch.nn as nn
import torch.optim as optim

In [33]:
# 훈련 데이터 전처리하기
sentence = "Repeat is the best medicine for memory".split()

In [34]:
vocab = list(set(sentence))
print(vocab)

['memory', 'the', 'best', 'for', 'Repeat', 'medicine', 'is']


In [35]:
word2index = {tkn:i for i, tkn in enumerate(vocab, 1)}
word2index['<unk>'] = 0

In [36]:
print(word2index)

{'memory': 1, 'the': 2, 'best': 3, 'for': 4, 'Repeat': 5, 'medicine': 6, 'is': 7, '<unk>': 0}


In [37]:
print(word2index['memory'])

1


In [38]:
index2word = {v:k for k, v in word2index.items()}
print(index2word)

{1: 'memory', 2: 'the', 3: 'best', 4: 'for', 5: 'Repeat', 6: 'medicine', 7: 'is', 0: '<unk>'}


In [40]:
print(index2word[1])

memory


In [41]:
def build_data(sentence, word2index):
    encoded = [word2index[token] for token in sentence]
    input_seq, label_seq = encoded[:-1], encoded[1:]
    input_seq = torch.LongTensor(input_seq).unsqueeze(0)
    label_seq = torch.LongTensor(label_seq).unsqueeze(0)
    return input_seq, label_seq

In [42]:
X, Y = build_data(sentence, word2index)

In [43]:
print(X)
print(Y)

tensor([[5, 7, 2, 3, 6, 4]])
tensor([[7, 2, 3, 6, 4, 1]])


In [44]:
# 모델 구현하기
class Net(nn.Module):
    def __init__(self, vocab_size, input_size, hidden_size, batch_first=True):
        super(Net, self).__init__()
        self.embedding_layer = nn.Embedding(num_embeddings=vocab_size,
                                            embedding_dim=input_size)
        self.rnn_layer = nn.RNN(input_size, hidden_size, # 입력 차원, 은닉 상태의 크기 정의
                                batch_first=batch_first)
        self.linear = nn.Linear(hidden_size, vocab_size)

    def forward(self, x):
        # 1. 임베딩 층
        # 크기변화: (배치 크기, 시퀀스 길이) => (배치 크기, 시퀀스 길이, 임베딩 차원)
        output = self.embedding_layer(x)

        # 2. RNN 층
        # 크기변화: (배치 크기, 시퀀스 길이, 임베딩 차원)
        # => output (배치 크기, 시퀀스 길이, 은닉층 크기), hidden (1, 배치 크기, 은닉층 크기)
        output, hidden = self.rnn_layer(output)

        # 3. 최종 출력층
        # 크기변화: (배치 크기, 시퀀스 길이, 은닉층 크기) => (배치 크기, 시퀀스 길이, 단어장 크기)
        output = self.linear(output)
        
        # 4. view를 통해서 배치 차원 제거
        # 크기변화: (배치 크기, 시퀀스 길이, 단어장 크기) => (배치 크기*시퀀스 길이, 단어장 크기)
        return output.view(-1, output.size(2))

In [45]:
vocab_size = len(word2index)
input_size = 5
hidden_size = 20

In [46]:
model = Net(vocab_size, input_size, hidden_size, batch_first=True)
loss_function = nn.CrossEntropyLoss() 
optimizer = optim.Adam(params=model.parameters())

In [47]:
output = model(X)
print(output)

tensor([[-0.0223, -0.3678,  0.0118,  0.0385,  0.0727,  0.3478, -0.2820,  0.2321],
        [ 0.2806,  0.0083, -0.0196, -0.1517,  0.1291, -0.0515, -0.5104,  0.0931],
        [ 0.2980, -0.2586,  0.1551, -0.0499, -0.0590, -0.0253, -0.5308,  0.0611],
        [ 0.1112, -0.1422,  0.0713,  0.0986, -0.0247,  0.0510, -0.4622, -0.0037],
        [-0.3756, -0.2720, -0.3380,  0.1547,  0.1712,  0.3320, -0.3808,  0.0144],
        [ 0.1827, -0.2132,  0.1569,  0.1194, -0.0029,  0.0582, -0.5674,  0.0947]],
       grad_fn=<ViewBackward>)


In [48]:
print(output.shape)

torch.Size([6, 8])


In [49]:
decode = lambda y: [index2word.get(x) for x in y]

In [50]:
for step in range(201):
    optimizer.zero_grad()
    output = model(X)

    loss = loss_function(output, Y.view(-1))
    loss.backward()
    optimizer.step()

    if step % 40 == 0:
        print("[{:02d}/201] {:.4f} ".format(step+1, loss))
        pred = output.softmax(-1).argmax(-1).tolist()
        print(" ".join(["Repeat"] + decode(pred)))
        print()

[01/201] 2.1245 
Repeat Repeat <unk> <unk> <unk> Repeat <unk>

[41/201] 1.5159 
Repeat is the best medicine for memory

[81/201] 0.8521 
Repeat is the best medicine for memory

[121/201] 0.4088 
Repeat is the best medicine for memory

[161/201] 0.2169 
Repeat is the best medicine for memory

[201/201] 0.1318 
Repeat is the best medicine for memory

