### **Char RNN**

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

---
**훈련 데이터 전처리**

In [2]:
input_str = 'apple'
label_str = 'pple!'
char_vocab = sorted(list(set(input_str + label_str)))
vocab_size = len(char_vocab)
print('문자 집합의 크기 : {}'.format(vocab_size))

문자 집합의 크기 : 5


In [3]:
input_size = vocab_size #  입력은 원-핫 벡터를 사용할 것이므로 입력의 크기는 문자 집합의 크기여야만 함
hidden_size = 5
output_size = 5
learning_rate = 0.1

char_to_index = dict((c, i) for i, c in enumerate(char_vocab)) # 문자에 고유한 정수 인덱스 부여

print(char_to_index)

{'!': 0, 'a': 1, 'e': 2, 'l': 3, 'p': 4}


In [4]:
index_to_char={}
for key, value in char_to_index.items():
    index_to_char[value] = key
print(index_to_char)

{0: '!', 1: 'a', 2: 'e', 3: 'l', 4: 'p'}


In [5]:
x_data = [char_to_index[c] for c in input_str]
y_data = [char_to_index[c] for c in label_str]

In [6]:
# nn.RNN()은 3차원 텐서를 입력받기 때문에 배치차원 추가

x_data = [x_data]
y_data = [y_data]
print(x_data)
print(y_data)

[[1, 4, 4, 3, 2]]
[[4, 4, 3, 2, 0]]


In [7]:
# 원-핫 벡터로 바꿔주기

x_one_hot = [np.eye(vocab_size)[x] for x in x_data]
print(x_one_hot)

[array([[0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 1.],
       [0., 0., 0., 1., 0.],
       [0., 0., 1., 0., 0.]])]


In [8]:
X = torch.FloatTensor(x_one_hot)
Y = torch.LongTensor(y_data)

  X = torch.FloatTensor(x_one_hot)


In [9]:
print('훈련 데이터의 크기 : {}'.format(X.shape))
print('레이블의 크기 : {}'.format(Y.shape))

훈련 데이터의 크기 : torch.Size([1, 5, 5])
레이블의 크기 : torch.Size([1, 5])


---
**모델 구현하기**

In [10]:
class Net(torch.nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(Net,self).__init__()
        self.rnn = torch.nn.RNN(input_size, hidden_size, batch_first=True)
        self.fc = torch.nn.Linear(hidden_size, output_size, bias=True)

    def forward(self, x):
        x, _status = self.rnn(x)
        x = self.fc(x)
        return(x)

In [11]:
net = Net(input_size, hidden_size, output_size)

In [12]:
outputs = net(X)
print(outputs.shape) # 배치 차원, 시점, 출력의 크기

torch.Size([1, 5, 5])


In [13]:
print(outputs.view(-1, input_size).shape)

torch.Size([5, 5])


In [14]:
print(Y.shape)
print(Y.view(-1).shape) # 정확도 측정 시에는 이 형태로 계산산

torch.Size([1, 5])
torch.Size([5])


In [15]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), learning_rate)

In [16]:
epochs = 100
for i in range(epochs):
    optimizer.zero_grad()
    outputs = net(X)
    loss = criterion(outputs.view(-1, input_size), Y.view(-1)) # Batch 차원 제거를 위해 view 사용
    loss.backward()
    optimizer.step() # optimizer의 파라미터 업데이트

    result = outputs.data.numpy().argmax(axis=2) # 각 시점별 5차원 벡터에 대해서 가장 높은 값의 인덱스 선택
    result_str = ''.join([index_to_char[c] for c in np.squeeze(result)])
    print(i, "loss: ", loss.item(), "prediction: ", result, "true Y: ", y_data, "prediction str: ", result_str)

0 loss:  1.6163734197616577 prediction:  [[2 2 2 3 3]] true Y:  [[4, 4, 3, 2, 0]] prediction str:  eeell
1 loss:  1.3258720636367798 prediction:  [[4 3 3 3 3]] true Y:  [[4, 4, 3, 2, 0]] prediction str:  pllll
2 loss:  1.10878586769104 prediction:  [[4 4 3 3 0]] true Y:  [[4, 4, 3, 2, 0]] prediction str:  ppll!
3 loss:  0.9173469543457031 prediction:  [[4 4 3 0 0]] true Y:  [[4, 4, 3, 2, 0]] prediction str:  ppl!!
4 loss:  0.7490520477294922 prediction:  [[4 4 3 0 0]] true Y:  [[4, 4, 3, 2, 0]] prediction str:  ppl!!
5 loss:  0.6088422536849976 prediction:  [[4 4 3 2 0]] true Y:  [[4, 4, 3, 2, 0]] prediction str:  pple!
6 loss:  0.5051730275154114 prediction:  [[4 4 3 2 0]] true Y:  [[4, 4, 3, 2, 0]] prediction str:  pple!
7 loss:  0.4215237498283386 prediction:  [[4 4 3 2 0]] true Y:  [[4, 4, 3, 2, 0]] prediction str:  pple!
8 loss:  0.3378075957298279 prediction:  [[4 4 3 2 0]] true Y:  [[4, 4, 3, 2, 0]] prediction str:  pple!
9 loss:  0.2580251395702362 prediction:  [[4 4 3 2 0]] tr

---
### **Char RNN(More Data)**

In [17]:
import torch
import torch.nn as nn
import torch.optim as optim

**훈련 데이터 전처리하기**

In [18]:
sentence = ("if you want to build a ship, don't drum up people together to "
            "collect wood and don't assign them tasks and work, but rather "
            "teach them to long for the endless immensity of the sea.")

In [19]:
char_set = list(set(sentence))
char_dic = {c : i for i, c in enumerate(char_set)}
print(char_dic)

{'e': 0, 's': 1, ' ': 2, 'l': 3, ',': 4, 'f': 5, 'g': 6, 'u': 7, 'a': 8, 'k': 9, 'w': 10, 'n': 11, 'b': 12, 'i': 13, 'y': 14, "'": 15, 'p': 16, 'c': 17, 't': 18, 'd': 19, 'o': 20, 'r': 21, 'h': 22, '.': 23, 'm': 24}


In [20]:
dic_size = len(char_dic)
print('문자 집합의 크기 : {}'.format(dic_size))

문자 집합의 크기 : 25


In [21]:
# 하이퍼파라미터 설정

hidden_size = dic_size
sequence_length = 10 # 임의 숫자(10의 단위로 샘플을 잘라서 데이터를 만듦)
learning_rate = 0.1

In [22]:
# 데이터 구성

x_data = []
y_data = []

for i in range(0, len(sentence) - sequence_length):
    x_str = sentence[i:i + sequence_length]
    y_str = sentence[i + 1 : i + sequence_length + 1]
    print(i, x_str, ' -> ', y_str)

    x_data.append([char_dic[c] for c in x_str])
    y_data.append([char_dic[c] for c in y_str])


0 if you wan  ->  f you want
1 f you want  ->   you want 
2  you want   ->  you want t
3 you want t  ->  ou want to
4 ou want to  ->  u want to 
5 u want to   ->   want to b
6  want to b  ->  want to bu
7 want to bu  ->  ant to bui
8 ant to bui  ->  nt to buil
9 nt to buil  ->  t to build
10 t to build  ->   to build 
11  to build   ->  to build a
12 to build a  ->  o build a 
13 o build a   ->   build a s
14  build a s  ->  build a sh
15 build a sh  ->  uild a shi
16 uild a shi  ->  ild a ship
17 ild a ship  ->  ld a ship,
18 ld a ship,  ->  d a ship, 
19 d a ship,   ->   a ship, d
20  a ship, d  ->  a ship, do
21 a ship, do  ->   ship, don
22  ship, don  ->  ship, don'
23 ship, don'  ->  hip, don't
24 hip, don't  ->  ip, don't 
25 ip, don't   ->  p, don't d
26 p, don't d  ->  , don't dr
27 , don't dr  ->   don't dru
28  don't dru  ->  don't drum
29 don't drum  ->  on't drum 
30 on't drum   ->  n't drum u
31 n't drum u  ->  't drum up
32 't drum up  ->  t drum up 
33 t drum up   ->   

In [23]:
print(x_data)
print(y_data)

[[13, 5, 2, 14, 20, 7, 2, 10, 8, 11], [5, 2, 14, 20, 7, 2, 10, 8, 11, 18], [2, 14, 20, 7, 2, 10, 8, 11, 18, 2], [14, 20, 7, 2, 10, 8, 11, 18, 2, 18], [20, 7, 2, 10, 8, 11, 18, 2, 18, 20], [7, 2, 10, 8, 11, 18, 2, 18, 20, 2], [2, 10, 8, 11, 18, 2, 18, 20, 2, 12], [10, 8, 11, 18, 2, 18, 20, 2, 12, 7], [8, 11, 18, 2, 18, 20, 2, 12, 7, 13], [11, 18, 2, 18, 20, 2, 12, 7, 13, 3], [18, 2, 18, 20, 2, 12, 7, 13, 3, 19], [2, 18, 20, 2, 12, 7, 13, 3, 19, 2], [18, 20, 2, 12, 7, 13, 3, 19, 2, 8], [20, 2, 12, 7, 13, 3, 19, 2, 8, 2], [2, 12, 7, 13, 3, 19, 2, 8, 2, 1], [12, 7, 13, 3, 19, 2, 8, 2, 1, 22], [7, 13, 3, 19, 2, 8, 2, 1, 22, 13], [13, 3, 19, 2, 8, 2, 1, 22, 13, 16], [3, 19, 2, 8, 2, 1, 22, 13, 16, 4], [19, 2, 8, 2, 1, 22, 13, 16, 4, 2], [2, 8, 2, 1, 22, 13, 16, 4, 2, 19], [8, 2, 1, 22, 13, 16, 4, 2, 19, 20], [2, 1, 22, 13, 16, 4, 2, 19, 20, 11], [1, 22, 13, 16, 4, 2, 19, 20, 11, 15], [22, 13, 16, 4, 2, 19, 20, 11, 15, 18], [13, 16, 4, 2, 19, 20, 11, 15, 18, 2], [16, 4, 2, 19, 20, 11, 15, 18,

In [24]:
x_one_hot = [np.eye(dic_size)[x] for x in x_data]
X = torch.FloatTensor(x_one_hot)
Y = torch.LongTensor(y_data)

In [25]:
print(X[0])

tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 1., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 1., 0.,

In [26]:
print(Y[0])

tensor([ 5,  2, 14, 20,  7,  2, 10,  8, 11, 18])


In [27]:
class Net(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, layers):
        super(Net, self).__init__()
        self.rnn = torch.nn.RNN(input_dim, hidden_dim, num_layers=layers, batch_first=True)
        self.fc = torch.nn.Linear(hidden_dim, hidden_dim, bias=True)
    
    def forward(self, x):
        x, _status = self.rnn(x)
        x = self.fc(x)
        return x

In [28]:
net = Net(dic_size, hidden_size, 2) # 층을 두 개 쌓음

In [29]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), learning_rate)

In [30]:
outputs = net(X)
print(outputs.shape)

torch.Size([170, 10, 25])


In [31]:
print(outputs.view(-1, dic_size).shape)

torch.Size([1700, 25])


In [32]:
print(Y.shape)
print(Y.view(-1).shape) # 정확도 측정 시에는 (170, 10) -> (1700)

torch.Size([170, 10])
torch.Size([1700])


In [33]:
epochs = 100
for i in range(epochs):
    optimizer.zero_grad()
    outputs = net(X)
    loss = criterion(outputs.view(-1, dic_size), Y.view(-1))
    loss.backward()
    optimizer.step()

    results = outputs.argmax(dim=2)
    predict_str = ""
    for j, result in enumerate(results):
        if j == 0:
            predict_str += ''.join([char_set[t] for t in result])
        else:
            predict_str += char_set[result[-1]]

    print(predict_str)

ragaaaagralagaaaaaaaagagaakggaagaaaaraakakagaagaaagaaagaaraaaagagaaloarakaagraaaagaaaaggaagagaaaaargggaagaagaagagaaaaagaaaaaaaggaagaaaaakagagaaagaaaaakagaaaggaaaaaggaaaagaagaakgaa
                                                                                                                                                                                   


                                                                                                                                                                                   
tlyonp.oipypppppoybpblspypyp.p.pyaypypppplppayl.pbpppyllp.plppylppyllpppppp.ppbyypyaylsppallplyplyaylpplsppoopyaypspyasp.aypypyylppypppylplpll.pppppbpsp.p.l.l.pyp.lylyplyyplpp.ppp
teeosashoteeeeeoheteeheoseeoeoesheeoeoeeoeeteeieoeeoeeeeteseeeeeeeeseeoeeoteteeeeeeaeheaeeeseeeeeeeeteeheaeeeteaeheaseeteoeeeeeteeeeoeeeeeseeleoeeseeheoheeoeaeoeleteaeeoheeheaeeoe
t eoeototototee otooeototoeotototoeotoeetoeootoeotoooeteototeetestee eoeototsteototoeoeototoeeoetooeseeotoeototoeoeototoeoeototsetoeoeteoeotsseoeteotototoeoeoeoeototseootetoeototo
t toe e t t oteogt oe e e e e e t t e edt t ee t eot eee e ttete t ete e e oett e t e e t e oe ee egttt e e e t e e e t e t t t tt e eee et e toeee e e e e e e e e t t  oee e e e 
 ot t o o  eoe o  t o oo    uoot t o o wo  o 't o oeoooot o e'o oo t oooootrt o oo to  e te  et   o 