In [27]:
import torch
import torch.nn as nn
import torch.optim as optim

sentence = ("if you want to build a ship, don't drum up people together to "
            "collect wood and don't assign them tasks and work, but rather "
            "teach them to long for the endless immensity of the sea.")

In [28]:
print(sentence)

if you want to build a ship, don't drum up people together to collect wood and don't assign them tasks and work, but rather teach them to long for the endless immensity of the sea.


In [29]:
char_set = list(set(sentence)) # 중복을 제거한 문자 집합 생성
char_dic = {c: i for i, c in enumerate(char_set)} # 각 문자에 정수 인코딩

In [30]:
print(char_dic)

{'o': 0, 'i': 1, 'n': 2, '.': 3, 'u': 4, 'b': 5, 'e': 6, 'p': 7, 'g': 8, 'k': 9, 'r': 10, 'f': 11, 'c': 12, 's': 13, 'm': 14, 'a': 15, 't': 16, 'h': 17, ' ': 18, 'w': 19, 'l': 20, 'd': 21, 'y': 22, "'": 23, ',': 24}


In [31]:
dic_size = len(char_dic)
print('문자 집합의 크기 : {}'.format(dic_size))

문자 집합의 크기 : 25


문자 집합의 크기는 25이며, 입력을 원-핫 벡터로 사용할 것이므로 이는 매 시점마다 들어갈 입력의 크기이기도 합니다. 이제 하이퍼파라미터를 설정합니다. hidden_size(은닉 상태의 크기)를 입력의 크기와 동일하게 줬는데, 이는 사용자의 선택으로 다른 값을 줘도 무방합니다.

그리고 sequence_length라는 변수를 선언했는데, 우리가 앞서 만든 샘플을 10개 단위로 끊어서 샘플을 만들 예정이기 때문입니다. 이는 뒤에서 더 자세히 보겠습니다.

In [32]:
# 하이퍼파라미터 설정
hidden_size = dic_size
sequence_length = 10  # 임의 숫자 지정
learning_rate = 0.1

In [33]:
# 데이터 구성
x_data = []
y_data = []

for i in range(0, len(sentence) - sequence_length):
    x_str = sentence[i:i + sequence_length]
    y_str = sentence[i + 1: i + sequence_length + 1]
    print(i, x_str, '->', y_str)

    x_data.append([char_dic[c] for c in x_str])  # x str to index
    y_data.append([char_dic[c] for c in y_str])  # y str to index

0 if you wan -> f you want
1 f you want ->  you want 
2  you want  -> you want t
3 you want t -> ou want to
4 ou want to -> u want to 
5 u want to  ->  want to b
6  want to b -> want to bu
7 want to bu -> ant to bui
8 ant to bui -> nt to buil
9 nt to buil -> t to build
10 t to build ->  to build 
11  to build  -> to build a
12 to build a -> o build a 
13 o build a  ->  build a s
14  build a s -> build a sh
15 build a sh -> uild a shi
16 uild a shi -> ild a ship
17 ild a ship -> ld a ship,
18 ld a ship, -> d a ship, 
19 d a ship,  ->  a ship, d
20  a ship, d -> a ship, do
21 a ship, do ->  ship, don
22  ship, don -> ship, don'
23 ship, don' -> hip, don't
24 hip, don't -> ip, don't 
25 ip, don't  -> p, don't d
26 p, don't d -> , don't dr
27 , don't dr ->  don't dru
28  don't dru -> don't drum
29 don't drum -> on't drum 
30 on't drum  -> n't drum u
31 n't drum u -> 't drum up
32 't drum up -> t drum up 
33 t drum up  ->  drum up p
34  drum up p -> drum up pe
35 drum up pe -> rum up peo
36

In [34]:
print(x_data[0])
print(y_data[0])

[1, 11, 18, 22, 0, 4, 18, 19, 15, 2]
[11, 18, 22, 0, 4, 18, 19, 15, 2, 16]


한 칸씩 쉬프트 된 시퀀스가 정상적으로 출력되는 것을 볼 수 있습니다. 이제 입력 시퀀스에 대해서 원-핫 인코딩을 수행하고, 입력 데이터와 레이블 데이터를 텐서로 변환합니다.

In [35]:
import numpy as np
x_one_hot = [np.eye(dic_size)[x] for x in x_data] # x 데이터는 원-핫 인코딩
X = torch.FloatTensor(x_one_hot)
Y = torch.LongTensor(y_data)

In [36]:
print(X)
print(Y)

tensor([[[0., 1., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 1.,  ..., 0., 0., 0.]],

        [[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 1., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 1.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]],

        [[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 1., 0., 0.],
         [1., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 1.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]],

        ...,

        [[0., 0., 0.,  ..., 1., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [1., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0., 

In [37]:
print(np.shape(X),np.shape(Y))

torch.Size([170, 10, 25]) torch.Size([170, 10])


In [38]:
class Net(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, layers): # 현재 hidden_size는 dic_size와 같음.
        super(Net, self).__init__()
        self.rnn = torch.nn.RNN(input_dim, hidden_dim, num_layers=layers, batch_first=True)
        self.fc = torch.nn.Linear(hidden_dim, hidden_dim, bias=True)
        # hidden = 25 / input = 125 
    def forward(self, x):
        x, _status = self.rnn(x)
        x = self.fc(x)
        return x
net = Net(dic_size, hidden_size, 2) # 이번에는 층을 두 개 쌓습니다.

In [39]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), learning_rate)

In [40]:
outputs = net(X)
print(outputs.shape) # 3차원 텐서

torch.Size([170, 10, 25])


In [41]:
print(outputs[0])

tensor([[ 0.0672, -0.0035,  0.1353,  0.3559,  0.0439,  0.1685, -0.2470, -0.1486,
          0.2087,  0.0071, -0.1106, -0.0976,  0.0413, -0.0526,  0.0958, -0.0764,
         -0.2462,  0.0349, -0.0203, -0.2517, -0.0939, -0.1188,  0.1210, -0.0158,
          0.0631],
        [ 0.0262, -0.2353,  0.1650,  0.2976,  0.0392,  0.1865, -0.3375, -0.0171,
          0.1197, -0.0189, -0.1874,  0.0626,  0.1534, -0.0837,  0.0270,  0.0265,
         -0.3208,  0.1438,  0.1312, -0.2729, -0.1641, -0.0426,  0.0264,  0.0217,
         -0.0493],
        [ 0.0043, -0.0875,  0.1955,  0.3847,  0.1050,  0.2256, -0.3407, -0.0712,
          0.1497,  0.0467, -0.1798, -0.0345,  0.1294, -0.0200,  0.0055, -0.0220,
         -0.3402,  0.0550,  0.0361, -0.3336, -0.0866, -0.1364,  0.0228, -0.1603,
          0.0646],
        [ 0.0280, -0.1433,  0.1159,  0.2903, -0.0810,  0.1515, -0.2351, -0.0556,
          0.1909,  0.0139, -0.2100, -0.0998,  0.0264, -0.0438,  0.0401, -0.0965,
         -0.3098, -0.0915,  0.0742, -0.3058, -0.0483

In [42]:
print(outputs.view(-1, dic_size).shape) # 2차원 텐서로 변환.

torch.Size([1700, 25])


In [43]:
print(Y.shape)
print(Y.view(-1).shape)

torch.Size([170, 10])
torch.Size([1700])


In [44]:
for i in range(300+1):
    optimizer.zero_grad()
    outputs = net(X) # (170, 10, 25) 크기를 가진 텐서를 매 에포크마다 모델의 입력으로 사용
    loss = criterion(outputs.view(-1, dic_size), Y.view(-1))
    loss.backward()
    optimizer.step()

    # results의 텐서 크기는 (170, 10)
    results = outputs.argmax(dim=2)
    predict_str = ""
    if i%100 == 0:
        print("%d 번 epoch" % i)
        for j, result in enumerate(results):
            if j == 0: # 처음에는 예측 결과를 전부 가져오지만
                predict_str += ''.join([char_set[t] for t in result])
            else: # 그 다음에는 마지막 글자만 반복 추가
                predict_str += char_set[result[-1]]

        print(predict_str)

0 번 epoch
...................................................................................................................................................................................
100 번 epoch
t you want to build a ship, don't drum up people together to collect wood and don't assign them tasks and work, but rather teach them to long for the endless immensity of the sea.
200 번 epoch
t you want to build a ship, don't drum up people together to collect wood and don't assign them tasks and work, but rather teach them to long for the endless immensity of the sea.
300 번 epoch
l you want to build a ship, don't drum up people together to collect wood and don't assign them tasks and work, but rather teach them to long for the endless immensity of the sea.
