<a href="https://colab.research.google.com/github/freud-sensei/imfine_torch/blob/main/%5B%EB%B1%80%EA%B3%BC%ED%9A%83%EB%B6%88%5D%EB%AC%B8%EC%9E%90%EB%8B%A8%EC%9C%84_RNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 간단한 예제

In [None]:
# 얘네 기능은 이제 다 알 거라고 믿음
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

## 훈련데이터 전처리하기

목표: `apple`을 입력받으면 `pple!`을 출력받는다.

In [None]:
# 문자집합 만들기
input_str = 'apple'
label_str = 'pple!'
char_vocab = sorted(list(set(input_str + label_str)))
vocab_size = len(char_vocab)
print(vocab_size) # 문자집합의 크기

5


In [None]:
# 원핫 벡터 사용: 입력 크기는 문자집합의 크기
input_size = vocab_size # time step 수가 '아님!!!""
hidden_size = 5
output_size = 5
learning_rate = 0.1

In [None]:
# 인덱싱
char_to_index = dict((c, idx) for idx, c in enumerate(char_vocab))
print(char_to_index)

{'!': 0, 'a': 1, 'e': 2, 'l': 3, 'p': 4}


In [None]:
index_to_char = dict((value, key) for key, value in char_to_index.items())
print(index_to_char)

{0: '!', 1: 'a', 2: 'e', 3: 'l', 4: 'p'}


In [None]:
x_data = [char_to_index[c] for c in input_str]
y_data = [char_to_index[c] for c in label_str]
print(x_data)
print(y_data)

[1, 4, 4, 3, 2]
[4, 4, 3, 2, 0]


In [None]:
# nn.RNN()은 기본적으로 3차원 텐서를 입력받는다.
# 즉 차원을 하나 추가해주자. (나머지 한 차원은 임베딩을 통해 추가될 것임...)
x_data = [x_data]
y_data = [y_data]
print(x_data)
print(y_data)

[[1, 4, 4, 3, 2]]
[[4, 4, 3, 2, 0]]


In [None]:
# 원핫 벡터로 바꿔주기
x_one_hot = [np.eye(vocab_size)[x] for x in x_data]
print(x_one_hot)

[array([[0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 1.],
       [0., 0., 0., 1., 0.],
       [0., 0., 1., 0., 0.]])]


In [None]:
X = torch.FloatTensor(x_one_hot)
y = torch.LongTensor(y_data)
print(X.shape)
print(y.shape)

torch.Size([1, 5, 5])
torch.Size([1, 5])


  X = torch.FloatTensor(x_one_hot)


## 모델 구현하기

In [None]:
class Net(nn.Module):
  def __init__(self, input_size, hidden_size, output_size):
    super().__init__()
    self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
    self.fc = nn.Linear(hidden_size, output_size)

  def forward(self, x):
    x, _status = self.rnn(x)
    # x: 모든 timesteps의 은닉 상태 (배치크기, timesteps 수, 은닉상태의 크기)
    # _status: 마지막 timestep의 은닉 상태, 대신 모든 layer 해당 (층의 개수, 배치크기, 은닉상태의 크기)
    x = self.fc(x)
    return x

In [None]:
net = Net(input_size, hidden_size, output_size)
y_pred = net(X)
print(y_pred.shape) # (배치크기, 시점수, 출력의 크기)

torch.Size([1, 5, 5])


In [None]:
# 정확도를 측정할 때는 배치, 시점 차원을 하나로 만들어야 합니다.
print(y_pred.view(-1, input_size).shape)

torch.Size([5, 5])


In [None]:
# 레이블 데이터의 크기
print(y.shape)
print(y.view(-1).shape) # 1차원 벡터로 바꾸겠다는 소리에요

torch.Size([1, 5])
torch.Size([5])


In [None]:
loss_function = torch.nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), learning_rate)

## 모델 학습하기

In [None]:
# num of epochs = 100
for i in range(100):
  optimizer.zero_grad()
  y_pred = net(X)
  loss = loss_function(y_pred.view(-1, input_size), y.view(-1)) # batch 차원 제거
  loss.backward()
  optimizer.step()

  # 어떻게 예측했는지 확인
  result = y_pred.detach().numpy().argmax(axis=2) # 각 time-step별 5차원 벡터에서, 가장 높은 인덱스값 선택
  result_str = ''.join(index_to_char[idx] for idx in result.reshape(-1))
  print(f"epoch {i}, loss {loss.item()}")
  print(f"predicton {result}, true y {result}")
  print(f"prediction str {result_str}")

epoch 0, loss 1.3217504024505615
predicton [[4 4 4 0 0]], true y [[4 4 4 0 0]]
prediction str ppp!!
epoch 1, loss 1.1565921306610107
predicton [[4 4 4 4 0]], true y [[4 4 4 4 0]]
prediction str pppp!
epoch 2, loss 1.0058705806732178
predicton [[4 4 4 2 0]], true y [[4 4 4 2 0]]
prediction str pppe!
epoch 3, loss 0.8865023851394653
predicton [[4 4 4 2 2]], true y [[4 4 4 2 2]]
prediction str pppee
epoch 4, loss 0.8021324276924133
predicton [[4 4 4 2 2]], true y [[4 4 4 2 2]]
prediction str pppee
epoch 5, loss 0.7323900461196899
predicton [[4 4 4 2 2]], true y [[4 4 4 2 2]]
prediction str pppee
epoch 6, loss 0.6769901514053345
predicton [[4 4 4 2 2]], true y [[4 4 4 2 2]]
prediction str pppee
epoch 7, loss 0.6306073665618896
predicton [[4 4 4 2 0]], true y [[4 4 4 2 0]]
prediction str pppe!
epoch 8, loss 0.5960450172424316
predicton [[4 4 4 0 0]], true y [[4 4 4 0 0]]
prediction str ppp!!
epoch 9, loss 0.5526713728904724
predicton [[4 4 4 0 0]], true y [[4 4 4 0 0]]
prediction str ppp!!


# 복잡한? 예제

## 훈련 데이터 전처리하기

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

In [None]:
sentence = ("if you want to build a ship, don't drum up people together to "
            "collect wood and don't assign them tasks and work, but rather "
            "teach them to long for the endless immensity of the sea.")

In [None]:
char_set = list(set(sentence))
char_to_idx = dict((char, idx) for idx, char in enumerate(char_set))
print(char_to_idx)

{'y': 0, 's': 1, ' ': 2, "'": 3, '.': 4, 'o': 5, 'c': 6, 'k': 7, 'f': 8, 'n': 9, ',': 10, 'i': 11, 'l': 12, 'a': 13, 'b': 14, 'm': 15, 'h': 16, 'r': 17, 'e': 18, 'u': 19, 't': 20, 'w': 21, 'p': 22, 'g': 23, 'd': 24}


In [None]:
dict_size = len(char_to_idx) # 문자집합의 크기
print(dict_size)

25


In [None]:
# 하이퍼파라미터 설정
hidden_size = dict_size
sequence_length = 10 # 10의 단위로 샘플들을 잘라서 데이터를 만들어보자
learning_rate = .1

In [None]:
# 데이터 만들기
X_data = []
y_data = []

i = 0
while i + sequence_length < len(sentence):
  X_str = sentence[i:i + sequence_length]
  y_str = sentence[i + 1:i + sequence_length + 1]
  print(i, X_str, '->' , y_str)

  X_data.append([char_to_idx[char] for char in X_str])
  y_data.append([char_to_idx[char] for char in y_str])
  i += 1

0 if you wan -> f you want
1 f you want ->  you want 
2  you want  -> you want t
3 you want t -> ou want to
4 ou want to -> u want to 
5 u want to  ->  want to b
6  want to b -> want to bu
7 want to bu -> ant to bui
8 ant to bui -> nt to buil
9 nt to buil -> t to build
10 t to build ->  to build 
11  to build  -> to build a
12 to build a -> o build a 
13 o build a  ->  build a s
14  build a s -> build a sh
15 build a sh -> uild a shi
16 uild a shi -> ild a ship
17 ild a ship -> ld a ship,
18 ld a ship, -> d a ship, 
19 d a ship,  ->  a ship, d
20  a ship, d -> a ship, do
21 a ship, do ->  ship, don
22  ship, don -> ship, don'
23 ship, don' -> hip, don't
24 hip, don't -> ip, don't 
25 ip, don't  -> p, don't d
26 p, don't d -> , don't dr
27 , don't dr ->  don't dru
28  don't dru -> don't drum
29 don't drum -> on't drum 
30 on't drum  -> n't drum u
31 n't drum u -> 't drum up
32 't drum up -> t drum up 
33 t drum up  ->  drum up p
34  drum up p -> drum up pe
35 drum up pe -> rum up peo
36

In [None]:
print(X_data[0])
print(y_data[0])

[11, 8, 2, 0, 5, 19, 2, 21, 13, 9]
[8, 2, 0, 5, 19, 2, 21, 13, 9, 20]


In [None]:
# 원핫인코딩
import numpy as np
X_one_hot = [np.eye(dict_size)[indices] for indices in X_data]
X = torch.FloatTensor(X_one_hot)
y = torch.LongTensor(y_data)

  X = torch.FloatTensor(X_one_hot)


In [None]:
print(X.shape) # 훈련 데이터의 크기
print(y.shape) # 레이블의 크기

torch.Size([170, 10, 25])
torch.Size([170, 10])


## 모델 구현하기

In [None]:
class Net(nn.Module):
  def __init__(self, input_dim, hidden_dim, layers):
    super().__init__()
    self.rnn = nn.RNN(input_dim, hidden_dim, num_layers=layers, batch_first=True)
    self.fc = nn.Linear(hidden_dim, hidden_dim)
    # output_dim == hidden_dim == dict_size in this case

  def forward(self, x):
    x, _status = self.rnn(x)
    x = self.fc(x)
    return x

In [None]:
net = Net(dict_size, hidden_size, 2) # 층을 2개 쌓기
loss_f = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), learning_rate)

In [None]:
y_pred = net(X)
print(y_pred.shape) # (배치차원, 시점, 출력의 크기)

torch.Size([170, 10, 25])


In [None]:
print(y_pred.view(-1, dict_size).shape)

torch.Size([1700, 25])


In [None]:
print(y.shape)
print(y.view(-1).shape)

torch.Size([170, 10])
torch.Size([1700])


## 모델 학습하기

In [None]:
for i in range(100):
  optimizer.zero_grad()
  y_pred = net(X)
  loss = loss_f(y_pred.view(-1, dict_size), y.view(-1))
  loss.backward()
  optimizer.step()

  results = y_pred.argmax(dim = 2)
  predict_str = ""
  for idx, result in enumerate(results):
    if idx == 0:
      predict_str += ''.join([char_set[t] for t in result])
    else:
      predict_str += char_set[result[-1]]
  print(predict_str)

lsco p,end to puild wod eps d r't d ut d' e  dle thgether to co le t wood tnd w g't dns gm them to ks wnd wo k, duthdether tonch toem to to gstorhtoemsnd ensetp ers tk wo toems r 
lsco p,and to poild wod eps d rkt enut dp eendle togethem to to le t woom tnd w gpt ens gm them to ks wnd wo k, duthdather tonch toem to tong tor toemsnd es  tn ins t  wo toe sor 
l co p,and to tuild wnd eps d rkt enut dp eesgle to ethem to co le t aood tnd w gpt ens gr them to ks wnd wook, duthdather tanch toem to to k tor toemsnd ec  tn ins tk ao toe sor 
l corp,and to tui d dnd eps don't dnut da ee,gle to ethem to co le t aoop tnd a gpt ens gp toem to ks dnd aook, dutodather tonch toem to tonk tor toemsnd ec  tn in  tk ao toe sorc
l corp,and to puild and ip, do 't enut dp eefgle to ethem to co le t aook tnd aog't ens gp them tooks and dook, dut dather tonch toem to tong for toe snd ess tn insitk ao toe sor 
l corp,and to build aodhip, don't dnut du eefgle th ethec to co lect aook and aon't dns gp them to k