# [순환신경망 실습: Hi HELLO 예제로 시작하기]

#### jupyter notebook 단축키
- ctrl+enter: 셀 실행
- shift+enter: 셀 실행 및 다음 셀 이동
- alt+enter: 셀 실행, 다음 셀 이동, 새로운 셀 생성
- a: 상단에 새로운 셀 만들기
- b: 하단에 새로운 셀 만들기
- dd: 셀 삭제(x: 셀 삭제)
- esc: 셀 나가기
- m: 마크다운 셀로 바꾸기

### 1. 모듈 불러오기

In [2]:
import torch
import torch.optim as optim
import numpy as np
torch.manual_seed(0)

<torch._C.Generator at 0x27c8d7ee5d0>

### 2. 데이터 생성하기

In [3]:
char_set = ['h', 'i', 'e', 'l', 'o']

In [4]:
# hyperparameters
input_size = len(char_set) # 문자 하나당 크기 
hidden_size = len(char_set) # Size of the hidden_state of the RNN
learning_rate = 0.1

In [5]:
# data setting
x_data = [[0, 1, 0, 2, 3, 3]]
x_one_hot = [[[1, 0, 0, 0, 0],
              [0, 1, 0, 0, 0],
              [1, 0, 0, 0, 0],
              [0, 0, 1, 0, 0],
              [0, 0, 0, 1, 0],
              [0, 0, 0, 1, 0]]]
y_data = [[1, 0, 2, 3, 3, 4]]

#### pytorch 에서는 input 데이터를 Tensor형태로 바꾸어 적용

In [6]:
X = torch.FloatTensor(x_one_hot)
Y = torch.LongTensor(y_data)

### 3. RNN 모델 생성하기
- RNN 모델 선언: rnn_model = torch.nn.RNN(input_size, hidden_size)

In [7]:
# declare RNN
rnn = torch.nn.RNN(input_size, hidden_size, batch_first=True)  # batch_first guarantees the order of output = (B, S, F)

In [8]:
rnn.all_weights

[[Parameter containing:
  tensor([[-0.0033,  0.2399, -0.3681, -0.3291, -0.1722],
          [ 0.1199, -0.0089,  0.3546, -0.0397,  0.1183],
          [-0.1352, -0.0879, -0.4272, -0.2962, -0.1844],
          [ 0.0166,  0.1768,  0.2683, -0.3032, -0.1947],
          [ 0.1624,  0.3714, -0.0920,  0.3347, -0.0721]], requires_grad=True),
  Parameter containing:
  tensor([[ 0.0473,  0.4049, -0.4149, -0.2815, -0.1132],
          [-0.1743,  0.3864, -0.2899, -0.2059, -0.3124],
          [-0.4188, -0.2611,  0.3844,  0.1996,  0.2168],
          [ 0.0235, -0.2293,  0.0757, -0.4176, -0.3231],
          [-0.2306,  0.2822,  0.2622, -0.1983, -0.0161]], requires_grad=True),
  Parameter containing:
  tensor([0.2860, 0.4446, 0.1775, 0.0604, 0.2999], requires_grad=True),
  Parameter containing:
  tensor([-0.2633,  0.0833, -0.3467, -0.3100, -0.2310], requires_grad=True)]]

In [9]:
# loss & optimizer setting
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.Adam(rnn.parameters(), learning_rate)

- RNN 모델 학습

In [12]:
X

tensor([[[1., 0., 0., 0., 0.],
         [0., 1., 0., 0., 0.],
         [1., 0., 0., 0., 0.],
         [0., 0., 1., 0., 0.],
         [0., 0., 0., 1., 0.],
         [0., 0., 0., 1., 0.]]])

In [11]:
# start training
for i in range(100):
    optimizer.zero_grad()
    outputs, _status = rnn(X)
    loss = criterion(outputs.view(-1, input_size), Y.view(-1))
    loss.backward()
    optimizer.step()

    result = outputs.data.numpy().argmax(axis=2)
    result_str = ''.join([char_set[c] for c in np.squeeze(result)])
    print(i, "loss: ", loss.item(), "prediction: ", result, "true Y: ", y_data, "prediction str: ", result_str)

0 loss:  1.7802648544311523 prediction:  [[1 1 1 1 1 1]] true Y:  [[1, 0, 2, 3, 3, 4]] prediction str:  iiiiii
1 loss:  1.4931954145431519 prediction:  [[1 4 1 1 4 4]] true Y:  [[1, 0, 2, 3, 3, 4]] prediction str:  ioiioo
2 loss:  1.3337129354476929 prediction:  [[1 3 2 3 1 4]] true Y:  [[1, 0, 2, 3, 3, 4]] prediction str:  ilelio
3 loss:  1.2152955532073975 prediction:  [[2 3 2 3 3 3]] true Y:  [[1, 0, 2, 3, 3, 4]] prediction str:  elelll
4 loss:  1.1131411790847778 prediction:  [[2 3 2 3 3 3]] true Y:  [[1, 0, 2, 3, 3, 4]] prediction str:  elelll
5 loss:  1.024188756942749 prediction:  [[2 3 2 3 3 4]] true Y:  [[1, 0, 2, 3, 3, 4]] prediction str:  elello
6 loss:  0.9573155045509338 prediction:  [[2 3 2 3 3 4]] true Y:  [[1, 0, 2, 3, 3, 4]] prediction str:  elello
7 loss:  0.9102011322975159 prediction:  [[2 0 2 3 3 4]] true Y:  [[1, 0, 2, 3, 3, 4]] prediction str:  ehello
8 loss:  0.8731772303581238 prediction:  [[1 0 2 3 3 4]] true Y:  [[1, 0, 2, 3, 3, 4]] prediction str:  ihello
9 

73 loss:  0.5379175543785095 prediction:  [[1 3 2 3 3 4]] true Y:  [[1, 0, 2, 3, 3, 4]] prediction str:  ilello
74 loss:  0.5376288890838623 prediction:  [[1 3 2 3 3 4]] true Y:  [[1, 0, 2, 3, 3, 4]] prediction str:  ilello
75 loss:  0.5372945666313171 prediction:  [[1 3 2 3 3 4]] true Y:  [[1, 0, 2, 3, 3, 4]] prediction str:  ilello
76 loss:  0.5369361639022827 prediction:  [[1 3 2 3 3 4]] true Y:  [[1, 0, 2, 3, 3, 4]] prediction str:  ilello
77 loss:  0.5366637110710144 prediction:  [[1 3 2 3 3 4]] true Y:  [[1, 0, 2, 3, 3, 4]] prediction str:  ilello
78 loss:  0.5363659858703613 prediction:  [[1 3 2 3 3 4]] true Y:  [[1, 0, 2, 3, 3, 4]] prediction str:  ilello
79 loss:  0.53604656457901 prediction:  [[1 3 2 3 3 4]] true Y:  [[1, 0, 2, 3, 3, 4]] prediction str:  ilello
80 loss:  0.5357930064201355 prediction:  [[1 3 2 3 3 4]] true Y:  [[1, 0, 2, 3, 3, 4]] prediction str:  ilello
81 loss:  0.5355222821235657 prediction:  [[1 3 2 3 3 4]] true Y:  [[1, 0, 2, 3, 3, 4]] prediction str:  i

In [30]:
print(outputs)
print(_status)
print(outputs.size())


tensor([[[-0.9956,  0.9884, -0.9740, -0.9979, -0.9973],
         [ 0.9995, -0.9915, -1.0000,  1.0000, -0.9663],
         [-1.0000, -0.9848,  0.9948, -0.9702, -1.0000],
         [-0.9948, -0.9770, -1.0000,  0.9999, -1.0000],
         [-0.9996, -1.0000, -0.9994,  0.9704, -0.2286],
         [-0.9998, -1.0000, -0.9872, -0.8719,  0.9764]]],
       grad_fn=<TransposeBackward0>)
tensor([[[-0.9998, -1.0000, -0.9872, -0.8719,  0.9764]]],
       grad_fn=<StackBackward>)
torch.Size([1, 6, 5])


In [31]:
rnn.all_weights

[[Parameter containing:
  tensor([[-1.8023,  2.1666, -2.1525, -1.8211, -0.1722],
          [ 3.3140, -2.3990, -2.9232, -1.9561,  0.1183],
          [-0.0200, -0.0612, -1.4873, -1.9349, -0.1844],
          [-2.5699,  0.0170,  1.8192,  1.0027, -0.1947],
          [-1.6933,  0.7727, -2.1081,  1.6618, -0.0721]], requires_grad=True),
  Parameter containing:
  tensor([[-0.6612,  1.4753,  0.0787, -0.7595, -0.3995],
          [-0.3432,  1.1279,  1.6676, -1.2401,  0.6755],
          [ 2.5378, -0.8079, -1.3030,  2.5379,  2.0835],
          [-0.2380,  1.6284,  0.0138, -1.1416, -4.4735],
          [-1.5921, -0.3177, -0.8340,  0.1494,  3.1745]], requires_grad=True),
  Parameter containing:
  tensor([-0.3542, -0.1867, -0.8129, -0.2463, -0.5376], requires_grad=True),
  Parameter containing:
  tensor([-0.9036, -0.5480, -1.3371, -0.6167, -1.0685], requires_grad=True)]]

In [32]:
rnn.weight_hh_l0

Parameter containing:
tensor([[-0.6612,  1.4753,  0.0787, -0.7595, -0.3995],
        [-0.3432,  1.1279,  1.6676, -1.2401,  0.6755],
        [ 2.5378, -0.8079, -1.3030,  2.5379,  2.0835],
        [-0.2380,  1.6284,  0.0138, -1.1416, -4.4735],
        [-1.5921, -0.3177, -0.8340,  0.1494,  3.1745]], requires_grad=True)

In [33]:
rnn.weight_ih_l0

Parameter containing:
tensor([[-1.8023,  2.1666, -2.1525, -1.8211, -0.1722],
        [ 3.3140, -2.3990, -2.9232, -1.9561,  0.1183],
        [-0.0200, -0.0612, -1.4873, -1.9349, -0.1844],
        [-2.5699,  0.0170,  1.8192,  1.0027, -0.1947],
        [-1.6933,  0.7727, -2.1081,  1.6618, -0.0721]], requires_grad=True)

In [34]:
rnn._parameters

OrderedDict([('weight_ih_l0', Parameter containing:
              tensor([[-1.8023,  2.1666, -2.1525, -1.8211, -0.1722],
                      [ 3.3140, -2.3990, -2.9232, -1.9561,  0.1183],
                      [-0.0200, -0.0612, -1.4873, -1.9349, -0.1844],
                      [-2.5699,  0.0170,  1.8192,  1.0027, -0.1947],
                      [-1.6933,  0.7727, -2.1081,  1.6618, -0.0721]], requires_grad=True)),
             ('weight_hh_l0', Parameter containing:
              tensor([[-0.6612,  1.4753,  0.0787, -0.7595, -0.3995],
                      [-0.3432,  1.1279,  1.6676, -1.2401,  0.6755],
                      [ 2.5378, -0.8079, -1.3030,  2.5379,  2.0835],
                      [-0.2380,  1.6284,  0.0138, -1.1416, -4.4735],
                      [-1.5921, -0.3177, -0.8340,  0.1494,  3.1745]], requires_grad=True)),
             ('bias_ih_l0', Parameter containing:
              tensor([-0.3542, -0.1867, -0.8129, -0.2463, -0.5376], requires_grad=True)),
             ('bias_