In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt

import torch
from torch import nn, optim

In [3]:
# 기본 폰트 설정
# plt.rcParams['font.family'] = font_name

# 기본 폰트 사이즈 변경
plt.rcParams['font.size'] = 14

# 기본 그래프 사이즈 변경
plt.rcParams['figure.figsize'] = (6,6)

# 기본 그리드 표시
# 필요에 따라 설정할 때는, plt.grid()
plt.rcParams['axes.grid'] = True
plt.rcParams["grid.linestyle"] = ":"

# 마이너스 기호 정상 출력
plt.rcParams['axes.unicode_minus'] = False

# 넘파이 부동소수점 자릿수 표시
np.set_printoptions(suppress=True, precision=4)

In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cpu


# Vanilla RNN

In [17]:
torch.manual_seed(326)

input_size = 2
hidden_size = 6

inputs = torch.rand((1, 5, input_size)) # Uniform(0, 1)
print(inputs)
print(inputs.shape)

tensor([[[0.9976, 0.2634],
         [0.4181, 0.2633],
         [0.6259, 0.1629],
         [0.2165, 0.4565],
         [0.9599, 0.9657]]])
torch.Size([1, 5, 2])


In [18]:
rnn = nn.RNN(input_size, hidden_size, batch_first=True)
for param in rnn.parameters():
    print(param)
    print('='*80)

Parameter containing:
tensor([[ 0.2979, -0.2266],
        [ 0.4065,  0.2677],
        [-0.0377,  0.1725],
        [ 0.3276, -0.3616],
        [ 0.0695,  0.2299],
        [-0.0335,  0.0257]], requires_grad=True)
Parameter containing:
tensor([[ 0.0264, -0.1758, -0.3677, -0.1028, -0.3662,  0.1997],
        [ 0.1845,  0.0868, -0.2008, -0.1158,  0.2391, -0.3231],
        [-0.3498,  0.1072, -0.2975,  0.1900,  0.0980,  0.2682],
        [ 0.1652,  0.2674, -0.1719,  0.3045,  0.1581, -0.3096],
        [ 0.1032, -0.1436, -0.1081,  0.1853,  0.2689,  0.3750],
        [ 0.3654, -0.2347, -0.2012, -0.3461, -0.3378, -0.1302]],
       requires_grad=True)
Parameter containing:
tensor([-0.0507,  0.2363,  0.2587, -0.0978, -0.1845, -0.3035],
       requires_grad=True)
Parameter containing:
tensor([ 0.2220,  0.2435, -0.3848, -0.0531,  0.2734,  0.1387],
       requires_grad=True)


In [13]:
outputs, _status = rnn(inputs)

print('outputs = \n', outputs)
print('terminal = \n', _status)

outputs = 
 tensor([[[ 0.4559, -0.4335,  0.4892,  0.1835],
         [ 0.0194, -0.2502,  0.4768, -0.2028],
         [ 0.4262, -0.3787,  0.3782, -0.2251],
         [ 0.1214, -0.1125,  0.4399, -0.3448],
         [ 0.3444, -0.2138,  0.5368, -0.2991]]], grad_fn=<TransposeBackward1>)
terminal = 
 tensor([[[ 0.3444, -0.2138,  0.5368, -0.2991]]], grad_fn=<StackBackward0>)


## 문자단위 RNN

In [31]:
# apple!

input_str = 'apple'
output_str = 'pple!'

In [32]:
# 1. Vacab set

char_vocab = sorted(list(set(input_str + output_str)))
vocab_size = len(char_vocab)
print("vocab size =", vocab_size)
print('char_vocab =', char_vocab)

vocab size = 5
char_vocab = ['!', 'a', 'e', 'l', 'p']


In [33]:
# 2. indexing

char_to_index = dict((c, i) for i, c in enumerate(char_vocab))
print(char_to_index)

index_to_char = dict((i, c) for i, c in enumerate(char_vocab))
print(index_to_char)

{'!': 0, 'a': 1, 'e': 2, 'l': 3, 'p': 4}
{0: '!', 1: 'a', 2: 'e', 3: 'l', 4: 'p'}


In [35]:
# 3. label encoding

x_data = [char_to_index[c] for c in input_str]
y_data = [char_to_index[c] for c in output_str]

print(x_data)
print(y_data)

[1, 4, 4, 3, 2]
[4, 4, 3, 2, 0]


In [36]:
x_data = [x_data]
y_data = [y_data]

print(x_data)
print(y_data)

[[1, 4, 4, 3, 2]]
[[4, 4, 3, 2, 0]]


In [45]:
# 4. Onehot encoding

x_one_hot = np.eye(5)[x_data]

print(x_one_hot)

[[[0. 1. 0. 0. 0.]
  [0. 0. 0. 0. 1.]
  [0. 0. 0. 0. 1.]
  [0. 0. 0. 1. 0.]
  [0. 0. 1. 0. 0.]]]


In [50]:
## tensor input

X = torch.tensor(x_one_hot).float() # float32
Y = torch.tensor(y_data) # int64

print("X = \n", X)
print(X.dtype)
print("Y = \n", Y)
print(Y.dtype)

X = 
 tensor([[[0., 1., 0., 0., 0.],
         [0., 0., 0., 0., 1.],
         [0., 0., 0., 0., 1.],
         [0., 0., 0., 1., 0.],
         [0., 0., 1., 0., 0.]]])
torch.float32
Y = 
 tensor([[4, 4, 3, 2, 0]])
torch.int64


In [51]:
input_size = vocab_size
hidden_size = 6
output_size = 5

lr = 0.1

In [52]:
class VanillaRNN(nn.Module):
  def __init__(self, input_size, hidden_size, output_size):
    super().__init__()
    self.rnn = nn.RNN(input_size, hidden_size, batch_first=True) ## RNN 구현
    self.fc = nn.Linear(hidden_size, output_size, bias=True) ## 출력층

  def forward(self, x):
    x, _status = self.rnn(x)
    x = self.fc(x)
    return x

In [53]:
net = VanillaRNN(input_size, hidden_size, output_size)
list(net.parameters())

[Parameter containing:
 tensor([[ 0.0855, -0.1944,  0.1208,  0.1701, -0.2244],
         [-0.2056,  0.1138, -0.0203,  0.2325, -0.2377],
         [ 0.1902, -0.0559,  0.1682,  0.3604,  0.3763],
         [-0.3584,  0.1864, -0.3756,  0.3567, -0.1144],
         [-0.3972, -0.2592, -0.3021, -0.2059,  0.3712],
         [ 0.2714, -0.1271,  0.0036, -0.1628,  0.2676]], requires_grad=True),
 Parameter containing:
 tensor([[-0.3357,  0.2503,  0.2755,  0.0870,  0.2406,  0.0080],
         [ 0.4007, -0.0640, -0.1714,  0.0357, -0.2510, -0.0692],
         [ 0.0937,  0.3574,  0.2292,  0.2233, -0.0803, -0.2404],
         [-0.2833, -0.0771,  0.2963,  0.2137, -0.3534, -0.2300],
         [-0.1297, -0.1622, -0.2788,  0.0945,  0.2847, -0.2184],
         [-0.2772, -0.0938, -0.1943,  0.0298,  0.1327,  0.1385]],
        requires_grad=True),
 Parameter containing:
 tensor([-0.2552, -0.0161,  0.0310, -0.1643, -0.1642,  0.1655],
        requires_grad=True),
 Parameter containing:
 tensor([ 0.2669,  0.0515,  0.2784, -

In [58]:
outputs = net(X)
print(outputs.shape)
print(outputs)
print(Y)
print(outputs.argmax(-1))

torch.Size([1, 5, 5])
tensor([[[ 0.0955,  0.1396,  0.1584, -0.4805,  0.2887],
         [ 0.2448,  0.1923,  0.2506, -0.2664,  0.3321],
         [ 0.2403,  0.2216,  0.1846, -0.2615,  0.3001],
         [ 0.2135,  0.1944,  0.1771, -0.5327,  0.2537],
         [ 0.1928,  0.1667,  0.1754, -0.3409,  0.2941]]],
       grad_fn=<ViewBackward0>)
tensor([[4, 4, 3, 2, 0]])
tensor([[4, 4, 4, 4, 4]])


In [61]:
outputs.view(-1, output_size)
Y.view(-1)

tensor([4, 4, 3, 2, 0])

In [59]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=lr)

In [69]:
## learning

for epoch in range(100):
  outputs = net(X)
  loss = criterion(outputs.view(-1, output_size), Y.view(-1))

  optimizer.zero_grad()
  loss.backward()
  optimizer.step()

  result = outputs.argmax(-1) # (1, 5)
  result_str = f"{''.join([index_to_char[c] for c in result.squeeze().numpy()])}"
  print(f"epoch = {epoch}, loss = {loss.item():.3f}, prediction = {result}, y_data = {y_data}, prediction_str = {result_str}")


epoch = 0, loss = 1.587, prediction = tensor([[4, 4, 4, 4, 4]]), y_data = [[4, 4, 3, 2, 0]], prediction_str = ppppp
epoch = 1, loss = 1.324, prediction = tensor([[4, 4, 4, 4, 4]]), y_data = [[4, 4, 3, 2, 0]], prediction_str = ppppp
epoch = 2, loss = 1.135, prediction = tensor([[4, 4, 4, 4, 0]]), y_data = [[4, 4, 3, 2, 0]], prediction_str = pppp!
epoch = 3, loss = 0.945, prediction = tensor([[4, 4, 4, 2, 0]]), y_data = [[4, 4, 3, 2, 0]], prediction_str = pppe!
epoch = 4, loss = 0.784, prediction = tensor([[4, 4, 4, 2, 0]]), y_data = [[4, 4, 3, 2, 0]], prediction_str = pppe!
epoch = 5, loss = 0.625, prediction = tensor([[4, 4, 4, 2, 0]]), y_data = [[4, 4, 3, 2, 0]], prediction_str = pppe!
epoch = 6, loss = 0.494, prediction = tensor([[4, 4, 3, 2, 0]]), y_data = [[4, 4, 3, 2, 0]], prediction_str = pple!
epoch = 7, loss = 0.386, prediction = tensor([[4, 4, 3, 2, 0]]), y_data = [[4, 4, 3, 2, 0]], prediction_str = pple!
epoch = 8, loss = 0.290, prediction = tensor([[4, 4, 3, 2, 0]]), y_data 

In [71]:
# text = "공복에 드셔야 흡수가 잘되기 때문입니다. 공복에 드셨을 때 소화가 잘 되지 않는 분들은 식후에 드시는 것이 좋아요. 생들기름은 성인 기준으로 하루 5g정도 드시면 충분합니다. 숟가락에 따랐을 때 반절이면 5g이에요."
text = "안녕하세요 여러분"
input_str_kr = text
label_str_kr = text[1:] + "!"

print(input_str_kr)
print(label_str_kr)

안녕하세요 여러분
녕하세요 여러분!


In [78]:
## vocabulary set
voc_set = set(input_str_kr + label_str_kr)
char_vocab = sorted(list(set(input_str_kr + label_str_kr)))

vocab_size = len(char_vocab)

print('vocab_size :', vocab_size)
print('vocab_set :', voc_set)

vocab_size : 10
vocab_set : {'분', '녕', '!', '하', '러', ' ', '여', '세', '요', '안'}


In [80]:
## char to index

char_to_index = dict((c, i) for i, c in enumerate(char_vocab))
index_to_char = dict((i, c) for i, c in enumerate(char_vocab))

print('Char to index = \n', char_to_index)
print('Index to char = \n', index_to_char)

Char to index = 
 {' ': 0, '!': 1, '녕': 2, '러': 3, '분': 4, '세': 5, '안': 6, '여': 7, '요': 8, '하': 9}
Index to char = 
 {0: ' ', 1: '!', 2: '녕', 3: '러', 4: '분', 5: '세', 6: '안', 7: '여', 8: '요', 9: '하'}


In [84]:
## Label encoding

x_data = [char_to_index[c] for c in input_str_kr]
y_data = [char_to_index[c] for c in label_str_kr]

x_data = [x_data]
y_data = [y_data]

print(x_data)
print(y_data)

[[6, 2, 9, 5, 8, 0, 7, 3, 4]]
[[2, 9, 5, 8, 0, 7, 3, 4, 1]]


In [86]:
## one-hot encoding

x_one_hot = np.eye(vocab_size)[x_data]
print('x_one_hot \n', x_one_hot)

x_one_hot 
 [[[0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]
  [0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
  [0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
  [1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
  [0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]]]


In [91]:
## Tensor vector

X = torch.tensor(x_one_hot).float() # float32
Y = torch.tensor(y_data) # int64

print(X)
print(Y)

tensor([[[0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
         [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
         [0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
         [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
         [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.]]])
tensor([[2, 9, 5, 8, 0, 7, 3, 4, 1]])


In [92]:
## Model

vocab_size = len(char_vocab)
input_size = vocab_size
hidden_size = 20
output_size = vocab_size
lr = 0.1

In [None]:
class VanillaRNN_Kor(nn.Module):
  def __init__(self, input_size, hidden_size, ouput_size):
    super().__init__()
    self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
    self.fc = nn.Linear(hidden_size, output_size, bias=True)

  def forward(self, x):
    x, _status = self.rnn(x)
    x = self.fc(x)
    return x