**Table of contents**<a id='toc0_'></a>    
- [기본 조작](#toc1_1_) : RNN, bidrectional, LSTM, GRU
- [EX1 : 문자단위 RNN(char RNN)](#toc1_2_)    
- [EX2 : 문장단위 RNN](#toc1_3_)    

<!-- vscode-jupyter-toc-config
	numbering=false
	anchor=true
	flat=false
	minLevel=1
	maxLevel=6
	/vscode-jupyter-toc-config -->
<!-- THIS CELL WILL BE REPLACED ON TOC UPDATE. DO NOT WRITE YOUR TEXT IN THIS CELL -->

## <a id='toc1_1_'></a>[기본 조작](#toc0_)

In [None]:
# python 3.8.19
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

In [4]:
# 입력차원
#---------------
# 3D (batch, time, vocab)
# 입력데이터는 임베딩층을 통과한 상태라고 하자
# 그러므로 input 크기를 dense 크기로 간주한다

input_size=5    # dense 크기(임베딩 결과)
hidden_size=8   # hidden 크기
inputs = torch.Tensor(1,10,5) # 입력텐서(batch,time,dense)

In [None]:
# rnn
# -----------------
# RNN 셀은 두 개의 입력을 리턴하는데, 
# 첫번째 리턴값은 모든 시점(timesteps)의 은닉 상태들 
# 두번째 리턴값은 마지막 시점(timestep)의 은닉 상태

cell = nn.RNN(input_size, hidden_size, batch_first=True, num_layers = 2)
outputs, _status = cell(inputs)
print(outputs.shape)
print(_status.shape)
 


torch.Size([1, 10, 8])
torch.Size([2, 1, 8])


In [5]:
# 양방향RNN(Bidirectional)

cell = nn.RNN(input_size = 5, hidden_size = 8, num_layers = 2, batch_first=True, bidirectional = True)
outputs, _status = cell(inputs)
print(outputs.shape)
print(_status.shape)

torch.Size([1, 10, 16])
torch.Size([4, 1, 8])


In [None]:
# LSTM(Long short term memory)

cell = nn.LSTM(input_size = 5, hidden_size = 8, num_layers = 2, batch_first=True)
outputs, _state = cell(inputs) # _state는 튜플형태태
print(outputs.shape) # (batch, time, hidden)
print(_state[0].shape) # 마지막 hidden_state
print(_state[1].shape) # 마지막 cell_state
                 # (num_layers * num_directions, batch_size, hidden_size)


torch.Size([1, 10, 8])
torch.Size([2, 1, 8])
torch.Size([2, 1, 8])


In [20]:
# GRU

cell = nn.GRU(input_size = 5, hidden_size = 8, num_layers = 2, batch_first=True)
outputs, _state = cell(inputs)
print(outputs.shape) # (batch, time, hidden)
print(_state.shape) 


torch.Size([1, 10, 8])
torch.Size([2, 1, 8])


## <a id='toc1_2_'></a>[EX1 : 문자단위 RNN(char RNN)](#toc0_)
---
- apple 입력 -> pple! 출력

In [None]:
# 문자집합 생성
input = 'apple'
label = 'pple!'
vocab = sorted(list(set(input+label)))
vocab_size = len(vocab)
print('문자집합 크기 : {}'.format(vocab_size))

문자집합 크기 : 5


In [None]:
# 하이퍼파라미터
input_size = vocab_size # embedding 단계 이후이므로 input은 dense 의미
hidden_size = 10        # 여기서 embedding을 원핫인코딩하면, dense차원은 vocab크기
output_size = 5
LR = 0.1

In [None]:
# 데이터 구성: 정수인코딩 - 정수맵핑 - 텐서변환
# ------------------------------------------

# 정수인코딩
char_to_index = dict((c,i) for i,c in enumerate(vocab))

index_to_char = {}
for key, value in char_to_index.items():
    index_to_char[value] = key

print(char_to_index)
print(index_to_char)

{'!': 0, 'a': 1, 'e': 2, 'l': 3, 'p': 4}
{0: '!', 1: 'a', 2: 'e', 3: 'l', 4: 'p'}


In [None]:
# 정수맵핑
x_data = [char_to_index[c] for c in input]
y_data = [char_to_index[c] for c in label]

print(x_data)
print(y_data)

[1, 4, 4, 3, 2]
[4, 4, 3, 2, 0]


In [None]:
# 원핫인코딩(embedding) 및 3D 텐서변환

# 배치차원 추가
x_data = [x_data]
y_data = [y_data]
print(x_data)

[[1, 4, 4, 3, 2]]


In [45]:
# 원핫벡터 변환
x_one_hot = [np.eye(vocab_size)[x] for x in x_data]
print(x_one_hot)

# 텐서 변환
X = torch.FloatTensor(x_one_hot)
Y = torch.LongTensor(y_data)
X.shape, Y.shape

[array([[0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 1.],
       [0., 0., 0., 1., 0.],
       [0., 0., 1., 0., 0.]])]


(torch.Size([1, 5, 5]), torch.Size([1, 5]))

- 모델구현
    - $rnn : tanh(XW_x+H_{t-1}W_h + b)=H_t$
    - $출력층 : f(H_tWy+b)=Y_t$

In [None]:
# 신경망 구조
class Net(torch.nn.Module):
    def __init__(self,input_size,hidden_size,output_size):
        super(Net, self).__init__()
        self.rnn = torch.nn.RNN(input_size, hidden_size, batch_first=True) # RNN층
        self.fc = torch.nn.Linear(hidden_size,output_size,bias=True) # 출력층

    def forward(self,x):
        x, _status = self.rnn(x) # (N,T,H)
        x = self.fc(x)  # (N,T,V)
        return x

In [None]:
# 선언
net = Net(input_size, hidden_size, output_size) # 신경망
criterion = torch.nn.CrossEntropyLoss()         # 손실함수
optimizer = optim.Adam(net.parameters(), LR)    # 최적화함수수


# output 사이즈 확인
outputs = net(X) 
print(outputs.shape) # 3D (N,T,V)
print(outputs.view(-1,vocab_size).shape)  # 2D (NxT,V)
print(outputs.data.numpy().argmax(axis=2).shape)  #1D (N,T)

torch.Size([1, 5, 5])


In [None]:
# 학습
epochs=100
for i in range(epochs):
    outputs = net(X)
    loss = criterion(outputs.view(-1, vocab_size), Y.view(-1)) # Batch 차원 제거 => (NxT,vocab), (,NxT)
    
    optimizer.zero_grad() # 초기화
    loss.backward()       # 기울기
    optimizer.step()      # 파라미터 업데이트

    # 알림
    result = outputs.data.numpy().argmax(axis=2) # 단어별 확률분포(vocab size)에서 가장 높은 값의 인덱스 선택
    result_str = ''.join([index_to_char[c] for c in np.squeeze(result)]) # 인덱스->char
    print(i, "loss: ", loss.item(), "prediction: ", result, "true Y: ", y_data, "prediction str: ", result_str)




0 loss:  1.5324671268463135 prediction:  [[4 4 4 4 4]] true Y:  [[4, 4, 3, 2, 0]] prediction str:  ppppp
1 loss:  1.1871622800827026 prediction:  [[4 4 4 4 4]] true Y:  [[4, 4, 3, 2, 0]] prediction str:  ppppp
2 loss:  0.8183690309524536 prediction:  [[4 4 3 3 0]] true Y:  [[4, 4, 3, 2, 0]] prediction str:  ppll!
3 loss:  0.4565443992614746 prediction:  [[4 4 3 2 0]] true Y:  [[4, 4, 3, 2, 0]] prediction str:  pple!
4 loss:  0.21796676516532898 prediction:  [[4 4 3 2 0]] true Y:  [[4, 4, 3, 2, 0]] prediction str:  pple!
5 loss:  0.10103382170200348 prediction:  [[4 4 3 2 0]] true Y:  [[4, 4, 3, 2, 0]] prediction str:  pple!
6 loss:  0.05083095282316208 prediction:  [[4 4 3 2 0]] true Y:  [[4, 4, 3, 2, 0]] prediction str:  pple!
7 loss:  0.02786228619515896 prediction:  [[4 4 3 2 0]] true Y:  [[4, 4, 3, 2, 0]] prediction str:  pple!
8 loss:  0.01661837473511696 prediction:  [[4 4 3 2 0]] true Y:  [[4, 4, 3, 2, 0]] prediction str:  pple!
9 loss:  0.010565690696239471 prediction:  [[4 4 3

In [None]:
# -----------------------
# argmax(dim=n)
# ------------------------
# 을 하면 그 차원(n)이 사라지고, 최댓값의 인덱스 배열이 남는다다
# 3D 텐서 생성 (크기: 2x3x4)
tensor_3d = torch.tensor([
    [[1, 5, 2, 3],
     [4, 8, 6, 7],
     [9, 3, 1, 0]],

    [[2, 3, 7, 1],
     [5, 1, 4, 8],
     [0, 6, 2, 9]]
])

# dim=0 (첫 번째 차원에서 최댓값의 인덱스)
# 첫첫 번째 차원(배치)**을 따라 최댓값의 인덱스를 반환 → 크기 (3, 4)
# 배치가 2개이니까 인덱스는 0과1만 할당당
print(torch.argmax(tensor_3d, dim=0)) 

# dim=1 (두 번째 차원에서 최댓값의 인덱스)
# 두 번째 차원(행)을 따라 최댓값의 인덱스를 반환 → 크기 (2, 4)
# T가 3개이니까 인덱스는 0~2만 할당
print(torch.argmax(tensor_3d, dim=1))

# dim=2 (세 번째 차원에서 최댓값의 인덱스)
# 세 번째 차원(열)을 따라 최댓값의 인덱스를 반환 → 크기 (2, 3)
# D가 4개이니까 인덱스는 0~3만 할당당
print(torch.argmax(tensor_3d, dim=2))

tensor([[1, 0, 1, 0],
        [1, 0, 0, 1],
        [0, 1, 1, 1]])
tensor([[2, 1, 1, 1],
        [1, 2, 0, 2]])
tensor([[1, 1, 0],
        [2, 3, 3]])


## <a id='toc1_3_'></a>[EX2 : 문장단위 RNN](#toc0_)

In [None]:
sentence = ("if you want to build a ship, don't drum up people together to "
            "collect wood and don't assign them tasks and work, but rather "
            "teach them to long for the endless immensity of the sea.")

In [None]:
# 하이퍼파라미터
# ----------------
# embedding : 원핫
# data = (batch, time, dense)
# rnn output = (batch, time, hidden)
# affine output = (batch, time, vocab)
input_size = vocab_size 
hidden_size = 25
time_size = 10
LR = 0.1
EPOCHS = 100


In [None]:
# 데이터구성 : 정수인코딩 - 정수맵핑 - 텐서변환
# -------------------------------------------

# 정수인코딩
vocab = list(set(sentence))
char_dict = {c:i for i, c in enumerate(vocab)}
vocab_size = len(vocab) # 25

In [None]:
# 정수맵핑
x_data = []
y_data = []

for i in range(0, len(sentence) - time_size):
    x_str = sentence[i:i+time_size]
    y_str = sentence[i+1:i+1+time_size]
    print(x_str,'--->', y_str)

    x_data.append([char_dict[c] for c in x_str])
    y_data.append([char_dict[c] for c in y_str])

if you wan ---> f you want
f you want --->  you want 
 you want  ---> you want t
you want t ---> ou want to
ou want to ---> u want to 
u want to  --->  want to b
 want to b ---> want to bu
want to bu ---> ant to bui
ant to bui ---> nt to buil
nt to buil ---> t to build
t to build --->  to build 
 to build  ---> to build a
to build a ---> o build a 
o build a  --->  build a s
 build a s ---> build a sh
build a sh ---> uild a shi
uild a shi ---> ild a ship
ild a ship ---> ld a ship,
ld a ship, ---> d a ship, 
d a ship,  --->  a ship, d
 a ship, d ---> a ship, do
a ship, do --->  ship, don
 ship, don ---> ship, don'
ship, don' ---> hip, don't
hip, don't ---> ip, don't 
ip, don't  ---> p, don't d
p, don't d ---> , don't dr
, don't dr --->  don't dru
 don't dru ---> don't drum
don't drum ---> on't drum 
on't drum  ---> n't drum u
n't drum u ---> 't drum up
't drum up ---> t drum up 
t drum up  --->  drum up p
 drum up p ---> drum up pe
drum up pe ---> rum up peo
rum up peo ---> um up peop
u

In [114]:
print(x_data[0])  # if you wan에 해당됨.
print(y_data[0])  # f you want에 해당됨.

[7, 2, 17, 12, 8, 6, 17, 4, 24, 1]
[2, 17, 12, 8, 6, 17, 4, 24, 1, 5]


In [None]:
# 원핫인코딩(embedding) 및 텐서변환

x_one_hot = [np.eye(vocab_size)[x] for x in x_data]
X = torch.FloatTensor(x_one_hot)  # (170,10,25)
Y = torch.LongTensor(y_data)  # (170,10,)


- 모델

In [None]:
# 신경망 구조

class Net(torch.nn.Module):
    def __init__(self, input_size, hidden_size, layers):
        super(Net, self).__init__()
        self.rnn = torch.nn.RNN(input_size, hidden_size, num_layers=layers, batch_first=True)
        self.fc = torch.nn.Linear(hidden_size, vocab_size, bias=True)

    def forward(self,x):
        x, _status = self.rnn(x) # x: output(모든 타임스텝의 은닉상태), _status: 마지막 타임스텝의 은닉상태
        x = self.fc(x)
        return x 

In [None]:
# 선언언
net = Net(input_size, hidden_size, 2)   # 신경망
criterion = torch.nn.CrossEntropyLoss() # 손실함수
optimizer = optim.Adam(net.parameters(), LR)  # 최적화함수수

In [None]:
# 결과과 차원확인
outputs = net(X)
print(outputs.shape)  # 3D (NL170,T:10,V:25)
print(outputs.view(-1,vocab_size).shape) # 2D (NxT:1700,V:25)
print(Y.view(-1).shape) # 1D (,NxT:1700)

torch.Size([170, 10, 25])
torch.Size([1700, 25])
torch.Size([1700])


In [119]:
# 학습
epochs=100
for i in range(epochs):
    outputs = net(X)  # 입력(170,10,25)=>출력(170,10,25)
    loss = criterion(outputs.view(-1,vocab_size),Y.view(-1))
    
 
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # 알림
    results = outputs.argmax(axis=2)
    predict_str = ''
    for j, result in enumerate(results):
        if j == 0:  # 처음에는 예측 결과를 모두 가져오지만
            predict_str += ''.join([vocab[t] for t in result])
        else: # 그 다음에는 마지막 글자만 반복 추가
            predict_str += vocab[result[-1]]

    print(predict_str)
    

cbbbbbbbcbbbbbbbbcbbbbbbcbbbbbbbbbbbbbbbbbbbcbbbbbbb.bbcbbbbcbbcbbbbbbbcbbbbbbbbbbbbbbbbcbbbb.bbbbbbbbbbbbbbcbbbbbbbbbbb.bbbbcbb.bb.bbbbcbbbbcbbcbbb.bbcbbbbbbbcbbbbbcbbbbbbb.bbbcb
                        p                              p                                 p                              p       p                                                  
                                                                                                                                                                                   
eddedddeddededdodddddododdd dodod doddededdedddddde eeddeeddddeddeededdddededddodddsdodtsddde dededdeodddododdd dedoddddddedddddddeedededdeddedddeddddd ded dedodeddddd deddededdde
 rr rr  rr ur rr rr ur rrhr rr r  rr rr rrrr rr rr rrr rr rr    r  rr rr r  rrrrrr rrr urerr rr  r  r rh rrhrr rrr rr rr rr  rr  r  rr rrrr rr rrrr rr rr  rr  rrrr rr rr  r  r  u 
           t  tlt   t t w w t     t  t t     w      w t   t  w t     t    t   w t t t  whtt     t  m