### 7 순환신경망

#### 7.1 기본 순환 신경망

In [17]:
# 라이브러리 로드
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from torch.utils.data import TensorDataset, DataLoader

In [12]:
import IPython.display as disp

<h5>순환신경망 개념 설명</h5>
<a href="https://wikidocs.net/60690">https://wikidocs.net/60690</a>
<br>
<img src="./images/rnn_image4_ver2.png">

##### 파이썬으로 RNN 구현
$h_t=tanh(W_xX_t+W_hh_{t-1}+b)$
<br>
$y_t=f(W_yh_t+b)$

In [21]:
def sigmoid(x):
        return 1 / (1 + np.exp(-x))

x = np.random.randn(3,4)
sigmoid(x)

array([[0.27073798, 0.37646874, 0.77600677, 0.44690026],
       [0.44388026, 0.77298242, 0.73875228, 0.32308485],
       [0.66897255, 0.34497565, 0.39840769, 0.82127011]])

In [27]:
timesteps = 10 # 시점의 수, 주식 예제에서는 주가 시퀀스의 개수
input_size = 4 # 입력층의 뉴런의 수, 주식의 ohlv 데이터
hidden_size = 8 # 은닉 상태의 뉴런의 수, 메모리 셀의 용량
out_size = 1 # 출력 층의 뉴런의 수

inputs = np.random.random((timesteps, input_size)) # 입력 값 
hidden_state_t = np.zeros((hidden_size,)) # 초기 은닉상태 값은 0

Wx = np.random.randn(input_size, hidden_size) # 입력에 대한 가중치
Wh = np.random.randn(hidden_size, hidden_size) # 은닉상태에 대한 가중치
Bh = np.random.randn(hidden_size) # 은닉상태 편향
Bo = np.random.randn(out_size) # 출력층의 편향
Wy = np.random.randn(hidden_size, out_size) # 출력층 가중치

hidden_state_stack=[]   # 은닉상태 값 리스트
output_stack=[] # 출력 값 리스트
# 순환 신경망 연산
for t in range(timesteps):
    ht = np.tanh(np.dot(inputs[t], Wx) + (np.dot(Wh, hidden_state_t) + Bh))
    yt = sigmoid( np.dot(ht, Wy)+Bo)
    print(ht.shape)
    hidden_state_stack.append(list(ht))
    output_stack.append(list(yt))

print("은닉상태 값:")
print(np.stack(hidden_state_stack))
print("출력 값:")
print(np.stack(output_stack))



(8,)
(8,)
(8,)
(8,)
(8,)
(8,)
(8,)
(8,)
(8,)
(8,)
은닉상태 값:
[[-0.57166436 -0.98670248 -0.82612591  0.04004308 -0.84764638 -0.79364865
   0.98894637 -0.98986762]
 [-0.36904716 -0.77255695 -0.83953156  0.46763686 -0.65965302 -0.52689072
   0.99766625 -0.97933114]
 [-0.91996562 -0.92072417 -0.98119166  0.67690757 -0.91332098 -0.57814044
   0.99972124 -0.99557965]
 [-0.65475786 -0.87267675 -0.85722462  0.50980551 -0.86526455 -0.26168189
   0.98597222 -0.96966675]
 [-0.96999724 -0.64508226 -0.99204274  0.80738749 -0.96299903 -0.38187911
   0.99869592 -0.99022754]
 [-0.82143297 -0.98291944 -0.87101403  0.48743911 -0.94001867 -0.07285294
   0.98502478 -0.98252842]
 [-0.64666021 -0.94224911 -0.86989271  0.42028443 -0.84359513 -0.48611411
   0.9949679  -0.98445717]
 [-0.57765586 -0.70746655 -0.85817263  0.35010523 -0.85391536 -0.61993201
   0.92850633 -0.95061561]
 [-0.97049019 -0.9918288  -0.97860413  0.49816004 -0.98354929 -0.55093509
   0.98846221 -0.9939664 ]
 [-0.76104388 -0.97970669 -0.8931

##### 7.1.5 기본 순환 신경망 구현

In [2]:
# 데이터 로드
df = pd.read_csv("./data/kospi.csv")
scaler = MinMaxScaler()
# min max 스케일러를 사용하여 데이터를 0~1 사이의 값으로 보정해준다.
df[['Open', 'High', 'Low', 'Close', 'Volume']] = scaler.fit_transform(df[['Open', 'High', 'Low', 'Close', 'Volume']])
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2019-01-30,0.722898,0.732351,0.745525,0.759235,2206.199951,0.242113
1,2019-01-31,0.763058,0.750069,0.769089,0.757866,2204.850098,0.274771
2,2019-02-01,0.751894,0.745714,0.76928,0.756456,2203.459961,0.241609
3,2019-02-07,0.755809,0.742538,0.764596,0.756415,2203.419922,0.215603
4,2019-02-08,0.731584,0.717777,0.739548,0.729669,2177.050049,0.197057


In [3]:
# 텐서 데이터 만들기
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

inputs = df[['Open', 'High', 'Low', 'Volume']].values
targets = df[['Close']].values

In [6]:
# 시퀀스 데이터 만들기
def seq_data(x, y, seq_length):
    # 데이터를 일정 길이 기준으로 잘라서 배열로 만들어 반환
    # ex) x:[1,2,3,4,5], y:[1,2,3,4,5], seq_length=3  경우
    # 배열의 첫번째 내용:  x_seq -> [1,2,3], y_seq -> 4
    # 배열의 두번째 내용:  x_seq -> [2,3,4], y_seq -> 5
    x_seq=[]
    y_seq=[]
    data_len = len(x)
    for i in range(data_len-seq_length):        
        x_seq.append(x[i: i+seq_length])
        y_seq.append(y[i+seq_length])
    return torch.FloatTensor(np.array(x_seq)).to(device), torch.FloatTensor(np.array(y_seq)).to(device).view(-1, 1)
    # y_seq를 view(-1,1) 로 2차원배열로 만든 것은 오차함수로 사용할 MSE Loss가 기본적으로 이차원 함수를 요구하기 때문

split_num = 200
sequence_length = 5
x_seq, y_seq = seq_data(inputs, targets, sequence_length)
x_train_seq = x_seq[:split_num]
y_train_seq = y_seq[:split_num]
x_test_seq = x_seq[split_num:]
y_test_seq = y_seq[split_num:]
print(x_train_seq.size(), y_train_seq.size())
print(x_test_seq.size(), y_test_seq.size())


torch.Size([200, 5, 4]) torch.Size([200, 1])
torch.Size([226, 5, 4]) torch.Size([226, 1])


In [7]:
# 데이터 로드 설정
trainset = TensorDataset(x_train_seq, y_train_seq)
testset = TensorDataset(x_test_seq, y_test_seq)
batch_size = 20
trainloader = DataLoader(dataset=trainset, batch_size=batch_size, shuffle=True)
testloader = DataLoader(dataset=testset, batch_size=batch_size, shuffle=False)

In [8]:
# RNN 구축에 필요한 하이퍼 파라미터 정의하기
input_size = x_seq.size(2)
num_layers = 2
hidden_layer = 8

In [9]:

size = x_seq.size()
size

torch.Size([426, 5, 4])

In [None]:
# RNN 클래스 
class VanillaRNN(nn.Module):
    def __init__(self, input_size, hidden_size, sequence_length, num_layers, device):
        super(VanillaRNN, self).__init__()
        self.device = device
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Sequential(nn.Linear(hidden_size*sequence_length, 1), nn.Sigmoid())
    
    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size()[0], self.hidden_size).to(self.device)
        out , _ = self.rnn(x, h0)
        out = out.reshape(out.shape[0], -1)
        out = self.fc(out)
        return out