# [순환신경망 실습: 다변량 시계열 예측 모델링]

### 1. 모듈 불러오기

In [None]:
import numpy as np
import pandas as pd
import pandas_datareader.data as pdr
import datetime

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim

from sklearn.metrics import mean_absolute_error

import seaborn as sns 
import matplotlib.pyplot as plt

#from google.colab import drive
#drive.mount('/content/gdrive/')
#import os
#os.chdir('/content/gdrive/My Drive/Day3/hands-on/3일차_RNN1/')

### 2. 데이터 불러오기: YAHOO에서 한화 주식 데이터 불러오기

In [None]:
hanhwa_stock = pd.read_csv('Data/regression_multivariate_hanhwa_stock.csv', engine='python')

In [None]:
hanhwa_stock.head()

In [None]:
hanhwa_stock.tail()

In [None]:
plt.figure(figsize=(16,8))
hanhwa_stock.Low.plot(grid=True)
plt.show()

In [None]:
print(hanhwa_stock.isna().sum())

In [None]:
hanhwa_stock.shape

### 3. 데이터 전처리: sequence 길이에 맞게  RNN Input 데이터 만들기

In [None]:
hanhwa_stock = hanhwa_stock.set_index('Date')

In [None]:
hanhwa_stock.head()

In [None]:
data = hanhwa_stock[::-1]  # reverse order

In [None]:
data.head()

In [None]:
seq_length = 7 
train_size = int(len(data) * 0.7)

In [None]:
train_set = data[0:train_size]
test_set = data[train_size - seq_length:]

In [None]:
# scaling function for input data
def minmax_scaler(data):
    numerator = data - np.min(data, 0)
    denominator = np.max(data, 0) - np.min(data, 0)
    return numerator / (denominator + 1e-7)

In [None]:
# scaling data
train_set = minmax_scaler(train_set)
test_set = minmax_scaler(test_set)

In [None]:
train_set.head()

In [None]:
train_set.head()

In [None]:
len(train_set)

In [None]:
def build_dataset(time_series, seq_length):
    dataX = []
    dataY = []
    for i in range(0, len(time_series) - seq_length):
        _x = time_series.iloc[i:i + seq_length, :]
        _y = time_series.iloc[i + seq_length, [-1]]  # Next close price
        print(_x, "->", _y)
        dataX.append(_x)
        dataY.append(_y)
    return np.stack(dataX), np.stack(dataY)

In [None]:
# make train-test dataset to input
trainX, trainY = build_dataset(train_set, seq_length)
testX, testY = build_dataset(test_set, seq_length)


In [None]:
# convert to tensor
trainX_tensor = torch.FloatTensor(trainX)
trainY_tensor = torch.FloatTensor(trainY)

testX_tensor = torch.FloatTensor(testX)
testY_tensor = torch.FloatTensor(testY)

#### 사용자가 설정하는 Hyperparameters
- seq_length: 시퀀스 길이
- input_dim: 변수 개수
- hidden_dim: hidden state 차원(=하나의 hidden state 내 특징을 어느 크기로 추출할 것인지)
- layers: hidden state 개수(= 추출하는 특징을 몇개의 hidden state로 구성할 것인지)
- output_dim: 출력 크기(회귀에서는 1, 분류에서는 클래스 개수)
- learning_rate: 학습률
- n_epochs: 학습 반복 회수

In [None]:
input_dim = 5 
hidden_dim = 5
output_dim = 1
layer_dim = 1
learning_rate = 0.01
n_epochs = 200 

In [None]:
class RNNRegressor(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, layer_dim):
        super(RNNRegressor, self).__init__()
        self.rnn = torch.nn.RNN(input_dim, hidden_dim, layer_dim, batch_first=True)
        self.fc = torch.nn.Linear(hidden_dim, output_dim, bias=True)

    def forward(self, x):
        x, _status = self.rnn(x)
        x = self.fc(x[:, -1])
        return x

In [None]:
model = RNNRegressor(input_dim, hidden_dim, output_dim, layer_dim)
criterion = torch.nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
# start training
for i in range(n_epochs):
    optimizer.zero_grad()
    outputs = model(trainX_tensor)
    loss = criterion(outputs, trainY_tensor)
    loss.backward()
    optimizer.step()
    print(i, np.round(loss.item(),7))

In [None]:
plt.plot(testY)
plt.plot(model(testX_tensor).data.numpy())
plt.legend(['original', 'prediction'])
plt.show()

### 3. RNN 모델 평가하기

In [None]:
y_true = testY
y_pred = model(testX_tensor).data.numpy()

In [None]:
mean_absolute_error(model(testX_tensor).data.numpy(), testY)