<a href="https://colab.research.google.com/github/jiwoong2/deeplearning/blob/main/RNNNNNNNNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import torch
from sklearn.preprocessing import StandardScaler
from torch import nn, optim
import numpy as np
from torch.utils.data import Dataset, DataLoader, random_split
from tqdm import tqdm
import matplotlib.pyplot as plt
import yfinance as yf
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# handle

In [None]:
LR = 1e-6 # -1 인경우 한번에 0또는 1만 출력하는 로컬 미니멈으로 수렴.
EPOCH = 200
batch_size = 150

# data


In [None]:
def load_stock_data(Ticker, peri):

    ticker = yf.Ticker(Ticker)
    stock_data = ticker.history(period=peri)
    stock_data = stock_data.reset_index() # 인덱스 초기화
    stock_data.drop(['Date','Dividends','Stock Splits'], axis=1, inplace=True) # 불필요한 data 누락
    stock_data = stock_data.to_numpy()
    return stock_data

apple = load_stock_data("AAPL", "25y")

In [None]:
def data_generator(Size:int, data, interval:int):

    idx = np.random.choice(range(interval, len(data)-6), size=Size, replace=False)
    idx = np.array(idx)

    g_data = []
    g_label = []

    for i in idx:
        gs_data = data[i-interval:i, :]
        g_data.append(gs_data)

        label = (data[i+6,3] - data[i+1,3]) / data[i+1,3] * 100

        if  label >= 5:
            g_label.append(3)

        elif 0 <= label < 5:
            g_label.append(2)

        elif -5 <= label < 0:
            g_label.append(1)

        elif label < -5:
            g_label.append(0)

    g_data = np.array(g_data).astype(np.float32)
    g_label = np.array(g_label)

    return g_data, g_label

In [None]:
apple_short_term_data, apple_short_term_label = data_generator(5000, apple, 20)

In [None]:
short_term_data = apple_short_term_data
short_term_label = apple_short_term_label

In [None]:
def data_scaler(data):
    for sample in range(data.shape[0]):
        scaler = StandardScaler()
        std = scaler.fit_transform(data[sample,:,:])
        data[sample,:,:] = std

data_scaler(short_term_data)

In [None]:
class CustomDataset(Dataset):
    def __init__(self, data, labels):
        self.data = data
        self.labels = labels

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]

# 인스턴스 생성.
short_term_dataset = CustomDataset(short_term_data, short_term_label)

#데이터를 훈련 데이터와 검증 데이터, 테스트 데이터로 나누기. 파이토치데이터셋 인스턴스에 사용하면 라벨도 같이 분류됨.
#훈련세트 80%, 테스트세트 20% 분류
train_size = int(0.9 * len(short_term_dataset))
test_size = len(short_term_dataset) - train_size

short_term_train_dataset, short_term_test_dataset = random_split(short_term_dataset, [train_size, test_size])

#테스트세트를 테스트세트 10%, 검증세트 10% 분류
test_size = int(0.5 * len(short_term_test_dataset))
val_size = len(short_term_test_dataset) - test_size

short_term_test_dataset, short_term_val_dataset = random_split(short_term_test_dataset, [test_size, val_size])

# 데이터 로더 생성.
short_term_train_DL = DataLoader(short_term_train_dataset, batch_size=batch_size, shuffle=True)
short_term_val_DL = DataLoader(short_term_val_dataset, batch_size=batch_size, shuffle=True)
short_term_test_DL = DataLoader(short_term_test_dataset, batch_size=batch_size, shuffle=True)

# 데이터 로더를 통해 데이터 확인
for features, labels in short_term_train_DL:
    print(features.shape)
    print(labels)
    break

In [None]:
class_counts = [sum(short_term_train_DL.dataset[:][1] == 0), sum(short_term_train_DL.dataset[:][1] == 1), sum(short_term_train_DL.dataset[:][1] == 2), sum(short_term_train_DL.dataset[:][1] == 3)]
total = sum(class_counts)
class_weights = [total / count for count in class_counts]
class_weights = torch.FloatTensor(class_weights)
print(class_weights)

criterion = nn.CrossEntropyLoss(weight=class_weights)
criterion = criterion.to(DEVICE) # loss에 가중치를 부여하는경우 손실함수도 gpu에 올려야함.

# model

In [None]:
class RNN(nn.Module):
    def __init__(self):
        super().__init__()

        self.rnn = nn.RNN(5,30,2,batch_first = True)
        self.linear = nn.Linear(30,4)

    def forward(self, x):
        _, x = self.rnn(x)
        x = x[-1,:,:]
        # x = x.squeeze(0)
        x = self.linear(x)
        return x

model = RNN().to(DEVICE)
optimizer = optim.Adam(model.parameters(), lr = LR)

In [None]:
for i in short_term_train_DL:
    test_data = i[0]
    break

test_data.shape

In [None]:
model = RNN()
a = model(test_data)
print(a.shape)

# trainer

In [None]:
# 1에폭 트레이닝.
def loss_epoch(model, DL, criterion, optimizer = None):

    N = len(DL.dataset) # the number of data
    rloss = 0; rcorrect = 0

    for x_batch, y_batch in DL:

        x_batch = x_batch.to(DEVICE)
        y_batch = y_batch.to(DEVICE)

        # inference
        y_hat = model(x_batch)

        # loss
        loss = criterion(y_hat,y_batch)

        # update
        if optimizer is not None:
            optimizer.zero_grad() # gradient 누적을 막기 위한 초기화
            loss.backward() # backpropagation
            optimizer.step() # weight update

        # loss accumulation
        loss_b = loss.item() * x_batch.shape[0] # batch loss # BATCH_SIZE를 곱하면 마지막 18개도 32개를 곱하니까..
        rloss += loss_b # running loss
        # accuracy accumulation
        pred = y_hat.argmax(dim=1)
        corrects_b = torch.sum(pred == y_batch).item()
        rcorrect += corrects_b

    loss_e = rloss/ N # epoch loss
    accuracy_e = rcorrect/N*100

    return loss_e, accuracy_e, rcorrect

def Train(model, train_DL, val_DL, criterion, optimizer, EPOCH):

    loss_history = {"train":[], "val":[]}
    acc_history = {"train":[], "val":[]}

    for ep in tqdm(range(EPOCH), leave=False):

        model.train() # train mode로 전환
        train_loss, train_acc, _ = loss_epoch(model, train_DL, criterion, optimizer)
        loss_history["train"] += [train_loss]
        acc_history["train"] += [train_acc]

        model.eval() # test mode로 전환
        with torch.no_grad():
            val_loss, val_acc, _ = loss_epoch(model, val_DL, criterion)
            loss_history["val"] += [val_loss]
            acc_history["val"] += [val_acc]

        # # print loss
        # print(f"train loss: {round(train_loss,5)}, "
        #       f"val loss: {round(val_loss,5)} \n"
        #       f"train acc: {round(train_acc,1)} %, "
        #       f"val acc: {round(val_acc,1)} %, time: {round(time.time()-epoch_start)} s")
        # print("-"*20)

    return loss_history, acc_history

def Test(model, test_DL, criterion):
    model.eval()
    with torch.no_grad():
        test_loss, test_acc, rcorrect = loss_epoch(model, test_DL, criterion)
    print()
    print(f"Test loss: {round(test_loss,5)}")
    print(f"Test accuracy: {rcorrect}/{len(test_DL.dataset)} ({round(test_acc,1)} %)")
    return round(test_acc,1)

def graph(loss, acc, epoch):
    # 두 개의 그래프를 하나의 figure에 가로로 배열
    fig, axs = plt.subplots(1, 2, figsize=(10, 4))

    # 첫 번째 그래프 (Train Loss와 Validation Loss)
    axs[0].plot(range(1, epoch + 1), loss['train'], label='Train Loss')
    axs[0].plot(range(1, epoch + 1), loss['val'], label='Validation Loss')
    axs[0].set_xlabel('Epoch')
    axs[0].set_ylabel('Loss')
    axs[0].set_title('Training and Validation Loss')
    axs[0].legend()

    # 두 번째 그래프 (Train Accuracy와 Validation Accuracy)
    axs[1].plot(range(1, epoch + 1), acc['train'], label='Train acc')
    axs[1].plot(range(1, epoch + 1), acc['val'], label='Validation acc')
    axs[1].set_xlabel('Epoch')
    axs[1].set_ylabel('Accuracy')
    axs[1].set_title('Training and Validation Accuracy')
    axs[1].legend()

    # 그래프 출력
    plt.tight_layout()  # 그래프 간격 조정
    plt.show()

# experiment

In [None]:
loss, acc = Train(model, short_term_train_DL, short_term_val_DL, criterion, optimizer, EPOCH)

In [None]:
print(f"EPOCH:{EPOCH}, batch:{batch_size}, LR:{LR}]")
graph(loss, acc, EPOCH)

In [None]:
Test(model, short_term_test_DL, criterion)

# LSTM