In [17]:
import torch
from torch import nn, optim
from torch.utils.data import DataLoader, Dataset
from datetime import datetime
import os
import wandb
from pathlib import Path

# 본 과제 제출자는 현재 우분투 도커 환경에서 작업중이므로 다음과 같이 경로 설정
BASE_PATH="/home/Deep-Learning-study"
import sys
sys.path.append(BASE_PATH)

CURRENT_FILE_PATH = os.getcwd()
CHECKPOINT_FILE_PATH = os.path.join(CURRENT_FILE_PATH, "checkpoints")

if not os.path.isdir(CHECKPOINT_FILE_PATH):
  os.makedirs(os.path.join(CURRENT_FILE_PATH, "checkpoints"))

In [18]:
from _01_code._15_lstm_and_its_application.f_arg_parser import get_parser
from _01_code._14_rnn.g_rnn_trainer import RegressionTrainer
#from _01_code._03_real_world_data_to_tensors.p_cryptocurrency_dataset_dataloader import get_cryptocurrency_data, \
#  CryptoCurrencyDataset

In [19]:
class CryptoCurrencyDataset(Dataset):
  def __init__(self, X, y, is_regression=True):
    self.X = X
    self.y = y

    assert len(self.X) == len(self.y)

  def __len__(self):
    return len(self.X)

  def __getitem__(self, idx):
    X = self.X[idx]
    y = self.y[idx]
    return X, y

  def __str__(self):
    str = "Data Size: {0}, Input Shape: {1}, Target Shape: {2}".format(
      len(self.X), self.X.shape, self.y.shape
    )
    return str

In [20]:
import pandas as pd

def get_cryptocurrency_data(
    sequence_size=10, validation_size=100, test_size=10, 
    target_column='Close', y_normalizer=1.0e7, 
    is_regression=True, use_next_open=True
):
    btc_krw_path = os.path.join(BASE_PATH, "_00_data", "k_cryptocurrency", "BTC_KRW.csv")
    df = pd.read_csv(btc_krw_path)
    row_size = len(df)
    date_list = df['Date']

    # Next_Open 컬럼 추가
    if use_next_open:
        df['Next_Open'] = df['Open'].shift(-1)
        
    df = df.drop(columns=['Date'])
    
    # Next_Open이 NaN인 마지막 행 제거
    if use_next_open:
        df = df.dropna()
        row_size = len(df)

    data_size = row_size - sequence_size
    train_size = data_size - (validation_size + test_size)
    #################################################################################################

    row_cursor = 0

    X_train_list = []
    y_train_regression_list = []
    y_train_classification_list = []
    y_train_date = []
    for idx in range(0, train_size):
        sequence_data = df.iloc[idx: idx + sequence_size].values  # sequence_data.shape: (sequence_size, 6)
        X_train_list.append(torch.from_numpy(sequence_data))
        y_train_regression_list.append(df.iloc[idx + sequence_size][target_column])
        y_train_classification_list.append(
            1 if df.iloc[idx + sequence_size][target_column] >= df.iloc[idx + sequence_size - 1][target_column] else 0
        )
        y_train_date.append(date_list[idx + sequence_size])
        row_cursor += 1

    X_train = torch.stack(X_train_list, dim=0).to(torch.float)
    y_train_regression = torch.tensor(y_train_regression_list, dtype=torch.float32) / y_normalizer
    y_train_classification = torch.tensor(y_train_classification_list, dtype=torch.int64)

    m = X_train.mean(dim=0, keepdim=True)
    s = X_train.std(dim=0, keepdim=True)
    X_train = (X_train - m) / s

    #################################################################################################

    X_validation_list = []
    y_validation_regression_list = []
    y_validation_classification_list = []
    y_validation_date = []
    for idx in range(row_cursor, row_cursor + validation_size):
        sequence_data = df.iloc[idx: idx + sequence_size].values  # sequence_data.shape: (sequence_size, 6)
        X_validation_list.append(torch.from_numpy(sequence_data))
        y_validation_regression_list.append(df.iloc[idx + sequence_size][target_column])
        y_validation_classification_list.append(
            1 if df.iloc[idx + sequence_size][target_column] >= df.iloc[idx + sequence_size - 1][target_column] else 0
        )
        y_validation_date.append(date_list[idx + sequence_size])
        row_cursor += 1

    X_validation = torch.stack(X_validation_list, dim=0).to(torch.float)
    y_validation_regression = torch.tensor(y_validation_regression_list, dtype=torch.float32) / y_normalizer
    y_validation_classification = torch.tensor(y_validation_classification_list, dtype=torch.int64)

    X_validation = (X_validation - m) / s
    #################################################################################################

    X_test_list = []
    y_test_regression_list = []
    y_test_classification_list = []
    y_test_date = []
    for idx in range(row_cursor, row_cursor + test_size):
        sequence_data = df.iloc[idx: idx + sequence_size].values  # sequence_data.shape: (sequence_size, 6)
        X_test_list.append(torch.from_numpy(sequence_data))
        y_test_regression_list.append(df.iloc[idx + sequence_size][target_column])
        y_test_classification_list.append(
            1 if df.iloc[idx + sequence_size][target_column] > df.iloc[idx + sequence_size - 1][target_column] else 0
        )
        y_test_date.append(date_list[idx + sequence_size])
        row_cursor += 1

    X_test = torch.stack(X_test_list, dim=0).to(torch.float)
    y_test_regression = torch.tensor(y_test_regression_list, dtype=torch.float32) / y_normalizer
    y_test_classification = torch.tensor(y_test_classification_list, dtype=torch.int64)

    X_test = (X_test - m) / s

    if is_regression:
        return (
            X_train, X_validation, X_test,
            y_train_regression, y_validation_regression, y_test_regression,
            y_train_date, y_validation_date, y_test_date
        )
    else:
        return (
            X_train, X_validation, X_test,
            y_train_classification, y_validation_classification, y_test_classification,
            y_train_date, y_validation_date, y_test_date
        )

In [21]:
def get_btc_krw_data(sequence_size=21, validation_size=150, test_size=30, is_regression=True, use_next_open=True):
    # use_next_open 파라미터 추가
    X_train, X_validation, X_test, y_train, y_validation, y_test, y_train_date, y_validation_date, y_test_date \
        = get_cryptocurrency_data(
            sequence_size=sequence_size,
            validation_size=validation_size,
            test_size=test_size,
            target_column='Close',
            y_normalizer=1.0e7,
            is_regression=is_regression,
            use_next_open=use_next_open  # Next_Open feature 사용 여부
        )

    # PyTorch Dataset 객체 생성
    train_crypto_currency_dataset = CryptoCurrencyDataset(X=X_train, y=y_train)
    validation_crypto_currency_dataset = CryptoCurrencyDataset(X=X_validation, y=y_validation)
    test_crypto_currency_dataset = CryptoCurrencyDataset(X=X_test, y=y_test)

    # DataLoader 생성
    train_data_loader = DataLoader(
        dataset=train_crypto_currency_dataset,
        batch_size=wandb.config.batch_size,
        shuffle=True
    )
    
    validation_data_loader = DataLoader(
        dataset=validation_crypto_currency_dataset,
        batch_size=wandb.config.batch_size,
        shuffle=True
    )
    
    test_data_loader = DataLoader(
        dataset=test_crypto_currency_dataset,
        batch_size=len(test_crypto_currency_dataset),
        shuffle=True
    )

    return train_data_loader, validation_data_loader, test_data_loader

In [22]:
def get_model():
    class MyModel(nn.Module):
        def __init__(self, n_input=6, n_output=1):  # regression이므로 n_output=1
            super().__init__()
            
            self.lstm = nn.LSTM(
                input_size=n_input,    # Next_Open 피처 추가로 6
                hidden_size=1024,
                num_layers=3,
                dropout=0.1,
                batch_first=True,
                bidirectional=True
            )
            
            self.fc_layers = nn.Sequential(
                nn.LayerNorm(2048),
                nn.Linear(2048, 512),
                nn.GELU(),
                nn.Dropout(0.1),
                
                nn.LayerNorm(512),
                nn.Linear(512, n_output)  # regression이므로 출력 1개
            )

        def forward(self, x):
            self.lstm.flatten_parameters()
            x, _ = self.lstm(x)
            x = x[:, -1, :]
            x = self.fc_layers(x)
            return x

    my_model = MyModel(n_input=6, n_output=1)  # regression 설정
    return my_model

class Args:
    def __init__(self):
        self.wandb = True
        self.batch_size = 16       # 배치 사이즈 감소
        self.epochs = 500          # epoch 증가
        self.learning_rate = 5e-4  # 학습률 조정
        self.weight_decay = 1e-5   # weight decay 감소
        self.validation_intervals = 1
        self.early_stop_patience = 50  # patience 증가
        self.early_stop_delta = 1e-6   # delta 감소

In [25]:
def main(args):
    # 현재 시간을 문자열로 변환
    run_time_str = datetime.now().astimezone().strftime('%Y-%m-%d_%H-%M-%S')

    # wandb 설정
    config = {
        'epochs': args.epochs,
        'batch_size': args.batch_size,
        'validation_intervals': args.validation_intervals,
        'learning_rate': args.learning_rate,
        'early_stop_patience': args.early_stop_patience,
        'early_stop_delta': args.early_stop_delta,
        'weight_decay': args.weight_decay
    }

    # 프로젝트 이름 변경 (regression용)
    project_name = "lstm_regression_btc_krw_with_next_open"
    wandb.init(
        mode="online" if args.wandb else "disabled",
        project=project_name,
        notes="btc_krw experiment with lstm and next_open feature",
        tags=["lstm", "regression", "btc_krw", "next_open"],
        name=run_time_str,
        config=config
    )

    # regression 데이터 로더 가져오기
    train_data_loader, validation_data_loader, _ = get_btc_krw_data(
        is_regression=True,  # regression으로 변경
        use_next_open=True   # Next_Open 피처 사용
    )
    
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print(f"Training on device {device}.")

    model = get_model()
    model.to(device)
    wandb.watch(model)

    optimizer = optim.Adam(
        model.parameters(),
        lr=wandb.config.learning_rate,
        weight_decay=wandb.config.weight_decay
    )

    def transforms(x):
        return x

    # 체크포인트 저장 경로 확인 및 생성
    checkpoint_path = os.path.join(CHECKPOINT_FILE_PATH, project_name, run_time_str)
    os.makedirs(checkpoint_path, exist_ok=True)

    # RegressionTrainer 사용
    regression_trainer = RegressionTrainer(
        project_name=project_name,
        model=model,
        optimizer=optimizer,
        train_data_loader=train_data_loader,
        validation_data_loader=validation_data_loader,
        run_time_str=run_time_str,
        wandb=wandb,
        device=device,
        checkpoint_file_path=checkpoint_path,  # 수정된 경로 사용
        transforms=transforms
    )

    regression_trainer.train_loop()
    wandb.finish()

In [26]:
if __name__ == "__main__":
    import sys
    if 'ipykernel' in sys.modules:  # Jupyter Notebook에서 실행 중인지 확인
        # Jupyter에서 실행할 때는 기본값 사용
        args = Args()  # Args 클래스의 인스턴스 생성
    else:
        # 일반 Python 스크립트로 실행할 때는 argparse 사용
        parser = get_parser()
        args = parser.parse_args()
    
    main(args)  # main 함수 호출은 if 문 내부에 있어야 함

0,1
Epoch,▁▃▅▆█
Training loss,█▁▁▁▁
Training speed (epochs/sec.),▁▂▃▄█
Validation loss,▁▁▄▃█

0,1
Epoch,5.0
Training loss,0.02749
Training speed (epochs/sec.),0.01111
Validation loss,1.33968


Training on device cuda:0.
[Epoch   1] T_loss: 0.42001, V_loss: 0.26985, Early stopping is stated! | T_time: 00:00:11, T_speed: 0.001
[Epoch   2] T_loss: 0.04868, V_loss: 1.35451, Early stopping counter: 1 out of 50 | T_time: 00:00:23, T_speed: 0.003
[Epoch   3] T_loss: 0.03779, V_loss: 0.84451, Early stopping counter: 2 out of 50 | T_time: 00:00:35, T_speed: 0.009
[Epoch   4] T_loss: 0.05168, V_loss: 0.34090, Early stopping counter: 3 out of 50 | T_time: 00:00:47, T_speed: 0.097
[Epoch   5] T_loss: 0.03451, V_loss: 1.77300, Early stopping counter: 4 out of 50 | T_time: 00:00:58, T_speed: 0.006
[Epoch   6] T_loss: 0.02971, V_loss: 0.28165, Early stopping counter: 5 out of 50 | T_time: 00:01:10, T_speed: 0.014
[Epoch   7] T_loss: 0.03252, V_loss: 0.72564, Early stopping counter: 6 out of 50 | T_time: 00:01:22, T_speed: 0.030
[Epoch   8] T_loss: 0.03622, V_loss: 0.07710, V_loss decreased (0.26985 --> 0.07710). Saving model... | T_time: 00:01:33, T_speed: 0.010
[Epoch   9] T_loss: 0.02732

0,1
Epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇██
Training loss,█▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Training speed (epochs/sec.),▁▁▁▁▁▁▁▂▁▁▁▁▁▂▁▃▂▁▁▂▁▂▁▂▁▂▂▂▄▂▂▂▂▂▂▄▂█▃▂
Validation loss,▂▆▄█▂▁▄▄▂▄▁▁▃▅▄▅▆▅▄▂▃▂▁▄▂▃▄▂▁▄▁▂▃▅▄▄▂▂▂▄

0,1
Epoch,58.0
Training loss,0.02388
Training speed (epochs/sec.),0.11395
Validation loss,0.94293
