In [1]:
import torch
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader
from datetime import datetime
import os
import wandb
from pathlib import Path

# 본 과제 제출자는 현재 우분투 도커 환경에서 작업중이므로 다음과 같이 경로 설정
BASE_PATH="/home/Deep-Learning-study"
import sys
sys.path.append(BASE_PATH)

CURRENT_FILE_PATH = os.getcwd()
CHECKPOINT_FILE_PATH = os.path.join(CURRENT_FILE_PATH, "checkpoints")

if not os.path.isdir(CHECKPOINT_FILE_PATH):
  os.makedirs(os.path.join(CURRENT_FILE_PATH, "checkpoints"))

In [2]:
from _01_code._08_fcn_best_practice.c_trainer import ClassificationTrainer
from _01_code._15_lstm_and_its_application.f_arg_parser import get_parser
#from _01_code._15_lstm_and_its_application.g_crypto_currency_regression_train_lstm import get_btc_krw_data

In [3]:
class CryptoCurrencyDataset(Dataset):
  def __init__(self, X, y, is_regression=True):
    self.X = X
    self.y = y

    assert len(self.X) == len(self.y)

  def __len__(self):
    return len(self.X)

  def __getitem__(self, idx):
    X = self.X[idx]
    y = self.y[idx]
    return X, y

  def __str__(self):
    str = "Data Size: {0}, Input Shape: {1}, Target Shape: {2}".format(
      len(self.X), self.X.shape, self.y.shape
    )
    return str

In [4]:
import pandas as pd

def get_cryptocurrency_data(
    sequence_size=10, validation_size=100, test_size=10, 
    target_column='Close', y_normalizer=1.0e7, 
    is_regression=True, use_next_open=True
):
    btc_krw_path = os.path.join(BASE_PATH, "_00_data", "k_cryptocurrency", "BTC_KRW.csv")
    df = pd.read_csv(btc_krw_path)
    row_size = len(df)
    date_list = df['Date']

    # Next_Open 컬럼 추가
    if use_next_open:
        df['Next_Open'] = df['Open'].shift(-1)
        
    df = df.drop(columns=['Date'])
    
    # Next_Open이 NaN인 마지막 행 제거
    if use_next_open:
        df = df.dropna()
        row_size = len(df)

    data_size = row_size - sequence_size
    train_size = data_size - (validation_size + test_size)
    #################################################################################################

    row_cursor = 0

    X_train_list = []
    y_train_regression_list = []
    y_train_classification_list = []
    y_train_date = []
    for idx in range(0, train_size):
        sequence_data = df.iloc[idx: idx + sequence_size].values  # sequence_data.shape: (sequence_size, 6)
        X_train_list.append(torch.from_numpy(sequence_data))
        y_train_regression_list.append(df.iloc[idx + sequence_size][target_column])
        y_train_classification_list.append(
            1 if df.iloc[idx + sequence_size][target_column] >= df.iloc[idx + sequence_size - 1][target_column] else 0
        )
        y_train_date.append(date_list[idx + sequence_size])
        row_cursor += 1

    X_train = torch.stack(X_train_list, dim=0).to(torch.float)
    y_train_regression = torch.tensor(y_train_regression_list, dtype=torch.float32) / y_normalizer
    y_train_classification = torch.tensor(y_train_classification_list, dtype=torch.int64)

    m = X_train.mean(dim=0, keepdim=True)
    s = X_train.std(dim=0, keepdim=True)
    X_train = (X_train - m) / s

    #################################################################################################

    X_validation_list = []
    y_validation_regression_list = []
    y_validation_classification_list = []
    y_validation_date = []
    for idx in range(row_cursor, row_cursor + validation_size):
        sequence_data = df.iloc[idx: idx + sequence_size].values  # sequence_data.shape: (sequence_size, 6)
        X_validation_list.append(torch.from_numpy(sequence_data))
        y_validation_regression_list.append(df.iloc[idx + sequence_size][target_column])
        y_validation_classification_list.append(
            1 if df.iloc[idx + sequence_size][target_column] >= df.iloc[idx + sequence_size - 1][target_column] else 0
        )
        y_validation_date.append(date_list[idx + sequence_size])
        row_cursor += 1

    X_validation = torch.stack(X_validation_list, dim=0).to(torch.float)
    y_validation_regression = torch.tensor(y_validation_regression_list, dtype=torch.float32) / y_normalizer
    y_validation_classification = torch.tensor(y_validation_classification_list, dtype=torch.int64)

    X_validation = (X_validation - m) / s
    #################################################################################################

    X_test_list = []
    y_test_regression_list = []
    y_test_classification_list = []
    y_test_date = []
    for idx in range(row_cursor, row_cursor + test_size):
        sequence_data = df.iloc[idx: idx + sequence_size].values  # sequence_data.shape: (sequence_size, 6)
        X_test_list.append(torch.from_numpy(sequence_data))
        y_test_regression_list.append(df.iloc[idx + sequence_size][target_column])
        y_test_classification_list.append(
            1 if df.iloc[idx + sequence_size][target_column] > df.iloc[idx + sequence_size - 1][target_column] else 0
        )
        y_test_date.append(date_list[idx + sequence_size])
        row_cursor += 1

    X_test = torch.stack(X_test_list, dim=0).to(torch.float)
    y_test_regression = torch.tensor(y_test_regression_list, dtype=torch.float32) / y_normalizer
    y_test_classification = torch.tensor(y_test_classification_list, dtype=torch.int64)

    X_test = (X_test - m) / s

    if is_regression:
        return (
            X_train, X_validation, X_test,
            y_train_regression, y_validation_regression, y_test_regression,
            y_train_date, y_validation_date, y_test_date
        )
    else:
        return (
            X_train, X_validation, X_test,
            y_train_classification, y_validation_classification, y_test_classification,
            y_train_date, y_validation_date, y_test_date
        )

In [5]:
def get_btc_krw_data(sequence_size=21, validation_size=150, test_size=30, is_regression=True, use_next_open=True):
    # use_next_open 파라미터 추가
    X_train, X_validation, X_test, y_train, y_validation, y_test, y_train_date, y_validation_date, y_test_date \
        = get_cryptocurrency_data(
            sequence_size=sequence_size,
            validation_size=validation_size,
            test_size=test_size,
            target_column='Close',
            y_normalizer=1.0e7,
            is_regression=is_regression,
            use_next_open=use_next_open  # Next_Open feature 사용 여부
        )

    # PyTorch Dataset 객체 생성
    train_crypto_currency_dataset = CryptoCurrencyDataset(X=X_train, y=y_train)
    validation_crypto_currency_dataset = CryptoCurrencyDataset(X=X_validation, y=y_validation)
    test_crypto_currency_dataset = CryptoCurrencyDataset(X=X_test, y=y_test)

    # DataLoader 생성
    train_data_loader = DataLoader(
        dataset=train_crypto_currency_dataset,
        batch_size=wandb.config.batch_size,
        shuffle=True
    )
    
    validation_data_loader = DataLoader(
        dataset=validation_crypto_currency_dataset,
        batch_size=wandb.config.batch_size,
        shuffle=True
    )
    
    test_data_loader = DataLoader(
        dataset=test_crypto_currency_dataset,
        batch_size=len(test_crypto_currency_dataset),
        shuffle=True
    )

    return train_data_loader, validation_data_loader, test_data_loader

In [6]:
def get_model():
    class MyModel(nn.Module):
        def __init__(self, n_input=6, n_output=2):  # n_input을 6으로 변경 (Next_Open 포함)
            super().__init__()
            
            #메인 LSTM 레이어
            self.lstm = nn.LSTM(
                input_size=n_input,
                hidden_size=1024,  # hidden size 증가
                num_layers=3,      # 3개의 layer
                dropout=0.1,       # dropout 추가
                batch_first=True,
                bidirectional=True # 양방향 LSTM
            )
            
            # 분류를 위한 FC 레이어
            self.fc_layers = nn.Sequential(
                nn.LayerNorm(2048),  # bidirectional이므로 hidden_size * 2
                nn.Linear(2048, 512),
                nn.GELU(),
                nn.Dropout(0.1),
                
                nn.LayerNorm(512),
                nn.Linear(512, 128),
                nn.GELU(),
                nn.Dropout(0.1),
                
                nn.LayerNorm(128),
                nn.Linear(128, n_output),  # n_output=2 for binary classification
            )
            
        def forward(self, x):
            self.lstm.flatten_parameters()  # CUDA 성능 최적화
            x, _ = self.lstm(x)
            x = x[:, -1, :]  # 마지막 시퀀스의 출력만 사용
            x = self.fc_layers(x)
            return x  # CrossEntropyLoss를 사용할 것이므로 softmax는 여기서 적용하지 않음

    my_model = MyModel(n_input=6, n_output=2)
    return my_model

# Args 클래스도 classification task에 맞게 수정
class Args:
    def __init__(self):
        self.wandb = True
        self.batch_size = 32       # classification은 regression보다 큰 배치 사이즈가 효과적일 수 있음
        self.epochs = 300
        self.learning_rate = 1e-3  # classification은 보통 더 큰 학습률 사용
        self.weight_decay = 1e-4
        self.validation_intervals = 1
        self.early_stop_patience = 30
        self.early_stop_delta = 1e-4

In [7]:
def main(args):
  run_time_str = datetime.now().astimezone().strftime('%Y-%m-%d_%H-%M-%S')

  config = {
    'epochs': args.epochs,
    'batch_size': args.batch_size,
    'validation_intervals': args.validation_intervals,
    'learning_rate': args.learning_rate,
    'early_stop_patience': args.early_stop_patience,
    'early_stop_delta': args.early_stop_delta,
    'weight_decay': args.weight_decay
  }

  project_name = "lstm_classification_btc_krw_next_open"
  wandb.init(
    mode="online" if args.wandb else "disabled",
    project=project_name,
    notes="btc_krw experiment with lstm",
    tags=["lstm", "classification", "btc_krw"],
    name=run_time_str,
    config=config
  )
  print(args)
  print(wandb.config)

  train_data_loader, validation_data_loader, _ = get_btc_krw_data(is_regression=False)
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
  print(f"Training on device {device}.")

  model = get_model()
  model.to(device)

  optimizer = optim.Adam(model.parameters(), lr=wandb.config.learning_rate, weight_decay=wandb.config.weight_decay)

  classification_trainer = ClassificationTrainer(
    project_name, model, optimizer, train_data_loader, validation_data_loader, None,
    run_time_str, wandb, device, CHECKPOINT_FILE_PATH
  )
  classification_trainer.train_loop()

  wandb.finish()

In [8]:
if __name__ == "__main__":
    import sys
    if 'ipykernel' in sys.modules:  # Jupyter Notebook에서 실행 중인지 확인
        # Jupyter에서 실행할 때는 기본값 사용
        args = Args()
    else:
        # 일반 Python 스크립트로 실행할 때는 argparse 사용
        parser = get_parser()
        args = parser.parse_args()
    
    main(args)

[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.


[34m[1mwandb[0m: Currently logged in as: [33mjaeminyu2356[0m ([33mjaeminyu2356-korea-university-of-technology-and-education[0m). Use [1m`wandb login --relogin`[0m to force relogin


<__main__.Args object at 0x7453d6110050>
{'epochs': 300, 'batch_size': 32, 'validation_intervals': 1, 'learning_rate': 0.001, 'early_stop_patience': 30, 'early_stop_delta': 0.0001, 'weight_decay': 0.0001}
Training on device cuda:0.
[Epoch   1] T_loss: 0.72054, T_accuracy: 51.6166 | V_loss: 0.69097, V_accuracy: 53.3333 | Early stopping is stated! | T_time: 00:00:07, T_speed: 0.143
[Epoch   2] T_loss: 0.69753, T_accuracy: 50.7941 | V_loss: 0.69073, V_accuracy: 53.3333 | V_loss decreased (0.69097 --> 0.69073). Saving model... | T_time: 00:00:14, T_speed: 0.143
[Epoch   3] T_loss: 0.69445, T_accuracy: 52.1838 | V_loss: 0.69206, V_accuracy: 53.3333 | Early stopping counter: 1 out of 30 | T_time: 00:00:23, T_speed: 0.130
[Epoch   4] T_loss: 0.69486, T_accuracy: 50.7374 | V_loss: 0.69307, V_accuracy: 53.3333 | Early stopping counter: 2 out of 30 | T_time: 00:00:30, T_speed: 0.133
[Epoch   5] T_loss: 0.69352, T_accuracy: 51.4464 | V_loss: 0.69168, V_accuracy: 53.3333 | Early stopping counter: 

0,1
Epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███
Training accuracy (%),▃▁▁▃▆▂▁▅▆▇▆▅▆▇▇▇▆▇█▇▇▅▅▇▇▆▇▅▇▇▇▇█▇▇▆▄▇▅▇
Training loss,█▃▂▂▂▂▂▂▂▂▂▂▂▁▂▂▂▁▂▂▂▂▂▂▁▁▁▂▂▂▂▁▁▁▂▁▂▁▁▁
Training speed (epochs/sec.),██▁▃▄▃▄▄▅▆▆▄▅▅▅▄▄▂▃▂▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▅▅▅▅
Validation accuracy (%),████████▁██████▁████████████████████████
Validation loss,▃▃▄▄▃▄▅▄▅▄▂▃▂▂▁▅▁█▂▃▄▅▄▄▃▃▄▃▃▃▂▄▃▃▄▂▃▄▂▃

0,1
Epoch,50.0
Training accuracy (%),52.92116
Training loss,0.69217
Training speed (epochs/sec.),0.13812
Validation accuracy (%),53.33333
Validation loss,0.69121
