In [None]:
import copy
import torch
import numpy as np
import warnings

from torch import nn
from torch.optim import Adam
from torch.utils.data import DataLoader, TensorDataset
from data.dataloader import dataloader_info
from utils.utils import load_yaml_config, instantiate_from_config
from models.predictor import GRU
from data.dataloader import dataloader_info

from utils.experiments import train_model, evaluate_model_stationary, print_score

warnings.filterwarnings("ignore")
device = "cuda" if torch.cuda.is_available() else "cpu"


In [None]:
# Load configurations
configs = load_yaml_config("configs/exp1_stock_st.yaml")

# Initialize Diffusion_TS Model
diffusion_ts = instantiate_from_config(configs['model']).to(device)
load_model_path = "check_points/exp1_stock_st/DiffusionTS_10000.pth"
diffusion_ts.load_state_dict(torch.load(load_model_path))



In [None]:
dl_info_train = dataloader_info(configs, train=True)
dl_train = dl_info_train["dataloader"]
ds_train = dl_info_train["dataset"]

In [None]:
dl_info_test = dataloader_info(configs, train=False)
dl_test = dl_info_test["dataloader"]
ds_test = dl_info_test["dataset"]

In [None]:
# train a baseline predictor
predictor_base = GRU(input_dim=5, hidden_dim=64, output_dim=1, num_layers=2, dropout=0).to(device)
optimizer_base = Adam(predictor_base.parameters(), lr=1e-3)
lossfn = nn.MSELoss()

train_model(predictor_base, 
            dl_train, 
            lossfn, 
            optimizer_base, 
            num_epochs=2000, 
            description="Baseline",
            device=device)
mae, rmse, pred_y_train, true_y_train = evaluate_model_stationary(predictor_base, dl_train, device, "Train")
base_mae, base_rmse, pred_y, true_y = evaluate_model_stationary(predictor_base, dl_test, device, "Test")


In [None]:
param = [0.1, 0.3, 0.5, 0.7, 1, 2, 5, 10]
scores = []
for sample_ratio in param:
    n_sample = int(len(ds_train.data) * sample_ratio)
    mod = n_sample % 3000
    count = n_sample // 3000
    sample_sizes = ([3000] * count)
    sample_sizes.append(mod)
    syn_score = []
    for e in range(1):
        predictor_base_copy = copy.deepcopy(predictor_base)
        optimizer_base_copy = Adam(predictor_base_copy.parameters(), lr=1e-3)

        # additional training on synthetic data
        for sample_size in sample_sizes:
            synthetic_data = diffusion_ts.generate_mts(batch_size=sample_size)
            synthetic_data = TensorDataset(torch.from_numpy(synthetic_data))
            dl_synthetic = DataLoader(synthetic_data, batch_size=dl_train.batch_size, shuffle=True)
            train_model(predictor_base_copy, 
                        dl_synthetic, 
                        lossfn, 
                        optimizer_base_copy, 
                        num_epochs=2000, 
                        description="Synthetic",
                        device=device)
        mae, rmse, pred_y, true_y = evaluate_model_stationary(predictor_base_copy, dl_test, device, "Synthetic")
        syn_score.append([mae, rmse])
    mae_mean, mae_std, change_mae, rmse_mean, rmse_std, change_rmse = print_score(syn_score, base_mae, base_rmse)
    scores.append([sample_ratio, mae_mean, mae_std, change_mae, rmse_mean, rmse_std, change_rmse]) 


In [None]:
for sc in scores:
    sample_ratio, mae_mean, mae_std, change_mae, rmse_mean, rmse_std, change_rmse = sc
    print("----------------------------------------------------")
    print(f"sample_ratio : {sample_ratio}")
    print(f"MAE : {mae_mean:0.4f}({mae_std:0.4f})({change_mae:0.4f}%) \nMSE : {rmse_mean:0.4f}({rmse_std:0.4f})({change_rmse:0.4f}%)")
    print("----------------------------------------------------")
