In [None]:
import copy
import torch
import numpy as np
import warnings

from torch import nn
from torch.optim import Adam
from torch.utils.data import DataLoader, TensorDataset
from data.dataloader import dataloader_info
from utils.utils import load_yaml_config, instantiate_from_config
from models.predictor import GRU
from data.dataloader import dataloader_info

from utils.experiments import train_model, evaluate_model_stationary, print_score

warnings.filterwarnings("ignore")
device = "cuda" if torch.cuda.is_available() else "cpu"


In [None]:
# Load configurations
configs = load_yaml_config("configs/exp1_stock_st.yaml")

# Initialize Diffusion_TS Model
diffusion_ts = instantiate_from_config(configs['model']).to(device)
load_model_path = "check_points/exp1_stock_st/DiffusionTS_10000.pth"
diffusion_ts.load_state_dict(torch.load(load_model_path))

n_sample=3000

In [None]:
dl_info_train = dataloader_info(configs, train=True)
dl_train = dl_info_train["dataloader"]
ds_train = dl_info_train["dataset"]

In [None]:
dl_info_test = dataloader_info(configs, train=False)
dl_test = dl_info_test["dataloader"]
ds_test = dl_info_test["dataset"]

In [None]:
# train a baseline predictor
predictor_base = GRU(input_dim=5, hidden_dim=64, output_dim=1, num_layers=2, dropout=0).to(device)
optimizer_base = Adam(predictor_base.parameters(), lr=1e-3)
lossfn = nn.MSELoss()

train_model(predictor_base, 
            dl_train, 
            lossfn, 
            optimizer_base, 
            num_epochs=2000, 
            description="Baseline",
            device=device)
mae, rmse, pred_y_train, true_y_train = evaluate_model_stationary(predictor_base, dl_train, device, "Train")
base_mae, base_rmse, pred_y, true_y = evaluate_model_stationary(predictor_base, dl_test, device, "Test")


In [None]:
# only synthetic
syn_score = []
for e in range(5):
    predictor_base_copy = copy.deepcopy(predictor_base)
    optimizer_base_copy = Adam(predictor_base_copy.parameters(), lr=1e-3)

    # additional training on synthetic data
    synthetic_data = diffusion_ts.generate_mts(batch_size=n_sample)
    synthetic_data = TensorDataset(torch.from_numpy(synthetic_data))
    dl_synthetic = DataLoader(synthetic_data, batch_size=dl_train.batch_size, shuffle=True)
    train_model(predictor_base_copy, 
                dl_synthetic, 
                lossfn, 
                optimizer_base_copy, 
                num_epochs=2000, 
                description="Synthetic",
                device=device)
    
    mae, rmse, pred_y, true_y = evaluate_model_stationary(predictor_base_copy, dl_test, device, "Synthetic")
    syn_score.append([mae, rmse])


In [None]:
*_, = print_score(syn_score, base_mae, base_rmse)

In [None]:
# only origin
origin_score = []
for e in range(5):
    predictor_base_copy = copy.deepcopy(predictor_base)
    optimizer_base_copy = Adam(predictor_base_copy.parameters(), lr=1e-3)

    # additional training on original data
    train_model(predictor_base_copy, 
                dl_train, 
                lossfn, 
                optimizer_base_copy, 
                num_epochs=2000, 
                description="Origin",
                device=device)
    mae, rmse, pred_y, true_y = evaluate_model_stationary(predictor_base_copy, dl_test, device, "Origin")
    origin_score.append([mae, rmse])

In [None]:
*_, = print_score(origin_score, base_mae, base_rmse)

In [None]:
# origin+synthetic
ori_syn_score = []
for e in range(5):
    predictor_base_copy = copy.deepcopy(predictor_base)
    optimizer_base_copy = Adam(predictor_base_copy.parameters(), lr=1e-3)

    # additional training on ori+syn data
    idx = np.random.permutation(len(ds_train))[:int(n_sample/2)]
    origin_data = ds_train.data_st_norm[idx]
    synthetic_data = diffusion_ts.generate_mts(batch_size=int(n_sample/2))
    ori_syn_data = np.concatenate([origin_data, synthetic_data])
    ori_syn_data = TensorDataset(torch.from_numpy(ori_syn_data))
    dl_ori_syn = DataLoader(ori_syn_data, batch_size=dl_train.batch_size, shuffle=True)
    train_model(predictor_base_copy, 
                dl_ori_syn, 
                lossfn, 
                optimizer_base_copy, 
                num_epochs=2000, 
                description="Ori+Syn",
                device=device)
    mae, rmse, pred_y, true_y = evaluate_model_stationary(predictor_base_copy, dl_test, device, "Ori+Syn")
    ori_syn_score.append([mae, rmse])


In [None]:
*_, = print_score(ori_syn_score, base_mae, base_rmse)