## Experiment objective
Is differencing technic effective?

## Experiment setting
compare the result with differencing and without differencing
### Dataset
train : AAPL, MSFT, NVDA, AMZN, COST stock close price (2000-01-01~2013-12-31)  
test : AAPL, MSFT, NVDA, AMZN, COST stock close price (2014-01-01~2023-12-31)

### Scenario1
forecasting one-step ahead AAPL stock close price based on the past 23 steps on itself and 4 other stocks close price


In [1]:
import matplotlib.pyplot as plt

import torch
from torch import nn
from torch.optim import Adam

import numpy as np
from torch.utils.data import DataLoader, TensorDataset
from tqdm import tqdm
from torchmetrics import MeanAbsolutePercentageError

from data.dataloader import dataloader_info
from utils.utils import load_yaml_config, instantiate_from_config

from models.predictor import GRU
from data.dataloader import dataloader_info


  from .autonotebook import tqdm as notebook_tqdm


# With Differencing

In [None]:
def train_model(model, dataloader, criterion, optimizer, num_epochs, description, device):
    model.train()
    with tqdm(range(num_epochs), total=num_epochs) as pbar:
        for _ in pbar:
            for data_diff, *_ in dataloader:
                x_train = data_diff[:,:-1,:].float().to(device)
                y_train = data_diff[:,-1:,0].float().to(device)
                optimizer.zero_grad()
                outputs = model(x_train)
                loss = criterion(outputs, y_train)
                loss.backward()
                optimizer.step()
            pbar.set_description(f"{description} loss: {loss.item():.6f}")

In [None]:
def evaluate_model(model, dataloader, device):
    model.eval()
    l1loss = nn.L1Loss()
    l2loss = nn.MSELoss()
    mapeloss = MeanAbsolutePercentageError().to(device)
    
    total_l1 = 0
    total_l2 = 0
    predictions, ground_truth = [], []
    with torch.no_grad():
        for data_diff, data_norm, data_mean, data_std  in dataloader:
            data_norm = data_norm.to(device)
            data_diff = data_diff.to(device)
            data_mean = data_mean.to(device)
            data_std = data_std.to(device)
            batch_size = len(data_diff)
            x_diff = data_diff[:, :-1, :].float()
            y_true_diff = data_diff[:, -1:, :1].float()
            y_pred_diff = model(x_diff).view(-1,1,1)
            y_pred_norm = data_norm[:,-2:-1,:1] + y_pred_diff
            y_true_norm = data_norm[:,-2:-1,:1] + y_true_diff
            
            y_pred_unnorm = y_pred_norm * data_std[:, :, :1] + data_mean[:, :, :1]
            y_test_unnorm = y_true_norm * data_std[:, :, :1] + data_mean[:, :, :1]
            
            total_l1 += l1loss(y_pred_unnorm, y_test_unnorm) * batch_size
            total_l2 += l2loss(y_pred_unnorm, y_test_unnorm) * batch_size

            predictions.append(y_pred_unnorm.cpu().numpy())
            ground_truth.append(y_test_unnorm.cpu().numpy())

    n_data = len(dataloader.dataset)
    total_l1 /= n_data
    total_l2 /= n_data
    predictions = np.concatenate(predictions).squeeze()
    ground_truth = np.concatenate(ground_truth).squeeze()
    mape_loss = mapeloss(torch.tensor(predictions), torch.tensor(ground_truth)).item()
    
    return total_l1.item(), total_l2.item(), mape_loss, predictions, ground_truth

In [None]:
# Load configurations
configs = load_yaml_config("configs/experiments1_w_diff.yaml")
device = "cuda" if torch.cuda.is_available() else "cpu"

# Initialize Diffusion_TS Model
diffusion_ts = instantiate_from_config(configs['model']).to(device)
diffusion_ts.load_state_dict(torch.load("check_points/experiments1_w_diff/DiffusionTS_5000.pth"))

batch_size = 128

In [None]:
# load dataloader, dataset
dl_info_train = dataloader_info(configs, train=True)
dl_info_test = dataloader_info(configs, train=False)

dl_train = dl_info_train["dataloader"]
ds_train = dl_info_train["dataset"]

dl_test = dl_info_test["dataloader"]
ds_test = dl_info_test["dataset"]


In [None]:
# 1. pre-training for baseline predictor

predictor_base = GRU(input_dim=5, 
                   hidden_dim=50, 
                   output_dim=1, 
                   num_layers=2).to(device)
optimizer_base = Adam(predictor_base.parameters(), lr=0.001)
lossfn = nn.L1Loss()

train_model(predictor_base, 
            dl_train, 
            lossfn, 
            optimizer_base, 
            num_epochs=3000, 
            description="Baseline",
            device=device)

l1, l2, mape, pred_y, true_y = evaluate_model(predictor_base, dl_test, device=device)
print(f"Baseline : L1 loss: {l1:0.5f} \t L2 Loss : {l2:0.5f} \t MAPE loss : {mape:0.5f} ")



In [None]:
# only synthetic
syn_score = []
for e in range(5):
    # train a baseline predictor
    predictor_base = GRU(input_dim=5, hidden_dim=50, output_dim=1, num_layers=2).to(device)
    optimizer_base = Adam(predictor_base.parameters(), lr=0.001)
    lossfn = nn.L1Loss()
    train_model(predictor_base, 
                dl_train, 
                lossfn, 
                optimizer_base, 
                num_epochs=5000, 
                description="Baseline",
                device=device)
    l1, l2, mape, pred_y, true_y = evaluate_model(predictor_base, dl_test, device=device)
    print(f"Baseline : L1 loss: {l1:0.5f} \t L2 Loss : {l2:0.5f} \t MAPE loss : {mape:0.5f} ")

    # additional training on synthetic data
    synthetic_data = diffusion_ts.generate_mts(batch_size=3000)
    synthetic_data = TensorDataset(torch.from_numpy(synthetic_data))
    dl_synthetic = DataLoader(synthetic_data, batch_size=batch_size, shuffle=True)
    train_model(predictor_base, 
                dl_synthetic, 
                lossfn, 
                optimizer_base, 
                num_epochs=5000, 
                description="Synthetic",
                device=device)
    l1, l2, mape, pred_y, true_y = evaluate_model(predictor_base, dl_test, device=device)
    syn_score.append([l1, l2, mape])
    print(f"Synthetic : L1 loss: {l1:0.5f} \t L2 Loss : {l2:0.5f} \t MAPE loss : {mape:0.5f} ")


In [None]:
syn_score

In [None]:
# only origin
origin_score = []
for e in range(5):
    # train a baseline predictor
    predictor_base = GRU(input_dim=5, hidden_dim=50, output_dim=1, num_layers=2).to(device)
    optimizer_base = Adam(predictor_base.parameters(), lr=0.001)
    lossfn = nn.L1Loss()
    train_model(predictor_base, 
                dl_train, 
                lossfn, 
                optimizer_base, 
                num_epochs=5000, 
                description="Baseline",
                device=device)
    l1, l2, mape, pred_y, true_y = evaluate_model(predictor_base, dl_test, device=device)
    print(f"Baseline : L1 loss: {l1:0.5f} \t L2 Loss : {l2:0.5f} \t MAPE loss : {mape:0.5f} ")

    # additional training on original data
    idx = np.random.permutation(len(ds_train))[:3000]
    origin_data = ds_train.data_diff[idx]
    origin_data = TensorDataset(torch.from_numpy(origin_data))
    dl_origin = DataLoader(origin_data, batch_size=batch_size, shuffle=True)
    train_model(predictor_base, 
                dl_origin, 
                lossfn, 
                optimizer_base, 
                num_epochs=5000, 
                description="Origin",
                device=device)
    l1, l2, mape, pred_y, true_y = evaluate_model(predictor_base, dl_test, device=device)
    origin_score.append([l1, l2, mape])
    print(f"Origin : L1 loss: {l1:0.5f} \t L2 Loss : {l2:0.5f} \t MAPE loss : {mape:0.5f} ")


In [None]:
origin_score

In [None]:
# origin+synthetic
ori_syn_score = []
for e in range(5):
    # train a baseline predictor
    predictor_base = GRU(input_dim=5, hidden_dim=50, output_dim=1, num_layers=2).to(device)
    optimizer_base = Adam(predictor_base.parameters(), lr=0.001)
    lossfn = nn.L1Loss()
    train_model(predictor_base, 
                dl_train, 
                lossfn, 
                optimizer_base, 
                num_epochs=5000, 
                description="Baseline",
                device=device)
    l1, l2, mape, pred_y, true_y = evaluate_model(predictor_base, dl_test, device=device)
    print(f"Baseline : L1 loss: {l1:0.5f} \t L2 Loss : {l2:0.5f} \t MAPE loss : {mape:0.5f} ")

    # additional training on ori+syn data
    idx = np.random.permutation(len(ds_train))[:1500]
    origin_data = ds_train.data_diff[idx]
    synthetic_data = diffusion_ts.generate_mts(batch_size=1500)
    ori_syn_data = np.concatenate([origin_data, synthetic_data])
    ori_syn_data = TensorDataset(torch.from_numpy(ori_syn_data))
    dl_ori_syn = DataLoader(ori_syn_data, batch_size=batch_size, shuffle=True)
    train_model(predictor_base, 
                dl_ori_syn, 
                lossfn, 
                optimizer_base, 
                num_epochs=5000, 
                description="Ori+Syn",
                device=device)
    l1, l2, mape, pred_y, true_y = evaluate_model(predictor_base, dl_test, device=device)
    ori_syn_score.append([l1, l2, mape])
    print(f"Ori+Syn : L1 loss: {l1:0.5f} \t L2 Loss : {l2:0.5f} \t MAPE loss : {mape:0.5f} ")


In [None]:
ori_syn_score

# Without Differencing

In [3]:
def train_model(model, dataloader, criterion, optimizer, num_epochs, description, device):
    model.train()
    with tqdm(range(num_epochs), total=num_epochs) as pbar:
        for _ in pbar:
            for data_norm, *_ in dataloader:
                x_train = data_norm[:,:-1,:].float().to(device)
                y_train = data_norm[:,-1:,0].float().to(device)
                optimizer.zero_grad()
                outputs = model(x_train)
                loss = criterion(outputs, y_train)
                loss.backward()
                optimizer.step()
            pbar.set_description(f"{description} loss: {loss.item():.6f}")

In [4]:
def evaluate_model(model, dataloader, device):
    model.eval()
    l1loss = nn.L1Loss()
    l2loss = nn.MSELoss()
    mapeloss = MeanAbsolutePercentageError().to(device)
    
    total_l1 = 0
    total_l2 = 0
    predictions, ground_truth = [], []
    with torch.no_grad():
        for data_norm, data_mean, data_std  in dataloader:
            data_norm = data_norm.to(device)
            data_mean = data_mean.to(device)
            data_std = data_std.to(device)
            batch_size = len(data_norm)
            
            x_test = data_norm[:, :-1, :].float()
            y_true_norm = data_norm[:, -1:, :1].float()
            y_pred_norm = model(x_test).view(-1,1,1)
            
            y_pred_unnorm = y_pred_norm * data_std[:, :, :1] + data_mean[:, :, :1]
            y_true_unnorm = y_true_norm * data_std[:, :, :1] + data_mean[:, :, :1]
            
            total_l1 += l1loss(y_pred_unnorm, y_true_unnorm) * batch_size
            total_l2 += l2loss(y_pred_unnorm, y_true_unnorm) * batch_size

            predictions.append(y_pred_unnorm.cpu().numpy())
            ground_truth.append(y_true_unnorm.cpu().numpy())

    n_data = len(dataloader.dataset)
    total_l1 /= n_data
    total_l2 /= n_data
    predictions = np.concatenate(predictions).squeeze()
    ground_truth = np.concatenate(ground_truth).squeeze()
    mape_loss = mapeloss(torch.tensor(predictions), torch.tensor(ground_truth)).item()
    
    return total_l1.item(), total_l2.item(), mape_loss, predictions, ground_truth

In [5]:
# Load configurations
configs = load_yaml_config("configs/experiments1_wo_diff.yaml")
device = "cuda" if torch.cuda.is_available() else "cpu"

# Initialize Diffusion_TS Model
diffusion_ts = instantiate_from_config(configs['model']).to(device)
diffusion_ts.load_state_dict(torch.load("check_points/experiments1_wo_diff/DiffusionTS_5000.pth"))

batch_size = 128

In [6]:
# load dataloader, dataset
dl_info_train = dataloader_info(configs, train=True)
dl_info_test = dataloader_info(configs, train=False)

dl_train = dl_info_train["dataloader"]
ds_train = dl_info_train["dataset"]

dl_test = dl_info_test["dataloader"]
ds_test = dl_info_test["dataset"]


In [None]:
# 1. pre-training for baseline predictor
predictor_base = GRU(input_dim=5, 
                   hidden_dim=50, 
                   output_dim=1, 
                   num_layers=2).to(device)
optimizer_base = Adam(predictor_base.parameters(), lr=0.001)
lossfn = nn.L1Loss()

train_model(predictor_base, 
            dl_train, 
            lossfn, 
            optimizer_base, 
            num_epochs=3000, 
            description="Baseline",
            device=device)

l1, l2, mape, pred_y, true_y = evaluate_model(predictor_base, dl_test, device=device)
print(f"Baseline : L1 loss: {l1:0.5f} \t L2 Loss : {l2:0.5f} \t MAPE loss : {mape:0.5f} ")



In [7]:
# only synthetic
syn_score = []
for e in range(5):
    # train a baseline predictor
    predictor_base = GRU(input_dim=5, hidden_dim=50, output_dim=1, num_layers=2).to(device)
    optimizer_base = Adam(predictor_base.parameters(), lr=0.001)
    lossfn = nn.L1Loss()
    train_model(predictor_base, 
                dl_train, 
                lossfn, 
                optimizer_base, 
                num_epochs=5000, 
                description="Baseline",
                device=device)
    l1, l2, mape, pred_y, true_y = evaluate_model(predictor_base, dl_test, device=device)
    print(f"Baseline : L1 loss: {l1:0.5f} \t L2 Loss : {l2:0.5f} \t MAPE loss : {mape:0.5f} ")

    # additional training on synthetic data
    synthetic_data = diffusion_ts.generate_mts(batch_size=3000)
    synthetic_data = TensorDataset(torch.from_numpy(synthetic_data))
    dl_synthetic = DataLoader(synthetic_data, batch_size=batch_size, shuffle=True)
    train_model(predictor_base, 
                dl_synthetic, 
                lossfn, 
                optimizer_base, 
                num_epochs=5000, 
                description="Synthetic",
                device=device)
    l1, l2, mape, pred_y, true_y = evaluate_model(predictor_base, dl_test, device=device)
    syn_score.append([l1, l2, mape])
    print(f"Synthetic : L1 loss: {l1:0.5f} \t L2 Loss : {l2:0.5f} \t MAPE loss : {mape:0.5f} ")


Baseline loss: 0.017479: 100%|██████████| 5000/5000 [02:28<00:00, 33.65it/s]


Baseline : L1 loss: 1.33639 	 L2 Loss : 4.95665 	 MAPE loss : 0.01685 


  return F.conv1d(input, weight, bias, self.stride,
reverse step from x_T to x_0: 100%|██████████| 100/100 [01:05<00:00,  1.53it/s]
Synthetic loss: 0.019662: 100%|██████████| 5000/5000 [01:49<00:00, 45.73it/s]


Synthetic : L1 loss: 1.36002 	 L2 Loss : 5.15284 	 MAPE loss : 0.01709 


Baseline loss: 0.180678:   3%|▎         | 152/5000 [00:04<02:23, 33.87it/s]


KeyboardInterrupt: 

In [None]:
syn_score

In [8]:
# only origin
origin_score = []
for e in range(5):
    # train a baseline predictor
    predictor_base = GRU(input_dim=5, hidden_dim=50, output_dim=1, num_layers=2).to(device)
    optimizer_base = Adam(predictor_base.parameters(), lr=0.001)
    lossfn = nn.L1Loss()
    train_model(predictor_base, 
                dl_train, 
                lossfn, 
                optimizer_base, 
                num_epochs=5000, 
                description="Baseline",
                device=device)
    l1, l2, mape, pred_y, true_y = evaluate_model(predictor_base, dl_test, device=device)
    print(f"Baseline : L1 loss: {l1:0.5f} \t L2 Loss : {l2:0.5f} \t MAPE loss : {mape:0.5f} ")

    # additional training on original data
    idx = np.random.permutation(len(ds_train))[:3000]
    origin_data = ds_train.data_diff[idx]
    origin_data = TensorDataset(torch.from_numpy(origin_data))
    dl_origin = DataLoader(origin_data, batch_size=batch_size, shuffle=True)
    train_model(predictor_base, 
                dl_origin, 
                lossfn, 
                optimizer_base, 
                num_epochs=5000, 
                description="Origin",
                device=device)
    l1, l2, mape, pred_y, true_y = evaluate_model(predictor_base, dl_test, device=device)
    origin_score.append([l1, l2, mape])
    print(f"Origin : L1 loss: {l1:0.5f} \t L2 Loss : {l2:0.5f} \t MAPE loss : {mape:0.5f} ")


Baseline loss: 0.021086:  90%|█████████ | 4516/5000 [02:12<00:14, 34.05it/s]

In [None]:
origin_score

In [None]:
# origin+synthetic
ori_syn_score = []
for e in range(5):
    # train a baseline predictor
    predictor_base = GRU(input_dim=5, hidden_dim=50, output_dim=1, num_layers=2).to(device)
    optimizer_base = Adam(predictor_base.parameters(), lr=0.001)
    lossfn = nn.L1Loss()
    train_model(predictor_base, 
                dl_train, 
                lossfn, 
                optimizer_base, 
                num_epochs=5000, 
                description="Baseline",
                device=device)
    l1, l2, mape, pred_y, true_y = evaluate_model(predictor_base, dl_test, device=device)
    print(f"Baseline : L1 loss: {l1:0.5f} \t L2 Loss : {l2:0.5f} \t MAPE loss : {mape:0.5f} ")

    # additional training on ori+syn data
    idx = np.random.permutation(len(ds_train))[:1500]
    origin_data = ds_train.data_diff[idx]
    synthetic_data = diffusion_ts.generate_mts(batch_size=1500)
    ori_syn_data = np.concatenate([origin_data, synthetic_data])
    ori_syn_data = TensorDataset(torch.from_numpy(ori_syn_data))
    dl_ori_syn = DataLoader(ori_syn_data, batch_size=batch_size, shuffle=True)
    train_model(predictor_base, 
                dl_ori_syn, 
                lossfn, 
                optimizer_base, 
                num_epochs=5000, 
                description="Ori+Syn",
                device=device)
    l1, l2, mape, pred_y, true_y = evaluate_model(predictor_base, dl_test, device=device)
    ori_syn_score.append([l1, l2, mape])
    print(f"Ori+Syn : L1 loss: {l1:0.5f} \t L2 Loss : {l2:0.5f} \t MAPE loss : {mape:0.5f} ")


In [None]:
ori_syn_score