## Experiment objective
Is differencing technic effective?

## Experiment setting
compare the result with differencing and without differencing
### Dataset
train : AAPL, MSFT, NVDA, AMZN, COST stock close price (2000-01-01~2013-12-31)  
test : AAPL, MSFT, NVDA, AMZN, COST stock close price (2014-01-01~2023-12-31)

### Scenario1
forecasting one-step ahead AAPL stock close price based on the past 23 steps on itself and 4 other stocks close price


In [42]:
import copy
import torch
import numpy as np
import pandas as pd
from torch import nn
from tqdm import tqdm
from torch.optim import Adam
from torch.utils.data import DataLoader, TensorDataset
from torchmetrics import MeanAbsolutePercentageError
from data.dataloader import dataloader_info
from utils.utils import load_yaml_config, instantiate_from_config
from models.predictor import GRU
from data.dataloader import dataloader_info



# With Differencing

In [3]:
def train_model(model, dataloader, criterion, optimizer, num_epochs, description, device):
    model.train()
    with tqdm(range(num_epochs), total=num_epochs) as pbar:
        for _ in pbar:
            for data_diff, *_ in dataloader:
                x_train = data_diff[:,:-1,:].float().to(device)
                y_train = data_diff[:,-1:,0].float().to(device)
                optimizer.zero_grad()
                outputs = model(x_train)
                loss = criterion(outputs, y_train)
                loss.backward()
                optimizer.step()
            pbar.set_description(f"{description} loss: {loss.item():.6f}")

In [4]:
def evaluate_model(model, dataloader, device):
    model.eval()
    l1loss = nn.L1Loss()
    l2loss = nn.MSELoss()
    mapeloss = MeanAbsolutePercentageError().to(device)
    
    total_l1 = 0
    total_l2 = 0
    predictions, ground_truth = [], []
    with torch.no_grad():
        for data_diff, data_norm, data_mean, data_std  in dataloader:
            data_norm = data_norm.to(device)
            data_diff = data_diff.to(device)
            data_mean = data_mean.to(device)
            data_std = data_std.to(device)
            batch_size = len(data_diff)
            x_diff = data_diff[:, :-1, :].float()
            y_true_diff = data_diff[:, -1:, :1].float()
            y_pred_diff = model(x_diff).view(-1,1,1)
            y_pred_norm = data_norm[:,-2:-1,:1] + y_pred_diff
            y_true_norm = data_norm[:,-2:-1,:1] + y_true_diff
            
            y_pred_unnorm = y_pred_norm * data_std[:, :, :1] + data_mean[:, :, :1]
            y_test_unnorm = y_true_norm * data_std[:, :, :1] + data_mean[:, :, :1]
            
            total_l1 += l1loss(y_pred_unnorm, y_test_unnorm) * batch_size
            total_l2 += l2loss(y_pred_unnorm, y_test_unnorm) * batch_size

            predictions.append(y_pred_unnorm.cpu().numpy())
            ground_truth.append(y_test_unnorm.cpu().numpy())

    n_data = len(dataloader.dataset)
    total_l1 /= n_data
    total_l2 /= n_data
    predictions = np.concatenate(predictions).squeeze()
    ground_truth = np.concatenate(ground_truth).squeeze()
    mape_loss = mapeloss(torch.tensor(predictions), torch.tensor(ground_truth)).item()
    
    return total_l1.item(), total_l2.item(), mape_loss, predictions, ground_truth

In [45]:
def print_score(score):
    l1_mean, l2_mean, mape_mean = np.array(score).mean(0)
    l1_std, l2_std, mape_std = np.array(score).std(0)
    
    score_df = pd.DataFrame(score, columns=["MAE", "MSE", "MAPE"])
    print(score_df)
    print("----------------------------------------------------")
    print(f"MAE : {l1_mean:0.4f}({l1_std:0.4f}) \nMSE : {l2_mean:0.4f}({l2_std:0.4f}) \nMAPE : {mape_mean:0.4f}({mape_std:0.4f})")
    print("----------------------------------------------------")
    
    

In [5]:
# Load configurations
configs = load_yaml_config("configs/experiments1_w_diff.yaml")
device = "cuda" if torch.cuda.is_available() else "cpu"

# Initialize Diffusion_TS Model
diffusion_ts = instantiate_from_config(configs['model']).to(device)
diffusion_ts.load_state_dict(torch.load("check_points/experiments1_w_diff/DiffusionTS_5000.pth"))

batch_size = 128

In [6]:
# load dataloader, dataset
dl_info_train = dataloader_info(configs, train=True)
dl_info_test = dataloader_info(configs, train=False)

dl_train = dl_info_train["dataloader"]
ds_train = dl_info_train["dataset"]

dl_test = dl_info_test["dataloader"]
ds_test = dl_info_test["dataset"]


In [34]:
# train a baseline predictor
predictor_base = GRU(input_dim=5, hidden_dim=50, output_dim=1, num_layers=2).to(device)
optimizer_base = Adam(predictor_base.parameters(), lr=0.001)
lossfn = nn.L1Loss()

train_model(predictor_base, 
            dl_train, 
            lossfn, 
            optimizer_base, 
            num_epochs=5000, 
            description="Baseline",
            device=device)

l1, l2, mape, pred_y, true_y = evaluate_model(predictor_base, dl_test, device=device)
print(f"Baseline : L1 loss: {l1:0.5f} \t L2 Loss : {l2:0.5f} \t MAPE loss : {mape:0.5f} ")

  0%|          | 0/5000 [00:00<?, ?it/s]

Baseline loss: 0.013744: 100%|██████████| 5000/5000 [03:05<00:00, 27.01it/s]

Baseline : L1 loss: 1.33756 	 L2 Loss : 4.90620 	 MAPE loss : 0.01667 





In [35]:
# only synthetic
syn_score = []
for e in range(5):
    predictor_base_copy = copy.deepcopy(predictor_base)
    optimizer_base_copy = Adam(predictor_base_copy.parameters(), lr=0.001)

    # additional training on synthetic data
    synthetic_data = diffusion_ts.generate_mts(batch_size=3000)
    synthetic_data = TensorDataset(torch.from_numpy(synthetic_data))
    dl_synthetic = DataLoader(synthetic_data, batch_size=batch_size, shuffle=True)
    train_model(predictor_base_copy, 
                dl_synthetic, 
                lossfn, 
                optimizer_base_copy, 
                num_epochs=5000, 
                description="Synthetic",
                device=device)
    l1, l2, mape, pred_y_syn, _ = evaluate_model(predictor_base_copy, dl_test, device=device)
    syn_score.append([l1, l2, mape])
    print(f"Synthetic : L1 loss: {l1:0.5f} \t L2 Loss : {l2:0.5f} \t MAPE loss : {mape:0.5f} ")


reverse step from x_T to x_0: 100%|██████████| 100/100 [01:01<00:00,  1.62it/s]
  result = _VF.gru(input, hx, self._flat_weights, self.bias, self.num_layers,
Synthetic loss: 0.009064: 100%|██████████| 5000/5000 [01:55<00:00, 43.21it/s]


Synthetic : L1 loss: 1.18296 	 L2 Loss : 3.90295 	 MAPE loss : 0.01479 


reverse step from x_T to x_0: 100%|██████████| 100/100 [01:03<00:00,  1.57it/s]
Synthetic loss: 0.010328: 100%|██████████| 5000/5000 [01:56<00:00, 42.88it/s]


Synthetic : L1 loss: 1.12814 	 L2 Loss : 3.71478 	 MAPE loss : 0.01408 


reverse step from x_T to x_0:   3%|▎         | 3/100 [00:02<01:05,  1.48it/s]


KeyboardInterrupt: 

In [46]:
print_score(syn_score)

        MAE       MSE      MAPE
0  1.182964  3.902951  0.014791
1  1.128135  3.714777  0.014083
----------------------------------------------------
MAE : 1.1555(0.0274) 
MSE : 3.8089(0.0941) 
MAPE : 0.0144(0.0004)
----------------------------------------------------


In [39]:
# only origin
origin_score = []
for e in range(5):
    predictor_base_copy = copy.deepcopy(predictor_base)
    optimizer_base_copy = Adam(predictor_base_copy.parameters(), lr=0.001)

    # additional training on original data
    idx = np.random.permutation(len(ds_train))[:3000]
    origin_data = ds_train.data_diff[idx]
    origin_data = TensorDataset(torch.from_numpy(origin_data))
    dl_origin = DataLoader(origin_data, batch_size=batch_size, shuffle=True)
    train_model(predictor_base_copy, 
                dl_origin, 
                lossfn, 
                optimizer_base_copy, 
                num_epochs=5000, 
                description="Origin",
                device=device)
    l1, l2, mape, pred_y_ori, _ = evaluate_model(predictor_base_copy, dl_test, device=device)
    origin_score.append([l1, l2, mape])
    print(f"Origin : L1 loss: {l1:0.5f} \t L2 Loss : {l2:0.5f} \t MAPE loss : {mape:0.5f} ")


  result = _VF.gru(input, hx, self._flat_weights, self.bias, self.num_layers,
Origin loss: 0.013514: 100%|██████████| 5000/5000 [01:57<00:00, 42.65it/s]


Origin : L1 loss: 1.28813 	 L2 Loss : 4.53590 	 MAPE loss : 0.01604 


Origin loss: 0.011560: 100%|██████████| 5000/5000 [01:56<00:00, 42.81it/s]


Origin : L1 loss: 1.28446 	 L2 Loss : 4.47585 	 MAPE loss : 0.01611 


Origin loss: 0.015524:   2%|▏         | 102/5000 [00:02<01:56, 42.14it/s]


KeyboardInterrupt: 

In [40]:
print_score(origin_score)

MAE : 1.2863(0.0018) 
 MSE : 4.5059(0.0300) 
 MAPE : 0.0161(0.0000)


In [47]:
# origin+synthetic
ori_syn_score = []
for e in range(5):
    predictor_base_copy = copy.deepcopy(predictor_base)
    optimizer_base_copy = Adam(predictor_base_copy.parameters(), lr=0.001)

    # additional training on ori+syn data
    idx = np.random.permutation(len(ds_train))[:1500]
    origin_data = ds_train.data_diff[idx]
    synthetic_data = diffusion_ts.generate_mts(batch_size=1500)
    ori_syn_data = np.concatenate([origin_data, synthetic_data])
    ori_syn_data = TensorDataset(torch.from_numpy(ori_syn_data))
    dl_ori_syn = DataLoader(ori_syn_data, batch_size=batch_size, shuffle=True)
    train_model(predictor_base_copy, 
                dl_ori_syn, 
                lossfn, 
                optimizer_base_copy, 
                num_epochs=5000, 
                description="Ori+Syn",
                device=device)
    l1, l2, mape, pred_y_ori_syn, _ = evaluate_model(predictor_base_copy, dl_test, device=device)
    ori_syn_score.append([l1, l2, mape])
    print(f"Ori+Syn : L1 loss: {l1:0.5f} \t L2 Loss : {l2:0.5f} \t MAPE loss : {mape:0.5f} ")


  return F.conv1d(input, weight, bias, self.stride,
reverse step from x_T to x_0: 100%|██████████| 100/100 [00:30<00:00,  3.24it/s]
  result = _VF.gru(input, hx, self._flat_weights, self.bias, self.num_layers,
Ori+Syn loss: 0.010988: 100%|██████████| 5000/5000 [01:56<00:00, 42.77it/s]


Ori+Syn : L1 loss: 1.21880 	 L2 Loss : 4.07234 	 MAPE loss : 0.01513 


reverse step from x_T to x_0: 100%|██████████| 100/100 [00:31<00:00,  3.22it/s]
Ori+Syn loss: 0.019224:  10%|▉         | 482/5000 [00:11<01:45, 42.84it/s]


KeyboardInterrupt: 

In [48]:
print_score(ori_syn_score)

        MAE       MSE      MAPE
0  1.218801  4.072343  0.015134
----------------------------------------------------
MAE : 1.2188(0.0000) 
MSE : 4.0723(0.0000) 
MAPE : 0.0151(0.0000)
----------------------------------------------------


# Without Differencing

In [49]:
def train_model(model, dataloader, criterion, optimizer, num_epochs, description, device):
    model.train()
    with tqdm(range(num_epochs), total=num_epochs) as pbar:
        for _ in pbar:
            for data_norm, *_ in dataloader:
                x_train = data_norm[:,:-1,:].float().to(device)
                y_train = data_norm[:,-1:,0].float().to(device)
                optimizer.zero_grad()
                outputs = model(x_train)
                loss = criterion(outputs, y_train)
                loss.backward()
                optimizer.step()
            pbar.set_description(f"{description} loss: {loss.item():.6f}")

In [50]:
def evaluate_model(model, dataloader, device):
    model.eval()
    l1loss = nn.L1Loss()
    l2loss = nn.MSELoss()
    mapeloss = MeanAbsolutePercentageError().to(device)
    
    total_l1 = 0
    total_l2 = 0
    predictions, ground_truth = [], []
    with torch.no_grad():
        for data_norm, data_mean, data_std  in dataloader:
            data_norm = data_norm.to(device)
            data_mean = data_mean.to(device)
            data_std = data_std.to(device)
            batch_size = len(data_norm)
            
            x_test = data_norm[:, :-1, :].float()
            y_true_norm = data_norm[:, -1:, :1].float()
            y_pred_norm = model(x_test).view(-1,1,1)
            
            y_pred_unnorm = y_pred_norm * data_std[:, :, :1] + data_mean[:, :, :1]
            y_true_unnorm = y_true_norm * data_std[:, :, :1] + data_mean[:, :, :1]
            
            total_l1 += l1loss(y_pred_unnorm, y_true_unnorm) * batch_size
            total_l2 += l2loss(y_pred_unnorm, y_true_unnorm) * batch_size

            predictions.append(y_pred_unnorm.cpu().numpy())
            ground_truth.append(y_true_unnorm.cpu().numpy())

    n_data = len(dataloader.dataset)
    total_l1 /= n_data
    total_l2 /= n_data
    predictions = np.concatenate(predictions).squeeze()
    ground_truth = np.concatenate(ground_truth).squeeze()
    mape_loss = mapeloss(torch.tensor(predictions), torch.tensor(ground_truth)).item()
    
    return total_l1.item(), total_l2.item(), mape_loss, predictions, ground_truth

In [51]:
# Load configurations
configs = load_yaml_config("configs/experiments1_wo_diff.yaml")
device = "cuda" if torch.cuda.is_available() else "cpu"

# Initialize Diffusion_TS Model
diffusion_ts = instantiate_from_config(configs['model']).to(device)
diffusion_ts.load_state_dict(torch.load("check_points/experiments1_wo_diff/DiffusionTS_5000.pth"))

batch_size = 128

In [52]:
# load dataloader, dataset
dl_info_train = dataloader_info(configs, train=True)
dl_info_test = dataloader_info(configs, train=False)

dl_train = dl_info_train["dataloader"]
ds_train = dl_info_train["dataset"]

dl_test = dl_info_test["dataloader"]
ds_test = dl_info_test["dataset"]

In [53]:
# train a baseline predictor
predictor_base = GRU(input_dim=5, hidden_dim=50, output_dim=1, num_layers=2).to(device)
optimizer_base = Adam(predictor_base.parameters(), lr=0.001)
lossfn = nn.L1Loss()
train_model(predictor_base, 
            dl_train, 
            lossfn, 
            optimizer_base, 
            num_epochs=5000, 
            description="Baseline",
            device=device)
l1, l2, mape, pred_y, true_y = evaluate_model(predictor_base, dl_test, device=device)
print(f"Baseline : L1 loss: {l1:0.5f} \t L2 Loss : {l2:0.5f} \t MAPE loss : {mape:0.5f} ")


Baseline loss: 0.016426: 100%|██████████| 5000/5000 [02:49<00:00, 29.48it/s]

Baseline : L1 loss: 1.34253 	 L2 Loss : 4.91878 	 MAPE loss : 0.01708 





In [54]:
# only synthetic
syn_score = []
for e in range(5):
    predictor_base_copy = copy.deepcopy(predictor_base)
    optimizer_base_copy = Adam(predictor_base_copy.parameters(), lr=0.001)

    # additional training on synthetic data
    synthetic_data = diffusion_ts.generate_mts(batch_size=3000)
    synthetic_data = TensorDataset(torch.from_numpy(synthetic_data))
    dl_synthetic = DataLoader(synthetic_data, batch_size=batch_size, shuffle=True)
    train_model(predictor_base_copy, 
                dl_synthetic, 
                lossfn, 
                optimizer_base_copy, 
                num_epochs=5000, 
                description="Synthetic",
                device=device)
    l1, l2, mape, pred_y_syn, _ = evaluate_model(predictor_base_copy, dl_test, device=device)
    syn_score.append([l1, l2, mape])
    print(f"Synthetic : L1 loss: {l1:0.5f} \t L2 Loss : {l2:0.5f} \t MAPE loss : {mape:0.5f} ")


  return F.conv1d(input, weight, bias, self.stride,
reverse step from x_T to x_0: 100%|██████████| 100/100 [01:04<00:00,  1.56it/s]
  result = _VF.gru(input, hx, self._flat_weights, self.bias, self.num_layers,
Synthetic loss: 0.015080: 100%|██████████| 5000/5000 [01:57<00:00, 42.46it/s]


Synthetic : L1 loss: 1.36891 	 L2 Loss : 5.25759 	 MAPE loss : 0.01717 


reverse step from x_T to x_0:   6%|▌         | 6/100 [00:04<01:04,  1.45it/s]


KeyboardInterrupt: 

In [55]:
print_score(syn_score)

        MAE       MSE      MAPE
0  1.368905  5.257589  0.017174
----------------------------------------------------
MAE : 1.3689(0.0000) 
MSE : 5.2576(0.0000) 
MAPE : 0.0172(0.0000)
----------------------------------------------------


In [56]:
# only origin
origin_score = []
for e in range(5):
    predictor_base_copy = copy.deepcopy(predictor_base)
    optimizer_base_copy = Adam(predictor_base_copy.parameters(), lr=0.001)

    # additional training on original data
    idx = np.random.permutation(len(ds_train))[:3000]
    origin_data = ds_train.data[idx]
    origin_data = TensorDataset(torch.from_numpy(origin_data))
    dl_origin = DataLoader(origin_data, batch_size=batch_size, shuffle=True)
    train_model(predictor_base_copy, 
                dl_origin, 
                lossfn, 
                optimizer_base_copy, 
                num_epochs=5000, 
                description="Origin",
                device=device)
    l1, l2, mape, pred_y_ori, _ = evaluate_model(predictor_base_copy, dl_test, device=device)
    origin_score.append([l1, l2, mape])
    print(f"Origin : L1 loss: {l1:0.5f} \t L2 Loss : {l2:0.5f} \t MAPE loss : {mape:0.5f} ")


  result = _VF.gru(input, hx, self._flat_weights, self.bias, self.num_layers,
Origin loss: 0.014746: 100%|██████████| 5000/5000 [01:57<00:00, 42.67it/s]


Origin : L1 loss: 1.30351 	 L2 Loss : 4.61478 	 MAPE loss : 0.01660 


Origin loss: 0.016947:   4%|▍         | 205/5000 [00:04<01:52, 42.76it/s]


KeyboardInterrupt: 

In [57]:
print_score(origin_score)

        MAE       MSE      MAPE
0  1.303512  4.614779  0.016602
----------------------------------------------------
MAE : 1.3035(0.0000) 
MSE : 4.6148(0.0000) 
MAPE : 0.0166(0.0000)
----------------------------------------------------


In [58]:
# origin+synthetic
ori_syn_score = []
for e in range(5):
    predictor_base_copy = copy.deepcopy(predictor_base)
    optimizer_base_copy = Adam(predictor_base_copy.parameters(), lr=0.001)

    # additional training on ori+syn data
    idx = np.random.permutation(len(ds_train))[:1500]
    origin_data = ds_train.data[idx]
    synthetic_data = diffusion_ts.generate_mts(batch_size=1500)
    ori_syn_data = np.concatenate([origin_data, synthetic_data])
    ori_syn_data = TensorDataset(torch.from_numpy(ori_syn_data))
    dl_ori_syn = DataLoader(ori_syn_data, batch_size=batch_size, shuffle=True)
    train_model(predictor_base_copy, 
                dl_ori_syn, 
                lossfn, 
                optimizer_base_copy, 
                num_epochs=5000, 
                description="Ori+Syn",
                device=device)
    l1, l2, mape, pred_y_ori_syn, _ = evaluate_model(predictor_base_copy, dl_test, device=device)
    ori_syn_score.append([l1, l2, mape])
    print(f"Ori+Syn : L1 loss: {l1:0.5f} \t L2 Loss : {l2:0.5f} \t MAPE loss : {mape:0.5f} ")


  return F.conv1d(input, weight, bias, self.stride,
reverse step from x_T to x_0: 100%|██████████| 100/100 [00:31<00:00,  3.13it/s]
  result = _VF.gru(input, hx, self._flat_weights, self.bias, self.num_layers,
Ori+Syn loss: 0.015565: 100%|██████████| 5000/5000 [01:55<00:00, 43.15it/s]


Ori+Syn : L1 loss: 1.31100 	 L2 Loss : 4.76241 	 MAPE loss : 0.01665 


reverse step from x_T to x_0:   4%|▍         | 4/100 [00:01<00:34,  2.74it/s]


KeyboardInterrupt: 

In [59]:
print_score(ori_syn_score)

        MAE       MSE      MAPE
0  1.311001  4.762411  0.016654
----------------------------------------------------
MAE : 1.3110(0.0000) 
MSE : 4.7624(0.0000) 
MAPE : 0.0167(0.0000)
----------------------------------------------------
