In [1]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
from time import time
from types import SimpleNamespace
from statsmodels.tsa.stattools import adfuller, kpss

import torch
import torch.nn as nn
from torch import optim
from torch.utils.data import Dataset, DataLoader, ConcatDataset, Subset

from utils.metrics import metric
from data_provider.data_factory import data_provider
from utils.tools import EarlyStopping, adjust_learning_rate, visual
from data_provider.data_loader import Dataset_Custom, Dataset_ETT_hour, Dataset_ETT_minute
from models import DLinear, iTransformer, Autoformer, FEDformer, Linear, LSTM
from models.trend import exp_1, exp_2, exp_3, exp_4
from normalizers import SAN, DDN, TP, RevIN, NoNorm, TP2, LightTrend, SAN_exp, DDN_exp
from layers.decomposition import series_decomp, fft_topk_decomp

configs = SimpleNamespace(
    task_name = "long_term_forecast",
    model_name = "iTransformer", 
    
    # Task
    seq_len=720,
    label_len=168,
    pred_len=96,
    enc_in=321,
    dec_in=321,
    c_out=321,
    features='M',
    freq='h',
    
    # Data
    root_path='datasets',
    data='custom',
    data_path='electricity.csv',
    target='OT',
    batch_size=16,
    
    # Basic Model Settings
    checkpoints='./manual_exp/checkpoints/',
    dropout=0.1,
    embed='timeF',
    output_attention=False,
    activation='gelu',
    moving_avg=25,
    num_kernels=6,
    individual=False,
    learning_rate=0.0005,
    n_heads=8,
    patience=3,
    train_epochs=10,
    use_amp=False,
    lradj='type1',
    # d_model=512,
    # d_ff=2048,
    
    # iTransformer
    d_model=128,
    d_ff=128,
    factor=1,
    e_layers=2,
    class_strategy='projection',
    
    # Autoformer
    # factor=3,
    # e_layers=2,
    # d_layers=1,
    
    # FEDformer
    # factor=3,
    # e_layers=2,
    # d_layers=1,
    
    # Normalizer
    use_norm='lt',
    norm_variant='original',
    station_type='adaptive',
    station_lr=0.001,
    alpha=0.3,
    s_norm=True,
    t_norm=True,
    use_mlp=True,
    affine=True,
    period_len=24,
    pre_epoch=5,
    twice_epoch=2,
    j=1,
    learnable=False,
    wavelet='coif3',
    dr=0.05,
    kernel_len=7,
    hkernel_len=5,
    pd_ff=128,
    pd_model=128,
    pe_layers=0,
    kernel_size=25,
    reinit_mean_pretrain=False,
    reinit_std_pretrain=False,
)
model_dict={
    'DLinear': DLinear,
    'iTransformer': iTransformer,
    'Autoformer': Autoformer,
    'FEDformer': FEDformer,
    'Linear': Linear,
    'LSTM': LSTM,
    'exp_1': exp_1,
    'exp_2': exp_2,
    'exp_3': exp_3,
    'exp_4': exp_4,
}
linear_models = ['DLinear', 'Linear', 'LSTM', 'exp_1', 'exp_2', 'exp_3', 'exp_4']
norm_dict = {
    'none': NoNorm,
    'revin': RevIN,
    'san': SAN,
    'san_exp': SAN_exp,
    'ddn': DDN,
    'ddn_exp': DDN_exp,
    'tp': TP,
    'tp2': TP2,
    'lt': LightTrend,
}
decomp = series_decomp(25)
# decomp = fft_topk_decomp(5)

# SAN
def san_loss(y, statistics_pred):
    bs, len, dim = y.shape
    y = y.reshape(bs, -1, configs.period_len, dim)
    mean = torch.mean(y, dim=2)
    std = torch.std(y, dim=2)
    station_ture = torch.cat([mean, std], dim=-1)
    loss = criterion(statistics_pred, station_ture)
    return loss

# DDN
def ddn_loss(y, statistics_pred):
    _, (mean, std) = norm.norm(y.transpose(-1, -2), False)
    station_ture = torch.cat([mean, std], dim=1).transpose(-1, -2)
    loss = criterion(statistics_pred, station_ture)
    return loss

# TREAD
def tread_loss(y, statistics_pred):
    trend_pred = statistics_pred[-1]
    _, trend_true = decomp(y)
    loss = criterion(trend_pred, trend_true)
    return loss

# LightTrend
def lt_loss(y, statistics_pred):
    trend_pred = statistics_pred
    _, trend_true = decomp(y)
    loss = criterion(trend_pred, trend_true)
    return loss

station_loss_dict = {
    'none': None,
    'revin': None,
    'san': san_loss,
    'ddn': ddn_loss,
    'san_exp': san_loss,
    'ddn_exp': ddn_loss,
    'tp': tread_loss,
    'tp2': tread_loss,
    'lt': lt_loss,
}
station_loss = station_loss_dict[configs.use_norm]

# [pre train, pre epoch, joint train, join epoch]
station_setting_dict = {
    'none': [0, 0, 0, 0],
    'revin': [0, 0, 0, 0],
    'san': [1, configs.pre_epoch, 0, 0],
    'ddn': [1, configs.pre_epoch, 1, configs.twice_epoch],
    'tp': [1, configs.pre_epoch, 1, configs.twice_epoch],
    'tp2': [1, configs.pre_epoch, 1, configs.twice_epoch],
    'lt': [1, configs.pre_epoch, 1, configs.twice_epoch],
    'san_exp': [1, configs.pre_epoch, 0, 0],
    'ddn_exp': [1, configs.pre_epoch, 1, configs.twice_epoch],
}
station_setting = station_setting_dict[configs.use_norm]

criterion = nn.MSELoss()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model_dict[configs.model_name].Model(configs).float().cuda()
norm = norm_dict[configs.use_norm].Model(configs).float().cuda()
model_optim = optim.Adam(model.parameters(), lr=configs.learning_rate)
norm_optim = optim.Adam(norm.parameters(), lr=configs.station_lr)

In [2]:
# 학습 가능한 총 파라미터 수 계산
trainable_params = sum(
    p.numel() for p in norm.parameters() if p.requires_grad
)

print(f"모델의 총 학습 가능 파라미터 수: {trainable_params:,} 개")

# 파라미터 저장을 위한 메모리 크기 계산 (기본 float32 기준)
# 1 float32 = 4 bytes
param_size_bytes = trainable_params * 4
param_size_mb = param_size_bytes / (1024 ** 2)

print(f"모델 파라미터의 예상 메모리 크기: {param_size_mb:.3f} MB")

모델의 총 학습 가능 파라미터 수: 14,433 개
모델 파라미터의 예상 메모리 크기: 0.055 MB


In [3]:
def vali(vali_data, vali_loader, criterion, epoch):
    total_loss = []
    model.eval()
    norm.eval()
    with torch.no_grad():
        for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(vali_loader):
            batch_x = batch_x.float().to(device)
            batch_y = batch_y.float().to(device)
            
            # Seasonal or Trend
            seasonal_x, trend_x = decomp(batch_x)
            seasonal_y, trend_y = decomp(batch_y)
            # batch_x = trend_x
            # batch_y = trend_y
            # batch_x = seasonal_x
            # batch_y = seasonal_y

            batch_x_mark = batch_x_mark.float().to(device)
            batch_y_mark = batch_y_mark.float().to(device)

            # normalize
            if configs.use_norm == 'ddn' or configs.use_norm == 'ddn_exp':
                if epoch + 1 <= station_setting[1]:
                    batch_x, statistics_pred, statistics_seq = norm.normalize(batch_x, p_value=False)
                else:
                    batch_x, statistics_pred, statistics_seq = norm.normalize(batch_x)
            else:
                batch_x, statistics_pred = norm.normalize(batch_x)

            # station pretrain
            if epoch + 1 <= station_setting[1]:
                f_dim = -1 if configs.features == 'MS' else 0
                batch_y = batch_y[:, -configs.pred_len:, f_dim:].to(device)
                if configs.features == 'MS':
                    statistics_pred = statistics_pred[:, :, [configs.enc_in - 1, -1]]
                loss = station_loss(batch_y, statistics_pred)
            
            # model train
            else:
                # decoder x
                dec_inp = torch.zeros_like(batch_y[:, -configs.pred_len:, :]).float()
                dec_label = batch_x[:, -configs.label_len:, :]
                dec_inp = torch.cat([dec_label, dec_inp], dim=1).float()
                # encoder - decoder
                if configs.use_amp:
                    with torch.cuda.amp.autocast():
                        if configs.model_name in linear_models:
                            outputs = model(batch_x)
                        else:
                            if configs.output_attention:
                                outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
                            else:
                                outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
                else:
                    if configs.model_name in linear_models:
                        outputs = model(batch_x)
                    else:
                        if configs.output_attention:
                            outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
                        else:
                            outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
                f_dim = -1 if configs.features == 'MS' else 0
                if configs.features == 'MS':
                    statistics_pred = statistics_pred[:, :, [configs.enc_in - 1, -1]]
                outputs = outputs[:, -configs.pred_len:, f_dim:]
                
                # de-normalize
                outputs = norm.de_normalize(outputs, statistics_pred)
                
                batch_y = batch_y[:, -configs.pred_len:, f_dim:].to(device)

                pred = outputs.detach().cpu()
                true = batch_y.detach().cpu()

                loss = criterion(pred, true)

            total_loss.append(loss.cpu().item())
    total_loss = np.average(total_loss)
    model.train()
    norm.train()
    return total_loss

def _get_data(flag):
    data_set, data_loader = data_provider(configs, flag)
    return data_set, data_loader

def train(setting):
    train_data, train_loader = _get_data(flag='train')
    vali_data, vali_loader = _get_data(flag='val')
    test_data, test_loader = _get_data(flag='test')

    path = os.path.join(configs.checkpoints, setting)
    if not os.path.exists(path):
        os.makedirs(path)

    path_station = './station/' + '{}_s{}_p{}'.format(configs.use_norm, configs.data,
                                                        configs.seq_len, configs.pred_len)
    if not os.path.exists(path_station):
        os.makedirs(path_station)

    time_now = time()

    train_steps = len(train_loader)
    early_stopping = EarlyStopping(patience=configs.patience, verbose=True)
    early_stopping_station_model = EarlyStopping(patience=configs.patience, verbose=True)

    if configs.use_amp:
        scaler = torch.cuda.amp.GradScaler()
        
    ### NEW ###
    # 재초기화를 위한 헬퍼 함수 정의
    def reinit_weights(m):
        if isinstance(m, nn.Linear):
            m.reset_parameters() # PyTorch 기본값으로 가중치 초기화
    ### END NEW ###

    time_per_epoch = []
    for epoch in range(configs.train_epochs + station_setting[1]):
        iter_count = 0
        train_loss = []
        epoch_start = time()
        # Load best station model after pretraining
        if station_setting[0] > 0 and epoch == station_setting[1]:
            best_model_path = path_station + '/' + 'checkpoint.pth'
            norm.load_state_dict(torch.load(best_model_path))
            print('loading pretrained adaptive station model')
            
            ### NEW ###
            # --- Motivating Experiment 3.2: 학습 분리 효과 검증 ---
            # 'DDN + w/o trend pretrain' 실험
            if getattr(configs, 'reinit_mean_pretrain', False):
                print("!!! EXPERIMENT: Re-initializing MEAN predictor weights !!!")
                norm.mlp.m_project.apply(reinit_weights)
                norm.mlp.mean_proj.apply(reinit_weights)
                norm.mlp.m_concat.apply(reinit_weights)
                norm.mlp.mean_ffn.apply(reinit_weights)
                norm.mlp.mean_pred.apply(reinit_weights)
                print("!!! Mean predictor re-initialization complete. !!!")

            # 'DDN + w/o seasonal pretrain' (std) 실험
            if getattr(configs, 'reinit_std_pretrain', False):
                print("!!! EXPERIMENT: Re-initializing STD predictor weights !!!")
                norm.mlp.s_project.apply(reinit_weights)
                norm.mlp.std_proj.apply(reinit_weights)
                norm.mlp.s_concat.apply(reinit_weights)
                norm.mlp.std_ffn.apply(reinit_weights)
                norm.mlp.std_pred.apply(reinit_weights)
                print("!!! Std predictor re-initialization complete. !!!")
            
            # 'DDN + w/o pretrain' 실험은 station_setting[1] = 0 으로 설정하고
            # 이 if 블록에 들어오지 않게 함으로써 자연스럽게 구현됩니다.
            ### END NEW ###
        
        # Add station parameters to model optim after pretraining and delay epochs for joint training
        if station_setting[2] > 0 and station_setting[3] == epoch - station_setting[1]:
            lr = model_optim.param_groups[0]['lr']
            model_optim.add_param_group({'params': norm.parameters(), 'lr': lr})
        
        model.train()
        norm.train()
        epoch_time = time()
        for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(train_loader):
            iter_count += 1
            model_optim.zero_grad()
            batch_x = batch_x.float().to(device)
            batch_y = batch_y.float().to(device)
            
            # Seasonal or Trend
            seasonal_x, trend_x = decomp(batch_x)
            seasonal_y, trend_y = decomp(batch_y)
            # batch_x = trend_x
            # batch_y = trend_y
            # batch_x = seasonal_x
            # batch_y = seasonal_y
            
            # normalize
            if configs.use_norm == 'ddn' or configs.use_norm == 'ddn_exp':
                if epoch + 1 <= station_setting[1]:
                    batch_x, statistics_pred, statistics_seq = norm.normalize(batch_x, p_value=False)
                else:
                    batch_x, statistics_pred, statistics_seq = norm.normalize(batch_x)
            else:
                batch_x, statistics_pred = norm.normalize(batch_x)
            
            # station pretrain
            if epoch + 1 <= station_setting[1]:
                f_dim = -1 if configs.features == 'MS' else 0
                batch_y = batch_y[:, -configs.pred_len:, f_dim:].to(device)
                if configs.features == 'MS':
                    statistics_pred = statistics_pred[:, :, [configs.enc_in - 1, -1]]
                loss = station_loss(batch_y, statistics_pred)
                train_loss.append(loss.item())
            
            # model train
            else:
                batch_x_mark = batch_x_mark.float().to(device)
                batch_y_mark = batch_y_mark.float().to(device)

                # decoder x
                dec_inp = torch.zeros_like(batch_y[:, -configs.pred_len:, :]).float()
                dec_label = batch_x[:, -configs.label_len:, :]
                dec_inp = torch.cat([dec_label, dec_inp], dim=1).float().to(device)

                # encoder - decoder
                if configs.use_amp:
                    with torch.cuda.amp.autocast():
                        if configs.model_name in linear_models:
                            outputs = model(batch_x)
                        else:
                            if configs.output_attention:
                                outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
                            else:
                                outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
                        f_dim = -1 if configs.features == 'MS' else 0
                        outputs = outputs[:, -configs.pred_len:, f_dim:]
                        batch_y = batch_y[:, -configs.pred_len:, f_dim:].to(device)
                        loss = criterion(outputs, batch_y)
                        train_loss.append(loss.item())
                else:
                    if configs.model_name in linear_models:
                        outputs = model(batch_x)
                    else:
                        if configs.output_attention:
                            outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
                        else:
                            outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
                    f_dim = -1 if configs.features == 'MS' else 0
                    outputs = outputs[:, -configs.pred_len:, f_dim:]
                    if configs.features == 'MS':
                        statistics_pred = statistics_pred[:, :, [configs.enc_in - 1, -1]]
                        
                # de-normalize
                outputs = norm.de_normalize(outputs, statistics_pred)
                
                batch_y = batch_y[:, -configs.pred_len:, f_dim:].to(device)
                loss = criterion(outputs, batch_y)
                train_loss.append(loss.item())

            if (i + 1) % 100 == 0:
                print("\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item()))
                speed = (time() - time_now) / iter_count
                left_time = speed * (
                        (configs.train_epochs + station_setting[1] - epoch) * train_steps - i)
                print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time))
                iter_count = 0
                time_now = time()
            if configs.use_amp:
                scaler.scale(loss).backward()
                scaler.step(model_optim)
                scaler.update()
            else:
                loss.backward()
                # two-stage training schema
                if epoch + 1 <= station_setting[1]:
                    norm_optim.step()
                else:
                    model_optim.step()
                model_optim.zero_grad()
                norm_optim.zero_grad()

        print("Epoch: {} cost time: {}".format(epoch + 1, time() - epoch_time))
        train_loss = np.average(train_loss)
        vali_loss = vali(vali_data, vali_loader, criterion, epoch)
        test_loss = vali(test_data, test_loader, criterion, epoch)

        if epoch + 1 <= station_setting[1]:
            print(
                "Station Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}".format(
                    epoch + 1, train_steps, train_loss, vali_loss, test_loss))
            early_stopping_station_model(vali_loss, norm, path_station)
            adjust_learning_rate(norm_optim, epoch + 1, configs, configs.station_lr)
        else:
            print(
                "Backbone Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}".format(
                    epoch + 1 - station_setting[1], train_steps, train_loss, vali_loss, test_loss))
            # if: joint training, else: only model training
            if station_setting[2] > 0 and station_setting[3] <= epoch - station_setting[1]:
                early_stopping(vali_loss, model, path, norm, path_station)
            else:
                early_stopping(vali_loss, model, path)
            if early_stopping.early_stop:
                print("Early stopping")
                break
            adjust_learning_rate(model_optim, epoch + 1 - station_setting[1], configs,
                                    configs.learning_rate)
            adjust_learning_rate(norm_optim, epoch + 1 - station_setting[1], configs,
                                    configs.station_lr)
        time_per_epoch.append(time() - epoch_start)
    print("Average time per pretrain epoch: {:.5f}s".format(np.mean(time_per_epoch[:5])))
    print("Average time per joint train epoch: {:.5f}s".format(np.mean(time_per_epoch[5:])))

    best_model_path = path + '/' + 'checkpoint.pth'
    model.load_state_dict(torch.load(best_model_path))
    if station_setting[2] > 0:
        norm.load_state_dict(torch.load(path_station + '/' + 'checkpoint.pth'))
    return model

def check_stationarity(batch_x, test='adf'):
    ts = batch_x[0, :, -1]
    if test == 'adf':
        stat, pvalue, *_ = adfuller(ts)
        # 낮은 p-value일수록 정상성 ↑ → 점수는 1 - pvalue
        score = 1 - min(pvalue, 1.0)
    elif test == 'kpss':
        stat, pvalue, *_ = kpss(ts, nlags="auto")
        # 높은 p-value일수록 정상성 ↑
        score = min(pvalue, 1.0)
    return score

def test(setting, test=0):
    test_data, test_loader = _get_data(flag='test')

    if test:
        print('loading model')
        model.load_state_dict(torch.load(os.path.join('./manual_exp/checkpoints/' + setting, 'checkpoint.pth')))

    preds = []
    trues = []
    inputx = []
    folder_path = './manual_exp/' + setting + '/'
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)

    model.eval()
    norm.eval()
    with torch.no_grad():
        for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(test_loader):
            batch_x = batch_x.float().to(device)
            batch_y = batch_y.float().to(device)
            input_x = batch_x
            
            # Seasonal or Trend
            seasonal_x, trend_x = decomp(batch_x)
            seasonal_y, trend_y = decomp(batch_y)
            # batch_x = trend_x
            # batch_y = trend_y
            # batch_x = seasonal_x
            # batch_y = seasonal_y

            # normalize
            input_noNorm = batch_x
            if configs.use_norm == 'ddn' or configs.use_norm == 'ddn_exp':
                batch_x, statistics_pred, statistics_seq = norm.normalize(batch_x)
            else:
                batch_x, statistics_pred = norm.normalize(batch_x)
            input_withNorm = batch_x

            batch_x_mark = batch_x_mark.float().to(device)
            batch_y_mark = batch_y_mark.float().to(device)

            # decoder x
            dec_inp = torch.zeros_like(batch_y[:, -configs.pred_len:, :]).float()
            dec_label = batch_x[:, -configs.label_len:, :]
            dec_inp = torch.cat([dec_label, dec_inp], dim=1).float().to(device)
            # encoder - decoder
            if configs.use_amp:
                with torch.cuda.amp.autocast():
                    if configs.model_name in linear_models:
                        outputs = model(batch_x)
                    else:
                        if configs.output_attention:
                            outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
                        else:
                            outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
            else:
                if configs.model_name in linear_models:
                    outputs = model(batch_x)
                else:
                    if configs.output_attention:
                        outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
                    else:
                        outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)

            f_dim = -1 if configs.features == 'MS' else 0
            outputs = outputs[:, -configs.pred_len:, f_dim:]
            if configs.features == 'MS':
                statistics_pred = statistics_pred[:, :, [configs.enc_in - 1, -1]]
                
            # de-normalize
            output_noDenorm = outputs
            outputs = norm.de_normalize(outputs, statistics_pred)
            output_withDenorm = outputs
            
            batch_y = batch_y[:, -configs.pred_len:, f_dim:].to(device)
            outputs = outputs.detach().cpu().numpy()
            batch_y = batch_y.detach().cpu().numpy()

            pred = outputs  # outputs.detach().cpu().numpy()  # .squeeze()
            true = batch_y  # batch_y.detach().cpu().numpy()  # .squeeze()

            preds.append(pred)
            trues.append(true)
            inputx.append(batch_x.detach().cpu().numpy())
            if i % 20 == 0:
                x = input_x.detach().cpu().numpy()
                gt = np.concatenate((x[0, :, -1], true[0, :, -1]), axis=0)
                pd = np.concatenate((x[0, :, -1], pred[0, :, -1]), axis=0)
                visual(gt, pd, os.path.join(folder_path, str(i) + '.pdf'))
            
            # visualize input and output with/without normalization/denormalization
            if i == 0:
                i_n = input_noNorm.detach().cpu().numpy()
                i_w = input_withNorm.detach().cpu().numpy()
                o_n = output_noDenorm.detach().cpu().numpy()
                o_w = output_withDenorm.detach().cpu().numpy()
                
                plt.close('all')
                plt.figure(figsize=(6, 4))
                # plt.subplot(3, 1, 1)
                # plt.plot(i_w[0, :, -1], label='Normalized', color='darkorange')
                # plt.plot(i_n[0, :, -1], label='Raw')
                # plt.legend()
                # plt.grid()
                
                plt.subplot(2, 1, 1)
                plt.plot(i_n[0, :, -1], label='Original')
                plt.legend()
                plt.grid()
                
                plt.subplot(2, 1, 2)
                plt.plot(i_w[0, :, -1], label='Normalized')
                plt.legend()
                plt.grid()
                plt.savefig(os.path.join(folder_path, 'normalized_input.pdf'))
                
                plt.close('all')
                plt.figure(figsize=(8, 4))
                plt.subplot(2, 1, 1)
                plt.plot(true[0, :, -1], label='Ground Truth', color='black', linewidth=2)
                plt.plot(o_w[0, :, -1], label='After Norm(Final Output)')
                plt.plot(0, label='Before Norm(Model Output)')
                # legend outside of plot
                plt.legend(loc='upper center', bbox_to_anchor=(0.5, 1.3), ncol=3)
                
                plt.grid()
                plt.subplot(2, 1, 2)
                plt.plot(o_w[0, :, -1], label='After Norm(Final Output)')
                plt.plot(o_n[0, :, -1], label='Before Norm(Model Output)')
                # plt.legend(loc='upper center', bbox_to_anchor=(0.5, 1.2), ncol=2)
                plt.grid()
                plt.savefig(os.path.join(folder_path, 'normalized_output.pdf'))

    # if configs.test_flop:
    #     test_params_flop((batch_x.shape[1], batch_x.shape[2]))
    #     exit()
    preds = np.array(preds, dtype=object)
    trues = np.array(trues, dtype=object)
    # inputx = np.array(inputx)

    preds = np.concatenate(preds, axis=0)
    trues = np.concatenate(trues, axis=0)
    # inputx = inputx.reshape(-1, inputx.shape[-2], inputx.shape[-1])

    # result save
    folder_path = './manual_exp/' + setting + '/'
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)

    mae, mse, rmse, mape, mspe, rse, corr = metric(preds, trues)
    print('mse:{}, mae:{}'.format(mse, mae))
    f = open("result.txt", 'a')
    f.write(setting + "  \n")
    f.write('mse:{}, mae:{}, rse:{}, corr:{}'.format(mse, mae, rse, corr))
    f.write('\n')
    f.write('\n')
    f.close()

    # np.save(folder_path + 'metrics.npy', np.array([mae, mse, rmse, mape, mspe,rse, corr]))
    # np.save(folder_path + 'pred.npy', preds)
    # np.save(folder_path + 'true.npy', trues)
    # np.save(folder_path + 'x.npy', inputx)
    return mse, mae

In [4]:
setting = 'test'
train(setting)
test(setting)

train 17597
val 2537
test 5165
	iters: 100, epoch: 1 | loss: 0.0584336
	speed: 0.0429s/iter; left time: 703.0683s
	iters: 200, epoch: 1 | loss: 0.0720515
	speed: 0.0221s/iter; left time: 359.9234s
	iters: 300, epoch: 1 | loss: 0.0823734
	speed: 0.0219s/iter; left time: 354.1596s
	iters: 400, epoch: 1 | loss: 0.0529639
	speed: 0.0215s/iter; left time: 346.3670s
	iters: 500, epoch: 1 | loss: 0.0551698
	speed: 0.0217s/iter; left time: 347.1853s
	iters: 600, epoch: 1 | loss: 0.0649555
	speed: 0.0212s/iter; left time: 337.1318s
	iters: 700, epoch: 1 | loss: 0.0664762
	speed: 0.0224s/iter; left time: 352.9576s
	iters: 800, epoch: 1 | loss: 0.0574967
	speed: 0.0224s/iter; left time: 350.6515s
	iters: 900, epoch: 1 | loss: 0.0643788
	speed: 0.0219s/iter; left time: 341.6106s
	iters: 1000, epoch: 1 | loss: 0.0658016
	speed: 0.0223s/iter; left time: 345.2110s
Epoch: 1 cost time: 26.215226888656616
Station Epoch: 1, Steps: 1099 | Train Loss: 0.0645538 Vali Loss: 0.0441553 Test Loss: 0.0640515
Val

(0.13067123, 0.22774436)

In [31]:
test(setting, test=1, stationarity_test=False)

test 2689
loading model
mse:0.4212721288204193, mae:0.4345196485519409


In [7]:
import pandas as pd

df = pd.read_csv('result.csv')
result_list = []
for i in range(len(df)):
    settings = df.Setting[i].split('_')
    dataset = settings[1]
    model_name = settings[4]
    seq_len = settings[7][2:]
    pred_len = settings[9][2:]
    trend_norm = settings[-4][2:]
    station_lr = settings[-3][3:]
    use_mlp = settings[-2][2:]
    mse = df.MSE[i]
    mae = df.MAE[i]
    
    result_list.append([dataset, model_name, seq_len, pred_len, trend_norm, station_lr, use_mlp, mse, mae])
df_result = pd.DataFrame(result_list, columns=['Dataset', 'Model', 'Seq_Len', 'Pred_Len', 'Trend_Norm', 'Station_LR', 'Use_MLP', 'MSE', 'MAE'])
df_result.to_csv('summary_results.csv', index=False)