In [1]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
from time import time
from types import SimpleNamespace
from statsmodels.tsa.stattools import adfuller, kpss

import torch
import torch.nn as nn
from torch import optim

from utils.metrics import metric
from data_provider.data_factory import data_provider
from utils.tools import EarlyStopping, adjust_learning_rate, visual
from models import DLinear, iTransformer, Autoformer, FEDformer, Linear
from normalizers import SAN, DDN, TP, RevIN, LightTrend, NoNorm
from layers.decomposition import series_decomp, ema_decomp, envelope_decomp

configs = SimpleNamespace(
    task_name = "long_term_forecast",
    model_name = "iTransformer", 
    
    # Task
    seq_len=720,
    label_len=168,
    pred_len=720,
    enc_in=7,
    dec_in=7,
    c_out=7,
    features='M',
    freq='h',
    
    # Data
    root_path='datasets/ETT-small',
    data='ETTh1',
    data_path='ETTh1.csv',
    target='OT',
    batch_size=32,
    
    # Basic Model Settings
    checkpoints='./manual_exp/checkpoints/',
    dropout=0.1,
    embed='timeF',
    output_attention=False,
    activation='gelu',
    moving_avg=25,
    num_kernels=6,
    individual=False,
    learning_rate=0.0001,
    num_workers=5,
    n_heads=8,
    patience=3,
    train_epochs=10,
    use_amp=False,
    lradj='type1',
    embed_type=0,
    # e_layers=4,
    factor=3,
    # d_model=512,
    # d_ff=2048,
    
    # iTransformer
    d_model=128,
    d_ff=128,
    class_strategy='projection',
    
    # Autoformer & FEDformer
    # factor=3,
    e_layers=2,
    d_layers=1,
    
    # Normalizer
    use_norm='lt',
    decomp_type='sma',
    norm_variant='original',
    station_type='adaptive',
    
    station_joint_lr=0.0005,
    station_pre_lr=0.00005,
    t_ff=64,
    alpha=0.04,
    kernel_size=25,
    twice_epoch=3,
    use_mlp=False,
    t_norm=True,
    s_norm=True,
    affine=True,
    
    period_len=24,
    pre_epoch=5,
    j=1,
    learnable=False,
    wavelet='coif3',
    dr=0.05,
    kernel_len=7,
    hkernel_len=5,
    pd_ff=128,
    pd_model=128,
    pe_layers=0,
    reinit_mean_pretrain=False,
    reinit_std_pretrain=False,
)
model_dict={
    'DLinear': DLinear,
    'iTransformer': iTransformer,
    'Autoformer': Autoformer,
    'FEDformer': FEDformer,
    'Linear': Linear,
}
linear_models = ['DLinear', 'Linear']
norm_dict = {
    'none': NoNorm,
    'revin': RevIN,
    'san': SAN,
    'ddn': DDN,
    'tp': TP,
    'lt': LightTrend,
}

if configs.decomp_type == 'sma':
    decomp = series_decomp(configs.kernel_size)
elif configs.decomp_type == 'ema':
    decomp = ema_decomp(configs.alpha)
elif configs.decomp_type == 'envelope':
    decomp = envelope_decomp(configs.kernel_size)

criterion = nn.MSELoss()
norm_criterion = nn.MSELoss()
# norm_criterion = nn.L1Loss()

# SAN
def san_loss(y, statistics_pred):
    bs, len, dim = y.shape
    y = y.reshape(bs, -1, configs.period_len, dim)
    mean = torch.mean(y, dim=2)
    std = torch.std(y, dim=2)
    station_ture = torch.cat([mean, std], dim=-1)
    loss = criterion(statistics_pred, station_ture)
    return loss

# DDN
def ddn_loss(y, statistics_pred):
    _, (mean, std) = norm.norm(y.transpose(-1, -2), False)
    station_ture = torch.cat([mean, std], dim=1).transpose(-1, -2)
    loss = criterion(statistics_pred, station_ture)
    return loss

# TREAD
def tread_loss(y, statistics_pred):
    trend_pred = statistics_pred[-1]
    _, trend_true = decomp(y)
    loss = criterion(trend_pred, trend_true)
    return loss

# LightTrend
def lt_loss(y, statistics_pred):
    trend_pred = statistics_pred
    _, trend_true = decomp(y)
    loss = norm_criterion(trend_pred, trend_true)
    return loss

station_loss_dict = {
    'none': None,
    'revin': None,
    'san': san_loss,
    'ddn': ddn_loss,
    'san_exp': san_loss,
    'ddn_exp': ddn_loss,
    'tp': tread_loss,
    'tp2': tread_loss,
    'lt': lt_loss,
}
station_loss = station_loss_dict[configs.use_norm]

# [pre train, pre epoch, joint train, join epoch]
station_setting_dict = {
    'none': [0, 0, 0, 0],
    'revin': [0, 0, 0, 0],
    'san': [1, configs.pre_epoch, 0, 0],
    'ddn': [1, configs.pre_epoch, 1, configs.twice_epoch],
    'tp': [1, configs.pre_epoch, 1, configs.twice_epoch],
    'tp2': [1, configs.pre_epoch, 1, configs.twice_epoch],
    # 'lt': [1, configs.pre_epoch, 0, 0],
    'lt': [1, configs.pre_epoch, 1, configs.twice_epoch],
    'san_exp': [1, configs.pre_epoch, 0, 0],
    'ddn_exp': [1, configs.pre_epoch, 1, configs.twice_epoch],
}
station_setting = station_setting_dict[configs.use_norm]

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model_dict[configs.model_name].Model(configs).float().cuda()
norm = norm_dict[configs.use_norm].Model(configs).float().cuda()
model_optim = optim.Adam(model.parameters(), lr=configs.learning_rate)
norm_optim = optim.Adam(norm.parameters(), lr=configs.station_pre_lr)

RuntimeError: The NVIDIA driver on your system is too old (found version 11080). Please update your GPU driver by downloading and installing a new version from the URL: http://www.nvidia.com/Download/index.aspx Alternatively, go to: https://pytorch.org to install a PyTorch version that has been compiled with your version of the CUDA driver.

In [380]:
def vali(vali_data, vali_loader, criterion, epoch):
    total_loss = []
    model.eval()
    norm.eval()
    with torch.no_grad():
        for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(vali_loader):
            batch_x = batch_x.float().to(device)
            batch_y = batch_y.float().to(device)

            batch_x_mark = batch_x_mark.float().to(device)
            batch_y_mark = batch_y_mark.float().to(device)

            # normalize
            if configs.use_norm == 'ddn' or configs.use_norm == 'ddn_exp':
                if epoch + 1 <= station_setting[1]:
                    batch_x, statistics_pred, statistics_seq = norm.normalize(batch_x, p_value=False)
                else:
                    batch_x, statistics_pred, statistics_seq = norm.normalize(batch_x)
            else:
                batch_x, statistics_pred = norm.normalize(batch_x)

            # station pretrain
            if epoch + 1 <= station_setting[1]:
                f_dim = -1 if configs.features == 'MS' else 0
                batch_y = batch_y[:, -configs.pred_len:, f_dim:].to(device)
                if configs.features == 'MS':
                    statistics_pred = statistics_pred[:, :, [configs.enc_in - 1, -1]]
                loss = station_loss(batch_y, statistics_pred)
            
            # model train
            else:
                # decoder x
                dec_inp = torch.zeros_like(batch_y[:, -configs.pred_len:, :]).float()
                dec_label = batch_x[:, -configs.label_len:, :]
                dec_inp = torch.cat([dec_label, dec_inp], dim=1).float()
                # encoder - decoder
                if configs.use_amp:
                    with torch.cuda.amp.autocast():
                        if configs.model_name in linear_models:
                            outputs = model(batch_x)
                        else:
                            if configs.output_attention:
                                outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
                            else:
                                outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
                else:
                    if configs.model_name in linear_models:
                        outputs = model(batch_x)
                    else:
                        if configs.output_attention:
                            outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
                        else:
                            outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
                f_dim = -1 if configs.features == 'MS' else 0
                if configs.features == 'MS':
                    statistics_pred = statistics_pred[:, :, [configs.enc_in - 1, -1]]
                outputs = outputs[:, -configs.pred_len:, f_dim:]
                
                # de-normalize
                outputs = norm.de_normalize(outputs, statistics_pred)
                
                batch_y = batch_y[:, -configs.pred_len:, f_dim:].to(device)

                pred = outputs.detach().cpu()
                true = batch_y.detach().cpu()

                loss = criterion(pred, true)

            total_loss.append(loss.cpu().item())
    total_loss = np.average(total_loss)
    model.train()
    norm.train()
    return total_loss

def _get_data(flag):
    data_set, data_loader = data_provider(configs, flag)
    return data_set, data_loader

def train(setting):
    train_data, train_loader = _get_data(flag='train')
    vali_data, vali_loader = _get_data(flag='val')
    test_data, test_loader = _get_data(flag='test')

    path = os.path.join(configs.checkpoints, setting)
    if not os.path.exists(path):
        os.makedirs(path)

    path_station = './station/' + '{}_s{}_p{}'.format(configs.use_norm, configs.data,
                                                        configs.seq_len, configs.pred_len)
    if not os.path.exists(path_station):
        os.makedirs(path_station)

    time_now = time()

    train_steps = len(train_loader)
    early_stopping = EarlyStopping(patience=configs.patience, verbose=True)
    early_stopping_station_model = EarlyStopping(patience=configs.patience, verbose=True)

    if configs.use_amp:
        scaler = torch.cuda.amp.GradScaler()
        
    ### NEW ###
    # 재초기화를 위한 헬퍼 함수 정의
    def reinit_weights(m):
        if isinstance(m, nn.Linear):
            m.reset_parameters() # PyTorch 기본값으로 가중치 초기화
    ### END NEW ###

    time_per_epoch = []
    for epoch in range(configs.train_epochs + station_setting[1]):
        iter_count = 0
        train_loss = []
        epoch_start = time()
        # Load best station model after pretraining
        if station_setting[0] > 0 and epoch == station_setting[1]:
            best_model_path = path_station + '/' + 'checkpoint.pth'
            norm.load_state_dict(torch.load(best_model_path))
            print('loading pretrained adaptive station model')
            
            ### NEW ###
            # --- Motivating Experiment 3.2: 학습 분리 효과 검증 ---
            # 'DDN + w/o trend pretrain' 실험
            if getattr(configs, 'reinit_mean_pretrain', False):
                print("!!! EXPERIMENT: Re-initializing MEAN predictor weights !!!")
                norm.mlp.m_project.apply(reinit_weights)
                norm.mlp.mean_proj.apply(reinit_weights)
                norm.mlp.m_concat.apply(reinit_weights)
                norm.mlp.mean_ffn.apply(reinit_weights)
                norm.mlp.mean_pred.apply(reinit_weights)
                print("!!! Mean predictor re-initialization complete. !!!")

            # 'DDN + w/o seasonal pretrain' (std) 실험
            if getattr(configs, 'reinit_std_pretrain', False):
                print("!!! EXPERIMENT: Re-initializing STD predictor weights !!!")
                norm.mlp.s_project.apply(reinit_weights)
                norm.mlp.std_proj.apply(reinit_weights)
                norm.mlp.s_concat.apply(reinit_weights)
                norm.mlp.std_ffn.apply(reinit_weights)
                norm.mlp.std_pred.apply(reinit_weights)
                print("!!! Std predictor re-initialization complete. !!!")
            
            # 'DDN + w/o pretrain' 실험은 station_setting[1] = 0 으로 설정하고
            # 이 if 블록에 들어오지 않게 함으로써 자연스럽게 구현됩니다.
            ### END NEW ###
        
        # Add station parameters to model optim after pretraining and delay epochs for joint training
        if station_setting[2] > 0 and station_setting[3] == epoch - station_setting[1] + 1:
            # lr = model_optim.param_groups[0]['lr']
            lr = configs.station_joint_lr
            # lr = norm_optim.param_groups[0]['lr']
            model_optim.add_param_group({'params': norm.parameters(), 'lr': lr})
        
        model.train()
        norm.train()
        epoch_time = time()
        for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(train_loader):
            iter_count += 1
            model_optim.zero_grad()
            batch_x = batch_x.float().to(device)
            batch_y = batch_y.float().to(device)
            
            # normalize
            if configs.use_norm == 'ddn' or configs.use_norm == 'ddn_exp':
                if epoch + 1 <= station_setting[1]:
                    batch_x, statistics_pred, statistics_seq = norm.normalize(batch_x, p_value=False)
                else:
                    batch_x, statistics_pred, statistics_seq = norm.normalize(batch_x)
            else:
                batch_x, statistics_pred = norm.normalize(batch_x)
            
            # station pretrain
            if epoch + 1 <= station_setting[1]:
                f_dim = -1 if configs.features == 'MS' else 0
                batch_y = batch_y[:, -configs.pred_len:, f_dim:].to(device)
                if configs.features == 'MS':
                    statistics_pred = statistics_pred[:, :, [configs.enc_in - 1, -1]]
                loss = station_loss(batch_y, statistics_pred)
                train_loss.append(loss.item())
            
            # model train
            else:
                batch_x_mark = batch_x_mark.float().to(device)
                batch_y_mark = batch_y_mark.float().to(device)

                # decoder x
                dec_inp = torch.zeros_like(batch_y[:, -configs.pred_len:, :]).float()
                dec_label = batch_x[:, -configs.label_len:, :]
                dec_inp = torch.cat([dec_label, dec_inp], dim=1).float().to(device)

                # encoder - decoder
                if configs.use_amp:
                    with torch.cuda.amp.autocast():
                        if configs.model_name in linear_models:
                            outputs = model(batch_x)
                        else:
                            if configs.output_attention:
                                outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
                            else:
                                outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
                        f_dim = -1 if configs.features == 'MS' else 0
                        outputs = outputs[:, -configs.pred_len:, f_dim:]
                        batch_y = batch_y[:, -configs.pred_len:, f_dim:].to(device)
                        loss = criterion(outputs, batch_y)
                        train_loss.append(loss.item())
                else:
                    if configs.model_name in linear_models:
                        outputs = model(batch_x)
                    else:
                        if configs.output_attention:
                            outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
                        else:
                            outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
                    f_dim = -1 if configs.features == 'MS' else 0
                    outputs = outputs[:, -configs.pred_len:, f_dim:]
                    if configs.features == 'MS':
                        statistics_pred = statistics_pred[:, :, [configs.enc_in - 1, -1]]
                        
                # de-normalize
                outputs = norm.de_normalize(outputs, statistics_pred)
                
                batch_y = batch_y[:, -configs.pred_len:, f_dim:].to(device)
                loss = criterion(outputs, batch_y)
                train_loss.append(loss.item())

            if (i + 1) % 100 == 0:
                print("\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item()))
                speed = (time() - time_now) / iter_count
                left_time = speed * (
                        (configs.train_epochs + station_setting[1] - epoch) * train_steps - i)
                print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time))
                iter_count = 0
                time_now = time()
            if configs.use_amp:
                scaler.scale(loss).backward()
                scaler.step(model_optim)
                scaler.update()
            else:
                loss.backward()
                # two-stage training schema
                if epoch + 1 <= station_setting[1]:
                    norm_optim.step()
                else:
                    model_optim.step()
                model_optim.zero_grad()
                norm_optim.zero_grad()

        print("Epoch: {} cost time: {}".format(epoch + 1, time() - epoch_time))
        train_loss = np.average(train_loss)
        vali_loss = vali(vali_data, vali_loader, criterion, epoch)
        test_loss = vali(test_data, test_loader, criterion, epoch)

        if epoch + 1 <= station_setting[1]:
            print(
                "Station Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}".format(
                    epoch + 1, train_steps, train_loss, vali_loss, test_loss))
            early_stopping_station_model(vali_loss, norm, path_station)
            adjust_learning_rate(norm_optim, epoch + 1, configs, configs.station_pre_lr)
        else:
            print(
                "Backbone Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}".format(
                    epoch + 1 - station_setting[1], train_steps, train_loss, vali_loss, test_loss))
            # if: joint training, else: only model training
            if station_setting[2] > 0 and station_setting[3] <= epoch - station_setting[1]:
                early_stopping(vali_loss, model, path, norm, path_station)
            else:
                early_stopping(vali_loss, model, path)
            if early_stopping.early_stop:
                print("Early stopping")
                break
            adjust_learning_rate(model_optim, epoch + 1 - station_setting[1], configs,
                                    configs.learning_rate)
            # adjust_learning_rate(norm_optim, epoch + 1 - station_setting[1], configs,
            #                         configs.station_pre_lr)
        time_per_epoch.append(time() - epoch_start)
    print("Average time per pretrain epoch: {:.5f}s".format(np.mean(time_per_epoch[:5])))
    print("Average time per joint train epoch: {:.5f}s".format(np.mean(time_per_epoch[5:])))

    best_model_path = path + '/' + 'checkpoint.pth'
    model.load_state_dict(torch.load(best_model_path))
    if station_setting[2] > 0:
        norm.load_state_dict(torch.load(path_station + '/' + 'checkpoint.pth'))
    return model

def check_stationarity(batch_x, test='adf'):
    ts = batch_x[0, :, -1]
    if test == 'adf':
        stat, pvalue, *_ = adfuller(ts)
        # 낮은 p-value일수록 정상성 ↑ → 점수는 1 - pvalue
        score = 1 - min(pvalue, 1.0)
    elif test == 'kpss':
        stat, pvalue, *_ = kpss(ts, nlags="auto")
        # 높은 p-value일수록 정상성 ↑
        score = min(pvalue, 1.0)
    return score

def test(setting, test=0):
    test_data, test_loader = _get_data(flag='test')

    if test:
        print('loading model')
        model.load_state_dict(torch.load(os.path.join('./manual_exp/checkpoints/' + setting, 'checkpoint.pth')))

    preds = []
    trues = []
    inputx = []
    folder_path = './manual_exp/' + setting + '/'
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)

    model.eval()
    norm.eval()
    with torch.no_grad():
        for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(test_loader):
            batch_x = batch_x.float().to(device)
            batch_y = batch_y.float().to(device)
            input_x = batch_x

            # normalize
            input_noNorm = batch_x.clone()
            _, trend_noNorm = decomp(input_noNorm)
            if configs.use_norm == 'ddn' or configs.use_norm == 'ddn_exp':
                batch_x, statistics_pred, statistics_seq = norm.normalize(batch_x)
            else:
                batch_x, statistics_pred = norm.normalize(batch_x)
            input_withNorm = batch_x

            batch_x_mark = batch_x_mark.float().to(device)
            batch_y_mark = batch_y_mark.float().to(device)

            # decoder x
            dec_inp = torch.zeros_like(batch_y[:, -configs.pred_len:, :]).float()
            dec_label = batch_x[:, -configs.label_len:, :]
            dec_inp = torch.cat([dec_label, dec_inp], dim=1).float().to(device)
            # encoder - decoder
            if configs.use_amp:
                with torch.cuda.amp.autocast():
                    if configs.model_name in linear_models:
                        outputs = model(batch_x)
                    else:
                        if configs.output_attention:
                            outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
                        else:
                            outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
            else:
                if configs.model_name in linear_models:
                    outputs = model(batch_x)
                else:
                    if configs.output_attention:
                        outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
                    else:
                        outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)

            f_dim = -1 if configs.features == 'MS' else 0
            outputs = outputs[:, -configs.pred_len:, f_dim:]
            if configs.features == 'MS':
                statistics_pred = statistics_pred[:, :, [configs.enc_in - 1, -1]]
                
            # de-normalize
            output_noDenorm = outputs.clone()
            outputs = norm.de_normalize(outputs, statistics_pred)
            output_withDenorm = outputs
            
            batch_y = batch_y[:, -configs.pred_len:, f_dim:].to(device)
            _, trend_true = decomp(batch_y)
            trend_true = trend_true.detach().cpu().numpy()
            outputs = outputs.detach().cpu().numpy()
            batch_y = batch_y.detach().cpu().numpy()

            pred = outputs  # outputs.detach().cpu().numpy()  # .squeeze()
            true = batch_y  # batch_y.detach().cpu().numpy()  # .squeeze()
            
            # LightTrend가 예측한 추세 시각화
            output_station_noDenorm = statistics_pred.clone() if configs.use_norm == 'lt' else None

            preds.append(pred)
            trues.append(true)
            inputx.append(batch_x.detach().cpu().numpy())
            if i % 20 == 0:
                x = input_x.detach().cpu().numpy()
                gt = np.concatenate((x[0, :, -1], true[0, :, -1]), axis=0)
                pd = np.concatenate((x[0, :, -1], pred[0, :, -1]), axis=0)
                visual(gt, pd, os.path.join(folder_path, str(i) + '.pdf'))
            
            # visualize input and output with/without normalization/denormalization
            if i == 0:
                i_n = input_noNorm.detach().cpu().numpy()
                t_n = trend_noNorm.detach().cpu().numpy()
                i_w = input_withNorm.detach().cpu().numpy()
                o_n = output_noDenorm.detach().cpu().numpy()
                o_w = output_withDenorm.detach().cpu().numpy()
                os_n = output_station_noDenorm.detach().cpu().numpy() if configs.use_norm == 'lt' else None
                
                plt.close('all')
                plt.figure(figsize=(10, 8))
                plt.subplot(2, 1, 1)
                plt.plot(i_n[0, :, -1], label='Original')
                plt.plot(t_n[0, :, -1], label='Trend', color='black', linestyle='--')
                plt.legend()
                plt.grid()
                plt.subplot(2, 1, 2)
                plt.plot(i_w[0, :, -1], label='Normalized')
                plt.legend()
                plt.grid()
                plt.savefig(os.path.join(folder_path, 'normalized_input.pdf'))
                
                plt.close('all')
                plt.figure(figsize=(8, 4))
                plt.subplot(2, 1, 1)
                plt.plot(true[0, :, -1], label='Ground Truth', color='black', linewidth=2)
                plt.plot(o_w[0, :, -1], label='After Norm(Final Output)')
                plt.plot(0, label='Before Norm(Model Output)')
                plt.legend(loc='upper center', bbox_to_anchor=(0.5, 1.3), ncol=3)
                plt.grid()
                plt.subplot(2, 1, 2)
                plt.plot(o_w[0, :, -1], label='After Norm(Final Output)')
                plt.plot(o_n[0, :, -1], label='Before Norm(Model Output)')
                # plt.legend(loc='upper center', bbox_to_anchor=(0.5, 1.2), ncol=2)
                plt.grid()
                plt.savefig(os.path.join(folder_path, 'normalized_output.pdf'))
                
                plt.close('all')
                plt.figure(figsize=(12, 8))
                plt.subplot(2, 1, 1)
                plt.plot(true[0, :, -1], label='True')
                plt.plot(o_w[0, :, -1], label='Model Output')
                if configs.use_norm == 'lt':
                    plt.plot(os_n[0, :, -1], label='Station Output')
                plt.legend()
                plt.subplot(2, 1, 2)
                plt.plot(trend_true[0, :, -1], label='Trend True')
                if configs.use_norm == 'lt':
                    plt.plot(os_n[0, :, -1], label='Trend Predicted')
                plt.legend()
                plt.tight_layout()
                plt.savefig(os.path.join(folder_path, 'station_output.pdf'))

    # if configs.test_flop:
    #     test_params_flop((batch_x.shape[1], batch_x.shape[2]))
    #     exit()
    preds = np.array(preds, dtype=object)
    trues = np.array(trues, dtype=object)
    # inputx = np.array(inputx)

    preds = np.concatenate(preds, axis=0)
    trues = np.concatenate(trues, axis=0)
    # inputx = inputx.reshape(-1, inputx.shape[-2], inputx.shape[-1])

    # result save
    folder_path = './manual_exp/' + setting + '/'
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)

    mae, mse, rmse, mape, mspe, rse, corr = metric(preds, trues)
    print('mse:{}, mae:{}'.format(mse, mae))
    f = open("result.txt", 'a')
    f.write(setting + "  \n")
    f.write('mse:{}, mae:{}, rse:{}, corr:{}'.format(mse, mae, rse, corr))
    f.write('\n')
    f.write('\n')
    f.close()
    return mse, mae

In [381]:
def make_setting_name(configs):
    setting = '{}_{}_pl{}_un{}_dc{}_lr({})_splr({})_sjlr({})_tff{}_dset({})_tepoch{}_tnorm{}_snorm{}'.format(
        configs.data_path[:-4],
        configs.model_name,
        configs.pred_len,
        configs.use_norm,
        configs.decomp_type,
        configs.learning_rate,
        configs.station_pre_lr,
        configs.station_joint_lr,
        configs.t_ff,
        configs.alpha if configs.decomp_type == 'ema' else configs.kernel_size,
        configs.twice_epoch,
        configs.t_norm,
        configs.s_norm,
    )
    return setting

setting = make_setting_name(configs)
train(setting)
test(setting)

train 4640
val 425
test 1182
	iters: 100, epoch: 1 | loss: 0.3048674
	speed: 0.0121s/iter; left time: 25.2205s
Epoch: 1 cost time: 1.777104377746582
Station Epoch: 1, Steps: 145 | Train Loss: 0.2863332 Vali Loss: 0.2366721 Test Loss: 0.1857928
Validation loss decreased (inf --> 0.236672).  Saving model ...
Updating learning rate to 5e-05
	iters: 100, epoch: 2 | loss: 0.2407931
	speed: 0.0216s/iter; left time: 41.6699s
Epoch: 2 cost time: 1.8142235279083252
Station Epoch: 2, Steps: 145 | Train Loss: 0.2358221 Vali Loss: 0.2094506 Test Loss: 0.1679239
Validation loss decreased (0.236672 --> 0.209451).  Saving model ...
Updating learning rate to 2.5e-05
	iters: 100, epoch: 3 | loss: 0.2146927
	speed: 0.0216s/iter; left time: 38.6036s
Epoch: 3 cost time: 1.818938970565796
Station Epoch: 3, Steps: 145 | Train Loss: 0.2285734 Vali Loss: 0.2081208 Test Loss: 0.1676325
Validation loss decreased (0.209451 --> 0.208121).  Saving model ...
Updating learning rate to 1.25e-05
	iters: 100, epoch: 4 

(0.33036685, 0.4165866)

: 

In [4]:
setting

'ETTh1_iTransformer_pl96_unlt_dcenvelope_lr(0.0001)_splr(1e-05)_sjlr(0.0001)_tff64_dset(25)_tepoch3_tnormTrue_snormFalse'

In [14]:
# 학습 가능한 총 파라미터 수 계산
trainable_params = sum(
    p.numel() for p in norm.parameters() if p.requires_grad
)

print(f"모델의 총 학습 가능 파라미터 수: {trainable_params:,} 개")

# 파라미터 저장을 위한 메모리 크기 계산 (기본 float32 기준)
# 1 float32 = 4 bytes
param_size_bytes = trainable_params * 4
param_size_mb = param_size_bytes / (1024 ** 2)

print(f"모델 파라미터의 예상 메모리 크기: {param_size_mb:.3f} MB")

test 2785
mse:0.3804953098297119, mae:0.4029838442802429


(0.3804953, 0.40298384)

In [29]:
configs.root_path='datasets/ETT-small'
configs.data='ETTh2'
configs.data_path='ETTh2.csv'

configs.model_name="iTransformer"
if configs.model_name=="iTransformer":
    configs.d_model=128
    configs.d_ff=128
    configs.factor=1
elif configs.model_name in ["Autoformer", "FEDformer"]:
    configs.d_model=512
    configs.d_ff=2048
    configs.factor=3
    
configs.learning_rate=0.0001
configs.seq_len=720
configs.label_len=168
configs.pred_len=720
configs.enc_in=7
configs.dec_in=7
configs.c_out=7

configs.use_norm='lt'
configs.station_pre_lr=0.00001
configs.station_joint_lr=0.00001
configs.t_ff=64
configs.alpha=0.2
configs.twice_epoch=3
configs.use_mlp=True
configs.t_norm=True
configs.s_norm=False
configs.affine=True

decomp = ema_decomp(configs.alpha)
station_loss = station_loss_dict[configs.use_norm]
station_setting = station_setting_dict[configs.use_norm]

model = model_dict[configs.model_name].Model(configs).float().cuda()
norm = norm_dict[configs.use_norm].Model(configs).float().cuda()
model_optim = optim.Adam(model.parameters(), lr=configs.learning_rate)
norm_optim = optim.Adam(norm.parameters(), lr=configs.station_pre_lr)

setting = 'test'
train(setting)
test(setting)

train 7201
val 2161
test 2161
	iters: 100, epoch: 1 | loss: 0.7581769
	speed: 0.0053s/iter; left time: 17.3979s
	iters: 200, epoch: 1 | loss: 0.9436355
	speed: 0.0053s/iter; left time: 16.8870s
Epoch: 1 cost time: 1.1876552104949951
Station Epoch: 1, Steps: 225 | Train Loss: 0.8619922 Vali Loss: 0.7346204 Test Loss: 0.3525207
Validation loss decreased (inf --> 0.734620).  Saving model ...
Updating learning rate to 1e-05
	iters: 100, epoch: 2 | loss: 0.6560441
	speed: 0.0111s/iter; left time: 33.9457s
	iters: 200, epoch: 2 | loss: 0.6961247
	speed: 0.0054s/iter; left time: 15.8585s
Epoch: 2 cost time: 1.2024035453796387
Station Epoch: 2, Steps: 225 | Train Loss: 0.8526279 Vali Loss: 0.7229651 Test Loss: 0.3511476
Validation loss decreased (0.734620 --> 0.722965).  Saving model ...
Updating learning rate to 5e-06
	iters: 100, epoch: 3 | loss: 0.7075656
	speed: 0.0114s/iter; left time: 32.1812s
	iters: 200, epoch: 3 | loss: 0.8973257
	speed: 0.0052s/iter; left time: 14.1843s
Epoch: 3 cost

(0.37135556, 0.43030044)

In [28]:
test(setting)

test 2161
mse:0.5507868528366089, mae:0.5359645485877991


(0.55078685, 0.53596455)

In [7]:
# 전체 결과 포맷팅

import pandas as pd

df = pd.read_csv('result.csv')
result_list = []
for i in range(len(df)):
    settings = df.Setting[i].split('_')
    dataset = settings[1]
    model_name = settings[4]
    seq_len = settings[7][2:]
    pred_len = settings[9][2:]
    trend_norm = settings[-4][2:]
    station_pre_lr = settings[-3][3:]
    use_mlp = settings[-2][2:]
    mse = df.MSE[i]
    mae = df.MAE[i]
    
    result_list.append([dataset, model_name, seq_len, pred_len, trend_norm, station_pre_lr, use_mlp, mse, mae])
df_result = pd.DataFrame(result_list, columns=['Dataset', 'Model', 'Seq_Len', 'Pred_Len', 'Trend_Norm', 'Station_LR', 'Use_MLP', 'MSE', 'MAE'])
df_result.to_csv('summary_results.csv', index=False)

In [8]:
data_set, data_loader = data_provider(configs, flag='train')

batch_x, batch_y, batch_x_mark, batch_y_mark = next(iter(data_loader))

decomp = envelope_decomp(25).to(device)
seasonal_x, trend_x = decomp(batch_y)

plt.figure(figsize=(12, 8))
plt.subplot(2, 1, 1)
plt.plot(batch_y[0, :, -1].cpu().numpy(), label='Original Series')
plt.plot(trend_x[0, :, -1].cpu().numpy(), label='Extracted Trend', linestyle='--')
plt.legend()
plt.subplot(2, 1, 2)
plt.plot(seasonal_x[0, :, -1].cpu().numpy(), label='Extracted Seasonal')
plt.axhline(0, color='black', linestyle='--')
plt.savefig('decomposition_example.pdf')

train 7585
