In [1]:
# TODO which one?
#git clone https://github.com/lucidrains/iTransformer.git
#import iTransformer
import sys
sys.path.append('/vol/fob-vol7/nebenf21/reinbene/bene/MA/iTransformer') 
from iTransformer import iTransformer

import torch
import torch.optim as optim
from tqdm import tqdm
from torch.utils.tensorboard import SummaryWriter
import pandas as pd
from pathlib import Path


from utils import data_handling, training_functions
import config 

print("Import succesfull")

Import succesfull


# Sanity checking our iTransformer implementation

We use the same parameters as presented in the paper to do a first evaulation if our model is actually able
to reproduce the results as shown in the original paper.

We take a window size of 96 hours as input and predict different horizons from 96h to 720h. 

The parameters used are in the range of the optimal parameters evaluated in the original paper.

In [2]:
# use electricity dataset
data_dict = data_handling.load_electricity()

window_size = 96
pred_length = (96, 192, 336, 720)

dataloader_train, dataloader_validation, dataloader_test = data_handling.convert_data(data_dict, window_size, pred_length)
len(dataloader_train)

Feature batch shape: torch.Size([32, 96, 348])


131

# Train model on electricity dataset

In [4]:
normalization_strategies = {"base" : [False, False],
							"revin" : [True, True],
							"stationary" : [True, False]
                            }

In [4]:
# run experiment for each normalizaiton strategie and save model and evaluation metrics

for key, value in normalization_strategies.items():

    # define parameters and create config 
    best_parameters = {'depth': 2, 'dim': 256, 'dim_head': 56, 'heads': 4, 'attn_dropout': 0.2, 'ff_mult': 4, 'ff_dropout': 0.1, 
                    'num_mem_tokens': 4, 'learning_rate': 0.0005}


    model_config = {
        'num_variates': data_dict["train"].size(1),
        'lookback_len': window_size,
        'depth': best_parameters["depth"],
        'dim': best_parameters["dim"],
        'num_tokens_per_variate': 1,
        'pred_length': pred_length,
        'dim_head': best_parameters["dim_head"],
        'heads': best_parameters["heads"],
        'attn_dropout': best_parameters["attn_dropout"],
        'ff_mult': best_parameters["ff_mult"],
        'ff_dropout': best_parameters["ff_dropout"],
        'num_mem_tokens': best_parameters["num_mem_tokens"],
        'use_reversible_instance_norm': value[0],
        'reversible_instance_norm_affine': value[1],
        'flash_attn': True
    }

    # select available deviec
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")

    # defining all needed instances
    model = iTransformer(**model_config).to(device)
    optimizer = optim.Adam(model.parameters(), lr=best_parameters["learning_rate"])
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
    writer = SummaryWriter(log_dir=config.CONFIG_LOGS_PATH[key])

    # run model training as mentioned in the original paper
    epoch = 15

    for epoch in range(1, epoch + 1):
        training_functions.train_one_epoch(epoch, model, device, dataloader_train, dataloader_validation, optimizer, scheduler, writer)


    metrics = training_functions.fast_eval(model, dataloader_test)


    # save model
    checkpoint = {
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'scheduler_state_dict' : scheduler.state_dict(),
            'epoch': epoch,
            'loss': metrics[96]["mse"].item(),
            'global_step_writer' : 0,
        }

    torch.save(checkpoint, f'{config.CONFIG_MODEL_LOCATION[key]}/electricity_{key}_epoch_{epoch}_loss_{checkpoint["loss"]}.pt')  

    print(f"Checkpointing succesfull after epoch {epoch} for {key}")

    # convert metrics to dataframe and save as csv
    for key_1, values_1 in metrics.items():
        for key_2, values_2 in values_1.items():
            metrics[key_1][key_2] = (values_2.item())

    metrics_df = pd.DataFrame.from_dict(metrics, orient='index')

    metrics_df.to_csv(f"{config.CONFIG_OUTPUT_PATH[key]}/metrics_{key}_epochs{epoch}.csv")



Using device: cuda
Non-A100 GPU detected, using math or mem efficient attention if input tensor is on cuda


Epoch: 1: 100%|██████████| 151/151 [02:23<00:00,  1.05it/s]


Epoch 1, MSE-Loss: 0.0684398826680436, LR: 0.0005


Epoch: Validating: 100%|██████████| 21/21 [00:18<00:00,  1.16it/s]


Validation MAE is {96: {'mse': tensor(0.2269, device='cuda:0')}, 192: {'mse': 0}, 336: {'mse': 0}, 720: {'mse': 0}}


Epoch: 2: 100%|██████████| 151/151 [02:24<00:00,  1.05it/s]


Epoch 2, MSE-Loss: 0.047383620471551716, LR: 0.0005


Epoch: Validating: 100%|██████████| 21/21 [00:15<00:00,  1.34it/s]


Validation MAE is {96: {'mse': tensor(0.2194, device='cuda:0')}, 192: {'mse': 0}, 336: {'mse': 0}, 720: {'mse': 0}}


Epoch: 3: 100%|██████████| 151/151 [02:23<00:00,  1.05it/s]


Epoch 3, MSE-Loss: 0.04356631458989832, LR: 0.0005


Epoch: Validating: 100%|██████████| 21/21 [00:18<00:00,  1.14it/s]


Validation MAE is {96: {'mse': tensor(0.2076, device='cuda:0')}, 192: {'mse': 0}, 336: {'mse': 0}, 720: {'mse': 0}}


Epoch: 4:  83%|████████▎ | 125/151 [01:58<00:24,  1.06it/s]

# Train electricity model with 1 forecasting horizon for transfer learning

Only 96h is used, because smaller datasets do not provide enough data for multiple long predicitn horizons.

In [6]:
# run experiment for each normalizaiton strategie and save evaluation metrics
pred_length = (96)
window_size = 96

# use electricity dataset
data_dict = data_handling.load_electricity()

dataloader_train, dataloader_validation, dataloader_test = data_handling.convert_data(data_dict, window_size, pred_length)

for key, value in normalization_strategies.items():

    # define parameters and create config 
    best_parameters = {'depth': 2, 'dim': 256, 'dim_head': 56, 'heads': 4, 'attn_dropout': 0.2, 'ff_mult': 4, 'ff_dropout': 0.1, 
                    'num_mem_tokens': 4, 'learning_rate': 0.0005}


    model_config = {
        'num_variates': data_dict["train"].size(1),
        'lookback_len': window_size,
        'depth': best_parameters["depth"],
        'dim': best_parameters["dim"],
        'num_tokens_per_variate': 1,
        'pred_length': pred_length,
        'dim_head': best_parameters["dim_head"],
        'heads': best_parameters["heads"],
        'attn_dropout': best_parameters["attn_dropout"],
        'ff_mult': best_parameters["ff_mult"],
        'ff_dropout': best_parameters["ff_dropout"],
        'num_mem_tokens': best_parameters["num_mem_tokens"],
        'use_reversible_instance_norm': value[0],
        'reversible_instance_norm_affine': value[1],
        'flash_attn': True
    }

    # select available deviec
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")

    # defining all needed instances
    model = iTransformer(**model_config).to(device)
    optimizer = optim.Adam(model.parameters(), lr=best_parameters["learning_rate"])
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
    writer = SummaryWriter(log_dir=config.CONFIG_LOGS_PATH[key])

    # run model training as mentioned in the original paper
    epoch = 15

    for epoch in range(1, epoch + 1):
        training_functions.train_one_epoch(epoch, model, device, dataloader_train, dataloader_validation, optimizer, scheduler, writer)


    metrics = training_functions.fast_eval(model, dataloader_test)


    # save model
    checkpoint = {
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'scheduler_state_dict' : scheduler.state_dict(),
            'epoch': epoch,
        }


    # define path for transfer learning base models
    torch.save(checkpoint, f'{config.CONFIG_MODEL_LOCATION[key]}/electricity_{key}_epoch_{epoch}_transfer_learning.pt')  

    print(f"Checkpointing succesfull after epoch {epoch} for {key}")



Feature batch shape: torch.Size([32, 96, 348])
Using device: cuda


Epoch: 1:   0%|          | 0/151 [00:00<?, ?it/s]

Epoch: 1: 100%|██████████| 151/151 [00:06<00:00, 24.67it/s]


Epoch 1, MSE-Loss: 0.06796556735867697, LR: 0.0005


Epoch: Validating: 100%|██████████| 21/21 [00:00<00:00, 75.83it/s]


Validation MAE is {96: {'mse': tensor(0.2376, device='cuda:0')}, 192: {'mse': 0}, 336: {'mse': 0}, 720: {'mse': 0}}


Epoch: 2: 100%|██████████| 151/151 [00:06<00:00, 24.84it/s]


Epoch 2, MSE-Loss: 0.04713020784570681, LR: 0.0005


Epoch: Validating: 100%|██████████| 21/21 [00:00<00:00, 77.92it/s]


Validation MAE is {96: {'mse': tensor(0.2253, device='cuda:0')}, 192: {'mse': 0}, 336: {'mse': 0}, 720: {'mse': 0}}


Epoch: 3: 100%|██████████| 151/151 [00:06<00:00, 24.73it/s]


Epoch 3, MSE-Loss: 0.043905113657184, LR: 0.0005


Epoch: Validating: 100%|██████████| 21/21 [00:00<00:00, 75.72it/s]


Validation MAE is {96: {'mse': tensor(0.2188, device='cuda:0')}, 192: {'mse': 0}, 336: {'mse': 0}, 720: {'mse': 0}}


Epoch: 4: 100%|██████████| 151/151 [00:06<00:00, 24.46it/s]


Epoch 4, MSE-Loss: 0.04228381013238667, LR: 0.0005


Epoch: Validating: 100%|██████████| 21/21 [00:00<00:00, 68.03it/s]


Validation MAE is {96: {'mse': tensor(0.2091, device='cuda:0')}, 192: {'mse': 0}, 336: {'mse': 0}, 720: {'mse': 0}}


Epoch: 5: 100%|██████████| 151/151 [00:06<00:00, 24.49it/s]


Epoch 5, MSE-Loss: 0.040424894640185186, LR: 0.0005
Checkpointing succesfull after epoch 5


Epoch: Validating: 100%|██████████| 21/21 [00:00<00:00, 82.93it/s]


Validation MAE is {96: {'mse': tensor(0.2050, device='cuda:0')}, 192: {'mse': 0}, 336: {'mse': 0}, 720: {'mse': 0}}


Epoch: 6: 100%|██████████| 151/151 [00:06<00:00, 24.58it/s]


Epoch 6, MSE-Loss: 0.03899529583702814, LR: 0.0005


Epoch: Validating: 100%|██████████| 21/21 [00:00<00:00, 89.71it/s]


Validation MAE is {96: {'mse': tensor(0.2015, device='cuda:0')}, 192: {'mse': 0}, 336: {'mse': 0}, 720: {'mse': 0}}


Epoch: 7: 100%|██████████| 151/151 [00:06<00:00, 24.92it/s]


Epoch 7, MSE-Loss: 0.038167722214827476, LR: 0.0005


Epoch: Validating: 100%|██████████| 21/21 [00:00<00:00, 87.46it/s]


Validation MAE is {96: {'mse': tensor(0.1992, device='cuda:0')}, 192: {'mse': 0}, 336: {'mse': 0}, 720: {'mse': 0}}


Epoch: 8: 100%|██████████| 151/151 [00:06<00:00, 24.56it/s]


Epoch 8, MSE-Loss: 0.03698696926334836, LR: 0.0005


Epoch: Validating: 100%|██████████| 21/21 [00:00<00:00, 88.77it/s]


Validation MAE is {96: {'mse': tensor(0.1989, device='cuda:0')}, 192: {'mse': 0}, 336: {'mse': 0}, 720: {'mse': 0}}


Epoch: 9: 100%|██████████| 151/151 [00:06<00:00, 24.62it/s]


Epoch 9, MSE-Loss: 0.036343022958924436, LR: 0.0005


Epoch: Validating: 100%|██████████| 21/21 [00:00<00:00, 86.16it/s]


Validation MAE is {96: {'mse': tensor(0.1982, device='cuda:0')}, 192: {'mse': 0}, 336: {'mse': 0}, 720: {'mse': 0}}


Epoch: 10: 100%|██████████| 151/151 [00:06<00:00, 24.69it/s]


Epoch 10, MSE-Loss: 0.03592635767645394, LR: 0.0005
Checkpointing succesfull after epoch 10


Epoch: Validating: 100%|██████████| 21/21 [00:00<00:00, 89.94it/s]


Validation MAE is {96: {'mse': tensor(0.2098, device='cuda:0')}, 192: {'mse': 0}, 336: {'mse': 0}, 720: {'mse': 0}}


Epoch: 11: 100%|██████████| 151/151 [00:06<00:00, 24.51it/s]


Epoch 11, MSE-Loss: 0.034505016016249626, LR: 5e-05


Epoch: Validating: 100%|██████████| 21/21 [00:00<00:00, 85.02it/s]


Validation MAE is {96: {'mse': tensor(0.1933, device='cuda:0')}, 192: {'mse': 0}, 336: {'mse': 0}, 720: {'mse': 0}}


Epoch: 12: 100%|██████████| 151/151 [00:06<00:00, 24.54it/s]


Epoch 12, MSE-Loss: 0.033850243771510406, LR: 5e-05


Epoch: Validating: 100%|██████████| 21/21 [00:00<00:00, 86.25it/s]


Validation MAE is {96: {'mse': tensor(0.1924, device='cuda:0')}, 192: {'mse': 0}, 336: {'mse': 0}, 720: {'mse': 0}}


Epoch: 13: 100%|██████████| 151/151 [00:06<00:00, 24.72it/s]


Epoch 13, MSE-Loss: 0.03372379994352922, LR: 5e-05


Epoch: Validating: 100%|██████████| 21/21 [00:00<00:00, 85.82it/s]


Validation MAE is {96: {'mse': tensor(0.1927, device='cuda:0')}, 192: {'mse': 0}, 336: {'mse': 0}, 720: {'mse': 0}}


Epoch: 14: 100%|██████████| 151/151 [00:06<00:00, 24.43it/s]


Epoch 14, MSE-Loss: 0.03358537797501545, LR: 5e-05


Epoch: Validating: 100%|██████████| 21/21 [00:00<00:00, 82.38it/s]


Validation MAE is {96: {'mse': tensor(0.1928, device='cuda:0')}, 192: {'mse': 0}, 336: {'mse': 0}, 720: {'mse': 0}}


Epoch: 15: 100%|██████████| 151/151 [00:06<00:00, 24.29it/s]


Epoch 15, MSE-Loss: 0.033532980200371994, LR: 5e-05
Checkpointing succesfull after epoch 15


Epoch: Validating: 100%|██████████| 21/21 [00:00<00:00, 90.46it/s]


Validation MAE is {96: {'mse': tensor(0.1922, device='cuda:0')}, 192: {'mse': 0}, 336: {'mse': 0}, 720: {'mse': 0}}


Epoch: Validating: 100%|██████████| 86/86 [00:01<00:00, 79.54it/s]


Validation MAE is {96: {'mse': tensor(0.1709, device='cuda:0')}, 192: {'mse': 0}, 336: {'mse': 0}, 720: {'mse': 0}}
Checkpointing succesfull after epoch 15 for base
Using device: cuda


Epoch: 1: 100%|██████████| 151/151 [00:06<00:00, 23.95it/s]


Epoch 1, MSE-Loss: 0.06687690428649352, LR: 0.0005


Epoch: Validating: 100%|██████████| 21/21 [00:00<00:00, 83.83it/s]


Validation MAE is {96: {'mse': tensor(0.2368, device='cuda:0')}, 192: {'mse': 0}, 336: {'mse': 0}, 720: {'mse': 0}}


Epoch: 2: 100%|██████████| 151/151 [00:06<00:00, 23.70it/s]


Epoch 2, MSE-Loss: 0.04636096939543225, LR: 0.0005


Epoch: Validating: 100%|██████████| 21/21 [00:00<00:00, 84.42it/s]


Validation MAE is {96: {'mse': tensor(0.2179, device='cuda:0')}, 192: {'mse': 0}, 336: {'mse': 0}, 720: {'mse': 0}}


Epoch: 3: 100%|██████████| 151/151 [00:06<00:00, 23.61it/s]


Epoch 3, MSE-Loss: 0.04171483497449894, LR: 0.0005


Epoch: Validating: 100%|██████████| 21/21 [00:00<00:00, 84.88it/s]


Validation MAE is {96: {'mse': tensor(0.2090, device='cuda:0')}, 192: {'mse': 0}, 336: {'mse': 0}, 720: {'mse': 0}}


Epoch: 4: 100%|██████████| 151/151 [00:06<00:00, 23.78it/s]


Epoch 4, MSE-Loss: 0.03922603938062459, LR: 0.0005


Epoch: Validating: 100%|██████████| 21/21 [00:00<00:00, 81.53it/s]


Validation MAE is {96: {'mse': tensor(0.2082, device='cuda:0')}, 192: {'mse': 0}, 336: {'mse': 0}, 720: {'mse': 0}}


Epoch: 5: 100%|██████████| 151/151 [00:06<00:00, 23.85it/s]


Epoch 5, MSE-Loss: 0.03765107116458432, LR: 0.0005
Checkpointing succesfull after epoch 5


Epoch: Validating: 100%|██████████| 21/21 [00:00<00:00, 74.52it/s]


Validation MAE is {96: {'mse': tensor(0.2038, device='cuda:0')}, 192: {'mse': 0}, 336: {'mse': 0}, 720: {'mse': 0}}


Epoch: 6: 100%|██████████| 151/151 [00:06<00:00, 23.47it/s]


Epoch 6, MSE-Loss: 0.03679176935593024, LR: 0.0005


Epoch: Validating: 100%|██████████| 21/21 [00:00<00:00, 77.40it/s]


Validation MAE is {96: {'mse': tensor(0.2042, device='cuda:0')}, 192: {'mse': 0}, 336: {'mse': 0}, 720: {'mse': 0}}


Epoch: 7: 100%|██████████| 151/151 [00:06<00:00, 23.75it/s]


Epoch 7, MSE-Loss: 0.03603706960350472, LR: 0.0005


Epoch: Validating: 100%|██████████| 21/21 [00:00<00:00, 84.32it/s]


Validation MAE is {96: {'mse': tensor(0.2073, device='cuda:0')}, 192: {'mse': 0}, 336: {'mse': 0}, 720: {'mse': 0}}


Epoch: 8: 100%|██████████| 151/151 [00:06<00:00, 23.53it/s]


Epoch 8, MSE-Loss: 0.03557245420995137, LR: 0.0005


Epoch: Validating: 100%|██████████| 21/21 [00:00<00:00, 82.08it/s]


Validation MAE is {96: {'mse': tensor(0.2026, device='cuda:0')}, 192: {'mse': 0}, 336: {'mse': 0}, 720: {'mse': 0}}


Epoch: 9: 100%|██████████| 151/151 [00:06<00:00, 23.63it/s]


Epoch 9, MSE-Loss: 0.03481913966108237, LR: 0.0005


Epoch: Validating: 100%|██████████| 21/21 [00:00<00:00, 84.94it/s]


Validation MAE is {96: {'mse': tensor(0.2032, device='cuda:0')}, 192: {'mse': 0}, 336: {'mse': 0}, 720: {'mse': 0}}


Epoch: 10: 100%|██████████| 151/151 [00:06<00:00, 23.68it/s]


Epoch 10, MSE-Loss: 0.03433901304687491, LR: 0.0005
Checkpointing succesfull after epoch 10


Epoch: Validating: 100%|██████████| 21/21 [00:00<00:00, 84.65it/s]


Validation MAE is {96: {'mse': tensor(0.2052, device='cuda:0')}, 192: {'mse': 0}, 336: {'mse': 0}, 720: {'mse': 0}}


Epoch: 11: 100%|██████████| 151/151 [00:06<00:00, 23.64it/s]


Epoch 11, MSE-Loss: 0.03287655913227836, LR: 5e-05


Epoch: Validating: 100%|██████████| 21/21 [00:00<00:00, 85.59it/s]


Validation MAE is {96: {'mse': tensor(0.2028, device='cuda:0')}, 192: {'mse': 0}, 336: {'mse': 0}, 720: {'mse': 0}}


Epoch: 12: 100%|██████████| 151/151 [00:06<00:00, 23.59it/s]


Epoch 12, MSE-Loss: 0.03281403083773638, LR: 5e-05


Epoch: Validating: 100%|██████████| 21/21 [00:00<00:00, 84.90it/s]


Validation MAE is {96: {'mse': tensor(0.2035, device='cuda:0')}, 192: {'mse': 0}, 336: {'mse': 0}, 720: {'mse': 0}}


Epoch: 13: 100%|██████████| 151/151 [00:06<00:00, 23.66it/s]


Epoch 13, MSE-Loss: 0.03258917905044872, LR: 5e-05


Epoch: Validating: 100%|██████████| 21/21 [00:00<00:00, 84.79it/s]


Validation MAE is {96: {'mse': tensor(0.2032, device='cuda:0')}, 192: {'mse': 0}, 336: {'mse': 0}, 720: {'mse': 0}}


Epoch: 14: 100%|██████████| 151/151 [00:06<00:00, 23.77it/s]


Epoch 14, MSE-Loss: 0.03249114929445532, LR: 5e-05


Epoch: Validating: 100%|██████████| 21/21 [00:00<00:00, 83.43it/s]


Validation MAE is {96: {'mse': tensor(0.2031, device='cuda:0')}, 192: {'mse': 0}, 336: {'mse': 0}, 720: {'mse': 0}}


Epoch: 15: 100%|██████████| 151/151 [00:06<00:00, 23.58it/s]


Epoch 15, MSE-Loss: 0.03247441451330453, LR: 5e-05
Checkpointing succesfull after epoch 15


Epoch: Validating: 100%|██████████| 21/21 [00:00<00:00, 85.32it/s]


Validation MAE is {96: {'mse': tensor(0.2049, device='cuda:0')}, 192: {'mse': 0}, 336: {'mse': 0}, 720: {'mse': 0}}


Epoch: Validating: 100%|██████████| 86/86 [00:01<00:00, 77.46it/s]


Validation MAE is {96: {'mse': tensor(0.1707, device='cuda:0')}, 192: {'mse': 0}, 336: {'mse': 0}, 720: {'mse': 0}}
Checkpointing succesfull after epoch 15 for revin
Using device: cuda


Epoch: 1: 100%|██████████| 151/151 [00:06<00:00, 24.55it/s]


Epoch 1, MSE-Loss: 0.06587604383956518, LR: 0.0005


Epoch: Validating: 100%|██████████| 21/21 [00:00<00:00, 82.53it/s]


Validation MAE is {96: {'mse': tensor(0.2361, device='cuda:0')}, 192: {'mse': 0}, 336: {'mse': 0}, 720: {'mse': 0}}


Epoch: 2: 100%|██████████| 151/151 [00:06<00:00, 24.38it/s]


Epoch 2, MSE-Loss: 0.04742282411910051, LR: 0.0005


Epoch: Validating: 100%|██████████| 21/21 [00:00<00:00, 80.06it/s]


Validation MAE is {96: {'mse': tensor(0.2222, device='cuda:0')}, 192: {'mse': 0}, 336: {'mse': 0}, 720: {'mse': 0}}


Epoch: 3: 100%|██████████| 151/151 [00:06<00:00, 24.77it/s]


Epoch 3, MSE-Loss: 0.04433667563543414, LR: 0.0005


Epoch: Validating: 100%|██████████| 21/21 [00:00<00:00, 82.63it/s]


Validation MAE is {96: {'mse': tensor(0.2188, device='cuda:0')}, 192: {'mse': 0}, 336: {'mse': 0}, 720: {'mse': 0}}


Epoch: 4: 100%|██████████| 151/151 [00:06<00:00, 24.77it/s]


Epoch 4, MSE-Loss: 0.04243250497129579, LR: 0.0005


Epoch: Validating: 100%|██████████| 21/21 [00:00<00:00, 82.96it/s]


Validation MAE is {96: {'mse': tensor(0.2145, device='cuda:0')}, 192: {'mse': 0}, 336: {'mse': 0}, 720: {'mse': 0}}


Epoch: 5: 100%|██████████| 151/151 [00:06<00:00, 24.47it/s]


Epoch 5, MSE-Loss: 0.04112462741353654, LR: 0.0005
Checkpointing succesfull after epoch 5


Epoch: Validating: 100%|██████████| 21/21 [00:00<00:00, 89.27it/s]


Validation MAE is {96: {'mse': tensor(0.2095, device='cuda:0')}, 192: {'mse': 0}, 336: {'mse': 0}, 720: {'mse': 0}}


Epoch: 6: 100%|██████████| 151/151 [00:06<00:00, 24.61it/s]


Epoch 6, MSE-Loss: 0.04002750366432777, LR: 0.0005


Epoch: Validating: 100%|██████████| 21/21 [00:00<00:00, 81.50it/s]


Validation MAE is {96: {'mse': tensor(0.2084, device='cuda:0')}, 192: {'mse': 0}, 336: {'mse': 0}, 720: {'mse': 0}}


Epoch: 7: 100%|██████████| 151/151 [00:06<00:00, 24.61it/s]


Epoch 7, MSE-Loss: 0.03894131605988307, LR: 0.0005


Epoch: Validating: 100%|██████████| 21/21 [00:00<00:00, 80.95it/s]


Validation MAE is {96: {'mse': tensor(0.2081, device='cuda:0')}, 192: {'mse': 0}, 336: {'mse': 0}, 720: {'mse': 0}}


Epoch: 8: 100%|██████████| 151/151 [00:06<00:00, 24.66it/s]


Epoch 8, MSE-Loss: 0.03825317779617594, LR: 0.0005


Epoch: Validating: 100%|██████████| 21/21 [00:00<00:00, 82.79it/s]


Validation MAE is {96: {'mse': tensor(0.2109, device='cuda:0')}, 192: {'mse': 0}, 336: {'mse': 0}, 720: {'mse': 0}}


Epoch: 9: 100%|██████████| 151/151 [00:06<00:00, 24.86it/s]


Epoch 9, MSE-Loss: 0.038109272891125145, LR: 0.0005


Epoch: Validating: 100%|██████████| 21/21 [00:00<00:00, 82.18it/s]


Validation MAE is {96: {'mse': tensor(0.2264, device='cuda:0')}, 192: {'mse': 0}, 336: {'mse': 0}, 720: {'mse': 0}}


Epoch: 10: 100%|██████████| 151/151 [00:06<00:00, 24.76it/s]


Epoch 10, MSE-Loss: 0.03761766484587003, LR: 0.0005
Checkpointing succesfull after epoch 10


Epoch: Validating: 100%|██████████| 21/21 [00:00<00:00, 88.91it/s]


Validation MAE is {96: {'mse': tensor(0.2068, device='cuda:0')}, 192: {'mse': 0}, 336: {'mse': 0}, 720: {'mse': 0}}


Epoch: 11: 100%|██████████| 151/151 [00:06<00:00, 24.71it/s]


Epoch 11, MSE-Loss: 0.03569170384859013, LR: 5e-05


Epoch: Validating: 100%|██████████| 21/21 [00:00<00:00, 87.11it/s]


Validation MAE is {96: {'mse': tensor(0.2022, device='cuda:0')}, 192: {'mse': 0}, 336: {'mse': 0}, 720: {'mse': 0}}


Epoch: 12: 100%|██████████| 151/151 [00:06<00:00, 24.79it/s]


Epoch 12, MSE-Loss: 0.035360332356383466, LR: 5e-05


Epoch: Validating: 100%|██████████| 21/21 [00:00<00:00, 88.20it/s]


Validation MAE is {96: {'mse': tensor(0.2021, device='cuda:0')}, 192: {'mse': 0}, 336: {'mse': 0}, 720: {'mse': 0}}


Epoch: 13: 100%|██████████| 151/151 [00:06<00:00, 24.65it/s]


Epoch 13, MSE-Loss: 0.03534292047288244, LR: 5e-05


Epoch: Validating: 100%|██████████| 21/21 [00:00<00:00, 84.34it/s]


Validation MAE is {96: {'mse': tensor(0.2023, device='cuda:0')}, 192: {'mse': 0}, 336: {'mse': 0}, 720: {'mse': 0}}


Epoch: 14: 100%|██████████| 151/151 [00:06<00:00, 24.78it/s]


Epoch 14, MSE-Loss: 0.035098239298391815, LR: 5e-05


Epoch: Validating: 100%|██████████| 21/21 [00:00<00:00, 86.15it/s]


Validation MAE is {96: {'mse': tensor(0.2023, device='cuda:0')}, 192: {'mse': 0}, 336: {'mse': 0}, 720: {'mse': 0}}


Epoch: 15: 100%|██████████| 151/151 [00:06<00:00, 24.47it/s]


Epoch 15, MSE-Loss: 0.035040249246240454, LR: 5e-05
Checkpointing succesfull after epoch 15


Epoch: Validating: 100%|██████████| 21/21 [00:00<00:00, 86.10it/s]


Validation MAE is {96: {'mse': tensor(0.2022, device='cuda:0')}, 192: {'mse': 0}, 336: {'mse': 0}, 720: {'mse': 0}}


Epoch: Validating: 100%|██████████| 86/86 [00:01<00:00, 80.02it/s]


Validation MAE is {96: {'mse': tensor(0.1744, device='cuda:0')}, 192: {'mse': 0}, 336: {'mse': 0}, 720: {'mse': 0}}
Checkpointing succesfull after epoch 15 for stationary
