In [8]:
from torch.optim import Adam
from torch.optim.lr_scheduler import ReduceLROnPlateau
import argparse
from config import *
import pickle
from torch.utils.tensorboard import SummaryWriter

from config import ModelConfig, CONFIG_DATA

from utils.data_handling import *
import numpy as np
from torch.utils.data import DataLoader
from iTransformer import iTransformer
import torch
from pathlib import Path
import optuna
import tqdm
# setup argparse for switch to .py
parser = argparse.ArgumentParser()
parser.add_argument("model_config")
parser.add_argument("dataset")
parser.add_argument("normalization")

args = parser.parse_args(["iTransformer", "electricity", "series_stationarization"])

In [9]:
try:
    # Specify the file path where you want to save the dictionary
    file_path = '/vol/fob-vol7/nebenf21/reinbene/bene/MA/data/electricity/electricity_dict.pkl'

    # Load the dictionary from the file using pickle.load
    with open(file_path, 'rb') as file:
        data_dict = pickle.load(file)


except FileNotFoundError:
    data_dict = electricity_loader()

    for key, value in data_dict.items():
        data_dict[key] = df_to_tensor(value)
    # Save the dictionary to the file using pickle.dump
    with open(file_path, 'wb') as file:
        pickle.dump(data_dict, file)


train_tensor = data_dict["electricity_train"]
val_tensor = data_dict["electricity_val"]

In [10]:
window_size = 24*7
pred_length = 24
epoch = 5

train_window = SlidingWindowTimeSeriesDataset(train_tensor, window_size, pred_length)
val_window = SlidingWindowTimeSeriesDataset(val_tensor, window_size, pred_length)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

train_dataloader = DataLoader(train_window, batch_size=32, shuffle=True)
val_dataloader = DataLoader(val_window, batch_size=32, shuffle=True)

train_features, train_labels = next(iter(train_dataloader))
print(f"Feature batch shape: {train_features.size()}")
print(f"Labels batch shape: {train_labels.size()}")

Using device: cuda
Feature batch shape: torch.Size([32, 168, 348])
Labels batch shape: torch.Size([32, 24, 348])


In [7]:
# Define the objective function to optimize
def objective(trial):
    num_variates = trial.suggest_int('num_variates', 348, 348)
    lookback_len = 7*24
    depth = trial.suggest_int('depth', 2, 16)
    dim = trial.suggest_int('dim', 256, 256)
    num_tokens_per_variate = trial.suggest_int('num_tokens_per_variate', 1, 1)
    pred_length = 24
    dim_head = trial.suggest_int('dim_head', 32, 32)
    heads = trial.suggest_int('heads', 4, 4)
    attn_dropout = trial.suggest_float('attn_dropout', 0.1, 0.1)
    ff_mult = trial.suggest_int('ff_mult', 4, 4)
    ff_dropout = trial.suggest_float('ff_dropout', 0.1, 0.1)
    num_mem_tokens = trial.suggest_int('num_mem_tokens', 4, 4)
    use_reversible_instance_norm = trial.suggest_categorical('use_reversible_instance_norm', [True])
    reversible_instance_norm_affine = trial.suggest_categorical('reversible_instance_norm_affine', [True])
    flash_attn = trial.suggest_categorical('flash_attn', [True])
    learning_rate = trial.suggest_float('learning_rate', 0.0001, 0.0001)


    config = {
        'num_variates': 348,
        'lookback_len': window_size,
        'depth': trial.suggest_int('depth', 2, 16),
        'dim': 256,
        'num_tokens_per_variate': 1,
        'pred_length': (24),
        'dim_head': 32,
        'heads': 4,
        'attn_dropout': 0.1,
        'ff_mult': 4,
        'ff_dropout': 0.1,
        'num_mem_tokens': 4,
        'use_reversible_instance_norm': True,
        'reversible_instance_norm_affine': True,
        'flash_attn': True
    }



    #hidden_size = trial.suggest_int('hidden_size', 8, 64)
    #learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-1, log=True)

    # model = iTransformer(num_variates, lookback_len, depth, dim,    num_tokens_per_variate, pred_length, dim_head, heads, attn_dropout, ff_mult, \
    #                       ff_dropout, num_mem_tokens, use_reversible_instance_norm, reversible_instance_norm_affine, flash_attn)
    model = iTransformer(**config).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    epoch = 2
    # Training loop (replace this with your own training logic)
    for epoch in range(epoch):
        model.train()
        total_loss = 0
        for input, target in train_dataloader:
            optimizer.zero_grad()
            input = input.to(device)
            output = model(input)
            loss = torch.nn.MSELoss()
            loss = loss(output[24], target.to(device))  # compute loss on all variates

            loss.backward()
            optimizer.step()
            total_loss += loss.item()

    # Evaluate the model on a validation set (replace this with your own evaluation logic)
    validation_loss = 0.0
    with torch.no_grad():
        for input, target_covariates in val_dataloader:
            input = input.to(device)
            output_covariates = model(input)
            loss = loss(output_covariates[24], target_covariates.to(device))  # compute loss on all variates
            validation_loss += loss.item()

    return validation_loss

# Set the number of trials
n_trials = 5

# Create a study and optimize the objective function
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=n_trials)

# Get the best hyperparameters
best_params = study.best_params
print("Best Hyperparameters:", best_params)

[I 2023-11-27 13:35:36,045] A new study created in memory with name: no-name-589c5977-6b6a-431f-8ff6-da9e903dc8f9


Non-A100 GPU detected, using math or mem efficient attention if input tensor is on cuda


[W 2023-11-27 13:35:56,200] Trial 0 failed with parameters: {'num_variates': 348, 'depth': 16, 'dim': 256, 'num_tokens_per_variate': 1, 'dim_head': 32, 'heads': 4, 'attn_dropout': 0.1, 'ff_mult': 4, 'ff_dropout': 0.1, 'num_mem_tokens': 4, 'use_reversible_instance_norm': True, 'reversible_instance_norm_affine': True, 'flash_attn': True, 'learning_rate': 0.0001} because of the following error: RuntimeError('stack expects each tensor to be equal size, but got [24, 348] at entry 0 and [23, 348] at entry 11').
Traceback (most recent call last):
  File "/vol/fob-vol7/nebenf21/reinbene/bene/MA/myenv/lib/python3.10/site-packages/optuna/study/_optimize.py", line 200, in _run_trial
    value_or_values = func(trial)
  File "/tmp/ipykernel_37190/1631907017.py", line 55, in objective
    for input, target in train_dataloader:
  File "/vol/fob-vol7/nebenf21/reinbene/bene/MA/myenv/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 634, in __next__
    data = self._next_data()
  File "

RuntimeError: stack expects each tensor to be equal size, but got [24, 348] at entry 0 and [23, 348] at entry 11