# Libraries

In [2]:
# standard
import pandas as pd
import numpy as np
from tqdm import tqdm
import math
from math import sqrt
import time

# reading data
import os
import json
from collections import defaultdict

# machine learning
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.fft import rfft, irfft, fftn, ifftn
from torch.utils.data import Dataset, DataLoader
from torch.optim import AdamW

# visuals
import matplotlib.pyplot as plt
import seaborn as sns

# eFormer
from eFormer.embeddings import Encoding, ProbEncoding, PositionalEncoding
from eFormer.sparse_attention import ProbSparseAttentionModule, DetSparseAttentionModule
from eFormer.loss_function import CRPS
from eFormer.sparse_decoder import DetSparseDecoder, ProbSparseDecoder
from eFormer.Dataloader import TimeSeriesDataProcessor

# transformer Benchmarks
from Benchmarks.models.Informer import Model as Informer 
from Benchmarks.models.Autoformer import Model as Autoformer
from Benchmarks.models.Transformer import Model as VanillaTransformer
from Benchmarks.data_provider.data_loader import Dataset_Custom


%store -r Kelmarsh_df Penmanshiel_df

# Hyperparameters

In [31]:
# set global parameters
hyperparameters = {
    'n_heads': 4,
    'ProbabilisticModel': False,
    # embeddings
    'LenEmbeddingVector': 64,
    'batch_size': 64,
    # eFormer
    'forecast': 1,
    'LookBack': 72,
    'EarlyStoppingEpochs': 7,
    'dropout': 0.05,
    # models general
    'learning_rate': 6e-4,
    'WeightDecay': 1e-1,
    'NumberEpochs': 3,
    # benchmarks
    'Frequency': '10min',
    'pred_len': 1,
    'seq_len': 72,
    'label_len': 36,
    'root_path': '../data/Windturbinen/Kelmarsh/',
    'data_path': 'Kelmarsh_1.csv',
    'freq': 't',
    'activation': 'relu',
    'train_epochs': 3,
    'patience': 7,
    'learning_rate': 6e-4,
    'loss': 'CRPS',
    'use_gpu': True,  # Adjusted based on torch.cuda.is_available()
    'gpu': 0,
    'use_multi_gpu': False,  # Since store_true is used, the default is False
    'devices': '0,1,2,3'
    }

hyperparameters.update({
    'is_training': 1,
    'model_id': 'test',
    'features': 'M',
    'target': 'OT',
    'checkpoints': './checkpoints/',
    'bucket_size': 4,
    'n_hashes': 4,
    'enc_in': 7,
    'dec_in': 7,
    'c_out': 7,
    'd_model': 512,
    'e_layers': 2,
    'd_layers': 1,
    'd_ff': 2048,
    'moving_avg': 25,
    'factor': 1,
    'distil': True,  # Note: This reverses the default argparse behavior because store_false is used
    'embed': 'timeF',
    'output_attention': False,  # Since store_true is used, the default is False
    'do_predict': False,  # Since store_true is used, the default is False
    'num_workers': 10,
    'itr': 2,
    'des': 'test',
    'lradj': 'type1',
    'use_amp': False,  # Since store_true is used, the default is False
})

# Adjusting use_gpu based on torch.cuda.is_available()
hyperparameters['use_gpu'] = torch.cuda.is_available() and hyperparameters['use_gpu']

# Parsing device IDs for multi-GPU setup
if hyperparameters['use_gpu'] and hyperparameters['use_multi_gpu']:
    device_ids = hyperparameters['devices'].replace(' ', '').split(',')
    hyperparameters['device_ids'] = [int(id_) for id_ in device_ids]
    hyperparameters['gpu'] = hyperparameters['device_ids'][0]


# Load Data

In [3]:
def shifted_data(data, forecast, look_back):
    data = data.set_index('# Date and time')
    data.index.names = [None]
    data = data.drop(['Long Term Wind (m/s)'], axis=1)
    shifts = range(forecast, look_back + forecast)
    variables = data.columns
        
    shifted_columns = []
    for column in variables:
        for i in shifts:
            shifted_df = data[[column]].shift(i)
            shifted_df.rename(columns={column: f"{column} (lag {i})"}, inplace=True)
            shifted_columns.append(shifted_df)
        
    data = data.drop(['Wind speed (m/s)'], axis=1)
    data_shifted = pd.concat([data] + shifted_columns, axis=1)
    data_shifted.dropna(inplace=True)
        
    return data_shifted

In [4]:
data = Kelmarsh_df['1']
data = data.set_index('# Date and time')
data.index.names = [None]
data = data.drop(['Long Term Wind (m/s)'], axis=1)

# Transformer Model

In [6]:
# Assuming `df` is your initial DataFrame
processor = TimeSeriesDataProcessor(
    dataframe=data,
    forecast=hyperparameters['forecast'],
    look_back=hyperparameters['LookBack'],
    batch_size=hyperparameters['BatchSize'])
    
train_loader, test_loader, eval_loader = processor.create_dataloaders()

In [7]:
class eFormer(nn.Module):
    def __init__(self, in_features, len_embedding_vector, n_heads_global, probabilistic_model=False):
        super(eFormer, self).__init__()
        self.probabilistic_model = probabilistic_model
        self.n_heads_global = n_heads_global
        self.len_embedding_vector = len_embedding_vector

        # Initialize encoding model
        if probabilistic_model:
            self.encoding_model = ProbEncoding(in_features=in_features, out_features=len_embedding_vector)
        else:
            self.encoding_model = Encoding(in_features=in_features, out_features=len_embedding_vector)

        # Initialize attention module
        if probabilistic_model:
            self.attention_module = ProbSparseAttentionModule(d_model=len_embedding_vector, n_heads=n_heads_global, prob_sparse_factor=5)
        else:
            self.attention_module = DetSparseAttentionModule(d_model=len_embedding_vector, n_heads=n_heads_global, prob_sparse_factor=5)

        # Initialize decoder
        # Assuming the decoder initialization does not actually require the output shape directly but parameters that depend on the model configuration
        if probabilistic_model:
            self.decoder = ProbSparseDecoder(d_model=len_embedding_vector, n_heads=n_heads_global, forecast_horizon=1, encoder_output_dim=len_embedding_vector)
        else:
            self.decoder = DetSparseDecoder(d_model=len_embedding_vector, n_heads=n_heads_global, forecast_horizon=1, encoder_output_dim=len_embedding_vector)

    def forward(self, features_matrix):
        if torch.isnan(features_matrix).any():
            raise ValueError('NaN values detected in Input')

        embeddings = self.encoding_model(features_matrix)
        if torch.isnan(embeddings).any():
            raise ValueError('NaN values detected in Embeddings')

        encoder_output = self.attention_module(embeddings, embeddings, embeddings)
        if torch.isnan(encoder_output).any():
            raise ValueError('NaN values detected in Sparse Attention Output')

        forecasts, crps_weights = self.decoder(encoder_output)
        return forecasts, crps_weights

In [8]:
class EarlyStopping:
    """Early stops the training if validation loss doesn't improve after a given patience."""
    def __init__(self, patience=7, verbose=False, delta=0):
        """
        Args:
            patience (int): How long to wait after last time validation loss improved.
                            Default: 7
            verbose (bool): If True, prints a message for each validation loss improvement. 
                            Default: False
            delta (float): Minimum change in the monitored quantity to qualify as an improvement.
                            Default: 0
        """
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
        self.delta = delta

    def __call__(self, val_loss):
        score = -val_loss

        if self.best_score is None:
            self.best_score = score
        elif score < self.best_score + self.delta:
            self.counter += 1
            print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.counter = 0
            if self.verbose:
                print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f})')
            self.val_loss_min = val_loss

early_stopping = EarlyStopping(
    patience=hyperparameters['EarlyStoppingEpochs'],
    verbose=True
    )

model = eFormer(
    in_features=(hyperparameters['LookBack'] * 2),
    len_embedding_vector=hyperparameters['LenEmbeddingVector'],
    n_heads_global=hyperparameters['nHeads'],
    probabilistic_model=hyperparameters['ProbabilisticModel'])
optimizer = AdamW(
    params = model.parameters(),
    lr=hyperparameters['LossRate'],
    weight_decay=hyperparameters['WeightDecay']
    )

loss_fn = CRPS()

num_epochs = hyperparameters['NumberEpochs']

for epoch in range(num_epochs):
    epoch_start_time = time.time()
    model.train()
    train_losses = []
    for features, labels in train_loader:
        optimizer.zero_grad()
        predictions, crps_weights = model(features)
        loss = loss_fn(predictions, labels, crps_weights)
        loss.backward()
        optimizer.step()
        train_losses.append(loss.item())
    
    train_loss_avg = np.mean(train_losses)
    print(f"Epoch {epoch + 1} / {num_epochs} with Loss: {train_loss_avg}")

    # Validation phase
    model.eval()
    validation_losses = []
    with torch.no_grad():
        for features, labels in eval_loader:
            predictions, crps_weights = model(features)
            val_loss = loss_fn(predictions, labels, crps_weights)
            validation_losses.append(val_loss.item())

    val_loss_avg = np.mean(validation_losses)
    print(f"Validation Loss: {val_loss_avg}")

    epoch_end_time = time.time()
    epoch_duration = epoch_end_time - epoch_start_time
    print(f"Epoch Duration: {epoch_duration}s")

    early_stopping(val_loss_avg)
    if early_stopping.early_stop:
        print("Early stopping")
        break

Epoch 1 / 3 with Loss: -647390.7945996479
Validation Loss: -3186469.480078125
Epoch Duration: 41.97282648086548s
Epoch 2 / 3 with Loss: -13735284.939655172
Validation Loss: -29110255.20625
Epoch Duration: 25.689845323562622s
Validation loss decreased (inf --> -29110255.206250)
Epoch 3 / 3 with Loss: -53044122.4874456
Validation Loss: -80461121.5125
Epoch Duration: 42.36861443519592s
Validation loss decreased (-29110255.206250 --> -80461121.512500)


# Ressource Measurement

In [47]:
def check_system_conditions():
    # Get CPU usage for each core
    cpu_percent = round(psutil.cpu_percent(), 4)

    # Get memory information
    memory_info = psutil.virtual_memory()
    memory_used_gb = round(memory_info.used / (1024 ** 3), 4)

    # Get GPU information
    try:
      gpu_info = GPUtil.getGPUs()[0]
      gpu_memory_used_gb = round(gpu_info.memoryUsed / 1024, 4)
    except IndexError:
      # If no GPU is found, set variables to None
      gpu_memory_used_gb = None

    # Collect data in a dictionary
    comp_usage = {
        'CPU Usage': cpu_percent,
        'Memory Usage (GB)': memory_used_gb,
        'GPU Usage (GB)': gpu_memory_used_gb
    }

    return comp_usage

In [None]:
import time
import psutil
import GPUtil

# Assuming the check_system_conditions function is defined as previously mentioned

# Define a function to run monitoring in a separate thread
def monitor_system_usage(every_n_seconds=10, keep_running=lambda: True, results_list=[]):
    while keep_running():
        comp_usage = check_system_conditions()
        results_list.append(comp_usage)
        time.sleep(every_n_seconds)

# Initialize a list to store the results
system_usage_results = []

# Define a lambda function to control the monitoring loop
# It will return False to stop the thread once training is done
keep_monitoring = lambda: keep_monitoring_flag

# Initialize the flag to True before starting training
keep_monitoring_flag = True

# Start the monitoring thread
monitor_thread = threading.Thread(target=monitor_system_usage, args=(5, keep_monitoring, system_usage_results))
monitor_thread.start()

# Training loop here
# Insert your existing training loop code

# After training is done, set the flag to False to stop the monitoring thread
keep_monitoring_flag = False
monitor_thread.join()  # Wait for the monitoring thread to finish

# Convert the results list to a DataFrame
system_usage_df = pd.DataFrame(system_usage_results)

print(system_usage_df)


# Benchmark Models

## DataLoader

seperate inputs for Time stamps and values

In [13]:
Kelmarsh_df['1'].columns

Index(['# Date and time', 'Wind speed (m/s)', 'Long Term Wind (m/s)',
       'Energy Export (kWh)'],
      dtype='object')

In [5]:
Kelmarsh_df['1'].rename(columns={'# Date and time':'date'}, inplace=True)

Kelmarsh_df['1'].to_csv('../data/Windturbinen/Kelmarsh/Kelmarsh_1.csv', encoding='utf-8', index=False)

# Path to the directory containing your dataset
root_path = '../data/Windturbinen/Kelmarsh/'
data_path = 'Kelmarsh_1.csv'  # Ensure this is the name of your CSV file

# Initialize your custom dataset
# Note: Adjust 'size', 'freq', and other parameters as needed for your specific use case
dataset = Dataset_Custom(
    root_path=root_path,
    data_path=data_path,
    target='Energy Export (kWh)',
    features='M',
    size=[hyperparameters['LookBack'], hyperparameters['LookBack'], hyperparameters['forecast']],
    freq=hyperparameters['Frequency']
    )

# Create a DataLoader
benchmark_dataloader = DataLoader(
    dataset,
    batch_size=hyperparameters['BatchSize'],
    shuffle=True
    )

# Example of iterating over your DataLoader
for batch in dataloader:
    x_enc, x_dec, seq_x_enc, seq_x_dec = batch
    # Your training code here
    break

tensor([[[ 7.7585e-01,  1.3414e+00,  7.1067e-01],
         [ 1.2136e+00,  1.3414e+00,  9.6010e-01],
         [ 1.1106e+00,  1.3414e+00,  2.0576e+00],
         ...,
         [ 3.2268e-02,  1.3414e+00, -1.5733e-01],
         [-2.7554e-01,  1.3414e+00, -6.6616e-01],
         [ 7.5954e-02,  1.3414e+00, -1.6731e-01]],

        [[-5.6902e-01,  5.9580e-01, -6.4621e-01],
         [-4.7084e-01,  5.9580e-01, -5.4644e-01],
         [-3.3983e-01,  5.9580e-01, -5.9633e-01],
         ...,
         [-1.1465e+00,  5.9580e-01, -8.8566e-01],
         [-1.1098e+00,  5.9580e-01, -8.8566e-01],
         [-9.6209e-01,  5.9580e-01, -8.5573e-01]],

        [[-1.6097e+00, -5.9711e-01, -9.5550e-01],
         [-1.5683e+00, -5.9711e-01, -9.5217e-01],
         [-1.3801e+00, -5.9711e-01, -9.5217e-01],
         ...,
         [-4.4152e-01, -5.9711e-01, -6.0630e-01],
         [-3.3553e-01, -5.9711e-01, -5.3646e-01],
         [-2.8744e-01, -5.9711e-01, -2.8704e-01]],

        ...,

        [[-9.4751e-04, -5.9711e-01, -1

## vanilla Transformer

In [32]:
class Config:
    def __init__(self, dictionary):
        for key, value in dictionary.items():
            setattr(self, key, value)

# Convert the hyperparameters dictionary to a Config object
hyperparameters_class = Config(hyperparameters)

# Initialize model, optimizer, and loss function here
model = VanillaTransformer(configs=hyperparameters_class)  # Adjust this line based on actual model initialization
optimizer = AdamW(model.parameters(), lr=hyperparameters['learning_rate'], weight_decay=hyperparameters['WeightDecay'])
loss_fn = CRPS()  # Ensure CRPS is correctly implemented

num_epochs = hyperparameters['NumberEpochs']

for epoch in range(num_epochs):
    epoch_start_time = time.time()
    model.train()
    train_losses = []
    for batch in train_loader:
        features, labels = batch['x_enc'], batch['y']  # Adjust these keys based on your DataLoader output
        optimizer.zero_grad()
        predictions = model(features)  # Adjust model call based on actual inputs/outputs
        loss = loss_fn(predictions, labels)  # Adjust based on how CRPS is implemented
        loss.backward()
        optimizer.step()
        train_losses.append(loss.item())

NameError: name 'train_loader' is not defined

# Test Area

In [40]:
%run Benchmarks/run.py \
  --is_training 1 \
  --root_path ../data/Windturbinen/Kelmarsh/ \
  --data_path Kelmarsh_1.csv \
  --model_id VanillaTransformer_quick \
  --model Transformer \
  --data custom \
  --features M \
  --seq_len 96 \
  --label_len 48 \
  --pred_len 96 \
  --e_layers 2 \
  --d_layers 1 \
  --factor 3 \
  --enc_in 1 \
  --dec_in 1 \
  --c_out 1 \
  --des 'Exp' \
  --itr 1


Args in experiment:
Namespace(is_training=1, model_id='VanillaTransformer_quick', model='Transformer', data='custom', root_path='../data/Windturbinen/Kelmarsh/', data_path='Kelmarsh_1.csv', features='M', target='OT', freq='h', checkpoints='./checkpoints/', seq_len=96, label_len=48, pred_len=96, bucket_size=4, n_hashes=4, enc_in=1, dec_in=1, c_out=1, d_model=512, n_heads=8, e_layers=2, d_layers=1, d_ff=2048, moving_avg=25, factor=3, distil=True, dropout=0.05, embed='timeF', activation='gelu', output_attention=False, do_predict=False, num_workers=10, itr=1, train_epochs=10, batch_size=32, patience=3, learning_rate=0.0001, des='Exp', loss='mse', lradj='type1', use_amp=False, use_gpu=False, gpu=0, use_multi_gpu=False, devices='0,1,2,3')
Use CPU
>>>>>>>start training : VanillaTransformer_quick_Transformer_custom_ftM_sl96_ll48_pl96_dm512_nh8_el2_dl1_df2048_fc3_ebtimeF_dtTrue_Exp_0>>>>>>>>>>>>>>>>>>>>>>>>>>


ValueError: list.remove(x): x not in list

# To-Do's

- ground truth and forecasted values in graph as time series
- seperate model for wind and company, one model can't output 2 different results for same equation

- label_length = look_back
- sequence_length -> window size