# Libraries

In [39]:
# standard
import pandas as pd
import numpy as np
from tqdm import tqdm
import math
from math import sqrt

# reading data
import os
import json
from collections import defaultdict

# machine learning
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.fft import rfft, irfft, fftn, ifftn

# visuals
import matplotlib.pyplot as plt
import seaborn as sns

# eFormer
from eFormer.embeddings import Encoding, ProbEncoding, PositionalEncoding
from eFormer.sparse_attention import ProbSparseAttentionModule, DetSparseAttentionModule
from eFormer.loss_function import crps

%store -r Kelmarsh_df Penmanshiel_df

# Architektur

## Hyperparameters

In [40]:
# set global parameters

n_heads_global = 4
probabilistic_model = True
len_embedding_vector = 64

## Embedding

probabilistic embedding & positional encoding

In [41]:
test_df = Kelmarsh_df['1'][['# Date and time', 'Energy Export (kWh)']][-1024:]

# First, ensure that the column is in datetime format
test_df['# Date and time'] = pd.to_datetime(test_df['# Date and time'])

# Then convert it to timestamps
test_df['Timestamp'] = test_df['# Date and time'].apply(lambda x: x.timestamp())

# interpolate NaN values
test_df = test_df.interpolate(method='linear')

features_matrix = test_df[['Energy Export (kWh)', 'Timestamp']].values

In [42]:
# Forward pass through the model
feature_tensor = torch.tensor(features_matrix, dtype=torch.float32).unsqueeze(0)
# check for NaN values early
if torch.isnan(feature_tensor).any():
    raise ValueError('NaN values detected in Input')

# decide which model to use
if probabilistic_model == True:
    encoding_model = ProbEncoding(in_features=feature_tensor.shape[-1], out_features=len_embedding_vector)
else:
    encoding_model = Encoding(in_features=feature_tensor.shape[-1], out_features=len_embedding_vector)

# create embeddings
embeddings = encoding_model(feature_tensor)

# Check for NaN values after computation
if torch.isnan(embeddings).any():
    raise ValueError('NaN values detected in Embeddings')
else:
    print(f"Embedding shape: {embeddings.shape}")

Embedding shape: torch.Size([2, 1, 1024, 64])


## Attention Mechanism

In [43]:
# determine which model to use
if probabilistic_model == True:
    model = ProbSparseAttentionModule(
        d_model=embeddings.shape[-1],
        n_heads=n_heads_global,
        prob_sparse_factor=5
        )
else:
    model = DetSparseAttentionModule(
        d_model=embeddings.shape[-1],
        n_heads=n_heads_global,
        prob_sparse_factor=5
        )

output = model(embeddings)

# check for NaN values early
if torch.isnan(output).any():
    raise ValueError('NaN values detected in ProbSparse Output')
else:
    print(f"Sparse Attention shape: {output.shape}")

Sparse Attention shape: torch.Size([2, 1, 1024, 64])


In [44]:
class DetSparseDecoder(nn.Module):
    def __init__(self, d_model, n_heads, encoder_output_dim, forecast_horizon=22, max_len=5000, d_ff=None, dropout=0.1, activation="relu"):
        super(DetSparseDecoder, self).__init__()
        self.d_model = d_model
        self.n_heads = n_heads
        self.forecast_horizon = forecast_horizon
        d_ff = d_ff or 4*d_model
        self.dropout = nn.Dropout(dropout)
        self.activation = F.relu if activation == "relu" else F.gelu

        # Initialize PositionalEncoding
        self.pos_encoder = PositionalEncoding(d_model, max_len)

        # Sparse Attention Module for encoder-decoder (cross) attention
        self.encoder_decoder_attention = DetSparseAttentionModule(d_model, n_heads, prob_sparse_factor=5)

        # Feed-forward network components
        self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)
        self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)
        
        # Normalization layers
        self.norm1 = nn.LayerNorm(d_model)
        self.norm2 = nn.LayerNorm(d_model)

        # Output layer
        self.output_layer = nn.Linear(d_model, 1)

    def forward(self, encoder_output):
        # Generate positional encodings
        dummy_input = torch.zeros(self.forecast_horizon, self.d_model).unsqueeze(0)
        pos_encodings = self.pos_encoder(dummy_input)

        # Apply encoder-decoder attention using positional encodings as queries and encoder outputs as keys and values
        attn_output, _ = self.encoder_decoder_attention(pos_encodings, encoder_output, encoder_output, None)
        attn_output = self.norm1(attn_output + self.dropout(attn_output))

        # Feed-forward network
        ff_output = attn_output.transpose(-1, 1)  # Prepare for conv1d
        ff_output = self.dropout(self.activation(self.conv1(ff_output)))
        ff_output = self.dropout(self.conv2(ff_output))
        ff_output = ff_output.transpose(-1, 1)  # Back to original dims
        ff_output = self.norm2(attn_output + self.dropout(ff_output))

        # Generate forecasts based on the attention output
        forecasts = self.output_layer(ff_output).squeeze(-1)
        
        return forecasts

In [45]:
model = DetSparseDecoder(
    d_model = output.shape[-1],
    n_heads = n_heads_global,
    forecast_horizon = 44,
    encoder_output_dim = output[0].shape
)

model(output[0])

TypeError: DetSparseAttentionModule.forward() takes 2 positional arguments but 5 were given

# Transformer Model

# Test Area

In [13]:
test_df.tail()

Unnamed: 0,# Date and time,Energy Export (kWh),Timestamp
26059,2021-06-30 23:10:00,5.0,1625095000.0
26060,2021-06-30 23:20:00,19.0,1625095000.0
26061,2021-06-30 23:30:00,12.0,1625096000.0
26062,2021-06-30 23:40:00,1.0,1625096000.0
26063,2021-06-30 23:50:00,0.0,1625097000.0


In [10]:
class DetSparseDecoder(nn.Module):
    def __init__(self, d_model, n_heads, encoder_output_dim, forecast_horizon=12, max_len=5000):
        super(DetSparseDecoder, self).__init__()
        self.d_model = d_model
        self.n_heads = n_heads
        self.forecast_horizon = forecast_horizon
        # Initialize PositionalEncoding with a max_len that covers your forecast_horizon
        self.pos_encoder = PositionalEncoding(d_model, max_len)
        # Assuming SparseAttention is already defined and can handle the operations
        self.encoder_decoder_attention = DetSparseAttentionModule(d_model, n_heads, prob_sparse_factor=5)  # Attention over encoder outputs
        self.output_layer = nn.Linear(d_model, 1)  # Assuming a univariate forecast
    
    def forward(self, encoder_output):
        # encoder_output: Output from the encoder phase

        # Generate positional encodings for the forecast horizon
        # Create a dummy input tensor for positional encoding generation
        dummy_input = torch.zeros(self.forecast_horizon, self.d_model).unsqueeze(0)
        pos_encodings = self.pos_encoder(dummy_input)
        
        # Apply encoder-decoder attention using positional encodings as queries and encoder outputs as keys and values
        attn_output, _ = self.encoder_decoder_attention(pos_encodings, encoder_output, encoder_output, None)
        print(f"attention output: {attn_output.shape}")
        
        # Generate forecasts based on the attention output
        # Assuming the last dimension of attn_output corresponds to the forecast horizon
        forecasts = self.output_layer(attn_output).squeeze(-1)  # Adjust dimensions as necessary
        print(f"forecast shape: {forecasts.shape}")
        
        return forecasts