In [None]:
import numpy as np
import pandas as pd
import datetime 

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split, TimeSeriesSplit
from sklearn.metrics import mean_squared_error, mean_absolute_error

import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df = pd.read_excel('/Users/evenbakke/Documents/Master Thesis/MasterThesis-/Data 2.0/Final Data with 2024.xlsx')
df.set_index("DateTime", inplace=True)
df.sort_index(ascending=True)
df

In [None]:
df.columns

In [None]:
df = df.loc['2023':'2024']
df

In [None]:
columns_to_remove = [
    'System Price Lag 1', 'System Price Lag 2', 'System Price Lag 3',
    'System Price Lag 24', 'System Price Lag 48', 'System Price Lag 168'
]
df = df.drop(columns=columns_to_remove)
df

# Feature Selection 

In [None]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.feature_selection import RFE
from sklearn.preprocessing import MinMaxScaler

# Split the original data into training and test sets
X = df.drop('System Price', axis=1)
y = df['System Price']

X_train = X[:'2023-12-31']
y_train = y[:'2023-12-31']
X_test = X['2024-01-01':]
y_test = y['2024-01-01':]

# Fit the scaler on the entire training set
dummy_columns = ['Weekend', 'Christmas vacation', 'Public holiday', 'Winter Time']
non_dummy_columns = [col for col in X_train.columns if col not in dummy_columns]

scaler_tanh = MinMaxScaler(feature_range=(-1, 1))
scaler_target_tanh = MinMaxScaler(feature_range=(-1, 1))

scaler_tanh.fit(X_train[non_dummy_columns])
scaler_target_tanh.fit(y_train.values.reshape(-1, 1))

X_train_tanh = scaler_tanh.transform(X_train[non_dummy_columns])
X_test_tanh = scaler_tanh.transform(X_test[non_dummy_columns])
y_train_tanh = scaler_target_tanh.transform(y_train.values.reshape(-1, 1))
y_test_tanh = scaler_target_tanh.transform(y_test.values.reshape(-1, 1))

X_train_tanh = pd.DataFrame(X_train_tanh, columns=non_dummy_columns, index=X_train.index)
X_test_tanh = pd.DataFrame(X_test_tanh, columns=non_dummy_columns, index=X_test.index)
X_train_tanh = pd.concat([X_train_tanh, X_train[dummy_columns]], axis=1)
X_test_tanh = pd.concat([X_test_tanh, X_test[dummy_columns]], axis=1)

y_train_tanh = pd.Series(y_train_tanh.flatten(), index=y_train.index)
y_test_tanh = pd.Series(y_test_tanh.flatten(), index=y_test.index)

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.feature_selection import mutual_info_regression
import matplotlib.pyplot as plt
import seaborn as sns


# Compute mutual information between each feature and the target variable
mutual_info = mutual_info_regression(X_train_tanh, y_train_tanh)

# Create a DataFrame to display the scores
mi_scores = pd.DataFrame(mutual_info, index=X_train_tanh.columns, columns=['Mutual Information'])
mi_scores = mi_scores.sort_values(by='Mutual Information', ascending=False)


print(mi_scores)


plt.figure(figsize=(12, 6))
sns.barplot(x=mi_scores.index, y=mi_scores['Mutual Information'], palette='viridis')
plt.xticks(rotation=90)
plt.title('Mutual Information Scores')
plt.show()


In [None]:
import pandas as pd


feature_correlation_matrix = X_train_tanh.corr().abs()


threshold = 0.8


high_correlation_pairs = feature_correlation_matrix.unstack().reset_index()
high_correlation_pairs.columns = ['Feature1', 'Feature2', 'Correlation']
high_correlation_pairs = high_correlation_pairs[high_correlation_pairs['Feature1'] != 'Feature2']  
high_correlation_pairs = high_correlation_pairs[high_correlation_pairs['Correlation'] > threshold]


high_correlation_pairs = high_correlation_pairs[high_correlation_pairs['Feature1'] < high_correlation_pairs['Feature2']]


print("High Correlation Pairs (threshold > 0.8):")
print(high_correlation_pairs)


In [None]:
df.columns

In [None]:

features_to_remove = [
    'NO2 Price', 'NO5 Price',
    'NO4 Price', 'SE1 Price', 'SE4 Price',
    'DK2 Price',
    'Coal',
    'System Price Lag 48',
     'Settled wind production SE', 'Settled wind production DK',
       'Settled wind production FI', 'Settled wind production NO', 
       'Temp NO'
]

df = df.drop(columns=features_to_remove)



In [None]:

X = df.drop('System Price', axis=1)
y = df['System Price']

X_train = X[:'2023-12-31']
y_train = y[:'2023-12-31']
X_test = X['2024-01-01':]
y_test = y['2024-01-01':]

# Fit the scaler on the entire training set
dummy_columns = ['Weekend', 'Christmas vacation', 'Public holiday', 'Winter Time']
non_dummy_columns = [col for col in X_train.columns if col not in dummy_columns]

scaler_tanh = MinMaxScaler(feature_range=(-1, 1))
scaler_target_tanh = MinMaxScaler(feature_range=(-1, 1))

scaler_tanh.fit(X_train[non_dummy_columns])
scaler_target_tanh.fit(y_train.values.reshape(-1, 1))

X_train_tanh = scaler_tanh.transform(X_train[non_dummy_columns])
X_test_tanh = scaler_tanh.transform(X_test[non_dummy_columns])
y_train_tanh = scaler_target_tanh.transform(y_train.values.reshape(-1, 1))
y_test_tanh = scaler_target_tanh.transform(y_test.values.reshape(-1, 1))

X_train_tanh = pd.DataFrame(X_train_tanh, columns=non_dummy_columns, index=X_train.index)
X_test_tanh = pd.DataFrame(X_test_tanh, columns=non_dummy_columns, index=X_test.index)
X_train_tanh = pd.concat([X_train_tanh, X_train[dummy_columns]], axis=1)
X_test_tanh = pd.concat([X_test_tanh, X_test[dummy_columns]], axis=1)

y_train_tanh = pd.Series(y_train_tanh.flatten(), index=y_train.index)
y_test_tanh = pd.Series(y_test_tanh.flatten(), index=y_test.index)

# Train/test split/Employing 6-Fold Cross-Validation/Normalizing for sigmoid & tanh

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

# Split the original data into training and test sets
X = df.drop('System Price', axis=1)
y = df['System Price']


X_train = X[:'2023-12-31']
y_train = y[:'2023-12-31']
X_test = X['2024-01-01':]
y_test = y['2024-01-01':]

In [None]:
# Function to create rolling folds within the training set
def create_rolling_folds(X, y, train_months, val_months):
    folds = []
    start_date = X.index[0]
    total_days = X.shape[0]

   
    days_in_month = total_days // 12

    for start_month in range(0, total_days - (train_months + val_months) * days_in_month + 1, days_in_month):
        end_train_month = start_month + train_months * days_in_month
        end_val_month = end_train_month + val_months * days_in_month

        X_train_fold = X.iloc[start_month:end_train_month]
        y_train_fold = y.iloc[start_month:end_train_month]
        X_val_fold = X.iloc[end_train_month:end_val_month]
        y_val_fold = y.iloc[end_train_month:end_val_month]

        folds.append((X_train_fold, y_train_fold, X_val_fold, y_val_fold))

    return folds


folds = create_rolling_folds(X_train, y_train, 6, 1)

# Function to scale the data within each fold for sigmoid
def scale_fold_sigmoid(X_train_fold, X_val_fold, y_train_fold, y_val_fold):
    dummy_columns = ['Weekend', 'Christmas vacation', 'Public holiday', 'Winter Time']
    non_dummy_columns = [col for col in X_train_fold.columns if col not in dummy_columns]

    scaler_sigmoid = MinMaxScaler(feature_range=(0, 1))
    scaler_target_sigmoid = MinMaxScaler(feature_range=(0, 1))

    scaler_sigmoid.fit(X_train_fold[non_dummy_columns])
    scaler_target_sigmoid.fit(y_train_fold.values.reshape(-1, 1))

    X_train_sigmoid = scaler_sigmoid.transform(X_train_fold[non_dummy_columns])
    X_val_sigmoid = scaler_sigmoid.transform(X_val_fold[non_dummy_columns])
    y_train_sigmoid = scaler_target_sigmoid.transform(y_train_fold.values.reshape(-1, 1))
    y_val_sigmoid = scaler_target_sigmoid.transform(y_val_fold.values.reshape(-1, 1))

    X_train_sigmoid = pd.DataFrame(X_train_sigmoid, columns=non_dummy_columns, index=X_train_fold.index)
    X_val_sigmoid = pd.DataFrame(X_val_sigmoid, columns=non_dummy_columns, index=X_val_fold.index)
    X_train_sigmoid = pd.concat([X_train_sigmoid, X_train_fold[dummy_columns]], axis=1)
    X_val_sigmoid = pd.concat([X_val_sigmoid, X_val_fold[dummy_columns]], axis=1)

    y_train_sigmoid = pd.Series(y_train_sigmoid.flatten(), index=y_train_fold.index)
    y_val_sigmoid = pd.Series(y_val_sigmoid.flatten(), index=y_val_fold.index)

    return X_train_sigmoid, X_val_sigmoid, y_train_sigmoid, y_val_sigmoid

# Function to scale the data within each fold for tanh
def scale_fold_tanh(X_train_fold, X_val_fold, y_train_fold, y_val_fold):
    dummy_columns = ['Weekend', 'Christmas vacation', 'Public holiday', 'Winter Time']
    non_dummy_columns = [col for col in X_train_fold.columns if col not in dummy_columns]

    scaler_tanh = MinMaxScaler(feature_range=(-1, 1))
    scaler_target_tanh = MinMaxScaler(feature_range=(-1, 1))

    scaler_tanh.fit(X_train_fold[non_dummy_columns])
    scaler_target_tanh.fit(y_train_fold.values.reshape(-1, 1))

    X_train_tanh = scaler_tanh.transform(X_train_fold[non_dummy_columns])
    X_val_tanh = scaler_tanh.transform(X_val_fold[non_dummy_columns])
    y_train_tanh = scaler_target_tanh.transform(y_train_fold.values.reshape(-1, 1))
    y_val_tanh = scaler_target_tanh.transform(y_val_fold.values.reshape(-1, 1))

    X_train_tanh = pd.DataFrame(X_train_tanh, columns=non_dummy_columns, index=X_train_fold.index)
    X_val_tanh = pd.DataFrame(X_val_tanh, columns=non_dummy_columns, index=X_val_fold.index)
    X_train_tanh = pd.concat([X_train_tanh, X_train_fold[dummy_columns]], axis=1)
    X_val_tanh = pd.concat([X_val_tanh, X_val_fold[dummy_columns]], axis=1)

    y_train_tanh = pd.Series(y_train_tanh.flatten(), index=y_train_fold.index)
    y_val_tanh = pd.Series(y_val_tanh.flatten(), index=y_val_fold.index)

    return X_train_tanh, X_val_tanh, y_train_tanh, y_val_tanh

# Apply scaling within each fold and store results for sigmoid
scaled_folds_sigmoid = []
for X_train_fold, y_train_fold, X_val_fold, y_val_fold in folds:
    X_train_sigmoid, X_val_sigmoid, y_train_sigmoid, y_val_sigmoid = scale_fold_sigmoid(X_train_fold, X_val_fold, y_train_fold, y_val_fold)
    scaled_folds_sigmoid.append((X_train_sigmoid, y_train_sigmoid, X_val_sigmoid, y_val_sigmoid))

# Apply scaling within each fold and store results for tanh
scaled_folds_tanh = []
for X_train_fold, y_train_fold, X_val_fold, y_val_fold in folds:
    X_train_tanh, X_val_tanh, y_train_tanh, y_val_tanh = scale_fold_tanh(X_train_fold, X_val_fold, y_train_fold, y_val_fold)
    scaled_folds_tanh.append((X_train_tanh, y_train_tanh, X_val_tanh, y_val_tanh))

# Function to scale the test set for sigmoid
def scale_test_set_sigmoid(X_train, y_train, X_test, y_test):
    dummy_columns = ['Weekend', 'Christmas vacation', 'Public holiday', 'Winter Time']
    non_dummy_columns = [col for col in X_train.columns if col not in dummy_columns]

    scaler_sigmoid = MinMaxScaler(feature_range=(0, 1))
    scaler_target_sigmoid = MinMaxScaler(feature_range=(0, 1))

    scaler_sigmoid.fit(X_train[non_dummy_columns])
    scaler_target_sigmoid.fit(y_train.values.reshape(-1, 1))

    X_test_sigmoid = scaler_sigmoid.transform(X_test[non_dummy_columns])
    y_test_sigmoid = scaler_target_sigmoid.transform(y_test.values.reshape(-1, 1))

    X_test_sigmoid = pd.DataFrame(X_test_sigmoid, columns=non_dummy_columns, index=X_test.index)
    X_test_sigmoid = pd.concat([X_test_sigmoid, X_test[dummy_columns]], axis=1)
    y_test_sigmoid = pd.Series(y_test_sigmoid.flatten(), index=y_test.index)

    return X_test_sigmoid, y_test_sigmoid

# Function to scale the test set for tanh
def scale_test_set_tanh(X_train, y_train, X_test, y_test):
    dummy_columns = ['Weekend', 'Christmas vacation', 'Public holiday', 'Winter Time']
    non_dummy_columns = [col for col in X_train.columns if col not in dummy_columns]

    scaler_tanh = MinMaxScaler(feature_range=(-1, 1))
    scaler_target_tanh = MinMaxScaler(feature_range=(-1, 1))

    scaler_tanh.fit(X_train[non_dummy_columns])
    scaler_target_tanh.fit(y_train.values.reshape(-1, 1))

    X_test_tanh = scaler_tanh.transform(X_test[non_dummy_columns])
    y_test_tanh = scaler_target_tanh.transform(y_test.values.reshape(-1, 1))

    X_test_tanh = pd.DataFrame(X_test_tanh, columns=non_dummy_columns, index=X_test.index)
    X_test_tanh = pd.concat([X_test_tanh, X_test[dummy_columns]], axis=1)
    y_test_tanh = pd.Series(y_test_tanh.flatten(), index=y_test.index)

    return X_test_tanh, y_test_tanh

# Normalize the test set for sigmoid
X_test_sigmoid, y_test_sigmoid = scale_test_set_sigmoid(X_train, y_train, X_test, y_test)

# Normalize the test set for tanh
X_test_tanh, y_test_tanh = scale_test_set_tanh(X_train, y_train, X_test, y_test)


def visualize_folds(folds, title_suffix=""):
    plt.figure(figsize=(14, 15))
    for i, (X_train_fold, y_train_fold, X_val_fold, y_val_fold) in enumerate(folds):
        plt.subplot(len(folds), 1, i + 1)
        plt.plot(y_train_fold.index, y_train_fold, 'blue', label='Training Data')
        plt.plot(y_val_fold.index, y_val_fold, 'red', label='Validation Data')
        plt.title(f'Fold {i+1} {title_suffix}')
        plt.legend()

    plt.tight_layout()
    plt.show()


visualize_folds(scaled_folds_sigmoid, title_suffix="(Scaled for Sigmoid)")


visualize_folds(scaled_folds_tanh, title_suffix="(Scaled for Tanh)")

# MLP - tanh activation function in output layer

## Initialization of model & hyperparameter tuning 

In [None]:
import torch
import numpy as np
import random

def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)

set_seed(42)

In [None]:
import torch
import torch.nn as nn

class MLP(nn.Module):
    def __init__(self, input_size, hidden_size, num_hidden_layers):
        super(MLP, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_hidden_layers = num_hidden_layers
        
        # Input layer
        self.input_layer = nn.Linear(input_size, hidden_size)
        
        # Initializing hidden layers
        self.hidden_layers = nn.ModuleList(
            [nn.Linear(hidden_size, hidden_size) for _ in range(num_hidden_layers)]
        )
        
        # Output layer
        self.output_layer = nn.Linear(hidden_size, 1)
        
        # Activation function for hidden layers
        self.activation = nn.ReLU()  
        
    def forward(self, x):
        
        x = self.activation(self.input_layer(x))
        
        
        for layer in self.hidden_layers:
            x = self.activation(layer(x))
        
        
        x = torch.tanh(self.output_layer(x))
        return x

### Random search CV

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from torch.utils.tensorboard import SummaryWriter
import numpy as np
import random


def calculate_loss(predictions, targets, criterion):
    return criterion(predictions, targets).item()

def train_one_epoch(model, train_loader, optimizer, criterion):
    model.train()
    epoch_losses = []
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        epoch_losses.append(loss.item())
    return epoch_losses

def validate(model, test_loader, criterion):
    model.eval()
    epoch_val_losses = []
    with torch.no_grad():
        for X_val, y_val in test_loader:
            preds = model(X_val)
            val_loss = criterion(preds, y_val)
            epoch_val_losses.append(val_loss.item())
    return epoch_val_losses

def initialize_tensorboard(path):
    return SummaryWriter(path)

hyperparam_space = {
    'learning_rate': [0.01, 0.005, 0.001, 0.0005, 0.0001],
    'batch_size': [16, 32, 64, 128],
    'num_hidden_layers': [1, 2, 3, 4],
    'hidden_size': [64, 128, 256, 512],
    'momentum': [0.0, 0.5, 0.9]  
}

# Main function to run training with random search
def run_random_search(folds, input_size, num_trials):
    writer = initialize_tensorboard('runs/electricity_price_hyperparameter_search_tanh')
    best_val_loss = float('inf')
    best_hyperparams = {}
    num_epochs = 30  

    for trial in range(num_trials):
        lr = random.choice(hyperparam_space['learning_rate'])
        batch_size = random.choice(hyperparam_space['batch_size'])
        num_hidden_layers = random.choice(hyperparam_space['num_hidden_layers'])
        hidden_size = random.choice(hyperparam_space['hidden_size'])
        momentum = random.choice(hyperparam_space['momentum'])

        fold_val_losses = []

        for i, (X_train_fold, y_train_fold, X_test_fold, y_test_fold) in enumerate(folds):
            model = MLP(input_size, hidden_size, num_hidden_layers)
            criterion = nn.MSELoss()
            optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)
            scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.99)

            train_dataset = TensorDataset(torch.tensor(X_train_fold.values, dtype=torch.float32),
                                          torch.tensor(y_train_fold.values, dtype=torch.float32).view(-1, 1))
            train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)

            test_dataset = TensorDataset(torch.tensor(X_test_fold.values, dtype=torch.float32),
                                         torch.tensor(y_test_fold.values, dtype=torch.float32).view(-1, 1))
            test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

            for epoch in range(num_epochs):
                epoch_train_losses = train_one_epoch(model, train_loader, optimizer, criterion)
                avg_train_loss = np.mean(epoch_train_losses)

                epoch_val_losses = validate(model, test_loader, criterion)
                avg_val_loss = np.mean(epoch_val_losses)
                fold_val_losses.append(avg_val_loss)

                writer.add_scalar(f'Trial_{trial}_Fold_{i+1}/Loss/Train', avg_train_loss, epoch)
                writer.add_scalar(f'Trial_{trial}_Fold_{i+1}/Loss/Validation', avg_val_loss, epoch)

                
                scheduler.step()

            avg_fold_val_loss = np.mean(fold_val_losses)
            if avg_fold_val_loss < best_val_loss:
                best_val_loss = avg_fold_val_loss
                best_hyperparams = {
                    'learning_rate': lr,
                    'batch_size': batch_size,
                    'num_hidden_layers': num_hidden_layers,
                    'hidden_size': hidden_size,
                    'momentum': momentum,
                }
            print(f"Trial {trial} | Fold {i+1} completed | Average Train Loss: {avg_train_loss:.6f} | Average Validation Loss: {avg_fold_val_loss:.6f}")
        
    writer.close()
    print(f"Best Hyperparameters: {best_hyperparams}")
    print(f"Best Validation Loss: {best_val_loss:.6f}")
    return best_hyperparams, best_val_loss


input_size = X_train_tanh.shape[1]  
num_trials = 20  
best_hyperparams, best_val_loss = run_random_search(scaled_folds_tanh, input_size, num_trials)

### Hyperparameter tuning using CV/Monitoring validation loss 

In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from torch.utils.tensorboard import SummaryWriter

def calculate_loss(predictions, targets, criterion):
    return criterion(predictions, targets).item()

def train_one_epoch(model, train_loader, optimizer, criterion):
    model.train()
    epoch_losses = []
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        epoch_losses.append(loss.item())
    return epoch_losses

def validate(model, val_loader, criterion):
    model.eval()
    epoch_val_losses = []
    with torch.no_grad():
        for X_val, y_val in val_loader:
            preds = model(X_val)
            val_loss = criterion(preds, y_val)
            epoch_val_losses.append(val_loss.item())
    return epoch_val_losses

def initialize_tensorboard(path):
    return SummaryWriter(path)

# Main function to run training with cross-validation
def run_training(scaled_folds_tanh, input_size, hidden_size, num_hidden_layers, lr, batch_size, num_epochs, momentum, patience):
    writer = initialize_tensorboard('runs/electricity_price_forecasting_tanh')
    all_fold_train_losses = []
    all_fold_val_losses = []

    avg_train_losses_per_fold = []
    avg_val_losses_per_fold = []

    for i, (X_train_fold, y_train_fold, X_val_fold, y_val_fold) in enumerate(scaled_folds_tanh):
        model = MLP(input_size, hidden_size, num_hidden_layers)
        criterion = nn.MSELoss()
        optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)
        scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.99)

        train_dataset = TensorDataset(torch.tensor(X_train_fold.values, dtype=torch.float32),
                                      torch.tensor(y_train_fold.values, dtype=torch.float32).view(-1, 1))
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)

        val_dataset = TensorDataset(torch.tensor(X_val_fold.values, dtype=torch.float32),
                                    torch.tensor(y_val_fold.values, dtype=torch.float32).view(-1, 1))
        val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

        fold_train_losses = []
        fold_val_losses = []

        best_val_loss = float('inf')
        epochs_no_improve = 0

        for epoch in range(num_epochs):
            epoch_train_losses = train_one_epoch(model, train_loader, optimizer, criterion)
            avg_train_loss = np.mean(epoch_train_losses)

            epoch_val_losses = validate(model, val_loader, criterion)
            avg_val_loss = np.mean(epoch_val_losses)

            fold_train_losses.append(avg_train_loss)
            fold_val_losses.append(avg_val_loss)

            writer.add_scalar(f'Fold_{i+1}/Loss/Train', avg_train_loss, epoch)
            writer.add_scalar(f'Fold_{i+1}/Loss/Validation', avg_val_loss, epoch)

            print(f"Fold {i+1} | Epoch: {epoch+1:03d}/{num_epochs} | Train Loss: {avg_train_loss:.6f} | Validation Loss: {avg_val_loss:.6f}")

            # Step the scheduler
            scheduler.step()

            # Early stopping check
            if avg_val_loss < best_val_loss:
                best_val_loss = avg_val_loss
                epochs_no_improve = 0
            else:
                epochs_no_improve += 1

            if epochs_no_improve >= patience:
                print(f"Early stopping at epoch {epoch+1} for fold {i+1}")
                break

        all_fold_train_losses.append(fold_train_losses)
        all_fold_val_losses.append(fold_val_losses)

        avg_train_losses_per_fold.append(np.mean(fold_train_losses))
        avg_val_losses_per_fold.append(np.mean(fold_val_losses))

        print(f"Fold {i+1} completed | Average Train Loss: {np.mean(fold_train_losses):.6f} | Average Validation Loss: {np.mean(fold_val_losses):.6f}")

    writer.close()

    print("Average Train Loss per Fold:", avg_train_losses_per_fold)
    print("Average Validation Loss per Fold:", avg_val_losses_per_fold)
    print(f"Overall Average Train Loss: {np.mean(avg_train_losses_per_fold):.6f}")
    print(f"Overall Average Validation Loss: {np.mean(avg_val_losses_per_fold):.6f}")

    return all_fold_train_losses, all_fold_val_losses

# Define hyperparameters 
input_size = X_train_tanh.shape[1]
hidden_size = 64
num_hidden_layers = 2
learning_rate = 0.01
batch_size = 32
num_epochs = 100
momentum = 0.9  

# Early stopping parameter
patience = 100  # no early stopping when setting too high

all_fold_train_losses, all_fold_val_losses = run_training(scaled_folds_tanh, input_size, hidden_size, num_hidden_layers, learning_rate, batch_size, num_epochs, momentum, patience)

In [None]:
import matplotlib.pyplot as plt


# Plotting the train and validation loss over epochs for each fold
for i in range(len(folds)):
    num_epochs_fold = len(all_fold_train_losses[i])  
    plt.figure(figsize=(10, 6))  
    plt.plot(range(1, num_epochs_fold + 1), all_fold_train_losses[i], label='Train Loss', color='blue', linewidth=2)
    plt.plot(range(1, num_epochs_fold + 1), all_fold_val_losses[i], label='Validation Loss', color='orange', linestyle='--', linewidth=2)
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.title(f'Fold {i+1} Train and Validation Loss')
    plt.legend()
    plt.grid(True)  
    plt.show()
 

In [None]:
import matplotlib.pyplot as plt
import numpy as np

max_epochs = max([len(fold_losses) for fold_losses in all_fold_train_losses])

train_losses_accum = np.zeros(max_epochs)
val_losses_accum = np.zeros(max_epochs)
count_per_epoch = np.zeros(max_epochs)  

for i in range(len(all_fold_train_losses)):
    num_epochs_fold = len(all_fold_train_losses[i])
    for epoch in range(num_epochs_fold):
        train_losses_accum[epoch] += all_fold_train_losses[i][epoch]
        val_losses_accum[epoch] += all_fold_val_losses[i][epoch]
        count_per_epoch[epoch] += 1

avg_train_losses = train_losses_accum / count_per_epoch
avg_val_losses = val_losses_accum / count_per_epoch

plt.figure(figsize=(10, 6))
plt.plot(range(1, max_epochs + 1), avg_train_losses, label='Average Train Loss', color='blue', linewidth=2)
plt.plot(range(1, max_epochs + 1), avg_val_losses, label='Average Validation Loss', color='orange', linestyle='--', linewidth=2)
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Average Train and Validation Loss Across All Folds')
plt.legend()
plt.grid(True)
plt.show()


## Training the model on all training data with the optimal hyperparameters found in CV 

### Step 1: Normalize all of the training data together to ensure consistency 

In [None]:
# Fit the scaler on the entire training set
dummy_columns = ['Weekend', 'Christmas vacation', 'Public holiday', 'Winter Time']
non_dummy_columns = [col for col in X_train.columns if col not in dummy_columns]

scaler_tanh = MinMaxScaler(feature_range=(-1, 1))
scaler_target_tanh = MinMaxScaler(feature_range=(-1, 1))

scaler_tanh.fit(X_train[non_dummy_columns])
scaler_target_tanh.fit(y_train.values.reshape(-1, 1))

X_train_tanh = scaler_tanh.transform(X_train[non_dummy_columns])
X_test_tanh = scaler_tanh.transform(X_test[non_dummy_columns])
y_train_tanh = scaler_target_tanh.transform(y_train.values.reshape(-1, 1))
y_test_tanh = scaler_target_tanh.transform(y_test.values.reshape(-1, 1))

X_train_tanh = pd.DataFrame(X_train_tanh, columns=non_dummy_columns, index=X_train.index)
X_test_tanh = pd.DataFrame(X_test_tanh, columns=non_dummy_columns, index=X_test.index)
X_train_tanh = pd.concat([X_train_tanh, X_train[dummy_columns]], axis=1)
X_test_tanh = pd.concat([X_test_tanh, X_test[dummy_columns]], axis=1)

y_train_tanh = pd.Series(y_train_tanh.flatten(), index=y_train.index)
y_test_tanh = pd.Series(y_test_tanh.flatten(), index=y_test.index)

In [None]:
X_train_tanh.shape[1]

### Step 2: Training Loop (training on all of 2023)

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np

# Function to train the model
def train_model(model, train_loader, criterion, optimizer, scheduler, num_epochs=80):
    model.train()
    average_losses_per_epoch = []
    for epoch in range(num_epochs):
        epoch_losses = []
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()
            epoch_losses.append(loss.item())
        scheduler.step()
        avg_loss = np.mean(epoch_losses)
        average_losses_per_epoch.append(avg_loss)
        print(f'Epoch: {epoch+1}, Average Training Loss: {avg_loss:.6f}')
    return model


train_dataset = TensorDataset(torch.tensor(X_train_tanh.values, dtype=torch.float32),
                              torch.tensor(y_train_tanh.values, dtype=torch.float32).view(-1, 1))
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=False)  

input_size = X_train_tanh.shape[1]
model = MLP(input_size, hidden_size=64, num_hidden_layers=2)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.99)
criterion = nn.MSELoss()

trained_model = train_model(model, train_loader, criterion, optimizer, scheduler, num_epochs=100)

torch.save(trained_model.state_dict(), 'final_trained_model.pth')
print("Training complete and model saved.")

input_size = X_train_tanh.shape[1]
hidden_size = 64
num_hidden_layers = 2
learning_rate = 0.01
batch_size = 32
num_epochs = 100
momentum = 0.9

### Step 3: Testing loop, recalibration loop, storing predictions & actuals, error metrics

In [None]:
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error
from datetime import timedelta
import calendar

def rescale_values(scaler, values):
    return scaler.inverse_transform(values)

def calculate_metrics(predictions, actuals, epsilon=1e-10, small_value=1e-5):
    mse = mean_squared_error(actuals, predictions)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(actuals, predictions)
    
    safe_actuals = np.where(np.abs(actuals) < small_value, small_value, np.abs(actuals))
    mape = np.mean(np.abs((actuals - predictions) / safe_actuals)) * 100
    
    return mse, rmse, mae, mape

def calculate_smape(predictions, actuals, epsilon=1e-10):
    smape = 100 * np.mean(2 * np.abs(predictions - actuals) / (np.abs(actuals) + np.abs(predictions) + epsilon))
    return smape

def test_model(model, X_test, y_test):
    model.eval()
    test_days = len(y_test) // 24  
    all_predictions = []
    all_actuals = []
    all_dates = []

    for day in range(test_days):
        start_idx = day * 24
        end_idx = (day + 1) * 24
        X_day = torch.tensor(X_test.iloc[start_idx:end_idx].values, dtype=torch.float32)
        y_day = y_test.iloc[start_idx:end_idx].values
        y_dates = y_test.iloc[start_idx:end_idx].index

        # Forecast the next 24 hours
        with torch.no_grad():
            y_pred_day = model(X_day).numpy()

        # Store normalized predictions and actual values with their dates
        all_predictions.extend(y_pred_day.flatten())
        all_actuals.extend(y_day.flatten())
        all_dates.extend(y_dates)

    # Convert the lists to numpy arrays 
    all_predictions = np.array(all_predictions)
    all_actuals = np.array(all_actuals)
    all_dates = pd.to_datetime(all_dates)

    return all_predictions, all_actuals, all_dates


# Function to update training and testing data
def update_training_data(X, y, start_date, end_date):
    X_train = X[start_date:end_date]
    y_train = y[start_date:end_date]
    return X_train, y_train

# Function to get the first day of the next month
def next_month(date):
    year = date.year + (date.month // 12)
    month = (date.month % 12) + 1
    return pd.Timestamp(year=year, month=month, day=1)

# Function to get the last day of the month
def last_day_of_month(date):
    next_month_date = next_month(date)
    return next_month_date - timedelta(days=1)

# Recalibration loop
def recalibration_loop(X, y, start_date, num_months, model, input_size, hidden_size, num_hidden_layers, lr, batch_size, num_epochs, momentum, scaler_X, scaler_y):
    initial_end_date = last_day_of_month(start_date + timedelta(days=364))  
    test_start_date = initial_end_date + timedelta(days=1)  

    # Store normalized predictions and actual values for each month
    all_normalized_predictions = []
    all_normalized_actuals = []
    all_dates = []
    all_performance_metrics = []  

    for month in range(num_months):
        test_end_date = last_day_of_month(test_start_date) + timedelta(hours=23)  

        print(f'\nRecalibration month {month + 1}...')
        print(f'Training period: {start_date.date()} to {initial_end_date.date()}')
        print(f'Testing period: {test_start_date.date()} to {test_end_date.date()}')

       
        if test_start_date >= test_end_date or len(X[test_start_date:test_end_date]) == 0:
            print(f'Skipping month {month + 1} due to invalid test period.')
            break

        X_train, y_train = update_training_data(X, y, start_date, initial_end_date)
        
        non_dummy_columns = [col for col in X_train.columns if col not in dummy_columns]
        X_train_tanh = scaler_X.transform(X_train[non_dummy_columns])
        y_train_tanh = scaler_y.transform(y_train.values.reshape(-1, 1))

        X_train_tanh = pd.DataFrame(X_train_tanh, columns=non_dummy_columns, index=X_train.index)
        X_train_tanh = pd.concat([X_train_tanh, X_train[dummy_columns]], axis=1)
        y_train_tanh = pd.Series(y_train_tanh.flatten(), index=y_train.index)

        train_dataset = TensorDataset(torch.tensor(X_train_tanh.values, dtype=torch.float32),
                                      torch.tensor(y_train_tanh.values, dtype=torch.float32).view(-1, 1))
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)

        model = MLP(input_size, hidden_size, num_hidden_layers)
        optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)
        scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.99)
        criterion = nn.MSELoss()

        trained_model = train_model(model, train_loader, criterion, optimizer, scheduler, num_epochs)

        X_test = X[test_start_date:test_end_date]
        y_test = y[test_start_date:test_end_date]

        X_test_tanh = scaler_X.transform(X_test[non_dummy_columns])
        y_test_tanh = scaler_y.transform(y_test.values.reshape(-1, 1))

        X_test_tanh = pd.DataFrame(X_test_tanh, columns=non_dummy_columns, index=X_test.index)
        X_test_tanh = pd.concat([X_test_tanh, X_test[dummy_columns]], axis=1)
        y_test_tanh = pd.Series(y_test_tanh.flatten(), index=y_test.index)

        normalized_predictions, normalized_actuals, dates = test_model(trained_model, X_test_tanh, y_test_tanh)
        all_normalized_predictions.extend(normalized_predictions)
        all_normalized_actuals.extend(normalized_actuals)
        all_dates.extend(dates)

        # Calculate performance metrics for the test month
        mse, rmse, mae, mape = calculate_metrics(normalized_predictions, normalized_actuals)
        smape = calculate_smape(normalized_predictions, normalized_actuals)
        all_performance_metrics.append((mse, rmse, mae, mape, smape))

        print(f'Month {month + 1} - Test Period: {test_start_date.date()} to {test_end_date.date()}')
        print(f'Overall MSE: {mse:.6f}')
        print(f'Overall RMSE: {rmse:.6f}')
        print(f'Overall MAE: {mae:.6f}')
        print(f'Overall MAPE: {mape:.6f}%')
        print(f'Overall SMAPE: {smape:.6f}%')

        
        start_date = next_month(start_date)
        initial_end_date = last_day_of_month(start_date + timedelta(days=364))
        test_start_date = initial_end_date + timedelta(days=1)

    return all_normalized_predictions, all_normalized_actuals, all_performance_metrics, all_dates


start_date = pd.to_datetime('2023-01-01')


num_months = 5  


normalized_predictions, normalized_actuals, all_performance_metrics, dates = recalibration_loop(X, y, start_date, num_months, model, input_size, hidden_size, num_hidden_layers, learning_rate, batch_size, num_epochs, momentum, scaler_tanh, scaler_target_tanh)


all_predictions_rescaled = rescale_values(scaler_target_tanh, np.array(normalized_predictions).reshape(-1, 1))
all_actuals_rescaled = rescale_values(scaler_target_tanh, np.array(normalized_actuals).reshape(-1, 1))

# Create DataFrames for predictions and actuals with datetime index
predictions_df = pd.DataFrame(all_predictions_rescaled, index=dates, columns=["Prediction"])
actuals_df = pd.DataFrame(all_actuals_rescaled, index=dates, columns=["Actual"])

# Combine predictions and actuals into a single DataFrame
Predictions_actuals_MLP_tanh = pd.concat([predictions_df, actuals_df], axis=1)

performance_metrics_per_month = []

start_idx = 0
total_length = len(normalized_predictions)

for month in range(1, 6):  
    test_start_date = pd.to_datetime('2024-01-01') + pd.DateOffset(months=month - 1)
    num_days_in_month = calendar.monthrange(test_start_date.year, test_start_date.month)[1]
    end_idx = start_idx + num_days_in_month * 24
    
    if end_idx > total_length:
        end_idx = total_length

    monthly_predictions = all_predictions_rescaled[start_idx:end_idx]
    monthly_actuals = all_actuals_rescaled[start_idx:end_idx]

    print(f'Month {month}:')
    print(f'Test period: {test_start_date.strftime("%Y-%m-%d 00:00:00")} to {(test_start_date + pd.DateOffset(days=num_days_in_month - 1)).strftime("%Y-%m-%d 23:00:00")}')
    print(f'Start index: {start_idx}, End index: {end_idx - 1}')
    print(f'Length of monthly predictions: {len(monthly_predictions)}')
    print(f'Length of monthly actuals: {len(monthly_actuals)}')

    if len(monthly_predictions) != num_days_in_month * 24:
        print(f"Error: Expected {num_days_in_month * 24} predictions, but got {len(monthly_predictions)}")

    mse, rmse, mae, mape = calculate_metrics(monthly_predictions, monthly_actuals)
    smape = calculate_smape(monthly_predictions, monthly_actuals)
    performance_metrics_per_month.append({
        'Month': month,
        'MSE': mse,
        'RMSE': rmse,
        'MAE': mae,
        'MAPE': mape,
        'SMAPE': smape
    })

    start_idx = end_idx

for metrics in performance_metrics_per_month:
    print(f'Month {metrics["Month"]}: MSE={metrics["MSE"]:.6f}, RMSE={metrics["RMSE"]:.6f}, MAE={metrics["MAE"]:.6f}, MAPE={metrics["MAPE"]:.6f}%, SMAPE={metrics["SMAPE"]:.6f}%')

for record in Predictions_actuals_MLP_tanh.head(10).itertuples():
    print(f'Datetime: {record.Index}, Prediction: {record.Prediction:.6f}, Actual: {record.Actual:.6f}')

### MLP predictions to dataframe 

In [None]:
# Combine the DataFrames on their datetime index
predictions_actuals_MLP_tanh = pd.concat([predictions_df, actuals_df], axis=1)
predictions_actuals_MLP_tanh

In [None]:
# Save predictions 

excel_file_path = '/Users/evenbakke/Documents/Master Thesis/Predictions all models /MLP predictions v2.xlsx'

predictions_actuals_MLP_tanh.to_excel(excel_file_path, index=True)

print(f'DataFrame successfully saved to {excel_file_path}')

# LSTM (tanh output layer, one lstm layer, one hidden layer)

In [None]:
import torch
import numpy as np
import random

def set_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed) 
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(42)  


In [None]:
import torch
import torch.nn as nn

class LSTMModel(nn.Module):
    def __init__(self, input_size, lstm_hidden_size, num_layers, fc_hidden_size):
        super(LSTMModel, self).__init__()
        self.lstm_hidden_size = lstm_hidden_size
        self.num_layers = num_layers
        
        # LSTM layer
        self.lstm = nn.LSTM(input_size, lstm_hidden_size, num_layers, batch_first=True)
        
        # Hidden fully connected layer
        self.hidden_layer = nn.Linear(lstm_hidden_size, fc_hidden_size)
        
        # Output fully connected layer
        self.output_layer = nn.Linear(fc_hidden_size, 1)
        
        # Activation functions
        self.relu = nn.ReLU()  # ReLU activation function for hidden layers
        self.tanh = nn.Tanh()  # Tanh activation function for the output layer

    def forward(self, x):
        # Initialize hidden state and cell state
        h0 = torch.zeros(self.num_layers, x.size(0), self.lstm_hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.lstm_hidden_size).to(x.device)
        
        lstm_out, _ = self.lstm(x, (h0, c0))
        
        lstm_out = lstm_out[:, -1, :]
        
        hidden_out = self.relu(self.hidden_layer(lstm_out))
        
        output = self.tanh(self.output_layer(hidden_out))
        
        return output


### Random search CV

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from torch.utils.tensorboard import SummaryWriter
import numpy as np
import random

def calculate_loss(predictions, targets, criterion):
    return criterion(predictions, targets).item()

def train_one_epoch(model, train_loader, optimizer, criterion):
    model.train()
    epoch_losses = []
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        epoch_losses.append(loss.item())
    return epoch_losses

def validate(model, test_loader, criterion):
    model.eval()
    epoch_val_losses = []
    with torch.no_grad():
        for X_val, y_val in test_loader:
            preds = model(X_val)
            val_loss = criterion(preds, y_val)
            epoch_val_losses.append(val_loss.item())
    return epoch_val_losses

def initialize_tensorboard(path):
    return SummaryWriter(path)

hyperparam_space = {
    'learning_rate': [0.01, 0.005, 0.001, 0.0005, 0.0001],
    'batch_size': [16, 32, 64, 128],
    'num_layers': [1, 2, 3, 4],
    'hidden_size': [64, 128, 256, 512],
    'momentum': [0.0, 0.5, 0.9]
}

# Main function to run training with random search
def run_random_search(folds, input_size, sequence_length, num_trials):
    writer = initialize_tensorboard('runs/electricity_price_hyperparameter_search')
    best_val_loss = float('inf')
    best_hyperparams = {}
    num_epochs = 30  

    for trial in range(num_trials):
        lr = random.choice(hyperparam_space['learning_rate'])
        batch_size = random.choice(hyperparam_space['batch_size'])
        num_layers = random.choice(hyperparam_space['num_layers'])
        hidden_size = random.choice(hyperparam_space['hidden_size'])
        momentum = random.choice(hyperparam_space['momentum'])

        fold_val_losses = []

        for i, (X_train_fold, y_train_fold, X_test_fold, y_test_fold) in enumerate(folds):
            model = LSTMModel(input_size, hidden_size, num_layers)
            criterion = nn.MSELoss()
            optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)
            scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.99)

            train_dataset = TensorDataset(torch.tensor(X_train_fold.values, dtype=torch.float32).unsqueeze(1),
                                          torch.tensor(y_train_fold.values, dtype=torch.float32).view(-1, 1))
            train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)

            test_dataset = TensorDataset(torch.tensor(X_test_fold.values, dtype=torch.float32).unsqueeze(1),
                                         torch.tensor(y_test_fold.values, dtype=torch.float32).view(-1, 1))
            test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

            for epoch in range(num_epochs):
                epoch_train_losses = train_one_epoch(model, train_loader, optimizer, criterion)
                avg_train_loss = np.mean(epoch_train_losses)

                epoch_val_losses = validate(model, test_loader, criterion)
                avg_val_loss = np.mean(epoch_val_losses)
                fold_val_losses.append(avg_val_loss)

                writer.add_scalar(f'Trial_{trial}_Fold_{i+1}/Loss/Train', avg_train_loss, epoch)
                writer.add_scalar(f'Trial_{trial}_Fold_{i+1}/Loss/Validation', avg_val_loss, epoch)

        
                scheduler.step()

            avg_fold_val_loss = np.mean(fold_val_losses)
            if avg_fold_val_loss < best_val_loss:
                best_val_loss = avg_fold_val_loss
                best_hyperparams = {
                    'learning_rate': lr,
                    'batch_size': batch_size,
                    'num_layers': num_layers,
                    'hidden_size': hidden_size,
                    'momentum': momentum,
                }
            print(f"Trial {trial} | Fold {i+1} completed | Average Train Loss: {avg_train_loss:.6f} | Average Validation Loss: {avg_fold_val_loss:.6f}")
        
    writer.close()
    print(f"Best Hyperparameters: {best_hyperparams}")
    print(f"Best Validation Loss: {best_val_loss:.6f}")
    return best_hyperparams, best_val_loss

input_size = X_train_tanh.shape[1]  
sequence_length = 24 
num_trials = 20  
best_hyperparams, best_val_loss = run_random_search(scaled_folds_tanh, input_size, sequence_length, num_trials)


### Hyperparameter tuning using CV (SGD optimizer)

In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from torch.utils.tensorboard import SummaryWriter

def calculate_loss(predictions, targets, criterion):
    return criterion(predictions, targets).item()

def train_one_epoch(model, train_loader, optimizer, criterion, device):
    model.train()
    epoch_losses = []

    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()

        epoch_losses.append(loss.item())

    return epoch_losses

def validate(model, test_loader, criterion, device):
    model.eval()
    epoch_val_losses = []
    with torch.no_grad():
        for X_val, y_val in test_loader:
            X_val, y_val = X_val.to(device), y_val.to(device)
            preds = model(X_val)
            val_loss = criterion(preds, y_val)
            epoch_val_losses.append(val_loss.item())

    return epoch_val_losses

def create_sequences(data, targets, seq_length):
    xs, ys = [], []
    for i in range(len(data) - seq_length):
        x = data.iloc[i:(i + seq_length)].values
        y = targets.iloc[i + seq_length]
        xs.append(x)
        ys.append(y)
    return np.array(xs), np.array(ys)

def initialize_tensorboard(path):
    return SummaryWriter(path)

# Main function to run training with cross-validation
def run_training(folds, input_size, lstm_hidden_size, num_layers, fc_hidden_size, lr, batch_size, num_epochs, patience, sequence_length, momentum):
    writer = initialize_tensorboard('runs/electricity_price_forecasting_lstm')
    all_fold_train_losses = []
    all_fold_val_losses = []

    avg_train_losses_per_fold = []
    avg_val_losses_per_fold = []

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    for i, (X_train_fold, y_train_fold, X_test_fold, y_test_fold) in enumerate(folds):
        model = LSTMModel(input_size, lstm_hidden_size, num_layers, fc_hidden_size).to(device)
        criterion = nn.MSELoss()
        optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)
        scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.99)

        # Create sequences for the training set
        X_train_seq, y_train_seq = create_sequences(X_train_fold, y_train_fold, sequence_length)
        train_dataset = TensorDataset(torch.tensor(X_train_seq, dtype=torch.float32),
                                      torch.tensor(y_train_seq, dtype=torch.float32).view(-1, 1))
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)

        # Create sequences for the test set
        X_test_seq, y_test_seq = create_sequences(X_test_fold, y_test_fold, sequence_length)
        test_dataset = TensorDataset(torch.tensor(X_test_seq, dtype=torch.float32),
                                     torch.tensor(y_test_seq, dtype=torch.float32).view(-1, 1))
        test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

        fold_train_losses = []
        fold_val_losses = []

        best_val_loss = float('inf')
        epochs_no_improve = 0
        best_model_state = None

        for epoch in range(num_epochs):
            epoch_train_losses = train_one_epoch(model, train_loader, optimizer, criterion, device)
            avg_train_loss = np.mean(epoch_train_losses)

            epoch_val_losses = validate(model, test_loader, criterion, device)
            avg_val_loss = np.mean(epoch_val_losses)

            fold_train_losses.append(avg_train_loss)
            fold_val_losses.append(avg_val_loss)

            writer.add_scalar(f'Fold_{i+1}/Loss/Train', avg_train_loss, epoch)
            writer.add_scalar(f'Fold_{i+1}/Loss/Validation', avg_val_loss, epoch)

            print(f"Fold {i+1} | Epoch: {epoch+1:03d}/{num_epochs} | Train Loss: {avg_train_loss:.6f} | Validation Loss: {avg_val_loss:.6f}")

            scheduler.step()

            # Early stopping check
            if avg_val_loss < best_val_loss:
                best_val_loss = avg_val_loss
                epochs_no_improve = 0
                best_model_state = model.state_dict()  
            else:
                epochs_no_improve += 1

            if epochs_no_improve >= patience:
                print(f"Early stopping at epoch {epoch+1} for fold {i+1}")
                break

        if best_model_state is not None:
            model.load_state_dict(best_model_state)

        all_fold_train_losses.append(fold_train_losses)
        all_fold_val_losses.append(fold_val_losses)

        avg_train_losses_per_fold.append(np.mean(fold_train_losses))
        avg_val_losses_per_fold.append(np.mean(fold_val_losses))

        print(f"Fold {i+1} completed | Average Train Loss: {np.mean(fold_train_losses):.6f} | Average Validation Loss: {np.mean(fold_val_losses):.6f}")

    writer.close()

    print("Average Train Loss per Fold:", avg_train_losses_per_fold)
    print("Average Validation Loss per Fold:", avg_val_losses_per_fold)
    print(f"Overall Average Train Loss: {np.mean(avg_train_losses_per_fold):.6f}")
    print(f"Overall Average Validation Loss: {np.mean(avg_val_losses_per_fold):.6f}")

    return all_fold_train_losses, all_fold_val_losses


input_size = X_train_tanh.shape[1]
lstm_hidden_size = 64
num_layers = 1
fc_hidden_size = 256
learning_rate = 0.01
batch_size = 32
num_epochs = 100
sequence_length = 24  
momentum = 0.9

# Early stopping parameters
patience = 100  # no early stopping when setting too high

all_fold_train_losses, all_fold_val_losses = run_training(scaled_folds_tanh, input_size, lstm_hidden_size, num_layers, fc_hidden_size, learning_rate, batch_size, num_epochs, patience, sequence_length, momentum)



In [None]:
import matplotlib.pyplot as plt


for i in range(len(folds)):
    num_epochs_fold = len(all_fold_train_losses[i])  
    plt.figure(figsize=(10, 6))  
    plt.plot(range(1, num_epochs_fold + 1), all_fold_train_losses[i], label='Train Loss', color='blue', linewidth=2)
    plt.plot(range(1, num_epochs_fold + 1), all_fold_val_losses[i], label='Validation Loss', color='orange', linestyle='--', linewidth=2)
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.title(f'Fold {i+1} Train and Validation Loss')
    plt.legend()
    plt.grid(True) 
    plt.show()
    plt.close()  

In [None]:
import matplotlib.pyplot as plt
import numpy as np

max_epochs = max([len(fold_losses) for fold_losses in all_fold_train_losses])

train_losses_accum = np.zeros(max_epochs)
val_losses_accum = np.zeros(max_epochs)
count_per_epoch = np.zeros(max_epochs)  

for i in range(len(all_fold_train_losses)):
    num_epochs_fold = len(all_fold_train_losses[i])
    for epoch in range(num_epochs_fold):
        train_losses_accum[epoch] += all_fold_train_losses[i][epoch]
        val_losses_accum[epoch] += all_fold_val_losses[i][epoch]
        count_per_epoch[epoch] += 1

avg_train_losses = train_losses_accum / count_per_epoch
avg_val_losses = val_losses_accum / count_per_epoch

plt.figure(figsize=(10, 6))
plt.plot(range(1, max_epochs + 1), avg_train_losses, label='Average Train Loss', color='blue', linewidth=2)
plt.plot(range(1, max_epochs + 1), avg_val_losses, label='Average Validation Loss', color='orange', linestyle='--', linewidth=2)
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Average Train and Validation Loss Across All Folds')
plt.legend()
plt.grid(True)
plt.show()


## Training the model (normalize all training data, testing loop, recalibration loop)

In [None]:
# Fit the scaler on the entire training set
dummy_columns = ['Weekend', 'Christmas vacation', 'Public holiday', 'Winter Time']
non_dummy_columns = [col for col in X_train.columns if col not in dummy_columns]

scaler_tanh = MinMaxScaler(feature_range=(-1, 1))
scaler_target_tanh = MinMaxScaler(feature_range=(-1, 1))

scaler_tanh.fit(X_train[non_dummy_columns])
scaler_target_tanh.fit(y_train.values.reshape(-1, 1))

X_train_tanh = scaler_tanh.transform(X_train[non_dummy_columns])
X_test_tanh = scaler_tanh.transform(X_test[non_dummy_columns])
y_train_tanh = scaler_target_tanh.transform(y_train.values.reshape(-1, 1))
y_test_tanh = scaler_target_tanh.transform(y_test.values.reshape(-1, 1))

X_train_tanh = pd.DataFrame(X_train_tanh, columns=non_dummy_columns, index=X_train.index)
X_test_tanh = pd.DataFrame(X_test_tanh, columns=non_dummy_columns, index=X_test.index)
X_train_tanh = pd.concat([X_train_tanh, X_train[dummy_columns]], axis=1)
X_test_tanh = pd.concat([X_test_tanh, X_test[dummy_columns]], axis=1)

y_train_tanh = pd.Series(y_train_tanh.flatten(), index=y_train.index)
y_test_tanh = pd.Series(y_test_tanh.flatten(), index=y_test.index)

In [None]:

input_size = X_train_tanh.shape[1]
lstm_hidden_size = 64
num_layers = 1
fc_hidden_size = 256
learning_rate = 0.001
batch_size = 32
num_epochs = 100
sequence_length = 24
momentum = 0.9

def create_sequences(data, target, seq_length):
    xs = []
    ys = []
    for i in range(len(data) - seq_length):
        x = data[i:i + seq_length]
        y = target[i + seq_length]
        xs.append(x)
        ys.append(y)
    return np.array(xs), np.array(ys)

X_train_seq, y_train_seq = create_sequences(X_train_tanh.values, y_train_tanh.values, sequence_length)
X_test_seq, y_test_seq = create_sequences(X_test_tanh.values, y_test_tanh.values, sequence_length)

X_train_seq = torch.tensor(X_train_seq, dtype=torch.float32)
y_train_seq = torch.tensor(y_train_seq, dtype=torch.float32).view(-1, 1)
X_test_seq = torch.tensor(X_test_seq, dtype=torch.float32)
y_test_seq = torch.tensor(y_test_seq, dtype=torch.float32).view(-1, 1)

train_dataset = TensorDataset(X_train_seq, y_train_seq)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

model = LSTMModel(input_size=input_size, lstm_hidden_size=lstm_hidden_size, num_layers=num_layers, fc_hidden_size=fc_hidden_size)
optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.99)
criterion = nn.MSELoss()

def train_model(model, train_loader, criterion, optimizer, scheduler, num_epochs=100):
    model.train()
    average_losses_per_epoch = []
    for epoch in range(num_epochs):
        epoch_losses = []
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()
            epoch_losses.append(loss.item())
        scheduler.step()
        avg_loss = np.mean(epoch_losses)
        average_losses_per_epoch.append(avg_loss)
        print(f'Epoch: {epoch+1}, Average Training Loss: {avg_loss:.6f}')
    return model

trained_model = train_model(model, train_loader, criterion, optimizer, scheduler, num_epochs=num_epochs)

torch.save(trained_model.state_dict(), 'final_trained_model_lstm.pth')

## Final model, training and recalibration loop, testing loop

In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
import torch
from torch.utils.data import DataLoader, TensorDataset
import torch.optim as optim
import torch.nn as nn

class LSTMModel(nn.Module):
    def __init__(self, input_size, lstm_hidden_size, num_layers, fc_hidden_size):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, lstm_hidden_size, num_layers, batch_first=True)
        self.fc = nn.Sequential(
            nn.Linear(lstm_hidden_size, fc_hidden_size),
            nn.ReLU(),
            nn.Linear(fc_hidden_size, 1)
        )

    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        lstm_out = lstm_out[:, -1, :]
        out = self.fc(lstm_out)
        return out

def rescale_values(scaler, values):
    return scaler.inverse_transform(values)

def calculate_metrics(predictions, actuals, epsilon=1e-10, small_value=1e-5):
    mse = mean_squared_error(actuals, predictions)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(actuals, predictions)
    
    safe_actuals = np.where(np.abs(actuals) < small_value, small_value, np.abs(actuals))
    mape = np.mean(np.abs((actuals - predictions) / safe_actuals)) * 100
    
    return mse, rmse, mae, mape

def calculate_smape(predictions, actuals, epsilon=1e-10):
    smape = 100 * np.mean(2 * np.abs(predictions - actuals) / (np.abs(actuals) + np.abs(predictions) + epsilon))
    return smape

# Testing loop
def test_model(model, X_test, y_test, sequence_length, scaler_target_tanh):
    model.eval()
    test_hours = len(y_test)  
    all_predictions = []
    all_actuals = []
    all_dates = []

    # Initial sequence
    X_seq = torch.tensor(X_test.iloc[:sequence_length].values, dtype=torch.float32).unsqueeze(0)

    # Predictions for the first 24 hours
    for hour in range(sequence_length):
        y_actual = y_test.iloc[hour]
        y_date = y_test.index[hour]

        with torch.no_grad():
            y_pred = model(X_seq).numpy()

        all_predictions.append(y_pred.flatten()[0])
        all_actuals.append(y_actual)
        all_dates.append(y_date)

        new_input = torch.tensor(X_test.iloc[hour].values, dtype=torch.float32).unsqueeze(0).unsqueeze(0)
        X_seq = torch.cat((X_seq[:, 1:, :], new_input), dim=1)

    for hour in range(sequence_length, test_hours):
        y_actual = y_test.iloc[hour]
        y_date = y_test.index[hour]

        with torch.no_grad():
            y_pred = model(X_seq).numpy()

        all_predictions.append(y_pred.flatten()[0])
        all_actuals.append(y_actual)
        all_dates.append(y_date)

        new_input = torch.tensor(X_test.iloc[hour].values, dtype=torch.float32).unsqueeze(0).unsqueeze(0)
        X_seq = torch.cat((X_seq[:, 1:, :], new_input), dim=1)

        if (hour + 1) % 24 == 0:
            actuals_24h = y_test.iloc[hour - 23:hour + 1].values
            X_test.iloc[hour - 23:hour + 1, -1] = scaler_target_tanh.transform(actuals_24h.reshape(-1, 1)).flatten().astype(X_test.iloc[:, -1].dtype)

    all_predictions = np.array(all_predictions)
    all_actuals = np.array(all_actuals)
    all_dates = pd.to_datetime(all_dates)

    return all_predictions, all_actuals, all_dates

def create_sequences(data, target, seq_length):
    xs = []
    ys = []
    for i in range(len(data) - seq_length):
        x = data[i:i + seq_length]
        y = target[i + seq_length]
        xs.append(x)
        ys.append(y)
    return np.array(xs), np.array(ys)

def train_model(model, train_loader, criterion, optimizer, scheduler, num_epochs=100):
    model.train()
    average_losses_per_epoch = []
    for epoch in range(num_epochs):
        epoch_losses = []
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()
            epoch_losses.append(loss.item())
        scheduler.step()
        avg_loss = np.mean(epoch_losses)
        average_losses_per_epoch.append(avg_loss)
        print(f'Epoch: {epoch+1}, Average Training Loss: {avg_loss:.6f}')
    return model

input_size = X_train_tanh.shape[1]
lstm_hidden_size = 64
num_layers = 1
fc_hidden_size = 256
learning_rate = 0.01
batch_size = 32
num_epochs = 100
sequence_length = 24
momentum = 0.9

all_predictions_actuals = pd.DataFrame()
monthly_metrics = []

# Recalibration loop for each month from January to May 2024
for month in range(1, 6):  
    # Define the training and test periods
    train_end_date = pd.to_datetime(f'2024-{month:02d}-01') - pd.DateOffset(days=1)
    test_start_date = pd.to_datetime(f'2024-{month:02d}-01')
    test_end_date = pd.to_datetime(f'2024-{month:02d}-01') + pd.DateOffset(months=1) - pd.DateOffset(hours=1)

    train_start_date = train_end_date - pd.DateOffset(years=1) + pd.DateOffset(days=1)
    
    X_train_window = X_train[train_start_date:train_end_date]
    y_train_window = y_train[train_start_date:train_end_date]

    # Scale the data
    scaler_tanh.fit(X_train_window[non_dummy_columns])
    scaler_target_tanh.fit(y_train_window.values.reshape(-1, 1))

    X_train_tanh = scaler_tanh.transform(X_train_window[non_dummy_columns])
    y_train_tanh = scaler_target_tanh.transform(y_train_window.values.reshape(-1, 1))

    X_train_tanh = pd.DataFrame(X_train_tanh, columns=non_dummy_columns, index=X_train_window.index)
    X_train_tanh = pd.concat([X_train_tanh, X_train_window[dummy_columns]], axis=1)
    y_train_tanh = pd.Series(y_train_tanh.flatten(), index=y_train_window.index)

    # Create sequences
    X_train_seq, y_train_seq = create_sequences(X_train_tanh.values, y_train_tanh.values, seq_length=sequence_length)
    X_train_seq = torch.tensor(X_train_seq, dtype=torch.float32)
    y_train_seq = torch.tensor(y_train_seq, dtype=torch.float32).view(-1, 1)

    train_dataset = TensorDataset(X_train_seq, y_train_seq)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

    # Reinitialize the model and optimizer for each month's training
    model = LSTMModel(input_size=input_size, lstm_hidden_size=lstm_hidden_size, num_layers=num_layers, fc_hidden_size=fc_hidden_size)
    optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum)
    scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.99)
    criterion = nn.MSELoss()

    # Train the model
    trained_model = train_model(model, train_loader, criterion, optimizer, scheduler, num_epochs=num_epochs)

    # Prepare the test data
    X_test_window = X_test[test_start_date:test_end_date]
    y_test_window = y_test[test_start_date:test_end_date]

    X_test_tanh = scaler_tanh.transform(X_test_window[non_dummy_columns])
    y_test_tanh = scaler_target_tanh.transform(y_test_window.values.reshape(-1, 1))

    X_test_tanh = pd.DataFrame(X_test_tanh, columns=non_dummy_columns, index=X_test_window.index)
    X_test_tanh = pd.concat([X_test_tanh, X_test_window[dummy_columns]], axis=1)
    y_test_tanh = pd.Series(y_test_tanh.flatten(), index=y_test_window.index)

    # Test the model
    normalized_predictions, normalized_actuals, dates = test_model(trained_model, X_test_tanh, y_test_tanh, sequence_length, scaler_target_tanh)

    # Rescale predictions and actuals
    predictions_rescaled = rescale_values(scaler_target_tanh, np.array(normalized_predictions).reshape(-1, 1))
    actuals_rescaled = rescale_values(scaler_target_tanh, np.array(normalized_actuals).reshape(-1, 1))

    # Create DataFrames for predictions and actuals with datetime index
    predictions_df = pd.DataFrame(predictions_rescaled, index=dates, columns=["Prediction"])
    actuals_df = pd.DataFrame(actuals_rescaled, index=dates, columns=["Actual"])

    # Combine predictions and actuals into a single DataFrame
    predictions_actuals = pd.concat([predictions_df, actuals_df], axis=1)

    # Append the current month's predictions and actuals to the combined DataFrame
    all_predictions_actuals = pd.concat([all_predictions_actuals, predictions_actuals])

    # Calculate and print performance metrics
    mse, rmse, mae, mape = calculate_metrics(predictions_rescaled, actuals_rescaled)
    smape = calculate_smape(predictions_rescaled, actuals_rescaled)
    monthly_metrics.append((mse, rmse, mae, mape, smape))
    print(f'{test_start_date.strftime("%B %Y")} - Test Period: {test_start_date.date()} to {test_end_date.date()}')
    print(f'MSE: {mse:.6f}')
    print(f'RMSE: {rmse:.6f}')
    print(f'MAE: {mae:.6f}')
    print(f'MAPE: {mape:.6f}%')
    print(f'SMAPE: {smape:.6f}%')

for i, (mse, rmse, mae, mape, smape) in enumerate(monthly_metrics, 1):
    print(f'Month {i}: MSE={mse:.6f}, RMSE={rmse:.6f}, MAE={mae:.6f}, MAPE={mape:.6f}%, SMAPE={smape:.6f}%')

all_predictions_actuals.to_csv('all_predictions_actuals.csv', index=True)


### LSTM predictions to dataframe

In [None]:
all_predictions_actuals

In [None]:

excel_file_path = '/Users/evenbakke/Documents/Master Thesis/Predictions all models /LSTM predictions.xlsx'

all_predictions_actuals.to_excel(excel_file_path, index=True)

print(f'DataFrame successfully saved to {excel_file_path}')