# Import library

In [1]:
import numpy as np
import pandas as pd
import os

import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset, random_split, TensorDataset
import torch.nn.functional as F
import torch.nn.init as init

import scipy
import pickle
import random
import sys
from os.path import dirname, join as pjoin
import scipy.io as sio

sys.path.append(os.path.abspath(os.path.join(os.path.dirname('point_forecast'), '..')))
from utils.networks import *
from utils.trainer import *
from utils.formulations import *

datafolderpath = '../data'
saveresultfolderpath = './result'

# Import data

In [2]:
dict_path = os.path.join(datafolderpath, 'data_central_train_nonan.pkl')
with open(dict_path, 'rb') as pickle_file:
    data = pickle.load(pickle_file)
print(data.keys())

X_train = data['X_train']
y_train = data['y_train']
X_val = data['X_val']
y_val = data['y_val']
X_test = data['X_test']
y_test = data['y_test']

target_col = data['target_col']
features_list = data['features_list']
future_regressor = data['future_regressor']

df_train_date = data['df_train_date']
df_val_date = data['df_val_date']
df_test_date = data['df_test_date']

num_step_ahead = data['num_step_ahead']
resolution = data['resolution']

df_test_features = data['df_test_nonan'] # For feature plotting

dict_keys(['X_train', 'y_train', 'X_val', 'y_val', 'X_test', 'y_test', 'target_col', 'features_list', 'future_regressor', 'df_train_nonan', 'df_val_nonan', 'df_test_nonan', 'df_train_date', 'df_val_date', 'df_test_date', 'timerange', 'num_step_ahead', 'resolution'])


# Train point forecast

## Define loss

In [3]:
class Quantileloss(nn.Module): # Quantile regression for multiple-quantiles
    def __init__(self, quantiles = [0.05, 0.95]):
        super(Quantileloss, self).__init__()
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        
        if isinstance(quantiles, (int, float)):  
            quantiles = [quantiles]  # Convert number to list
        self.quantiles = torch.as_tensor(quantiles, dtype=torch.float32, device=self.device)
        
    def forward(self, y, y_pred): 
        # y_pred dimension is (num_samples, num_quantiles, num_steps)
        # y dimension is (num_samples, num_steps)
        y_expand = y.clone().detach().unsqueeze(1).repeat(1, len(self.quantiles), 1) # Expand y to the same as y_pred
        error = y_expand - y_pred
        quantiles_expand = self.quantiles.view(1, -1, 1) # Expand quantiles to (1, num_quantiles, 1)
        pinball_i = torch.maximum(quantiles_expand*error, (quantiles_expand - 1)*error) # dim the same as y_pred
        pinball = torch.mean(pinball_i) # Averaging for quantiles and numstep axes

        return pinball

## Define model

In [4]:
class SolarkstepaheadNet_point_exoinput(nn.Module):
    def __init__(self, lag_input_window_size=24, exo_input_window_size=12, hidden_size=100, predicted_step=1):
        super(SolarkstepaheadNet_point_exoinput, self).__init__()
        
        self.lag_input_window_size = lag_input_window_size
        self.exo_input_window_size = exo_input_window_size
        self.hidden_size = hidden_size
        self.predicted_step = predicted_step
        self.num_exo_input = self.exo_input_window_size // self.predicted_step
        
        # Common layers for lag inputs
        self.fc_common1 = nn.Linear(self.lag_input_window_size, self.hidden_size)
        self.bn_common1 = nn.BatchNorm1d(self.hidden_size)
        self.fc_common2 = nn.Linear(self.hidden_size, self.hidden_size)
        self.bn_common2 = nn.BatchNorm1d(self.hidden_size)
        
        # Exogenous layers for each step
        self.exo_layers1 = nn.ModuleList([
            nn.Linear(self.hidden_size + self.num_exo_input, self.hidden_size) for _ in range(predicted_step)
        ])
        self.bn_exo_layers1 = nn.ModuleList([
            nn.BatchNorm1d(self.hidden_size) for _ in range(predicted_step)
        ])
        
        self.exo_layers2 = nn.ModuleList([
            nn.Linear(self.hidden_size, self.hidden_size) for _ in range(predicted_step)
        ])
        self.bn_exo_layers2 = nn.ModuleList([
            nn.BatchNorm1d(self.hidden_size) for _ in range(predicted_step)
        ])
        
        # Output layers for each step (2 outputs per step for prediction intervals)
        self.output_layers = nn.ModuleList([
            nn.Linear(self.hidden_size, 1) for _ in range(predicted_step)
        ])
        
        # Activation function
        self.relu = nn.ReLU()

    def forward(self, x):
        common_input = x[:,:self.lag_input_window_size]
        exo_input = x[:,self.lag_input_window_size:]
        
        batch_size = common_input.size(0)
        
        # Exogenous inputs for each step ahead
        exo_ahead = [exo_input[:, i*self.num_exo_input:(i+1)*self.num_exo_input] for i in range(self.predicted_step)]
        
        # Process common input with ReLU and BatchNorm after each layer
        common_input = self.relu(self.bn_common1(self.fc_common1(common_input)))
        common_input = self.relu(self.bn_common2(self.fc_common2(common_input)))
                
        outputs = []
        
        # Loop through each step ahead
        for i in range(self.predicted_step):
            # Create input for this step (concatenating common_input[i] and corresponding exo input)
            step_input = torch.cat((common_input, exo_ahead[i]), dim=1)
            
            # Process through exogenous layer for this step with ReLU and BatchNorm
            step_hidden = self.relu(self.bn_exo_layers1[i](self.exo_layers1[i](step_input)))
            step_hidden = self.relu(self.bn_exo_layers2[i](self.exo_layers2[i](step_hidden)))
            
            # Get prediction interval for this step with ReLU and BatchNorm
            step_output = self.output_layers[i](step_hidden)
            outputs.append(step_output)
        
        # Concatenate the results from all steps into shape (N, 1 * predicted_step)
        final_output = torch.cat(outputs, dim=1)
        return final_output

## Define trainer

In [5]:
class trainer_point_multistep():
    def __init__(self, num_epochs = 100, batch_size = 10, patience = 1000, datanorm = 'quantile', fig_folder_path = './', epoch_showloss = 10):
        self.num_epochs = num_epochs
        self.batch_size = batch_size
        self.patience = patience
        self.datanorm = datanorm
        self.fig_folder_path = fig_folder_path
        self.epoch_showloss = epoch_showloss
    
    def train_test_split(self, X, y, val_ratio = 0.2, require_dataset = False):
        if isinstance(X, np.ndarray):
            X = torch.tensor(X, dtype=torch.float)
        if isinstance(y, np.ndarray):
            y = torch.tensor(y, dtype=torch.float)
        dataset = TensorDataset(X, y)
        val_size = int(val_ratio*len(dataset))
        train_size = len(dataset) - val_size
        train_dataset, val_dataset = random_split(dataset, [train_size, val_size])
        train_loader = DataLoader(train_dataset, batch_size = len(train_dataset))
        val_loader = DataLoader(val_dataset, batch_size = len(val_dataset))

        for X_batch, y_batch in train_loader:
            X_train = X_batch
            y_train = y_batch
        for X_batch, y_batch in val_loader:
            X_val = X_batch
            y_val = y_batch
        if require_dataset:
            return X_train, y_train, X_val, y_val, train_dataset, val_dataset
        else:
            return X_train, y_train, X_val, y_val
    
    def training(self, X_train, y_train, X_val, y_val, criterion, optimizer, model):
        # Check if returnseparatedloss exists in criterion and get its value, default to False if not present
        
        # Check for GPU availability and move model to GPU
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        print('----------Training using: '+str(device)+ '----------')
        
        model = model.to(device)
        X_train = X_train.to(device)
        y_train = y_train.to(device)
        X_val = X_val.to(device)
        y_val = y_val.to(device)
        ymean = torch.mean(y_train) 
        ystd = torch.std(y_train)
        
        train_dataset = TensorDataset(X_train, y_train)
        val_dataset = TensorDataset(X_val, y_val)
        dataloader_train = DataLoader(train_dataset, batch_size = self.batch_size, shuffle = True)
        dataloader_val = DataLoader(val_dataset, batch_size = self.batch_size, shuffle = True)
        
        train_loss_list = []
        val_loss_list = []

        ## For early stopping ##
        best_val_loss = float('inf')
        best_model_weights = None
        patience = self.patience
        ######################
        
        for epoch in range(self.num_epochs):
            # Train a model
            model.train()
            for X_batch_train, y_batch_train in dataloader_train:
                X_batch_train = X_batch_train.to(device)
                y_batch_train = y_batch_train.to(device)
                ## Data normalization ##
                y_batch_train = (y_batch_train - ymean)/ystd
                
                optimizer.zero_grad()
                outputs = model(X_batch_train)
                loss = criterion(y_batch_train, outputs)
                loss.backward()
                optimizer.step()   
                
            # Evaluate a model
            model.eval()
            with torch.no_grad():
                # Calculate the training loss in each epoch
                outputs_train = model(X_train).detach()
                ## Data denormalization ##
                outputs_train = outputs_train*ystd + ymean
                ########################
                loss = criterion(y_train, outputs_train)
         
                train_epoch_loss = loss.item()
                train_loss_list.append(train_epoch_loss)

                # Calculate the validation loss in each epoch
                outputs_val = model(X_val).detach()
                
                ## Data denormalization ##
                outputs_val = outputs_val*ystd + ymean
                ########################                
                loss = criterion(y_val, outputs_val)
                val_epoch_loss = loss.item()
                val_loss_list.append(val_epoch_loss)
                
            ## For early stopping: apply at the epoch level (evaluate from last batch) to prevent noise ##
            if val_epoch_loss < best_val_loss:
                best_val_loss = val_epoch_loss
                best_train_loss = train_epoch_loss
                best_model_weights = copy.deepcopy(model.state_dict())
                best_epoch = epoch
                patience = self.patience  # Reset patience counter
            else:
                patience -= 1
                if patience == 0:
                    print(f'Early stopping occurs within {epoch + 1} Epochs.') 
                    break
            #########################
                
            if (epoch + 1) % self.epoch_showloss == 0:
                print(f'Epoch [{epoch+1}/{self.num_epochs}], Training Loss: {train_epoch_loss:.4f}, Val Loss: {val_epoch_loss:.4f}')
        
        # Load the best model weights before return to user
        print(f'The best model occurs in {best_epoch + 1} Epoch with the training Loss: {best_train_loss:.4f}, the val. Loss: {best_val_loss:.4f}.')
        model.load_state_dict(best_model_weights)
        
        return train_loss_list, val_loss_list, model
    
    def plotloss(self, train_loss_list, val_loss_list, returnplot = False, plotname = None):
        fig, ax = plt.subplots()
        fig.set_size_inches(5,3)
        ax.plot(train_loss_list, color = 'blue', label = 'Training loss', alpha = 0.5)
        ax.plot(val_loss_list, color = 'red', label = 'Validation loss', alpha = 0.5)
        ax.set_yscale('log')
        ax.set_xlabel('Epoch')
        ax.set_ylabel('Loss')
        ax.set_title('Training loss vs epoch')
        ax.grid()
        ax.legend()
        plt.show()
        
        if returnplot:
            fig.savefig(self.fig_folder_path+'allloss_'+plotname+".pdf",format='pdf',bbox_inches='tight',pad_inches=0,transparent=True)
        
    def predict(self, X, model, ymean = 0, ystd = 1):
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        X = X.to(device)
        model = model.to(device)
        with torch.no_grad():
            y_pred = model(X).detach()*ystd + ymean
        return y_pred
    
    def MAE(self, y, y_pred, ytarget = None, alongaxis = 0):
        abs_error = torch.abs(y - y_pred)
        MAE = torch.mean(abs_error, axis = alongaxis).detach()
        if ytarget is None:
            return MAE
        else:
            if self.datanorm == 'maxmin':
                y_range = ytarget.max() - ytarget.min()
            elif self.datanorm == 'quantile':
                y_range = np.quantile(ytarget, 0.95) - np.quantile(ytarget, 0.05)
            else:
                raise ValueError("Input must be maxmin or quantile")
        return MAE/y_range

## Training the point forecast

In [None]:
torch.manual_seed(21)
model = SolarkstepaheadNet_point_exoinput(lag_input_window_size = len(features_list) - len(target_col) - len(future_regressor)
                       , exo_input_window_size = len(future_regressor)
                       , hidden_size = 100, predicted_step = len(target_col))

optimizer = torch.optim.Adam(model.parameters(), lr = 0.0002)
criterion = Quantileloss(quantiles = 0.5)

train_point = trainer_point_multistep(num_epochs = 10, batch_size = int(0.3*X_train.shape[0]), patience = 100)
train_loss_list, val_loss_list, model = train_point.training(X_train, y_train, X_val, y_val, criterion, optimizer, model)
train.plotloss(train_loss_list, val_loss_list, returnplot = False, plotname = None)

outputs_train_point = train_point.predict(X_train, model, ymean = torch.mean(y_train), ystd = torch.std(y_train))
outputs_val_point = train_point.predict(X_val, model, ymean = torch.mean(y_train), ystd = torch.std(y_train))
outputs_test_point = train_point.predict(X_test, model, ymean = torch.mean(y_train), ystd = torch.std(y_train))

outputs_val_eval = outputs_val_point
outputs_test_eval = outputs_test_point

MAE_val = train_point.MAE(y_val, outputs_val_point)
MAE_test = train_point.MAE(y_test, outputs_test_point)

print('MAE of validation set: 15, 30, 45, 60 min ahead')
print(MAE_val)
print('MAE of test set: 15, 30, 45, 60 min ahead')
print(MAE_test)

# saved_result = {'outputs_train_point':outputs_train_point, 'outputs_val_point':outputs_val_point,
#                     'outputs_test_point':outputs_test_point}

# filename = f'point_solarcentral_4step.pkl'
# print(f'Save as: {filename}')
# # # Save
# dict_path = os.path.join('./result', filename)
# with open(dict_path, 'wb') as pickle_file:
#     pickle.dump(saved_result, pickle_file)

----------Training using: cpu----------


## Load the point forecast result

In [3]:
dict_path = os.path.join(saveresultfolderpath, 'point_solarcentral_4step.pkl')
with open(dict_path, 'rb') as pickle_file:
    result_point = pickle.load(pickle_file)
print(result_point.keys())

outputs_train_point = result_point['outputs_train_point']
outputs_val_point = result_point['outputs_val_point']
outputs_test_point = result_point['outputs_test_point']

dict_keys(['outputs_train_point', 'outputs_val_point', 'outputs_test_point'])


In [7]:
import plotly.graph_objects as go
import plotly.io as pio
pio.renderers.default = 'notebook_connected'  # or 'notebook'

fig = go.Figure()
step_ahead = 2
y_plot = y_train[:, step_ahead - 1]
y_pred = outputs_train_point[:, step_ahead - 1]

x = np.arange(len(y_plot))

# Ground truth
fig.add_trace(go.Scatter(
    x=x,
    y=y_plot,
    mode='lines',
    name='Ground Truth',
    line=dict(color='black')
))

# Ground truth
fig.add_trace(go.Scatter(
    x=x,
    y=y_pred,
    mode='lines',
    name='Point forecast',
    line=dict(color='#1f77b4', dash='dash')
))

# Function to add PI bands
def add_pi_band(fig, x, pi, name, fillcolor):
    fig.add_trace(go.Scatter(
        x=x,
        y=pi[:, 1],
        mode='lines',
        line=dict(width=0),
        showlegend=False
    ))
    fig.add_trace(go.Scatter(
        x=x,
        y=pi[:, 0],
        mode='lines',
        fill='tonexty',
        fillcolor=fillcolor,
        line=dict(width=0),
        name=name
    ))

fig.update_layout(
    title=f'Prediction Intervals for Step Ahead {step_ahead}',
    xaxis_title='Time Index',
    yaxis_title='Value',
    template='plotly_white'
)

fig.show()