In [1]:
import torch
import pandas as pd
from scipy import stats
from core.util.metrics import mae, rmse, smape
from core.util.get_datasets import cross_validation
from core.util.hyperparameter_configuration import get_hyperparameter_configuration

In [2]:
def tensor_to_list(tensor: torch.tensor) -> tuple[list, list]:
    """Extract values from tensor and appends them to lists."""
    x = []
    y = []
    for i in range(len(tensor)):
        x.append(i)
        y.append(tensor[i][0].detach().item())
    return (x, y)


def fix_offset(x: list, offset: float) -> list:
    """Make a list appear like an extension of another."""
    for i in range(len(x)):
        x[i] += offset
    return x

# Naive baseline
We start by defining a linear regression naive baseline.

The regression is only done on the train set whereas the loss is calculated on the test set.

In [3]:
class NBE:
    """A class for the linear regression naive baseline."""

    def __init__(
        self,
        x_train: list,
        y_train: list,
    ) -> None:
        """Conduct linear regression on the train set."""
        self.slope, self.intercept, _, _, _ = stats.linregress(x_train, y_train)
        self.slope = self.slope.item()
        self.intercept = self.intercept.item()

    def print_formula(self) -> None:
        """Print the formula of the regression."""
        print(f"nbe(x) = {self.slope} * x + {self.intercept}")

    def get_loss(self, loss_function: callable, x_test: list, y_test: list) -> float:
        """Calculate the loss of the baseline."""

        def regression(x: float) -> float:
            """Make a function for the regression."""
            return self.slope * x + self.intercept

        # Use the regression to make predictions
        predictions = []
        for i in range(len(x_test)):
            predictions.append(regression(x_test[i]))

        return loss_function(y_test, predictions)

Then we define our 24 hour lag baseline

In [4]:
def _24hlnbe(y_test: list) -> tuple[list, list]:
    """Create two lists with a 24 hour shift."""
    target_24h = []
    predicted_24h = []
    for i in range(len(y_test) - 24):
        predicted_24h.append(y_test[i])
        target_24h.append(y_test[i + 24])
    return (target_24h, predicted_24h)

We load in the train and test sets in order to calculate the loss of the baseline

In [5]:
hyperparameters = get_hyperparameter_configuration()
horizon = hyperparameters["horizon"]
train_days = hyperparameters["train_days"]
val_days = hyperparameters["val_days"]
test_days = hyperparameters["test_days"]
LOOKBACK = 36

_, y_train, _, _, _, y_test = cross_validation(
    LOOKBACK, horizon, train_days, val_days, test_days
)

x_train, y_train = tensor_to_list(y_train)
x_test, y_test = tensor_to_list(y_test)
x_test = fix_offset(x_test, len(x_train))

Using the train set we initialise the NBE and print its formula

In [6]:
nbe = NBE(x_train=x_train, y_train=y_train)
nbe.print_formula()

nbe(x) = -3.914709153808514e-07 * x + 0.13106607374791684


Create the two lists needed for the 24h lag baseline

In [7]:
target_24h, predicted_24h = _24hlnbe(y_test)

Finally we calculate the metrics for the baselines

In [8]:
loss = {
    "Metric": ["MAE", "RSME", "sMAPE"],
    "NBE": [
        nbe.get_loss(mae, x_test, y_test),
        nbe.get_loss(rmse, x_test, y_test),
        nbe.get_loss(smape, x_test, y_test),
    ],
    "24hLNBE": [
        mae(target_24h, predicted_24h),
        rmse(target_24h, predicted_24h),
        smape(target_24h, predicted_24h),
    ],
}
pd.DataFrame(loss)

Unnamed: 0,Metric,NBE,24hLNBE
0,MAE,0.123194,0.057574
1,RSME,0.157893,0.09411
2,sMAPE,108.373413,60.343154
