In [157]:
import pmdarima as Arima
from utils import data_handling, training_functions, helpers

from sklearn.metrics import mean_squared_error as mse
import pandas as pd
import config
import pickle
import torch


In [11]:
# use electricity dataset
data_dict = data_handling.format_electricity()

df = data_dict

for key, value in df.items():
			df[key]= data_handling.df_to_tensor(value)
train_standardize_dict = None

# normalize train and use matrics for val and test
df["train"], train_standardize_dict = helpers.custom_standardizer(df["train"])
df["validation"], _ = helpers.custom_standardizer(df["validation"], train_standardize_dict)
df["test"], _ = helpers.custom_standardizer(df["test"], train_standardize_dict)


In [None]:
data_tensor = data_handling.load_bavaria_electricity()
data_dict, standadizer = data_handling.train_test_split_eu_elec(data_tensor, standardize=True)

In [133]:
# Example tensor of shape [2929, 348]
def create_lagged(df):

    def lag_tensor(df, lag):
        if lag > 0:
            return torch.cat((torch.zeros(lag, df.size(1)), df[:-lag]), dim=0)
        return df

    # Lag by 24
    lagged_24 = lag_tensor(df, 24)
    lagged_48 = lag_tensor(df, 24*2)
    lagged_72 = lag_tensor(df, 24*3)
    lagged_96 = lag_tensor(df, 24*4)

    # create time of day index
    base_tensor = torch.arange(0, 24) 
    time_of_day = base_tensor.repeat(df.size(1))[0:df.size(0)] 
    time_of_day_ids = time_of_day.repeat(df.size(1), 1).transpose(0, 1)

    return torch.stack((lagged_24, lagged_48, lagged_72, lagged_96, time_of_day_ids), dim=2)

torch.Size([4993, 348, 5])

In [167]:
num_96_horizons = int(df["test"][:,0].shape[0] / (96))
lagged_covariates_train = create_lagged(df["train"])
lagged_covariates_test = create_lagged(df["test"])

#filename = config.CONFIG_OUTPUT_PATH["arima"] / f'arima_{key_}predictions.csv'
filename = config.CONFIG_OUTPUT_PATH["arima"] / 'arima_electricity_predictions.pkl'


# Open the file and read the data
try:
    with open(filename, 'rb') as file:
        prediction_list = pickle.load(file)
except: 
    print("no predictions available.")
    prediction_list = []


for id in range(len(prediction_list), df["train"].size(1)):
    model = Arima.auto_arima(df["train"][-2000:,id], exogenous=lagged_covariates_train[-2000:,id,:], stepwise=True, seasonal=True, m=24, maxiter=5)

    sum_mse = 0
    for i in range(num_96_horizons):
        time_step = i * 96
        target = df["test"][time_step : time_step+96, id]

        lagged_window_test = lagged_covariates_test[time_step:time_step+96,id,:]
        forecasts = model.predict(n_periods=96, return_conf_int=False, exogenous=lagged_window_test, alpha=0.1)

        sum_mse = sum_mse + (mse(forecasts, target))

    prediction_list.append(sum_mse / num_96_horizons)
    print(sum_mse / num_96_horizons)


    # save as pickle
    with open(filename, 'wb') as file:
        pickle.dump(prediction_list, file)

no predictions available.
