# Imports

In [None]:
from estimator import HopfieldEstimator

from gluonts.torch.distributions import StudentTOutput, NegativeBinomialOutput
from datasets import load_dataset
from gluonts.dataset.split import split
from gluonts.evaluation import make_evaluation_predictions, Evaluator
from gluonts.dataset.common import ListDataset
from gluonts.dataset.util import to_pandas
from gluonts.dataset.repository.datasets import get_dataset

import numpy as np
import pandas as pd
import json
import pickle
import torch
import time

import warnings
warnings.filterwarnings('ignore')

# Datasets

In [None]:
dta_sets = ["exchange_rate", "solar-energy", "electricity_hourly", "traffic", "taxi_30min", "wiki-rolling_nips"]

for dta in dta_sets:
    dataset = get_dataset(dta, regenerate=False) 
    print(f"{dta}: {int(dataset.metadata.feat_static_cat[0].cardinality)}/{dataset.metadata.freq}/{dataset.metadata.prediction_length}")

# Train model

In [None]:
# Parameter Settings
params  = {
    
    # General settings
    "model": "hopfield",
    "context_length_factor": 2, 
    "batch_size": 32,
    "max_epochs": 100,
    
    # Model-specific settings
    "nhead": 4,
    "num_encoder_layers": 8,
    "num_decoder_layers": 8,
    "dim_feedforward": 32,
    "embedding_dimension": 4,     
}

# Number of runs
n_runs = 5

metrics = []
for dta in dta_sets:     
    
    # Dataset
    dataset = get_dataset(dta, regenerate=False) 
    freq = dataset.metadata.freq
    prediction_length = dataset.metadata.prediction_length
    cardinality = int(dataset.metadata.feat_static_cat[0].cardinality)
    context_length = int(params["context_length_factor"])*prediction_length

    # Distribution
    dist_mapping = {
        "taxi_30min": NegativeBinomialOutput(),
        "wiki-rolling_nips": NegativeBinomialOutput()
        }
    dist = dist_mapping.get(dta, StudentTOutput())

    # Scaling
    scaling_mapping = {
        "taxi_30min": "mean",
        "wiki-rolling_nips": "mean"
        }
    scaling = scaling_mapping.get(dta, "std")

    for i in range(n_runs):           
       
        print(f"\n\nEstimation of {params['model'].upper()} on {dta} dataset: {i+1}/{n_runs}\n\n")

        # Estimator
        start = time.time()
        estimator = HopfieldEstimator(
            freq=freq,
            prediction_length=prediction_length,
            context_length=context_length,
            num_feat_static_cat=1,
            cardinality=[cardinality],
            num_batches_per_epoch=100,

            distr_output = dist,
            scaling = scaling,
            nhead=params["nhead"],
            num_encoder_layers=params["num_encoder_layers"],
            num_decoder_layers=params["num_decoder_layers"],
            dim_feedforward=params["dim_feedforward"],            
            embedding_dimension=[params["embedding_dimension"]],
            batch_size=params["batch_size"],   
            trainer_kwargs=dict(max_epochs=params["max_epochs"], accelerator="auto")
        )
        end = time.time()
        runtime = end - start

        # Train
        predictor = estimator.train(dataset.train, shuffle_buffer_length=1024, cache_data=True)

        # Forecast
        forecast_it, ts_it = make_evaluation_predictions(dataset=dataset.test, predictor=predictor)
        forecasts = list(forecast_it)
        tss = list(ts_it)

        # Evaluate
        evaluator = Evaluator()
        agg_metrics, _ = evaluator(tss, forecasts)

        # Metrics
        items = list(agg_metrics.items())
        metrics_df = pd.DataFrame(items).T.iloc[[1]]
        metrics_df.columns = list(agg_metrics)
        metrics_df["dataset"] = dta
        metrics_df["model"] = params["model"]
        metrics_df["runtime"] = runtime
        metrics_df["run"] = i + 1
        metrics.append(metrics_df)

# Output Metrics

In [None]:
metrics_df = pd.concat(metrics, axis=0)
metrics_df.to_csv(f"{params['model'].upper()}_experiments.csv", index=False)

# Calculate Metrics for Table

In [None]:
def grouped_loss(df, group_by_cols, target_cols, digits=3):
    """
    Computes grouped statistics (mean, min, and max) of the target columns based on the grouping columns.
    
    Parameters:
    - df (pd.DataFrame): The data frame to be grouped.
    - group_by_cols (list): List of column names to group by.
    - target_cols (list): List of target columns for which statistics will be computed.
    - digits (int): Number of decimal places to round the statistics. Default is 3.
    
    Returns:
    - pd.DataFrame: A data frame with the mean, min, and max statistics for the target columns.
    """
    
    # Group by the specified columns
    grouped = df.groupby(group_by_cols)
    
    # Compute mean, min, and max for target columns
    mean_df = grouped[target_cols].mean()
    min_df = grouped[target_cols].min()
    max_df = grouped[target_cols].max()
    
    # Format string for rounding
    format_str = '{:.' + str(digits) + 'f}'
    
    # Iterate through columns and create the desired format
    for col in mean_df.columns:
        mean_df[col] = mean_df[col].map(format_str.format) + ' (' + min_df[col].map(format_str.format) + ', ' + max_df[col].map(format_str.format) + ')'

    return mean_df        


metric_cols = ["mean_wQuantileLoss",
               "wQuantileLoss[0.5]",
               "wQuantileLoss[0.9]",
               "MSIS",
               "NRMSE",
               "sMAPE",
               "MASE"
              ]

group_cols =  ["dataset", "model"]

digits=4
loss_metrics = grouped_loss(metrics_df, group_cols, metric_cols, digits)
loss_metrics

In [None]:
format_str = '{:.' + str(digits) + 'f}'
loss_metrics.to_latex(index=False, float_format=format_str.format)