In [1]:
import argparse
import time
import warnings
from pathlib import Path

import numpy as np
import pandas as pd
from utils_benchmark import (
    compute_all_metrics,
    get_seasonality,
    list_available_datasets,
    load_dataset,
    print_available_datasets,
)

warnings.filterwarnings('ignore')
from smooth.adam_general.core.adam import ADAM

In [2]:
DEFAULT_DATA_DIR = "/home/filtheo/smooth/python/tests/speed_tests/benchmark_data"

In [3]:
def list_available_datasets(data_dir=None):
    """List all available datasets in the data directory."""
    data_dir = Path(data_dir) if data_dir else DEFAULT_DATA_DIR

    if not data_dir.exists():
        return []

    datasets = []
    for path in data_dir.iterdir():
        if path.is_dir() and (path / "metadata.csv").exists():
            datasets.append(path.name)

    return sorted(datasets)

list_available_datasets(DEFAULT_DATA_DIR)

['m1_monthly',
 'm1_quarterly',
 'm1_yearly',
 'm3_monthly',
 'm3_other',
 'm3_quarterly',
 'm3_yearly',
 'tourism_monthly',
 'tourism_quarterly',
 'tourism_yearly']

In [4]:
def load_and_datetime_index_series(dataset_path, 
                                   series, h, 
                                   train_start_date='2023-01-01'):
        """
        Loads train and test dataframes for a series, assigning a monthly datetime index starting at train_start_date.

        Args:
            dataset_path (str): Path to dataset directory.
            series (str): Series ID.
            h (int): Forecast horizon (length of test set).
            train_start_date (str): Date string for first train index (default '2023-01-01').

        Returns:
            tuple (train_df, test_df)
                train_df: Pandas DataFrame with datetime index.
                test_df: Pandas DataFrame with datetime index.
        """
        train_path = dataset_path + "/" + series + "_train.csv"
        test_path = dataset_path + "/" + series + "_test.csv"

        train_df = pd.read_csv(train_path)
        total_length = len(train_df)
        # As we have no real date info, just generate a fixed date range starting at Jan 2000
        dates = pd.date_range(start=train_start_date, periods=total_length, freq='M')
        train_df['t'] = dates
        end_date = train_df['t'].iloc[-1]
        train_df.set_index('t', inplace=True)
        test_df = pd.read_csv(test_path)
        # Use DateOffset for months to advance to first test period
        dates = pd.date_range(start=end_date + pd.DateOffset(months=1), periods=h, freq='M')
        test_df['t'] = dates
        test_df.set_index('t', inplace=True)
        return train_df, test_df

In [5]:
dataset = 'm1_monthly'
dataset_path = DEFAULT_DATA_DIR + "/" + dataset


metadata = pd.read_csv(dataset_path + "/metadata.csv")
series_ids = metadata['series_id'].tolist()
h = metadata['horizon'].unique()[0]
freq = metadata['frequency'].unique()[0]

In [6]:
# For every series
for series in series_ids:
    # Load and prepare series
    train_df, test_df = load_and_datetime_index_series(dataset_path, series, h)

    # define model 
    model_optimal = ADAM(model='ZZZ', lags=[freq], initial='optimal')
    model_optimal.fit(train_df)
    forecast_result = model_optimal.predict(h=h)
    forecast_result.index = test_df.index
    forecast_result['True'] = test_df['y']
    forecast_result['series_id'] = series
    print("Series: ", series)
    #print('--'*30)
    print(model_optimal)
    print('-'*100)
    break

Series:  series_0001
Time elapsed: 6.00 seconds
Model estimated using ADAM() function: ETS(MMN)
With optimal initialisation
Distribution assumed in the model: Gamma
Loss function type: likelihood; Loss function value: 595.9333
Persistence vector g:
 alpha   beta
0.0004 0.0002
Sample size: 42
Number of estimated parameters: 5
Number of degrees of freedom: 37
Information criteria:
      AIC      AICc       BIC      BICc
1201.8666 1203.5332 1210.5549 1213.6696
----------------------------------------------------------------------------------------------------


In [7]:
train_df.shape

(42, 1)

In [8]:
model_optimal = ADAM(model='MMN', lags=[freq], initial='optimal')
model_optimal.fit(train_df)
print(model_optimal)

Time elapsed: 0.08 seconds
Model estimated using ADAM() function: ETS(MMN)
With optimal initialisation
Distribution assumed in the model: Gamma
Loss function type: likelihood; Loss function value: 595.9333
Persistence vector g:
 alpha   beta
0.0004 0.0002
Sample size: 42
Number of estimated parameters: 5
Number of degrees of freedom: 37
Information criteria:
      AIC      AICc       BIC      BICc
1201.8666 1203.5332 1210.5549 1213.6696
