In [7]:
import os
import pickle
import time
import argparse

import numpy as np
import pandas as pd

from hyperopt import fmin, tpe, hp, Trials, STATUS_OK

from statsforecast import StatsForecast
from statsforecast.models import (
    naive,
    seasonal_naive, 
    auto_arima, 
    auto_arima_f, 
    forecast_arima
)

from src.losses.numpy import mae, mse
from src.experiments.utils import hyperopt_tunning

import matplotlib.pyplot as plt

In [8]:
def main(args):

    #----------------------------------------------- Load Data -----------------------------------------------#
    Y_df = pd.read_csv(f'./data/{args.dataset}/M/df_y.csv')

    X_df = None
    S_df = None

    print('Y_df: ', Y_df.head())
    if args.dataset == 'ETTm2':
        len_val = 11520
        len_test = 11520
    if args.dataset == 'Exchange':
        len_val = 760
        len_test = 1517
    if args.dataset == 'ECL':
        len_val = 2632
        len_test = 5260
    if args.dataset == 'traffic':
        len_val = 1756
        len_test = 3508
    if args.dataset == 'weather':
        len_val = 5270
        len_test = 10539
    if args.dataset == 'ili':
        len_val = 97
        len_test = 193

    #---------------------------------------------- Directories ----------------------------------------------#
    output_dir = f'./results/multivariate/{args.dataset}_{args.horizon}/NHITS/'

    os.makedirs(output_dir, exist_ok = True)
    assert os.path.exists(output_dir), f'Output dir {output_dir} does not exist'
    
    hyperopt_file = output_dir + f'hyperopt_{args.experiment_id}.p'    
    
    n_series = len(Y_df.unique_id.unique())
    Y_df['ds'] = pd.to_datetime(Y_df['ds'])
    
    # Test set predictions
    model = StatsForecast(Y_df[['unique_id', 'ds', 'y']].set_index('unique_id'),
                          models=[(seasonal_naive, args.seasonality)],
                          freq=args.freq, n_jobs=-1)
    forecasts = model.cross_validation(h=args.horizon, 
                                       n_windows=None, step_size=1,
                                       test_size=len_test)
    forecasts['unique_id'] = forecasts.index
    forecasts = forecasts.reset_index(drop=True)
    
    y_true = forecasts['y'].values
    y_hat  = forecasts[f'seasonal_naive_season_length-{args.seasonality}'].values
    
    y_true = y_true.reshape(n_series, -1, args.horizon)
    y_hat  = y_hat.reshape(n_series, -1, args.horizon)
    
    #return forecasts, y_true, y_hat
    losses= {'dataset': args.dataset,
             'horizon': args.horizon,
             'mse': mse(y=y_true, y_hat=y_hat),
             'mae': mae(y=y_true, y_hat=y_hat)}
    
    #with open(hyperopt_file, "wb") as f:
    #    pickle.dump(trials, f)    
    
    return losses
    
    #pickle.dump(trials, f)

# source ~/anaconda3/etc/profile.d/conda.sh
# conda activate nixtla
# CUDA_VISIBLE_DEVICES=0 python nhits_multivariate.py --hyperopt_max_evals 10 --experiment_id "eval_train"

In [9]:
args=pd.Series(dict(experiment_id='seasonal_naive'))

if __name__ == '__main__':
    
    horizons = [96, 192, 336, 720]
    ILI_horizons = [24, 36, 48, 60]
#     datasets = ['ETTm2', 'ECL', 'Exchange', 'traffic', 'weather', 'ili',]
#     seasonalities = [96, 96, 7, 168, 144, 52]
#     freqs = ['15min','15min','D','H','10min','W']
    datasets = ['ETTm2', 'Exchange', 'weather', 'ili',]
    seasonalities = [96, 7, 144, 52]
    freqs = ['15min', 'D', '10min','W']
    #datasets = ['ili']
    #seasonalities = [52]
    #freqs = ['W']
    
    losses_list = []
    for dataset, seasonality, freq in\
            zip(datasets, seasonalities, freqs):
        # Horizon
        if dataset == 'ili':
            horizons_dataset = ILI_horizons
        else:
            horizons_dataset = horizons
        for horizon in horizons_dataset:
            print(50*'-', dataset, 50*'-')
            print(50*'-', horizon, 50*'-')
            start = time.time()
            args.dataset = dataset
            args.horizon = horizon
            args.seasonality = seasonality
            args.freq = freq
            losses = main(args)
            losses_list.append(losses)
            print('Time: ', time.time() - start)

-------------------------------------------------- ETTm2 --------------------------------------------------
-------------------------------------------------- 96 --------------------------------------------------
Y_df:                      ds unique_id         y
0  2016-07-01 00:00:00      HUFL -0.041413
1  2016-07-01 00:00:00      HULL  0.040104
2  2016-07-01 00:00:00      MUFL -0.599211
3  2016-07-01 00:00:00      MULL -0.393536
4  2016-07-01 00:00:00      LUFL  0.695804
Time:  6.612730979919434
-------------------------------------------------- ETTm2 --------------------------------------------------
-------------------------------------------------- 192 --------------------------------------------------
Y_df:                      ds unique_id         y
0  2016-07-01 00:00:00      HUFL -0.041413
1  2016-07-01 00:00:00      HULL  0.040104
2  2016-07-01 00:00:00      MUFL -0.599211
3  2016-07-01 00:00:00      MULL -0.393536
4  2016-07-01 00:00:00      LUFL  0.695804
Time:  9.822413206

In [10]:
eval_df = pd.DataFrame(losses_list)

In [11]:
eval_df

Unnamed: 0,dataset,horizon,mse,mae
0,ETTm2,96,0.263149,0.300543
1,ETTm2,192,0.320563,0.336559
2,ETTm2,336,0.376372,0.370016
3,ETTm2,720,0.470845,0.421547
4,Exchange,96,0.086209,0.204812
5,Exchange,192,0.172805,0.295294
6,Exchange,336,0.311953,0.40308
7,Exchange,720,0.819348,0.681318
8,weather,96,0.316707,0.287956
9,weather,192,0.342557,0.304936
