In [5]:
import pandas as pd

from ray import tune
from neuralforecast.auto import AutoNHITS
from neuralforecast.core import NeuralForecast

In [2]:
def create_date_format(
        df: pd.DataFrame, original_date_column: str
    ) -> pd.DataFrame:
        """
        Creates a pandas datetime object from a date string.
        Very specifiy to the output of the mdb notebook yrmo column.
        """
        df["yyyymm"] = df[original_date_column].apply(
            lambda x: str(x)[:4] + "-" + str(x)[4:] + "-01"
        )
        df["yyyymm"] = pd.to_datetime(df.yyyymm)
        return df

In [4]:
df = pd.read_csv('s3://dsaa-cph-ai-s3-dev/jan_rathfelder/impact_data/fra_eylea_20230215.csv')
df = create_date_format(df, 'yrmo')

df_nixtla = df[['cstmr_1_id', 'yyyymm', 'sales_unit']]
df_nixtla.rename(columns={'cstmr_1_id': 'unique_id', 'yyyymm': 'ds', 'sales_unit': 'y'}, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_nixtla.rename(columns={'cstmr_1_id': 'unique_id', 'yyyymm': 'ds', 'sales_unit': 'y'}, inplace=True)


The AutoNHITS class will automatically perform hyperparamter tunning using Tune library, exploring a user-defined or default search space. Models are selected based on the error on a validation set and the best model is then stored and used during inference.

In [7]:
horizon = 6 

# Use your own config or AutoNHITS.default_config
nhits_config = {
       "learning_rate": tune.choice([1e-3]),                                     # Initial Learning rate
       "max_steps": tune.choice([1000]),                                         # Number of SGD steps
       "input_size": tune.choice([5 * horizon]),                                 # input_size = multiplier * horizon
       "batch_size": tune.choice([7]),                                           # Number of series in windows
       "windows_batch_size": tune.choice([256]),                                 # Number of windows in batch
       "n_pool_kernel_size": tune.choice([[2, 2, 2], [16, 8, 1]]),               # MaxPool's Kernelsize
       "n_freq_downsample": tune.choice([[168, 24, 1], [24, 12, 1], [1, 1, 1]]), # Interpolation expressivity ratios
       "activation": tune.choice(['ReLU']),                                      # Type of non-linear activation
       "n_blocks":  tune.choice([[1, 1, 1]]),                                    # Blocks per each 3 stacks
       "mlp_units":  tune.choice([[[512, 512], [512, 512], [512, 512]]]),        # 2 512-Layers per block for each stack
       "interpolation_mode": tune.choice(['linear']),                            # Type of multi-step interpolation
       "val_check_steps": tune.choice([100]),                                    # Compute validation every 100 epochs
       "random_seed": tune.randint(1, 10),
    }

In [8]:
models = [AutoNHITS(h=horizon,
                    config=nhits_config, 
                    num_samples=5) # number of configurations explored
         ]

In [11]:
nf = NeuralForecast(
    models=models,
    freq='MS')

val_size  = 18 # 3 x 6 months
test_size = 6 # 1 x 6 months

Y_hat_df = nf.cross_validation(df=df_nixtla, 
                               val_size=val_size,
                               test_size=test_size, 
                               n_windows=None)

[2m[36m(_train_tune pid=22496)[0m Seed set to 4


Sanity Checking DataLoader 0:   0%|          | 0/2 [00:00<?, ?it/s]
Sanity Checking DataLoader 0: 100%|██████████| 2/2 [00:00<00:00, 13.69it/s]


2023-10-23 13:29:23,283	ERROR tune_controller.py:1502 -- Trial task failed for trial _train_tune_d2c57_00000
Traceback (most recent call last):
  File "/home/jan_rathfelder/Development/env_basic/lib/python3.9/site-packages/ray/air/execution/_internal/event_manager.py", line 110, in resolve_future
    result = ray.get(future)
  File "/home/jan_rathfelder/Development/env_basic/lib/python3.9/site-packages/ray/_private/auto_init_hook.py", line 24, in auto_init_wrapper
    return fn(*args, **kwargs)
  File "/home/jan_rathfelder/Development/env_basic/lib/python3.9/site-packages/ray/_private/client_mode_hook.py", line 103, in wrapper
    return func(*args, **kwargs)
  File "/home/jan_rathfelder/Development/env_basic/lib/python3.9/site-packages/ray/_private/worker.py", line 2547, in get
    raise value.as_instanceof_cause()
ray.exceptions.RayTaskError: [36mray::ImplicitFunc.train()[39m (pid=22496, ip=10.123.138.232, actor_id=4521b00fcb3d937d4106fec401000000, repr=_train_tune)
  File "/home/j

Epoch 0:   0%|          | 0/109 [00:00<?, ?it/s]                           


[2m[36m(_train_tune pid=22636)[0m Seed set to 7


Sanity Checking DataLoader 0:   0%|          | 0/2 [00:00<?, ?it/s]
Sanity Checking DataLoader 0: 100%|██████████| 2/2 [00:00<00:00, 14.05it/s]


2023-10-23 13:29:28,464	ERROR tune_controller.py:1502 -- Trial task failed for trial _train_tune_d2c57_00001
Traceback (most recent call last):
  File "/home/jan_rathfelder/Development/env_basic/lib/python3.9/site-packages/ray/air/execution/_internal/event_manager.py", line 110, in resolve_future
    result = ray.get(future)
  File "/home/jan_rathfelder/Development/env_basic/lib/python3.9/site-packages/ray/_private/auto_init_hook.py", line 24, in auto_init_wrapper
    return fn(*args, **kwargs)
  File "/home/jan_rathfelder/Development/env_basic/lib/python3.9/site-packages/ray/_private/client_mode_hook.py", line 103, in wrapper
    return func(*args, **kwargs)
  File "/home/jan_rathfelder/Development/env_basic/lib/python3.9/site-packages/ray/_private/worker.py", line 2547, in get
    raise value.as_instanceof_cause()
ray.exceptions.RayTaskError: [36mray::ImplicitFunc.train()[39m (pid=22636, ip=10.123.138.232, actor_id=84846ec61f0d41011a8a4b5601000000, repr=_train_tune)
  File "/home/j

Epoch 0:   0%|          | 0/109 [00:00<?, ?it/s]                           


[2m[36m(_train_tune pid=22779)[0m Seed set to 4


Sanity Checking DataLoader 0:   0%|          | 0/2 [00:00<?, ?it/s]
Sanity Checking DataLoader 0: 100%|██████████| 2/2 [00:00<00:00, 13.67it/s]


2023-10-23 13:29:34,435	ERROR tune_controller.py:1502 -- Trial task failed for trial _train_tune_d2c57_00002
Traceback (most recent call last):
  File "/home/jan_rathfelder/Development/env_basic/lib/python3.9/site-packages/ray/air/execution/_internal/event_manager.py", line 110, in resolve_future
    result = ray.get(future)
  File "/home/jan_rathfelder/Development/env_basic/lib/python3.9/site-packages/ray/_private/auto_init_hook.py", line 24, in auto_init_wrapper
    return fn(*args, **kwargs)
  File "/home/jan_rathfelder/Development/env_basic/lib/python3.9/site-packages/ray/_private/client_mode_hook.py", line 103, in wrapper
    return func(*args, **kwargs)
  File "/home/jan_rathfelder/Development/env_basic/lib/python3.9/site-packages/ray/_private/worker.py", line 2547, in get
    raise value.as_instanceof_cause()
ray.exceptions.RayTaskError: [36mray::ImplicitFunc.train()[39m (pid=22779, ip=10.123.138.232, actor_id=1f60e19b68a3397866f1082601000000, repr=_train_tune)
  File "/home/j

Epoch 0:   0%|          | 0/109 [00:00<?, ?it/s]                           


[2m[36m(_train_tune pid=22920)[0m Seed set to 7


Sanity Checking DataLoader 0:   0%|          | 0/2 [00:00<?, ?it/s]
Sanity Checking DataLoader 0: 100%|██████████| 2/2 [00:00<00:00, 14.38it/s]


2023-10-23 13:29:40,440	ERROR tune_controller.py:1502 -- Trial task failed for trial _train_tune_d2c57_00003
Traceback (most recent call last):
  File "/home/jan_rathfelder/Development/env_basic/lib/python3.9/site-packages/ray/air/execution/_internal/event_manager.py", line 110, in resolve_future
    result = ray.get(future)
  File "/home/jan_rathfelder/Development/env_basic/lib/python3.9/site-packages/ray/_private/auto_init_hook.py", line 24, in auto_init_wrapper
    return fn(*args, **kwargs)
  File "/home/jan_rathfelder/Development/env_basic/lib/python3.9/site-packages/ray/_private/client_mode_hook.py", line 103, in wrapper
    return func(*args, **kwargs)
  File "/home/jan_rathfelder/Development/env_basic/lib/python3.9/site-packages/ray/_private/worker.py", line 2547, in get
    raise value.as_instanceof_cause()
ray.exceptions.RayTaskError: [36mray::ImplicitFunc.train()[39m (pid=22920, ip=10.123.138.232, actor_id=c752b5157b931e8d41fb3e4601000000, repr=_train_tune)
  File "/home/j

Epoch 0:   0%|          | 0/109 [00:00<?, ?it/s]                           


[2m[36m(_train_tune pid=23063)[0m Seed set to 8


Sanity Checking: |          | 0/? [00:00<?, ?it/s]
Sanity Checking DataLoader 0:   0%|          | 0/2 [00:00<?, ?it/s]


2023-10-23 13:29:46,484	ERROR tune_controller.py:1502 -- Trial task failed for trial _train_tune_d2c57_00004
Traceback (most recent call last):
  File "/home/jan_rathfelder/Development/env_basic/lib/python3.9/site-packages/ray/air/execution/_internal/event_manager.py", line 110, in resolve_future
    result = ray.get(future)
  File "/home/jan_rathfelder/Development/env_basic/lib/python3.9/site-packages/ray/_private/auto_init_hook.py", line 24, in auto_init_wrapper
    return fn(*args, **kwargs)
  File "/home/jan_rathfelder/Development/env_basic/lib/python3.9/site-packages/ray/_private/client_mode_hook.py", line 103, in wrapper
    return func(*args, **kwargs)
  File "/home/jan_rathfelder/Development/env_basic/lib/python3.9/site-packages/ray/_private/worker.py", line 2547, in get
    raise value.as_instanceof_cause()
ray.exceptions.RayTaskError: [36mray::ImplicitFunc.train()[39m (pid=23063, ip=10.123.138.232, actor_id=7505d54f1e0c87633f37782801000000, repr=_train_tune)
  File "/home/j

Sanity Checking DataLoader 0: 100%|██████████| 2/2 [00:00<00:00, 14.48it/s]
Epoch 0:   0%|          | 0/109 [00:00<?, ?it/s]                           


RuntimeError: No best trial found for the given metric: loss. This means that no trial has reported this metric, or all values reported for this metric are NaN. To not ignore NaN values, you can set the `filter_nan_and_inf` arg to False.