In [23]:
# ==========================================
# 1. Installation & Imports
# ==========================================
!pip install pandas numpy matplotlib seaborn statsforecast mlforecast neuralforecast lightgbm nixtla

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error

# StatsForecast
from statsforecast import StatsForecast
from statsforecast.models import Naive, SeasonalNaive, AutoETS, AutoARIMA

# MLForecast
from mlforecast import MLForecast
from mlforecast.target_transforms import Differences
from lightgbm import LGBMRegressor

# NeuralForecast
from neuralforecast import NeuralForecast
from neuralforecast.auto import AutoNBEATS, AutoNHITS

# TimeGPT
from nixtla import NixtlaClient



In [24]:
# ==========================================
# 2. Data Loading & Preprocessing (Fixed Date Frequency)
# ==========================================
import pandas as pd
import numpy as np

def load_and_prep_data():
    print("Loading data...")

    # 1. Load CSVs
    try:
        train = pd.read_csv('train.csv')
        features = pd.read_csv('features.csv')
        test = pd.read_csv('test.csv')
        stores = pd.read_csv('stores.csv')
    except FileNotFoundError as e:
        print(f"Error finding file: {e}")
        return None, None

    # 2. Merge Dataframes
    # Merge Store metadata and Features into Train
    df = train.merge(stores, on='Store', how='left')
    df = df.merge(features, on=['Store', 'Date', 'IsHoliday'], how='left')

    # Merge Test data as well
    test_df = test.merge(stores, on='Store', how='left')
    test_df = test_df.merge(features, on=['Store', 'Date', 'IsHoliday'], how='left')

    # 3. Handle Missing Values
    # Markdowns: Fill with 0 (missing markdown = no promotion)
    md_cols = ['MarkDown1', 'MarkDown2', 'MarkDown3', 'MarkDown4', 'MarkDown5']
    df[md_cols] = df[md_cols].fillna(0)
    test_df[md_cols] = test_df[md_cols].fillna(0)

    # Economic Indicators: Interpolate (Fill forward, then backward)
    exog_cols_to_fill = ['Temperature', 'Fuel_Price', 'CPI', 'Unemployment']
    for col in exog_cols_to_fill:
        df[col] = df.groupby('Store')[col].transform(lambda x: x.ffill().bfill())
        test_df[col] = test_df.groupby('Store')[col].transform(lambda x: x.ffill().bfill())

    # 4. Formatting for Time Series Models
    df['ds'] = pd.to_datetime(df['Date'])

    # DIAGNOSTIC 1: Check target before renaming
    if 'Weekly_Sales' not in df.columns:
        print(f"CRITICAL ERROR: 'Weekly_Sales' column not found. Available columns: {df.columns.tolist()}")
        return None, None
    # --------------------------------------------------

    df = df.rename(columns={'Weekly_Sales': 'y'})
    df['unique_id'] = df['Store'].astype(str) + '_' + df['Dept'].astype(str)

    test_df['ds'] = pd.to_datetime(test_df['Date'])
    test_df['unique_id'] = test_df['Store'].astype(str) + '_' + test_df['Dept'].astype(str)

    # 5. Downsample: Select Top 20 series by volume
    top_series = df.groupby('unique_id')['y'].sum().nlargest(20).index
    df_subset = df[df['unique_id'].isin(top_series)].reset_index(drop=True)
    test_subset = test_df[test_df['unique_id'].isin(top_series)].reset_index(drop=True)

    # 6. Feature Engineering (Encodings)
    df_subset['IsHoliday'] = df_subset['IsHoliday'].astype(int)
    df_subset['Type'] = df_subset['Type'].map({'A': 1, 'B': 2, 'C': 3})
    test_subset['IsHoliday'] = test_subset['IsHoliday'].astype(int)
    test_subset['Type'] = test_subset['Type'].map({'A': 1, 'B': 2, 'C': 3})

    # Ensure continuous date ranges for each unique_id in training data
    all_series_dfs_train = []
    for uid in df_subset['unique_id'].unique():
        series_df = df_subset[df_subset['unique_id'] == uid].copy()
        min_ds = series_df['ds'].min()
        max_ds = series_df['ds'].max()

        # This aligns the generated timeline with the Walmart Friday dates
        full_date_range = pd.date_range(start=min_ds, end=max_ds, freq='W-FRI')
        # ----------------------------------------------------

        full_series_df = pd.DataFrame({'ds': full_date_range, 'unique_id': uid})

        # Merge back with original series data
        series_df = full_series_df.merge(series_df, on=['unique_id', 'ds'], how='left')
        all_series_dfs_train.append(series_df)

    df_subset = pd.concat(all_series_dfs_train).reset_index(drop=True)

    # Final Safety Check: Ensure no NaNs remain in features used for ML
    exogenous_ml_cols = ['Temperature', 'Fuel_Price', 'CPI', 'Unemployment',
                         'MarkDown1', 'MarkDown2', 'MarkDown3', 'MarkDown4', 'MarkDown5',
                         'IsHoliday', 'Type', 'Size']

    # Fill any remaining NaNs (e.g., from reindexing or initial missing data) with 0
    df_subset['y'] = df_subset['y'].fillna(0)
    df_subset[exogenous_ml_cols] = df_subset[exogenous_ml_cols].fillna(0)
    test_subset[exogenous_ml_cols] = test_subset[exogenous_ml_cols].fillna(0)

    # Drop the original 'Date' column
    df_subset = df_subset.drop(columns=['Date'], errors='ignore')
    test_subset = test_subset.drop(columns=['Date'], errors='ignore')

    #  DIAGNOSTIC 2: Verify Sales Sum
    print("\n--- DIAGNOSTIC CHECK ---")
    if 'y' in df_subset.columns:
        total_y = df_subset['y'].sum()
        print(f"Total Sum of 'y' (Sales): {total_y:,.2f}")

        if total_y == 0:
            print("CRITICAL ERROR: 'y' still contains only 0s.")
            return None, None
        else:
            print("STATUS: Data verified. Sales column 'y' is populated.")
    else:
        print("CRITICAL ERROR: 'y' column disappeared.")
        return None, None
    print("------------------------\n")
    # ---------------------------------------

    print(f"Data Prepared. Modeling {df_subset['unique_id'].nunique()} series.")
    return df_subset, test_subset

# EXECUTE THE FUNCTION
train_subset, test_subset = load_and_prep_data()

# Verify results
if train_subset is not None and test_subset is not None:
    print("\nSuccess! First 5 rows of testing data:")
    print(test_subset.head())
else:
    print("Error: Data loading and preparation failed.")

Loading data...

--- DIAGNOSTIC CHECK ---
Total Sum of 'y' (Sales): 402,568,823.37
STATUS: Data verified. Sales column 'y' is populated.
------------------------

Data Prepared. Modeling 20 series.

Success! First 5 rows of testing data:
   Store  Dept  IsHoliday  Type    Size  Temperature  Fuel_Price  MarkDown1  MarkDown2  MarkDown3  MarkDown4  MarkDown5         CPI  Unemployment         ds unique_id
0      1    92          0     1  151315        55.32       3.386    6766.44    5147.70      50.82    3639.90    2737.42  223.462779         6.573 2012-11-02      1_92
1      1    92          0     1  151315        61.24       3.314   11421.32    3370.89      40.28    4646.79    6154.16  223.481307         6.573 2012-11-09      1_92
2      1    92          0     1  151315        52.92       3.252    9696.28     292.10     103.78    1133.15    6612.69  223.512911         6.573 2012-11-16      1_92
3      1    92          1     1  151315        56.23       3.211     883.59       4.17   74910

In [33]:
# ==========================================
# 3. TimeGPT Pipeline
# ==========================================
from nixtla import NixtlaClient
import pandas as pd

def run_timegpt_pipeline(df_train, df_test, horizon, n_windows):
    print("\n--- Running TimeGPT ---")

    # 1. PASTE YOUR API KEY HERE
    # ---------------------------------------------------------
    my_api_key = 'nixak-eX92lSIatoWxvaVDNG4v7IFJQ5cva8FrMRaNuADzsiAlt007Tm8ejGc7VUU0MJEt2YZtsa2CmsfcbYPg'
    # ---------------------------------------------------------

    try:
        # Initialize the client
        nixtla_client = NixtlaClient(api_key=my_api_key)

        # Validates the key before running heavy tasks
        nixtla_client.validate_api_key()

        # A. Cross-Validation
        # We use 'W-FRI' because the data ends on Fridays
        print("  Running TimeGPT Cross-Validation...")
        timegpt_cv = nixtla_client.cross_validation(
            df=df_train,
            h=horizon,
            n_windows=n_windows,
            step_size=horizon,
            freq='W-FRI'
        )

        # B. Future Forecast
        # Prepare future exogenous variables (Test set without the target 'y')
        future_exog = df_test.drop(columns=['Date', 'Weekly_Sales', 'y'], errors='ignore')

        # Calculate horizon based on test set length (should be 39 weeks for this dataset)
        test_horizon = df_test.groupby('unique_id')['ds'].count().max()

        print(f"  Running TimeGPT Future Forecast ({test_horizon} weeks)...")
        timegpt_fcst = nixtla_client.forecast(
            df=df_train,
            h=test_horizon,
            X_df=future_exog,
            freq='W-FRI'
        )

        return timegpt_cv, timegpt_fcst

    except Exception as e:
        print(f"Error running TimeGPT: {e}")
        print("TIP: Check if your API key is pasted correctly inside the quotes.")
        return pd.DataFrame(), pd.DataFrame()

# EXECUTE THE FUNCTION
if 'train_subset' in globals() and 'test_subset' in globals():
    # Run pipeline
    tgpt_cv, tgpt_fcst = run_timegpt_pipeline(train_subset, test_subset, horizon=4, n_windows=5)

    # Print results if successful
    if not tgpt_cv.empty:
        print("\nSuccess! TimeGPT Cross-Validation Head:")
        print(tgpt_cv.head())


--- Running TimeGPT ---
  Running TimeGPT Cross-Validation...




  Running TimeGPT Future Forecast (39 weeks)...

Success! TimeGPT Cross-Validation Head:
  unique_id         ds     cutoff          y     TimeGPT
0     10_72 2012-06-15 2012-06-08  105499.39  115114.920
1     10_72 2012-06-22 2012-06-08  107949.41  181358.220
2     10_72 2012-06-29 2012-06-08   96579.10  133881.170
3     10_72 2012-07-06 2012-06-08  100464.25  148005.270
4     10_72 2012-07-13 2012-07-06   92923.05  113516.984


In [26]:
# ==========================================
# 4. Standard Models Pipeline
# ==========================================
import pandas as pd
import numpy as np
from statsforecast import StatsForecast
from statsforecast.models import Naive, SeasonalNaive, AutoETS, AutoARIMA
from mlforecast import MLForecast
from mlforecast.target_transforms import Differences
from lightgbm import LGBMRegressor
from neuralforecast import NeuralForecast
from neuralforecast.auto import AutoNBEATS, AutoNHITS

# Set pandas to show all columns (so you can see all models)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)

def run_standard_pipeline(df):
    HORIZON = 4
    N_WINDOWS = 5
    FREQ = 'W-FRI'

    # A. StatsForecast (Naive, ARIMA, ETS)
    print(f"[{'StatsForecast':<15}] Training: Naive, SeasonalNaive, AutoETS, AutoARIMA")
    sf = StatsForecast(
        models=[
            Naive(),
            SeasonalNaive(season_length=52),
            AutoETS(season_length=52),
            AutoARIMA(season_length=52)
        ],
        freq=FREQ,
        n_jobs=-1
    )
    stats_cv = sf.cross_validation(
        df=df[['unique_id', 'ds', 'y']],
        h=HORIZON, step_size=HORIZON, n_windows=N_WINDOWS
    )

    # B. MLForecast (LightGBM)
    print(f"[{'MLForecast':<15}] Training: LightGBM")
    lgbm = LGBMRegressor(verbosity=-1, random_state=42)

    # Prepare features
    mlforecast_cols = [
        'unique_id', 'ds', 'y', 'Type', 'Size',
        'Temperature', 'Fuel_Price', 'CPI', 'Unemployment',
        'MarkDown1', 'MarkDown2', 'MarkDown3', 'MarkDown4', 'MarkDown5',
        'IsHoliday'
    ]
    df_for_mlforecast = df[mlforecast_cols].copy()

    mlf = MLForecast(
        models=[lgbm], freq=FREQ, lags=[1, 4, 52],
        target_transforms=[Differences([1])],
        date_features=['month', 'week'], num_threads=4
    )

    ml_cv = mlf.cross_validation(
        df=df_for_mlforecast,
        h=HORIZON, step_size=HORIZON, n_windows=N_WINDOWS,
        static_features=['Type', 'Size']
    )
    ml_cv = ml_cv.rename(columns={'LGBMRegressor': 'LightGBM'})

    # C. NeuralForecast (AutoNBEATS, AutoNHITS)
    print(f"[{'NeuralForecast':<15}] Training: AutoNBEATS, AutoNHITS")

    nf = NeuralForecast(
        models=[
            AutoNBEATS(h=HORIZON, num_samples=2),
            AutoNHITS(h=HORIZON, num_samples=2)
        ],
        freq=FREQ
    )

    neural_cv = nf.cross_validation(
        df=df[['unique_id', 'ds', 'y']],
        val_size=HORIZON,
        n_windows=N_WINDOWS,
        step_size=HORIZON
    )

    # D. MERGING ALL RESULTS
    print("Merging all model predictions...")

    # Start with StatsForecast
    all_results = stats_cv.copy()

    # Merge MLForecast (dropping 'y' to avoid duplication)
    if ml_cv is not None:
        all_results = all_results.merge(
            ml_cv.drop(columns=['y'], errors='ignore'),
            on=['unique_id', 'ds', 'cutoff'],
            how='left'
        )

    # Merge NeuralForecast (dropping 'y' to avoid duplication)
    if neural_cv is not None:
        all_results = all_results.merge(
            neural_cv.drop(columns=['y'], errors='ignore'),
            on=['unique_id', 'ds', 'cutoff'],
            how='left'
        )

    return all_results

# EXECUTE
if 'train_subset' in globals():
    # Filter for active data
    active_ids = train_subset.groupby('unique_id')['y'].sum()
    active_ids = active_ids[active_ids > 0].index.tolist()

    if len(active_ids) > 0:
        train_subset_clean = train_subset[train_subset['unique_id'].isin(active_ids)].copy()

        # Run Pipeline
        combined_results = run_standard_pipeline(train_subset_clean)

        print("\n" + "="*50)
        print("SUCCESS! PREDICTIONS FROM ALL MODELS:")
        print("="*50)
        print(combined_results.head())

[StatsForecast  ] Training: Naive, SeasonalNaive, AutoETS, AutoARIMA
[MLForecast     ] Training: LightGBM
[NeuralForecast ] Training: AutoNBEATS, AutoNHITS
+--------------------------------------------------------------------+
| Configuration for experiment     _train_tune_2025-12-04_16-35-42   |
+--------------------------------------------------------------------+
| Search algorithm                 BasicVariantGenerator             |
| Scheduler                        FIFOScheduler                     |
| Number of trials                 2                                 |
+--------------------------------------------------------------------+

View detailed results here: /root/ray_results/_train_tune_2025-12-04_16-35-42
To visualize your results with TensorBoard, run: `tensorboard --logdir /tmp/ray/session_2025-12-04_15-19-14_190539_731/artifacts/2025-12-04_16-35-42/_train_tune_2025-12-04_16-35-42/driver_artifacts`


[36m(_train_tune pid=26945)[0m /usr/local/lib/python3.12/dist-packages/ray/tune/integration/pytorch_lightning.py:198: `ray.tune.integration.pytorch_lightning.TuneReportCallback` is deprecated. Use `ray.tune.integration.pytorch_lightning.TuneReportCheckpointCallback` instead.
[36m(_train_tune pid=26945)[0m Seed set to 16
[36m(_train_tune pid=26945)[0m GPU available: False, used: False
[36m(_train_tune pid=26945)[0m TPU available: False, using: 0 TPU cores
[36m(_train_tune pid=26945)[0m 2025-12-04 16:36:03.585952: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
[36m(_train_tune pid=26945)[0m E0000 00:00:1764866163.636017   27060 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
[36m(_train_tune pid=26945)[0m E0000 00:00:1764866163.649070   27060 cuda_blas.c

[36m(_train_tune pid=26945)[0m ┏━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┓
[36m(_train_tune pid=26945)[0m ┃   ┃ Name         ┃ Type          ┃ Params ┃ Mode  ┃ FLOPs ┃
[36m(_train_tune pid=26945)[0m ┡━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━┩
[36m(_train_tune pid=26945)[0m │ 0 │ loss         │ MAE           │      0 │ train │     0 │
[36m(_train_tune pid=26945)[0m │ 1 │ padder_train │ ConstantPad1d │      0 │ train │     0 │
[36m(_train_tune pid=26945)[0m │ 2 │ scaler       │ TemporalNorm  │      0 │ train │     0 │
[36m(_train_tune pid=26945)[0m │ 3 │ blocks       │ ModuleList    │  2.4 M │ train │     0 │
[36m(_train_tune pid=26945)[0m └───┴──────────────┴───────────────┴────────┴───────┴───────┘
[36m(_train_tune pid=26945)[0m Trainable params: 2.4 M                                                         
[36m(_train_tune pid=26945)[0m Non-trainable params: 216                                                       
[36m(_train

[36m(_train_tune pid=26945)[0m [2025-12-04 16:36:20,930 E 26945 26991] core_worker_process.cc:837: Failed to establish connection to the metrics exporter agent. Metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14
[36m(_train_tune pid=26945)[0m `Trainer.fit` stopped: `max_steps=1000` reached.


[36m(_train_tune pid=26945)[0m Epoch 999/-2 ━━━━━━━━━━━━━━━━━ 1/1 0:00:00 • 0:00:00 0.00it/s v_num: 0.000      
[36m(_train_tune pid=26945)[0m                                                               train_loss_step:  
[36m(_train_tune pid=26945)[0m                                                               0.079             
[36m(_train_tune pid=26945)[0m                                                               train_loss_epoch: 
[36m(_train_tune pid=26945)[0m                                                               0.079 valid_loss: 
[36m(_train_tune pid=26945)[0m                                                               6234.723          


[36m(_train_tune pid=27458)[0m /usr/local/lib/python3.12/dist-packages/ray/tune/integration/pytorch_lightning.py:198: `ray.tune.integration.pytorch_lightning.TuneReportCallback` is deprecated. Use `ray.tune.integration.pytorch_lightning.TuneReportCheckpointCallback` instead.
[36m(_train_tune pid=27458)[0m Seed set to 8
[36m(_train_tune pid=27458)[0m GPU available: False, used: False
[36m(_train_tune pid=27458)[0m TPU available: False, using: 0 TPU cores
[36m(_train_tune pid=27458)[0m 2025-12-04 16:37:58.167212: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
[36m(_train_tune pid=27458)[0m E0000 00:00:1764866278.203734   27602 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
[36m(_train_tune pid=27458)[0m E0000 00:00:1764866278.223518   27602 cuda_blas.cc

[36m(_train_tune pid=27458)[0m ┏━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┓
[36m(_train_tune pid=27458)[0m ┃   ┃ Name         ┃ Type          ┃ Params ┃ Mode  ┃ FLOPs ┃
[36m(_train_tune pid=27458)[0m ┡━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━┩
[36m(_train_tune pid=27458)[0m │ 0 │ loss         │ MAE           │      0 │ train │     0 │
[36m(_train_tune pid=27458)[0m │ 1 │ padder_train │ ConstantPad1d │      0 │ train │     0 │
[36m(_train_tune pid=27458)[0m │ 2 │ scaler       │ TemporalNorm  │      0 │ train │     0 │
[36m(_train_tune pid=27458)[0m │ 3 │ blocks       │ ModuleList    │  2.4 M │ train │     0 │
[36m(_train_tune pid=27458)[0m └───┴──────────────┴───────────────┴────────┴───────┴───────┘
[36m(_train_tune pid=27458)[0m Trainable params: 2.4 M                                                         
[36m(_train_tune pid=27458)[0m Non-trainable params: 180                                                       
[36m(_train

[36m(_train_tune pid=27458)[0m [2025-12-04 16:38:10,369 E 27458 27513] core_worker_process.cc:837: Failed to establish connection to the metrics exporter agent. Metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14
2025-12-04 16:39:00,798	INFO tune.py:1009 -- Wrote the latest version of all result files and experiment state to '/root/ray_results/_train_tune_2025-12-04_16-35-42' in 0.0066s.
INFO:lightning_fabric.utilities.seed:Seed set to 16
[36m(_train_tune pid=27458)[0m `Trainer.fit` stopped: `max_steps=500` reached.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores



[36m(_train_tune pid=27458)[0m Epoch 499/-2 ━━━━━━━━━━━━━━━━━ 1/1 0:00:00 • 0:00:00 0.00it/s v_num: 0.000      
[36m(_train_tune pid=27458)[0m                                                               train_loss_step:  
[36m(_train_tune pid=27458)[0m                                                               2497.830          
[36m(_train_tune pid=27458)[0m                                                               train_loss_epoch: 
[36m(_train_tune pid=27458)[0m                                                               2497.830          
[36m(_train_tune pid=27458)[0m                                                               valid_loss:       
[36m(_train_tune pid=27458)[0m                                                               9802.795          


Output()

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_steps=1000` reached.


INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores


Output()

+--------------------------------------------------------------------+
| Configuration for experiment     _train_tune_2025-12-04_16-40-46   |
+--------------------------------------------------------------------+
| Search algorithm                 BasicVariantGenerator             |
| Scheduler                        FIFOScheduler                     |
| Number of trials                 2                                 |
+--------------------------------------------------------------------+

View detailed results here: /root/ray_results/_train_tune_2025-12-04_16-40-46
To visualize your results with TensorBoard, run: `tensorboard --logdir /tmp/ray/session_2025-12-04_15-19-14_190539_731/artifacts/2025-12-04_16-40-46/_train_tune_2025-12-04_16-40-46/driver_artifacts`


[36m(_train_tune pid=28331)[0m /usr/local/lib/python3.12/dist-packages/ray/tune/integration/pytorch_lightning.py:198: `ray.tune.integration.pytorch_lightning.TuneReportCallback` is deprecated. Use `ray.tune.integration.pytorch_lightning.TuneReportCheckpointCallback` instead.
[36m(_train_tune pid=28331)[0m Seed set to 6
[36m(_train_tune pid=28331)[0m GPU available: False, used: False
[36m(_train_tune pid=28331)[0m TPU available: False, using: 0 TPU cores
[36m(_train_tune pid=28331)[0m 2025-12-04 16:41:07.850422: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
[36m(_train_tune pid=28331)[0m E0000 00:00:1764866467.919661   28442 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
[36m(_train_tune pid=28331)[0m E0000 00:00:1764866467.939537   28442 cuda_blas.cc

[36m(_train_tune pid=28331)[0m ┏━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┓
[36m(_train_tune pid=28331)[0m ┃   ┃ Name         ┃ Type          ┃ Params ┃ Mode  ┃ FLOPs ┃
[36m(_train_tune pid=28331)[0m ┡━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━┩
[36m(_train_tune pid=28331)[0m │ 0 │ loss         │ MAE           │      0 │ train │     0 │
[36m(_train_tune pid=28331)[0m │ 1 │ padder_train │ ConstantPad1d │      0 │ train │     0 │
[36m(_train_tune pid=28331)[0m │ 2 │ scaler       │ TemporalNorm  │      0 │ train │     0 │
[36m(_train_tune pid=28331)[0m │ 3 │ blocks       │ ModuleList    │  2.4 M │ train │     0 │
[36m(_train_tune pid=28331)[0m └───┴──────────────┴───────────────┴────────┴───────┴───────┘
[36m(_train_tune pid=28331)[0m Trainable params: 2.4 M                                                         
[36m(_train_tune pid=28331)[0m Non-trainable params: 0                                                         
[36m(_train

[36m(_train_tune pid=28331)[0m [2025-12-04 16:41:24,690 E 28331 28368] core_worker_process.cc:837: Failed to establish connection to the metrics exporter agent. Metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14


[36m(_train_tune pid=28331)[0m Epoch 1399/-2 ━━━━━━━━━━━━━━━━━ 1/1 0:00:00 • 0:00:00 0.00it/s v_num: 0.000     
[36m(_train_tune pid=28331)[0m                                                                train_loss_step: 
[36m(_train_tune pid=28331)[0m                                                                14999268352.000  
[36m(_train_tune pid=28331)[0m                                                                train_loss_epoch:
[36m(_train_tune pid=28331)[0m                                                                14999268352.000  
[36m(_train_tune pid=28331)[0m                                                                valid_loss:      
[36m(_train_tune pid=28331)[0m                                                                5894118400.000   


[36m(_train_tune pid=28331)[0m `Trainer.fit` stopped: `max_steps=1400.0` reached.
[36m(_train_tune pid=29029)[0m /usr/local/lib/python3.12/dist-packages/ray/tune/integration/pytorch_lightning.py:198: `ray.tune.integration.pytorch_lightning.TuneReportCallback` is deprecated. Use `ray.tune.integration.pytorch_lightning.TuneReportCheckpointCallback` instead.
[36m(_train_tune pid=29029)[0m Seed set to 15
[36m(_train_tune pid=29029)[0m GPU available: False, used: False
[36m(_train_tune pid=29029)[0m TPU available: False, using: 0 TPU cores
[36m(_train_tune pid=29029)[0m 2025-12-04 16:43:41.839218: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
[36m(_train_tune pid=29029)[0m E0000 00:00:1764866621.866948   29143 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registere

[36m(_train_tune pid=29029)[0m ┏━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┓
[36m(_train_tune pid=29029)[0m ┃   ┃ Name         ┃ Type          ┃ Params ┃ Mode  ┃ FLOPs ┃
[36m(_train_tune pid=29029)[0m ┡━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━┩
[36m(_train_tune pid=29029)[0m │ 0 │ loss         │ MAE           │      0 │ train │     0 │
[36m(_train_tune pid=29029)[0m │ 1 │ padder_train │ ConstantPad1d │      0 │ train │     0 │
[36m(_train_tune pid=29029)[0m │ 2 │ scaler       │ TemporalNorm  │      0 │ train │     0 │
[36m(_train_tune pid=29029)[0m │ 3 │ blocks       │ ModuleList    │  2.4 M │ train │     0 │
[36m(_train_tune pid=29029)[0m └───┴──────────────┴───────────────┴────────┴───────┴───────┘
[36m(_train_tune pid=29029)[0m Trainable params: 2.4 M                                                         
[36m(_train_tune pid=29029)[0m Non-trainable params: 0                                                         
[36m(_train

[36m(_train_tune pid=29029)[0m [2025-12-04 16:43:59,266 E 29029 29075] core_worker_process.cc:837: Failed to establish connection to the metrics exporter agent. Metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14
2025-12-04 16:46:27,223	INFO tune.py:1009 -- Wrote the latest version of all result files and experiment state to '/root/ray_results/_train_tune_2025-12-04_16-40-46' in 0.0083s.
INFO:lightning_fabric.utilities.seed:Seed set to 15
[36m(_train_tune pid=29029)[0m `Trainer.fit` stopped: `max_steps=500.0` reached.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores



[36m(_train_tune pid=29029)[0m Epoch 499/-2 ━━━━━━━━━━━━━━━━━ 1/1 0:00:00 • 0:00:00 0.00it/s v_num: 0.000      
[36m(_train_tune pid=29029)[0m                                                               train_loss_step:  
[36m(_train_tune pid=29029)[0m                                                               0.024             
[36m(_train_tune pid=29029)[0m                                                               train_loss_epoch: 
[36m(_train_tune pid=29029)[0m                                                               0.024 valid_loss: 
[36m(_train_tune pid=29029)[0m                                                               11275.909         


Output()

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_steps=500.0` reached.


INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores


Output()

Merging all model predictions...

SUCCESS! PREDICTIONS FROM ALL MODELS:
  unique_id         ds     cutoff          y      Naive  SeasonalNaive        AutoETS      AutoARIMA       LightGBM     AutoNBEATS      AutoNHITS
0     10_72 2012-06-15 2012-06-08  105499.39  125434.23      127450.66  125453.077037  135381.194651  145607.035537  144165.359375  128642.929688
1     10_72 2012-06-22 2012-06-08  107949.41  125434.23      117948.54  114657.507531  117357.420039  141443.112113  113317.765625  111508.296875
2     10_72 2012-06-29 2012-06-08   96579.10  125434.23      114398.47  116075.626160  109978.895935  148534.664866  102078.171875  103085.296875
3     10_72 2012-07-06 2012-06-08  100464.25  125434.23      108519.93  109074.835739  102380.377658  149283.028227  116065.226562  113007.320312
4     10_72 2012-07-13 2012-07-06   92923.05  100464.25      115004.83  110645.290441  107966.684059  105233.901058  105550.414062  102123.453125


In [27]:
# ==========================================
# 5. Generate Testing Outputs (Final Corrected Version)
# ==========================================
import pandas as pd
from statsforecast import StatsForecast
from statsforecast.models import Naive, SeasonalNaive, AutoETS, AutoARIMA
from mlforecast import MLForecast
from mlforecast.target_transforms import Differences
from lightgbm import LGBMRegressor
from neuralforecast import NeuralForecast
from neuralforecast.auto import AutoNBEATS, AutoNHITS
from neuralforecast.losses.pytorch import MAE  # Import MAE for stability

def generate_future_forecasts(df_train, df_test):
    print("\n--- Generating Future Forecasts (Testing Outputs) ---")

    # 1. Configuration
    FREQ = 'W-FRI'
    # Calculate horizon based on the test set
    HORIZON = df_test.groupby('unique_id')['ds'].count().max()
    print(f"Forecasting horizon: {HORIZON} weeks")

    # 2. StatsForecast
    print("1. Generating StatsForecast predictions...")
    sf = StatsForecast(
        models=[
            Naive(),
            SeasonalNaive(season_length=52),
            AutoETS(season_length=52),
            AutoARIMA(season_length=52)
        ],
        freq=FREQ,
        n_jobs=-1
    )
    sf_fut = sf.forecast(df=df_train[['unique_id', 'ds', 'y']], h=HORIZON)
    sf_fut = sf_fut.reset_index()

    # 3. MLForecast (LightGBM)
    print("2. Generating MLForecast (LightGBM) predictions...")
    lgbm = LGBMRegressor(verbosity=-1, random_state=42)

    mlf = MLForecast(
        models=[lgbm],
        freq=FREQ,
        lags=[1, 4, 52],
        target_transforms=[Differences([1])],
        date_features=['month', 'week'],
        num_threads=4
    )

    # Define columns
    ml_train_cols = ['unique_id', 'ds', 'y', 'Type', 'Size', 'Temperature',
                     'Fuel_Price', 'CPI', 'Unemployment', 'IsHoliday',
                     'MarkDown1', 'MarkDown2', 'MarkDown3', 'MarkDown4', 'MarkDown5']

    # Fit (Defining Type and Size as static)
    mlf.fit(df=df_train[ml_train_cols], static_features=['Type', 'Size'])

    # The model already knows 'Type' and 'Size' from training.
    cols_to_drop = ['y', 'Weekly_Sales', 'Type', 'Size']
    X_df_future = df_test.drop(columns=cols_to_drop, errors='ignore')

    ml_fut = mlf.predict(h=HORIZON, X_df=X_df_future)
    ml_fut = ml_fut.rename(columns={'LGBMRegressor': 'LightGBM'})

    # 4. NeuralForecast (Auto Models Stabilized)
    print("3. Generating NeuralForecast predictions...")

    # STABILITY SETTINGS:
    nf = NeuralForecast(
        models=[
            AutoNBEATS(
                h=HORIZON,
                loss=MAE(),
                num_samples=5
            ),
            AutoNHITS(
                h=HORIZON,
                loss=MAE(),
                num_samples=5
            )
        ],
        freq=FREQ
    )

    nf.fit(df=df_train[['unique_id', 'ds', 'y']])
    nf_fut = nf.predict(futr_df=df_test[['unique_id', 'ds']])

    # 5. Merge All Results

    print("Merging future forecasts...")
    final_fut = sf_fut.copy()
    final_fut = final_fut.merge(ml_fut, on=['unique_id', 'ds'], how='left')
    final_fut = final_fut.merge(nf_fut, on=['unique_id', 'ds'], how='left')

    return final_fut

# --- EXECUTE ---
if 'train_subset' in globals() and 'test_subset' in globals():

    # Filter for active data to ensure stability
    active_ids = train_subset.groupby('unique_id')['y'].sum()
    active_ids = active_ids[active_ids > 0].index.tolist()
    train_clean = train_subset[train_subset['unique_id'].isin(active_ids)].copy()
    test_clean = test_subset[test_subset['unique_id'].isin(active_ids)].copy()

    # RUN FUNCTION
    future_forecasts = generate_future_forecasts(train_clean, test_clean)

    # MERGE TIMEGPT IF AVAILABLE
    # Checks if TimeGPT was run in Step 3 and merges it if it exists
    if 'tgpt_fcst' in globals() and not tgpt_fcst.empty:
        print("Merging TimeGPT results...")
        cols_to_merge = ['unique_id', 'ds', 'TimeGPT']
        if all(col in tgpt_fcst.columns for col in cols_to_merge):
             future_forecasts = future_forecasts.merge(
                 tgpt_fcst[cols_to_merge],
                 on=['unique_id', 'ds'],
                 how='left'
             )

    print("\nSuccess! Future forecasts generated.")
    print(future_forecasts.head())

else:
    print("Error: 'train_subset' or 'test_subset' not found. Please run Step 2 (Data Loading) first.")


--- Generating Future Forecasts (Testing Outputs) ---
Forecasting horizon: 39 weeks
1. Generating StatsForecast predictions...
2. Generating MLForecast (LightGBM) predictions...
3. Generating NeuralForecast predictions...
+--------------------------------------------------------------------+
| Configuration for experiment     _train_tune_2025-12-04_16-50-15   |
+--------------------------------------------------------------------+
| Search algorithm                 BasicVariantGenerator             |
| Scheduler                        FIFOScheduler                     |
| Number of trials                 5                                 |
+--------------------------------------------------------------------+

View detailed results here: /root/ray_results/_train_tune_2025-12-04_16-50-15
To visualize your results with TensorBoard, run: `tensorboard --logdir /tmp/ray/session_2025-12-04_15-19-14_190539_731/artifacts/2025-12-04_16-50-15/_train_tune_2025-12-04_16-50-15/driver_artifacts`


[36m(_train_tune pid=31273)[0m /usr/local/lib/python3.12/dist-packages/ray/tune/integration/pytorch_lightning.py:198: `ray.tune.integration.pytorch_lightning.TuneReportCallback` is deprecated. Use `ray.tune.integration.pytorch_lightning.TuneReportCheckpointCallback` instead.
[36m(_train_tune pid=31273)[0m Seed set to 16
[36m(_train_tune pid=31273)[0m GPU available: False, used: False
[36m(_train_tune pid=31273)[0m TPU available: False, using: 0 TPU cores
[36m(_train_tune pid=31273)[0m 2025-12-04 16:50:38.901610: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
[36m(_train_tune pid=31273)[0m E0000 00:00:1764867038.967443   31393 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
[36m(_train_tune pid=31273)[0m E0000 00:00:1764867038.989036   31393 cuda_blas.c

[36m(_train_tune pid=31273)[0m ┏━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┓
[36m(_train_tune pid=31273)[0m ┃   ┃ Name         ┃ Type          ┃ Params ┃ Mode  ┃ FLOPs ┃
[36m(_train_tune pid=31273)[0m ┡━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━┩
[36m(_train_tune pid=31273)[0m │ 0 │ loss         │ MAE           │      0 │ train │     0 │
[36m(_train_tune pid=31273)[0m │ 1 │ padder_train │ ConstantPad1d │      0 │ train │     0 │
[36m(_train_tune pid=31273)[0m │ 2 │ scaler       │ TemporalNorm  │      0 │ train │     0 │
[36m(_train_tune pid=31273)[0m │ 3 │ blocks       │ ModuleList    │  2.9 M │ train │     0 │
[36m(_train_tune pid=31273)[0m └───┴──────────────┴───────────────┴────────┴───────┴───────┘
[36m(_train_tune pid=31273)[0m Trainable params: 2.9 M                                                         
[36m(_train_tune pid=31273)[0m Non-trainable params: 18.5 K                                                    
[36m(_train

2025-12-04 16:50:47,028	ERROR tune_controller.py:1331 -- Trial task failed for trial _train_tune_05703_00000
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/ray/air/execution/_internal/event_manager.py", line 110, in resolve_future
    result = ray.get(future)
             ^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/ray/_private/auto_init_hook.py", line 22, in auto_init_wrapper
    return fn(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/ray/_private/client_mode_hook.py", line 104, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/ray/_private/worker.py", line 2972, in get
    values, debugger_breakpoint = worker.get_objects(
                                  ^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/ray/_private/worker.py", line 1031, in get_objects
    raise value.as_instanceof_caus

[36m(_train_tune pid=31273)[0m Epoch 0/-2                    0/1 0:00:00 • -:--:-- 0.00it/s v_num: 0.000       
[36m(_train_tune pid=31273)[0m                                                              valid_loss:        
[36m(_train_tune pid=31273)[0m                                                              16859.066          

Trial _train_tune_05703_00000 errored after 0 iterations at 2025-12-04 16:50:47. Total running time: 31s
Error file: /tmp/ray/session_2025-12-04_15-19-14_190539_731/artifacts/2025-12-04_16-50-15/_train_tune_2025-12-04_16-50-15/driver_artifacts/_train_tune_05703_00000_0_batch_size=32,input_size=195,learning_rate=0.0034,max_steps=1000,random_seed=16,scaler_type=standard,ste_2025-12-04_16-50-15/error.txt


[36m(_train_tune pid=31483)[0m /usr/local/lib/python3.12/dist-packages/ray/tune/integration/pytorch_lightning.py:198: `ray.tune.integration.pytorch_lightning.TuneReportCallback` is deprecated. Use `ray.tune.integration.pytorch_lightning.TuneReportCheckpointCallback` instead.
[36m(_train_tune pid=31483)[0m Seed set to 8
[36m(_train_tune pid=31483)[0m GPU available: False, used: False
[36m(_train_tune pid=31483)[0m TPU available: False, using: 0 TPU cores
[36m(_train_tune pid=31483)[0m 2025-12-04 16:51:09.562055: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
[36m(_train_tune pid=31483)[0m E0000 00:00:1764867069.590458   31594 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
[36m(_train_tune pid=31483)[0m E0000 00:00:1764867069.598758   31594 cuda_blas.cc

[36m(_train_tune pid=31483)[0m ┏━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┓
[36m(_train_tune pid=31483)[0m ┃   ┃ Name         ┃ Type          ┃ Params ┃ Mode  ┃ FLOPs ┃
[36m(_train_tune pid=31483)[0m ┡━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━┩
[36m(_train_tune pid=31483)[0m │ 0 │ loss         │ MAE           │      0 │ train │     0 │
[36m(_train_tune pid=31483)[0m │ 1 │ padder_train │ ConstantPad1d │      0 │ train │     0 │
[36m(_train_tune pid=31483)[0m │ 2 │ scaler       │ TemporalNorm  │      0 │ train │     0 │
[36m(_train_tune pid=31483)[0m │ 3 │ blocks       │ ModuleList    │  2.8 M │ train │     0 │
[36m(_train_tune pid=31483)[0m └───┴──────────────┴───────────────┴────────┴───────┴───────┘
[36m(_train_tune pid=31483)[0m Trainable params: 2.8 M                                                         
[36m(_train_tune pid=31483)[0m Non-trainable params: 15.4 K                                                    
[36m(_train

2025-12-04 16:51:17,086	ERROR tune_controller.py:1331 -- Trial task failed for trial _train_tune_05703_00001
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/ray/air/execution/_internal/event_manager.py", line 110, in resolve_future
    result = ray.get(future)
             ^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/ray/_private/auto_init_hook.py", line 22, in auto_init_wrapper
    return fn(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/ray/_private/client_mode_hook.py", line 104, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/ray/_private/worker.py", line 2972, in get
    values, debugger_breakpoint = worker.get_objects(
                                  ^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/ray/_private/worker.py", line 1031, in get_objects
    raise value.as_instanceof_caus

[36m(_train_tune pid=31483)[0m Epoch 0/-2                    0/1 0:00:00 • -:--:-- 0.00it/s v_num: 0.000       
[36m(_train_tune pid=31483)[0m                                                              valid_loss:        
[36m(_train_tune pid=31483)[0m                                                              23484.732          

Trial _train_tune_05703_00001 errored after 0 iterations at 2025-12-04 16:51:17. Total running time: 1min 1s
Error file: /tmp/ray/session_2025-12-04_15-19-14_190539_731/artifacts/2025-12-04_16-50-15/_train_tune_2025-12-04_16-50-15/driver_artifacts/_train_tune_05703_00001_1_batch_size=256,input_size=156,learning_rate=0.0019,max_steps=500,random_seed=8,scaler_type=None,step_siz_2025-12-04_16-50-15/error.txt


[36m(_train_tune pid=31677)[0m /usr/local/lib/python3.12/dist-packages/ray/tune/integration/pytorch_lightning.py:198: `ray.tune.integration.pytorch_lightning.TuneReportCallback` is deprecated. Use `ray.tune.integration.pytorch_lightning.TuneReportCheckpointCallback` instead.
[36m(_train_tune pid=31677)[0m Seed set to 16
[36m(_train_tune pid=31677)[0m GPU available: False, used: False
[36m(_train_tune pid=31677)[0m TPU available: False, using: 0 TPU cores
[36m(_train_tune pid=31677)[0m 2025-12-04 16:51:40.076641: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
[36m(_train_tune pid=31677)[0m E0000 00:00:1764867100.120026   31791 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
[36m(_train_tune pid=31677)[0m E0000 00:00:1764867100.133777   31791 cuda_blas.c

[36m(_train_tune pid=31677)[0m ┏━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┓
[36m(_train_tune pid=31677)[0m ┃   ┃ Name         ┃ Type          ┃ Params ┃ Mode  ┃ FLOPs ┃
[36m(_train_tune pid=31677)[0m ┡━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━┩
[36m(_train_tune pid=31677)[0m │ 0 │ loss         │ MAE           │      0 │ train │     0 │
[36m(_train_tune pid=31677)[0m │ 1 │ padder_train │ ConstantPad1d │      0 │ train │     0 │
[36m(_train_tune pid=31677)[0m │ 2 │ scaler       │ TemporalNorm  │      0 │ train │     0 │
[36m(_train_tune pid=31677)[0m │ 3 │ blocks       │ ModuleList    │  2.6 M │ train │     0 │
[36m(_train_tune pid=31677)[0m └───┴──────────────┴───────────────┴────────┴───────┴───────┘
[36m(_train_tune pid=31677)[0m Trainable params: 2.6 M                                                         
[36m(_train_tune pid=31677)[0m Non-trainable params: 9.2 K                                                     
[36m(_train

[36m(_train_tune pid=31677)[0m [2025-12-04 16:51:57,771 E 31677 31723] core_worker_process.cc:837: Failed to establish connection to the metrics exporter agent. Metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14


[36m(_train_tune pid=31677)[0m Epoch 999/-2 ━━━━━━━━━━━━━━━━━ 1/1 0:00:00 • 0:00:00 0.00it/s v_num: 0.000      
[36m(_train_tune pid=31677)[0m                                                               train_loss_step:  
[36m(_train_tune pid=31677)[0m                                                               0.014             
[36m(_train_tune pid=31677)[0m                                                               train_loss_epoch: 
[36m(_train_tune pid=31677)[0m                                                               0.014 valid_loss: 
[36m(_train_tune pid=31677)[0m                                                               16254.303         


[36m(_train_tune pid=31677)[0m `Trainer.fit` stopped: `max_steps=1000` reached.
[36m(_train_tune pid=32637)[0m /usr/local/lib/python3.12/dist-packages/ray/tune/integration/pytorch_lightning.py:198: `ray.tune.integration.pytorch_lightning.TuneReportCallback` is deprecated. Use `ray.tune.integration.pytorch_lightning.TuneReportCheckpointCallback` instead.
[36m(_train_tune pid=32637)[0m Seed set to 3
[36m(_train_tune pid=32637)[0m GPU available: False, used: False
[36m(_train_tune pid=32637)[0m TPU available: False, using: 0 TPU cores
[36m(_train_tune pid=32637)[0m 2025-12-04 16:55:17.597228: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
[36m(_train_tune pid=32637)[0m E0000 00:00:1764867317.641974   32751 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered


[36m(_train_tune pid=32637)[0m ┏━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┓
[36m(_train_tune pid=32637)[0m ┃   ┃ Name         ┃ Type          ┃ Params ┃ Mode  ┃ FLOPs ┃
[36m(_train_tune pid=32637)[0m ┡━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━┩
[36m(_train_tune pid=32637)[0m │ 0 │ loss         │ MAE           │      0 │ train │     0 │
[36m(_train_tune pid=32637)[0m │ 1 │ padder_train │ ConstantPad1d │      0 │ train │     0 │
[36m(_train_tune pid=32637)[0m │ 2 │ scaler       │ TemporalNorm  │      0 │ train │     0 │
[36m(_train_tune pid=32637)[0m │ 3 │ blocks       │ ModuleList    │  2.6 M │ train │     0 │
[36m(_train_tune pid=32637)[0m └───┴──────────────┴───────────────┴────────┴───────┴───────┘
[36m(_train_tune pid=32637)[0m Trainable params: 2.6 M                                                         
[36m(_train_tune pid=32637)[0m Non-trainable params: 9.2 K                                                     
[36m(_train

[36m(_train_tune pid=32637)[0m [2025-12-04 16:55:35,216 E 32637 32683] core_worker_process.cc:837: Failed to establish connection to the metrics exporter agent. Metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14


[36m(_train_tune pid=32637)[0m Epoch 499/-2 ━━━━━━━━━━━━━━━━━ 1/1 0:00:00 • 0:00:00 0.00it/s v_num: 0.000      
[36m(_train_tune pid=32637)[0m                                                               train_loss_step:  
[36m(_train_tune pid=32637)[0m                                                               0.011             
[36m(_train_tune pid=32637)[0m                                                               train_loss_epoch: 
[36m(_train_tune pid=32637)[0m                                                               0.011 valid_loss: 
[36m(_train_tune pid=32637)[0m                                                               16326.287         


[36m(_train_tune pid=32637)[0m `Trainer.fit` stopped: `max_steps=500` reached.
[36m(_train_tune pid=33548)[0m /usr/local/lib/python3.12/dist-packages/ray/tune/integration/pytorch_lightning.py:198: `ray.tune.integration.pytorch_lightning.TuneReportCallback` is deprecated. Use `ray.tune.integration.pytorch_lightning.TuneReportCheckpointCallback` instead.
[36m(_train_tune pid=33548)[0m Seed set to 2
[36m(_train_tune pid=33548)[0m GPU available: False, used: False
[36m(_train_tune pid=33548)[0m TPU available: False, using: 0 TPU cores
[36m(_train_tune pid=33548)[0m 2025-12-04 16:58:45.281448: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
[36m(_train_tune pid=33548)[0m E0000 00:00:1764867525.327893   33667 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
[

[36m(_train_tune pid=33548)[0m ┏━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┓
[36m(_train_tune pid=33548)[0m ┃   ┃ Name         ┃ Type          ┃ Params ┃ Mode  ┃ FLOPs ┃
[36m(_train_tune pid=33548)[0m ┡━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━┩
[36m(_train_tune pid=33548)[0m │ 0 │ loss         │ MAE           │      0 │ train │     0 │
[36m(_train_tune pid=33548)[0m │ 1 │ padder_train │ ConstantPad1d │      0 │ train │     0 │
[36m(_train_tune pid=33548)[0m │ 2 │ scaler       │ TemporalNorm  │      0 │ train │     0 │
[36m(_train_tune pid=33548)[0m │ 3 │ blocks       │ ModuleList    │  2.6 M │ train │     0 │
[36m(_train_tune pid=33548)[0m └───┴──────────────┴───────────────┴────────┴───────┴───────┘
[36m(_train_tune pid=33548)[0m Trainable params: 2.6 M                                                         
[36m(_train_tune pid=33548)[0m Non-trainable params: 9.2 K                                                     
[36m(_train

[36m(_train_tune pid=33548)[0m [2025-12-04 16:58:59,516 E 33548 33590] core_worker_process.cc:837: Failed to establish connection to the metrics exporter agent. Metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14
[36m(_train_tune pid=33548)[0m `Trainer.fit` stopped: `max_steps=500` reached.
2025-12-04 17:01:48,986	INFO tune.py:1009 -- Wrote the latest version of all result files and experiment state to '/root/ray_results/_train_tune_2025-12-04_16-50-15' in 0.0157s.
2025-12-04 17:01:48,994	ERROR tune.py:1037 -- Trials did not complete: [_train_tune_05703_00000, _train_tune_05703_00001]
INFO:lightning_fabric.utilities.seed:Seed set to 16
INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores


[36m(_train_tune pid=33548)[0m Epoch 499/-2 ━━━━━━━━━━━━━━━━━ 1/1 0:00:00 • 0:00:00 0.00it/s v_num: 0.000      
[36m(_train_tune pid=33548)[0m                                                               train_loss_step:  
[36m(_train_tune pid=33548)[0m                                                               0.025             
[36m(_train_tune pid=33548)[0m                                                               train_loss_epoch: 
[36m(_train_tune pid=33548)[0m                                                               0.025 valid_loss: 
[36m(_train_tune pid=33548)[0m                                                               18515.104         



Output()

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_steps=1000` reached.


+--------------------------------------------------------------------+
| Configuration for experiment     _train_tune_2025-12-04_17-05-25   |
+--------------------------------------------------------------------+
| Search algorithm                 BasicVariantGenerator             |
| Scheduler                        FIFOScheduler                     |
| Number of trials                 5                                 |
+--------------------------------------------------------------------+

View detailed results here: /root/ray_results/_train_tune_2025-12-04_17-05-25
To visualize your results with TensorBoard, run: `tensorboard --logdir /tmp/ray/session_2025-12-04_15-19-14_190539_731/artifacts/2025-12-04_17-05-25/_train_tune_2025-12-04_17-05-25/driver_artifacts`


[36m(_train_tune pid=35329)[0m /usr/local/lib/python3.12/dist-packages/ray/tune/integration/pytorch_lightning.py:198: `ray.tune.integration.pytorch_lightning.TuneReportCallback` is deprecated. Use `ray.tune.integration.pytorch_lightning.TuneReportCheckpointCallback` instead.
[36m(_train_tune pid=35329)[0m Seed set to 6
[36m(_train_tune pid=35329)[0m GPU available: False, used: False
[36m(_train_tune pid=35329)[0m TPU available: False, using: 0 TPU cores
[36m(_train_tune pid=35329)[0m 2025-12-04 17:05:46.973250: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
[36m(_train_tune pid=35329)[0m E0000 00:00:1764867947.001811   35440 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
[36m(_train_tune pid=35329)[0m E0000 00:00:1764867947.012480   35440 cuda_blas.cc

[36m(_train_tune pid=35329)[0m ┏━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┓
[36m(_train_tune pid=35329)[0m ┃   ┃ Name         ┃ Type          ┃ Params ┃ Mode  ┃ FLOPs ┃
[36m(_train_tune pid=35329)[0m ┡━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━┩
[36m(_train_tune pid=35329)[0m │ 0 │ loss         │ MAE           │      0 │ train │     0 │
[36m(_train_tune pid=35329)[0m │ 1 │ padder_train │ ConstantPad1d │      0 │ train │     0 │
[36m(_train_tune pid=35329)[0m │ 2 │ scaler       │ TemporalNorm  │      0 │ train │     0 │
[36m(_train_tune pid=35329)[0m │ 3 │ blocks       │ ModuleList    │  2.8 M │ train │     0 │
[36m(_train_tune pid=35329)[0m └───┴──────────────┴───────────────┴────────┴───────┴───────┘
[36m(_train_tune pid=35329)[0m Trainable params: 2.8 M                                                         
[36m(_train_tune pid=35329)[0m Non-trainable params: 0                                                         
[36m(_train

2025-12-04 17:05:52,894	ERROR tune_controller.py:1331 -- Trial task failed for trial _train_tune_05706_00000
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/ray/air/execution/_internal/event_manager.py", line 110, in resolve_future
    result = ray.get(future)
             ^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/ray/_private/auto_init_hook.py", line 22, in auto_init_wrapper
    return fn(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/ray/_private/client_mode_hook.py", line 104, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/ray/_private/worker.py", line 2972, in get
    values, debugger_breakpoint = worker.get_objects(
                                  ^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/ray/_private/worker.py", line 1031, in get_objects
    raise value.as_instanceof_caus

[36m(_train_tune pid=35329)[0m Epoch 0/-2                    0/1 0:00:00 • -:--:-- 0.00it/s v_num: 0.000       
[36m(_train_tune pid=35329)[0m                                                              valid_loss:        
[36m(_train_tune pid=35329)[0m                                                              17604.809          

Trial _train_tune_05706_00000 errored after 0 iterations at 2025-12-04 17:05:52. Total running time: 27s
Error file: /tmp/ray/session_2025-12-04_15-19-14_190539_731/artifacts/2025-12-04_17-05-25/_train_tune_2025-12-04_17-05-25/driver_artifacts/_train_tune_05706_00000_0_batch_size=32,input_size=195,learning_rate=0.0710,max_steps=1400.0000,n_freq_downsample=60_8_1,n_pool_ke_2025-12-04_17-05-25/error.txt


[36m(_train_tune pid=35523)[0m /usr/local/lib/python3.12/dist-packages/ray/tune/integration/pytorch_lightning.py:198: `ray.tune.integration.pytorch_lightning.TuneReportCallback` is deprecated. Use `ray.tune.integration.pytorch_lightning.TuneReportCheckpointCallback` instead.
[36m(_train_tune pid=35523)[0m Seed set to 15
[36m(_train_tune pid=35523)[0m GPU available: False, used: False
[36m(_train_tune pid=35523)[0m TPU available: False, using: 0 TPU cores
[36m(_train_tune pid=35523)[0m 2025-12-04 17:06:15.912533: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
[36m(_train_tune pid=35523)[0m E0000 00:00:1764867975.943548   35633 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
[36m(_train_tune pid=35523)[0m E0000 00:00:1764867975.955924   35633 cuda_blas.c

[36m(_train_tune pid=35523)[0m ┏━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┓
[36m(_train_tune pid=35523)[0m ┃   ┃ Name         ┃ Type          ┃ Params ┃ Mode  ┃ FLOPs ┃
[36m(_train_tune pid=35523)[0m ┡━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━┩
[36m(_train_tune pid=35523)[0m │ 0 │ loss         │ MAE           │      0 │ train │     0 │
[36m(_train_tune pid=35523)[0m │ 1 │ padder_train │ ConstantPad1d │      0 │ train │     0 │
[36m(_train_tune pid=35523)[0m │ 2 │ scaler       │ TemporalNorm  │      0 │ train │     0 │
[36m(_train_tune pid=35523)[0m │ 3 │ blocks       │ ModuleList    │  3.0 M │ train │     0 │
[36m(_train_tune pid=35523)[0m └───┴──────────────┴───────────────┴────────┴───────┴───────┘
[36m(_train_tune pid=35523)[0m Trainable params: 3.0 M                                                         
[36m(_train_tune pid=35523)[0m Non-trainable params: 0                                                         
[36m(_train

2025-12-04 17:06:21,846	ERROR tune_controller.py:1331 -- Trial task failed for trial _train_tune_05706_00001
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/ray/air/execution/_internal/event_manager.py", line 110, in resolve_future
    result = ray.get(future)
             ^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/ray/_private/auto_init_hook.py", line 22, in auto_init_wrapper
    return fn(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/ray/_private/client_mode_hook.py", line 104, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/ray/_private/worker.py", line 2972, in get
    values, debugger_breakpoint = worker.get_objects(
                                  ^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/ray/_private/worker.py", line 1031, in get_objects
    raise value.as_instanceof_caus

[36m(_train_tune pid=35523)[0m Epoch 0/-2                    0/1 0:00:00 • -:--:-- 0.00it/s v_num: 0.000       
[36m(_train_tune pid=35523)[0m                                                              valid_loss:        
[36m(_train_tune pid=35523)[0m                                                              16888.574          

Trial _train_tune_05706_00001 errored after 0 iterations at 2025-12-04 17:06:21. Total running time: 56s
Error file: /tmp/ray/session_2025-12-04_15-19-14_190539_731/artifacts/2025-12-04_17-05-25/_train_tune_2025-12-04_17-05-25/driver_artifacts/_train_tune_05706_00001_1_batch_size=128,input_size=195,learning_rate=0.0017,max_steps=500.0000,n_freq_downsample=40_20_1,n_pool_k_2025-12-04_17-05-25/error.txt


[36m(_train_tune pid=35712)[0m /usr/local/lib/python3.12/dist-packages/ray/tune/integration/pytorch_lightning.py:198: `ray.tune.integration.pytorch_lightning.TuneReportCallback` is deprecated. Use `ray.tune.integration.pytorch_lightning.TuneReportCheckpointCallback` instead.
[36m(_train_tune pid=35712)[0m Seed set to 3
[36m(_train_tune pid=35712)[0m GPU available: False, used: False
[36m(_train_tune pid=35712)[0m TPU available: False, using: 0 TPU cores
[36m(_train_tune pid=35712)[0m 2025-12-04 17:06:44.598457: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
[36m(_train_tune pid=35712)[0m E0000 00:00:1764868004.625478   35821 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
[36m(_train_tune pid=35712)[0m E0000 00:00:1764868004.633492   35821 cuda_blas.cc

[36m(_train_tune pid=35712)[0m ┏━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┓
[36m(_train_tune pid=35712)[0m ┃   ┃ Name         ┃ Type          ┃ Params ┃ Mode  ┃ FLOPs ┃
[36m(_train_tune pid=35712)[0m ┡━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━┩
[36m(_train_tune pid=35712)[0m │ 0 │ loss         │ MAE           │      0 │ train │     0 │
[36m(_train_tune pid=35712)[0m │ 1 │ padder_train │ ConstantPad1d │      0 │ train │     0 │
[36m(_train_tune pid=35712)[0m │ 2 │ scaler       │ TemporalNorm  │      0 │ train │     0 │
[36m(_train_tune pid=35712)[0m │ 3 │ blocks       │ ModuleList    │  2.6 M │ train │     0 │
[36m(_train_tune pid=35712)[0m └───┴──────────────┴───────────────┴────────┴───────┴───────┘
[36m(_train_tune pid=35712)[0m Trainable params: 2.6 M                                                         
[36m(_train_tune pid=35712)[0m Non-trainable params: 0                                                         
[36m(_train

[36m(_train_tune pid=35712)[0m [2025-12-04 17:07:01,871 E 35712 35753] core_worker_process.cc:837: Failed to establish connection to the metrics exporter agent. Metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14


[36m(_train_tune pid=35712)[0m Epoch 999/-2 ━━━━━━━━━━━━━━━━━ 1/1 0:00:00 • 0:00:00 0.00it/s v_num: 0.000      
[36m(_train_tune pid=35712)[0m                                                               train_loss_step:  
[36m(_train_tune pid=35712)[0m                                                               51007.672         
[36m(_train_tune pid=35712)[0m                                                               train_loss_epoch: 
[36m(_train_tune pid=35712)[0m                                                               51007.672         
[36m(_train_tune pid=35712)[0m                                                               valid_loss:       
[36m(_train_tune pid=35712)[0m                                                               116955.898        


[36m(_train_tune pid=35712)[0m `Trainer.fit` stopped: `max_steps=1000.0` reached.
[36m(_train_tune pid=37331)[0m /usr/local/lib/python3.12/dist-packages/ray/tune/integration/pytorch_lightning.py:198: `ray.tune.integration.pytorch_lightning.TuneReportCallback` is deprecated. Use `ray.tune.integration.pytorch_lightning.TuneReportCheckpointCallback` instead.
[36m(_train_tune pid=37331)[0m Seed set to 10
[36m(_train_tune pid=37331)[0m GPU available: False, used: False
[36m(_train_tune pid=37331)[0m TPU available: False, using: 0 TPU cores
[36m(_train_tune pid=37331)[0m 2025-12-04 17:13:06.118442: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
[36m(_train_tune pid=37331)[0m E0000 00:00:1764868386.152277   37450 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registere

[36m(_train_tune pid=37331)[0m ┏━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┓
[36m(_train_tune pid=37331)[0m ┃   ┃ Name         ┃ Type          ┃ Params ┃ Mode  ┃ FLOPs ┃
[36m(_train_tune pid=37331)[0m ┡━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━┩
[36m(_train_tune pid=37331)[0m │ 0 │ loss         │ MAE           │      0 │ train │     0 │
[36m(_train_tune pid=37331)[0m │ 1 │ padder_train │ ConstantPad1d │      0 │ train │     0 │
[36m(_train_tune pid=37331)[0m │ 2 │ scaler       │ TemporalNorm  │      0 │ train │     0 │
[36m(_train_tune pid=37331)[0m │ 3 │ blocks       │ ModuleList    │  2.5 M │ train │     0 │
[36m(_train_tune pid=37331)[0m └───┴──────────────┴───────────────┴────────┴───────┴───────┘
[36m(_train_tune pid=37331)[0m Trainable params: 2.5 M                                                         
[36m(_train_tune pid=37331)[0m Non-trainable params: 0                                                         
[36m(_train

[36m(_train_tune pid=37331)[0m [2025-12-04 17:13:22,781 E 37331 37377] core_worker_process.cc:837: Failed to establish connection to the metrics exporter agent. Metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14
[36m(_train_tune pid=37331)[0m `Trainer.fit` stopped: `max_steps=800.0` reached.


[36m(_train_tune pid=37331)[0m Epoch 799/-2 ━━━━━━━━━━━━━━━━━ 1/1 0:00:00 • 0:00:00 0.00it/s v_num: 0.000      
[36m(_train_tune pid=37331)[0m                                                               train_loss_step:  
[36m(_train_tune pid=37331)[0m                                                               1019.198          
[36m(_train_tune pid=37331)[0m                                                               train_loss_epoch: 
[36m(_train_tune pid=37331)[0m                                                               1019.198          
[36m(_train_tune pid=37331)[0m                                                               valid_loss:       
[36m(_train_tune pid=37331)[0m                                                               18786.342         


[36m(_train_tune pid=37927)[0m /usr/local/lib/python3.12/dist-packages/ray/tune/integration/pytorch_lightning.py:198: `ray.tune.integration.pytorch_lightning.TuneReportCallback` is deprecated. Use `ray.tune.integration.pytorch_lightning.TuneReportCheckpointCallback` instead.
[36m(_train_tune pid=37927)[0m Seed set to 4
[36m(_train_tune pid=37927)[0m GPU available: False, used: False
[36m(_train_tune pid=37927)[0m TPU available: False, using: 0 TPU cores
[36m(_train_tune pid=37927)[0m 2025-12-04 17:15:12.409118: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
[36m(_train_tune pid=37927)[0m E0000 00:00:1764868512.438631   38038 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
[36m(_train_tune pid=37927)[0m E0000 00:00:1764868512.446849   38038 cuda_blas.cc

[36m(_train_tune pid=37927)[0m ┏━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┓
[36m(_train_tune pid=37927)[0m ┃   ┃ Name         ┃ Type          ┃ Params ┃ Mode  ┃ FLOPs ┃
[36m(_train_tune pid=37927)[0m ┡━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━┩
[36m(_train_tune pid=37927)[0m │ 0 │ loss         │ MAE           │      0 │ train │     0 │
[36m(_train_tune pid=37927)[0m │ 1 │ padder_train │ ConstantPad1d │      0 │ train │     0 │
[36m(_train_tune pid=37927)[0m │ 2 │ scaler       │ TemporalNorm  │      0 │ train │     0 │
[36m(_train_tune pid=37927)[0m │ 3 │ blocks       │ ModuleList    │  2.6 M │ train │     0 │
[36m(_train_tune pid=37927)[0m └───┴──────────────┴───────────────┴────────┴───────┴───────┘
[36m(_train_tune pid=37927)[0m Trainable params: 2.6 M                                                         
[36m(_train_tune pid=37927)[0m Non-trainable params: 0                                                         
[36m(_train

[36m(_train_tune pid=37927)[0m [2025-12-04 17:15:29,369 E 37927 37969] core_worker_process.cc:837: Failed to establish connection to the metrics exporter agent. Metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14
2025-12-04 17:17:56,387	INFO tune.py:1009 -- Wrote the latest version of all result files and experiment state to '/root/ray_results/_train_tune_2025-12-04_17-05-25' in 0.0116s.
2025-12-04 17:17:56,396	ERROR tune.py:1037 -- Trials did not complete: [_train_tune_05706_00000, _train_tune_05706_00001]
INFO:lightning_fabric.utilities.seed:Seed set to 10
[36m(_train_tune pid=37927)[0m `Trainer.fit` stopped: `max_steps=800.0` reached.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores



[36m(_train_tune pid=37927)[0m Epoch 799/-2 ━━━━━━━━━━━━━━━━━ 1/1 0:00:00 • 0:00:00 0.00it/s v_num: 0.000      
[36m(_train_tune pid=37927)[0m                                                               train_loss_step:  
[36m(_train_tune pid=37927)[0m                                                               16240.426         
[36m(_train_tune pid=37927)[0m                                                               train_loss_epoch: 
[36m(_train_tune pid=37927)[0m                                                               16240.426         
[36m(_train_tune pid=37927)[0m                                                               valid_loss:       
[36m(_train_tune pid=37927)[0m                                                               36866.355         


Output()

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_steps=800.0` reached.


INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores


Output()

INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores


Output()

Merging future forecasts...
Merging TimeGPT results...

Success! Future forecasts generated.
   index unique_id         ds      Naive  SeasonalNaive        AutoETS      AutoARIMA       LightGBM     AutoNBEATS      AutoNHITS     TimeGPT
0      0     10_72 2012-11-02  121126.83      164085.50  121731.372285  146823.630044  119293.290232  145975.578125  139059.859375  101126.766
1      1     10_72 2012-11-09  121126.83      165484.28  154154.779805  152522.285140  120565.613327  137477.546875  165292.718750  137822.900
2      2     10_72 2012-11-16  121126.83      142730.01  125783.764293  131582.211216  113009.388415   91980.015625  135245.031250  172166.800
3      3     10_72 2012-11-23  121126.83      630999.19  721370.335147  620616.833776  253823.819253  401640.437500  547326.375000  269151.940
4      4     10_72 2012-11-30  121126.83      156039.04  189763.215386  145979.638015  142908.368103  121521.179688  217144.625000  212760.480


In [29]:
# ==========================================
# 6. Evaluation Metrics
# ==========================================
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error

def calculate_metrics(cv_df):
    # Identify model columns (exclude ID/Date/Target columns)
    models = [c for c in cv_df.columns if c not in ['unique_id', 'ds', 'cutoff', 'y']]
    results = []

    for model in models:
        y_true = cv_df['y']
        y_pred = cv_df[model]

        # Drop NaNs for metric calculation (in case of alignment issues)
        valid_mask = ~np.isnan(y_pred)
        y_true_clean = y_true[valid_mask]
        y_pred_clean = y_pred[valid_mask]

        if len(y_true_clean) == 0:
            continue

        # Rubric Metrics
        me = np.mean(y_true_clean - y_pred_clean)
        mae = mean_absolute_error(y_true_clean, y_pred_clean)
        rmse = np.sqrt(mean_squared_error(y_true_clean, y_pred_clean))

        # MAPE (handling zeros)
        mask = y_true_clean != 0
        if mask.any():
            mape = np.mean(np.abs((y_true_clean[mask] - y_pred_clean[mask]) / y_true_clean[mask])) * 100
        else:
            mape = np.nan

        results.append({'Model': model, 'ME': me, 'MAE': mae, 'RMSE': rmse, 'MAPE': mape})

    return pd.DataFrame(results).sort_values(by='RMSE')

def count_winners(cv_df):
    models = [c for c in cv_df.columns if c not in ['unique_id', 'ds', 'cutoff', 'y']]
    errors = cv_df.copy()

    # Calculate absolute error for every row
    for m in models:
        errors[m] = (errors['y'] - errors[m]).abs()

    # Find model with lowest MAE per series
    series_mae = errors.groupby('unique_id')[models].mean()
    series_mae['Winner'] = series_mae.idxmin(axis=1)

    return series_mae['Winner'].value_counts().reset_index()

# EXECUTE THE FUNCTIONS
# We look for 'combined_results' which comes from Step 4 (The Cross-Validation Step)
if 'combined_results' in globals():
    print("\n--- Calculating Global Accuracy Metrics ---")
    metrics_df = calculate_metrics(combined_results)
    print(metrics_df)

    print("\n--- Model Leaderboard (Wins per Series) ---")
    winners_df = count_winners(combined_results)
    print(winners_df)


--- Calculating Global Accuracy Metrics ---
           Model           ME           MAE          RMSE      MAPE
5     AutoNBEATS  -850.619566   7107.947170   9782.697450  5.175090
2        AutoETS  -857.830317   7845.232619  10662.438821  5.672372
3      AutoARIMA -2206.756063   7418.543313  11411.103296  5.473035
6      AutoNHITS -1811.165523   9425.748159  12897.903357  6.778289
4       LightGBM -5034.945115  10399.368105  14645.473281  7.725298
1  SeasonalNaive  1012.752825  11298.365025  15861.602483  8.184313
0          Naive -2025.840950  12065.408550  15934.427679  8.689676

--- Model Leaderboard (Wins per Series) ---
          Winner  count
0     AutoNBEATS      9
1      AutoARIMA      4
2        AutoETS      3
3  SeasonalNaive      3
4      AutoNHITS      1


In [32]:
# ==========================================
# 7. Main Execution (Corrected for Unified Pipeline)
# ==========================================
import matplotlib.pyplot as plt
import pandas as pd

# Check if functions from previous cells are defined
required_funcs = ['load_and_prep_data', 'run_standard_pipeline', 'run_timegpt_pipeline',
                  'generate_future_forecasts', 'calculate_metrics', 'count_winners']

if not all(func in globals() for func in required_funcs):
    print("Error: Required functions are missing.")
    print("Please make sure you have run ALL previous cells (Steps 2-6) before running this one.")
else:
    print("Starting Main Execution Pipeline...\n")

    # 1. Load Data
    train_subset, test_subset = load_and_prep_data()

    if train_subset is not None:
        # 2. Run Standard Model
        final_eval = run_standard_pipeline(train_subset)

        # 3. Run TimeGPT (Remote)
        tgpt_cv, tgpt_fcst = run_timegpt_pipeline(train_subset, test_subset, horizon=4, n_windows=5)

        # 4. Merge TimeGPT into Evaluation Results
        print("Merging TimeGPT results...")
        merge_keys = ['unique_id', 'ds', 'cutoff']

        if not tgpt_cv.empty and 'TimeGPT' in tgpt_cv.columns:
            # Drop 'y' from TimeGPT CV to avoid duplicates if it exists
            tgpt_clean = tgpt_cv.drop(columns=['y'], errors='ignore')
            final_eval = final_eval.merge(tgpt_clean, on=merge_keys, how='left')

        # 5. Generate Future Forecasts
        future_forecasts = generate_future_forecasts(train_subset, test_subset)

        # Merge TimeGPT Future Forecasts if available
        if not tgpt_fcst.empty and 'TimeGPT' in tgpt_fcst.columns:
             future_forecasts = future_forecasts.merge(
                 tgpt_fcst[['unique_id', 'ds', 'TimeGPT']],
                 on=['unique_id', 'ds'],
                 how='left'
             )

        # 6. Metrics & Winners
        metrics_df = calculate_metrics(final_eval)
        winners_df = count_winners(final_eval)

        print("\n--- Final Metrics ---")
        print(metrics_df)
        print("\n--- Model Winners (Count by Series) ---")
        print(winners_df)

        # 7. Save CSVs
        final_eval.to_csv('final_evaluation_output.csv', index=False)
        metrics_df.to_csv('final_metrics_summary.csv', index=False)
        future_forecasts.to_csv('testing_outputs.csv', index=False)
        print("\n Files Saved: final_evaluation_output.csv, final_metrics_summary.csv, testing_outputs.csv")

        # 8. Plotting
        # Check if we have data to plot
        if not final_eval.empty:
            u_id = final_eval['unique_id'].unique()[0]
            subset = final_eval[final_eval['unique_id'] == u_id]

            plt.figure(figsize=(14, 6))

            # Plot Actuals
            if 'y' in subset.columns:
                plt.plot(subset['ds'], subset['y'], label='Actual', color='black', linewidth=2)

            # Plot models dynamically
            plot_models = [c for c in subset.columns if c not in ['unique_id', 'ds', 'cutoff', 'y']]
            for m in plot_models:
                # Plot only if column is numeric
                if pd.api.types.is_numeric_dtype(subset[m]):
                    plt.plot(subset['ds'], subset[m], label=m, alpha=0.7)

            plt.title(f"Forecast Models vs Actual: {u_id}")
            plt.legend()
            plt.show()

Starting Main Execution Pipeline...

Loading data...

--- DIAGNOSTIC CHECK ---
Total Sum of 'y' (Sales): 402,568,823.37
STATUS: Data verified. Sales column 'y' is populated.
------------------------

Data Prepared. Modeling 20 series.
[StatsForecast  ] Training: Naive, SeasonalNaive, AutoETS, AutoARIMA


KeyboardInterrupt: 