In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [38]:
# Install required packages
!pip install -q pytorch_lightning wandb torch torchvision pandas numpy matplotlib seaborn scikit-learn mlflow wand dagshub neuralforecast

# Set up Kaggle API
!pip install -q kaggle ray[tune]

In [3]:
# Upload your kaggle.json to Colab and run:
!mkdir -p ~/.kaggle
!cp /content/drive/MyDrive/ColabNotebooks/kaggle_API_credentials/kaggle.json ~/.kaggle/kaggle.json
! chmod 600 ~/.kaggle/kaggle.json

In [4]:
# Download the dataset
!kaggle competitions download -c walmart-recruiting-store-sales-forecasting
!unzip -q walmart-recruiting-store-sales-forecasting.zip

Downloading walmart-recruiting-store-sales-forecasting.zip to /content
  0% 0.00/2.70M [00:00<?, ?B/s]
100% 2.70M/2.70M [00:00<00:00, 926MB/s]


In [5]:
!unzip -q train.csv.zip
!unzip -q stores.csv.zip
!unzip -q test.csv.zip
!unzip -q features.csv.zip

unzip:  cannot find or open stores.csv.zip, stores.csv.zip.zip or stores.csv.zip.ZIP.


In [27]:
import mlflow
from dagshub import dagshub_logger
import os

# Set tracking URI manually
mlflow.set_tracking_uri("https://dagshub.com/ekvirika/WalmartRecruiting.mlflow")

# Use your DagsHub credentials
os.environ["MLFLOW_TRACKING_USERNAME"] = "ekvirika"
os.environ["MLFLOW_TRACKING_PASSWORD"] = "0adb1004ddd4221395353efea2d8ead625e26197"

# Optional: set registry if you're using model registry
mlflow.set_registry_uri("https://dagshub.com/ekvirika/WalmartRecruiting.mlflow")
mlflow.set_experiment("NBeats_Training")

# Detect GPU
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# W&B setup
wandb_project = 'WalmartRecruiting'
wandb_entity = None  # Replace with your W&B entity if using teams


In [21]:
!wandb login

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit: 
[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mellekvirikashvili[0m ([33mellekvirikashvili-free-university-of-tbilisi-[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


# Import Libraries

In [40]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.base import BaseEstimator, TransformerMixin
import torch
from neuralforecast import NeuralForecast
from neuralforecast.models import NBEATS
from neuralforecast.losses.pytorch import MSE


In [29]:
import os

BASE_DIR = os.getcwd()
DATA_DIR = os.path.join(BASE_DIR, '')

STORES_PATH = os.path.join(DATA_DIR, 'stores.csv')
FEATURES_PATH = os.path.join(DATA_DIR, 'features.csv')
TRAIN_PATH = os.path.join(DATA_DIR, 'train.csv')
TEST_PATH = os.path.join(DATA_DIR, 'test.csv')


import pandas as pd

def load_data():
    return {
        'stores': pd.read_csv(STORES_PATH),
        'features': pd.read_csv(FEATURES_PATH),
        'train': pd.read_csv(TRAIN_PATH),
        'test': pd.read_csv(TEST_PATH)
    }


In [36]:
def compute_wmae(y_true, y_pred, weights):
    return np.sum(np.abs(y_true - y_pred) * weights) / np.sum(weights)


In [37]:
class TweakedNBEATS(NBEATS):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.optimizer = torch.optim.AdamW(self.parameters(), lr=kwargs.get('learning_rate', 1e-3))
        self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=10, gamma=0.9)

    def configure_optimizers(self):
        return {
            'optimizer': self.optimizer,
            'lr_scheduler': {
                'scheduler': self.scheduler,
                'interval': 'epoch',
                'frequency': 1
            }
        }

In [31]:
import pandas as pd
from sklearn.preprocessing import StandardScaler

class RetailDataPreprocessor:
    def __init__(self, scale_features=True):
        self.scale_features = scale_features
        self.scaler = StandardScaler() if scale_features else None

    def load_and_merge(self, data_path):
        train = pd.read_csv(f"train.csv")
        features = pd.read_csv(f"features.csv")
        stores = pd.read_csv(f"stores.csv")

        data = train.merge(features, on=["Store", "Date", "IsHoliday"], how="left")
        data = data.merge(stores, on="Store", how="left")
        data["Date"] = pd.to_datetime(data["Date"])
        return data

    def preprocess(self, data):
        data = data.sort_values(["Store", "Dept", "Date"])
        data["Year"] = data["Date"].dt.year
        data["Month"] = data["Date"].dt.month
        data["Week"] = data["Date"].dt.isocalendar().week
        data["Day"] = data["Date"].dt.day

        features = ["Temperature", "Fuel_Price", "CPI", "Unemployment", "Year", "Month", "Week", "Day"]
        if self.scale_features:
            data[features] = self.scaler.fit_transform(data[features])

        return data


In [None]:
import pandas as pd
from neuralforecast import NeuralForecast
from neuralforecast.models import NBEATS
from neuralforecast.losses.pytorch import MSE
from ray import tune
from ray.tune.schedulers import ASHAScheduler

def prepare_neuralforecast_input(data):
    df = data[["Store", "Dept", "Date", "Weekly_Sales"]].copy()
    df["unique_id"] = df["Store"].astype(str) + "_" + df["Dept"].astype(str)
    df.rename(columns={"Date": "ds", "Weekly_Sales": "y"}, inplace=True)
    return df

def train_model(config, forecast_data, h):
    with mlflow.start_run():
        model = NeuralForecast(
            models=[
                NBEATS(
                    h=h,
                    input_size=config["input_size"],
                    max_steps=config["steps"],
                    learning_rate=config["lr"],
                    loss="MAE",
                    scaler_type="robust",
                    valid_loss="MAE",
                    early_stop_patience_steps=10,
                    random_seed=42,
                    device="cuda" if torch.cuda.is_available() else "cpu"
                )
            ],
            freq="D"
        )

        model.fit(forecast_data)
        forecast = model.predict().reset_index()
        y_true = forecast_data.df[forecast_data.df["ds"] > forecast_data.df["ds"].max() - pd.Timedelta(days=h)]["y"].values
        y_pred = forecast["NBEATS"].values

        mse = np.mean((y_true - y_pred) ** 2)

        # Logging to MLflow
        mlflow.log_params(config)
        mlflow.log_metric("mse", mse)

        # Logging to W&B
        wandb.log({"mse": mse, **config})

        tune.report(mse=mse)

def tune_hyperparameters(df, h):
    def trainable(config):
        model = NeuralForecast(
            models=[
                NBEATS(
                    h=h,
                    input_size=config["input_size"],
                    loss=MSE(),
                    learning_rate=config["lr"],
                    max_steps=config["steps"]
                )
            ],
            freq="W"
        )
        model.fit(df=df)
        forecast_df = model.predict()
        last = df.groupby("unique_id").tail(h)
        mse = ((forecast_df["NBEATS"] - last["y"].values) ** 2).mean()
        tune.report(mse=mse)

    analysis = tune.run(
        trainable,
        config={
            "input_size": tune.choice([2*h, 3*h, 4*h]),
            "lr": tune.loguniform(1e-4, 1e-2),
            "steps": tune.choice([200, 500, 1000])
        },
        num_samples=5,
        scheduler=ASHAScheduler(metric="mse", mode="min"),
        resources_per_trial={"cpu": 2},
        storage_path="/content/"
    )


    print("Best config: ", analysis.best_config)
    return analysis.best_config


In [33]:
import numpy as np

def weighted_mae(true, pred, is_holiday):
    weights = np.where(is_holiday, 5, 1)
    return np.sum(weights * np.abs(true - pred)) / np.sum(weights)


In [34]:
import torch
from neuralforecast.models import NBEATS

class TweakedNBEATS(NBEATS):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.optimizer = torch.optim.AdamW(self.parameters(), lr=kwargs.get('learning_rate', 1e-3))
        self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=10, gamma=0.9)

    def configure_optimizers(self):
        return {
            'optimizer': self.optimizer,
            'lr_scheduler': {
                'scheduler': self.scheduler,
                'interval': 'epoch',
                'frequency': 1
            }
        }


In [39]:

import pandas as pd

# 1. Load and preprocess
preprocessor = RetailDataPreprocessor()
raw_data = preprocessor.load_and_merge("")
processed_data = preprocessor.preprocess(raw_data)

# 2. Prepare data for NeuralForecast
forecast_data = prepare_neuralforecast_input(processed_data)
horizon = 12  # number of weeks to forecast

# 3. Optional hyperparameter tuning
best_config = tune_hyperparameters(forecast_data, h=horizon)

# 4. Train final model
final_forecast = train_model(forecast_data, h=horizon)
final_forecast.to_csv("/mnt/data/WalmartForecastCustom/final_forecast.csv", index=False)

print("Forecast complete. Output saved.")


+------------------------------------------------------------------+
| Configuration for experiment     trainable_2025-08-03_02-57-00   |
+------------------------------------------------------------------+
| Search algorithm                 BasicVariantGenerator           |
| Scheduler                        AsyncHyperBandScheduler         |
| Number of trials                 5                               |
+------------------------------------------------------------------+

View detailed results here: /content/trainable_2025-08-03_02-57-00
To visualize your results with TensorBoard, run: `tensorboard --logdir /tmp/ray/session_2025-08-03_02-33-24_172762_386/artifacts/2025-08-03_02-57-00/trainable_2025-08-03_02-57-00/driver_artifacts`

Trial status: 5 PENDING
Current time: 2025-08-03 02:57:01. Total running time: 1s
Logical resource usage: 0/2 CPUs, 0/1 GPUs (0.0/1.0 accelerator_type:T4)
+-------------------------------------------------------------------------+
| Trial name        

[36m(trainable pid=9696)[0m Seed set to 1
[36m(trainable pid=9696)[0m GPU available: False, used: False
[36m(trainable pid=9696)[0m TPU available: False, using: 0 TPU cores
[36m(trainable pid=9696)[0m HPU available: False, using: 0 HPUs
[36m(trainable pid=9696)[0m 2025-08-03 02:57:13.659701: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
[36m(trainable pid=9696)[0m E0000 00:00:1754189833.676817    9786 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
[36m(trainable pid=9696)[0m E0000 00:00:1754189833.681737    9786 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
[36m(trainable pid=9696)[0m 2025-08-03 02:57:13.701764: I tensorflow/core/platform/cpu_feature_guard

Epoch 0:   0%|          | 0/105 [00:00<?, ?it/s] 
Epoch 0:  19%|█▉        | 20/105 [00:03<00:15,  5.65it/s, v_num=0, train_loss_step=1.53e+7]
Epoch 0:  38%|███▊      | 40/105 [00:07<00:12,  5.31it/s, v_num=0, train_loss_step=1.14e+8]
Epoch 0:  57%|█████▋    | 60/105 [00:12<00:09,  4.97it/s, v_num=0, train_loss_step=1.97e+7]

Trial status: 1 RUNNING | 4 PENDING
Current time: 2025-08-03 02:57:31. Total running time: 31s
Logical resource usage: 2.0/2 CPUs, 0/1 GPUs (0.0/1.0 accelerator_type:T4)
+-------------------------------------------------------------------------+
| Trial name              status       input_size            lr     steps |
+-------------------------------------------------------------------------+
| trainable_86a28_00000   RUNNING              24   0.000234596       200 |
| trainable_86a28_00001   PENDING              36   0.000129884       500 |
| trainable_86a28_00002   PENDING              48   0.00995402        200 |
| trainable_86a28_00003   PENDING              

[36m(trainable pid=9696)[0m `Trainer.fit` stopped: `max_steps=200` reached.
[36m(trainable pid=9696)[0m GPU available: False, used: False
[36m(trainable pid=9696)[0m TPU available: False, using: 0 TPU cores
[36m(trainable pid=9696)[0m HPU available: False, using: 0 HPUs


[36m(trainable pid=9696)[0m 
[36m(trainable pid=9696)[0m Validation DataLoader 0:  76%|███████▌  | 80/105 [00:00<00:00, 666.57it/s][A
[36m(trainable pid=9696)[0m Validation DataLoader 0:  95%|█████████▌| 100/105 [00:00<00:00, 723.28it/s][A
[36m(trainable pid=9696)[0m Validation DataLoader 0: 100%|██████████| 105/105 [00:00<00:00, 736.37it/s][A
[36m(trainable pid=9696)[0m                                                                            [AEpoch 1: 100%|██████████| 95/95 [00:22<00:00,  4.30it/s, v_num=0, train_loss_step=9.46e+7, train_loss_epoch=6.29e+7]Epoch 1: 100%|██████████| 95/95 [00:22<00:00,  4.30it/s, v_num=0, train_loss_step=9.46e+7, train_loss_epoch=5.82e+7]Epoch 1: 100%|██████████| 95/95 [00:22<00:00,  4.30it/s, v_num=0, train_loss_step=9.46e+7, train_loss_epoch=5.82e+7]
Predicting DataLoader 0:   0%|          | 0/105 [00:00<?, ?it/s]
Predicting DataLoader 0:  19%|█▉        | 20/105 [00:00<00:00, 192.28it/s]
Predicting DataLoader 0:  38%|███▊     

2025-08-03 02:57:59,357	ERROR tune_controller.py:1331 -- Trial task failed for trial trainable_86a28_00000
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/ray/air/execution/_internal/event_manager.py", line 110, in resolve_future
    result = ray.get(future)
             ^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/ray/_private/auto_init_hook.py", line 22, in auto_init_wrapper
    return fn(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/ray/_private/client_mode_hook.py", line 104, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/ray/_private/worker.py", line 2858, in get
    values, debugger_breakpoint = worker.get_objects(object_refs, timeout=timeout)
                                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/ray/_private/worker.py", line


Trial trainable_86a28_00000 errored after 0 iterations at 2025-08-03 02:57:59. Total running time: 58s
Error file: /tmp/ray/session_2025-08-03_02-33-24_172762_386/artifacts/2025-08-03_02-57-00/trainable_2025-08-03_02-57-00/driver_artifacts/trainable_86a28_00000_0_input_size=24,lr=0.0002,steps=200_2025-08-03_02-57-01/error.txt

Trial status: 1 ERROR | 4 PENDING
Current time: 2025-08-03 02:58:01. Total running time: 1min 1s
Logical resource usage: 2.0/2 CPUs, 0/1 GPUs (0.0/1.0 accelerator_type:T4)
+-------------------------------------------------------------------------+
| Trial name              status       input_size            lr     steps |
+-------------------------------------------------------------------------+
| trainable_86a28_00001   PENDING              36   0.000129884       500 |
| trainable_86a28_00002   PENDING              48   0.00995402        200 |
| trainable_86a28_00003   PENDING              48   0.00135525        200 |
| trainable_86a28_00004   PENDING         

[36m(trainable pid=10027)[0m Seed set to 1
[36m(trainable pid=10027)[0m GPU available: False, used: False
[36m(trainable pid=10027)[0m TPU available: False, using: 0 TPU cores
[36m(trainable pid=10027)[0m HPU available: False, using: 0 HPUs
[36m(trainable pid=10027)[0m 2025-08-03 02:58:14.829802: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
[36m(trainable pid=10027)[0m E0000 00:00:1754189894.847399   10136 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
[36m(trainable pid=10027)[0m E0000 00:00:1754189894.852367   10136 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
[36m(trainable pid=10027)[0m 2025-08-03 02:58:14.869541: I tensorflow/core/platform/cpu_featu

Epoch 0:   0%|          | 0/105 [00:00<?, ?it/s] 
Epoch 0:  19%|█▉        | 20/105 [00:03<00:14,  5.71it/s, v_num=0, train_loss_step=1.43e+7]
Epoch 0:  38%|███▊      | 40/105 [00:07<00:12,  5.28it/s, v_num=0, train_loss_step=1.12e+8]
Epoch 0:  57%|█████▋    | 60/105 [00:12<00:09,  4.95it/s, v_num=0, train_loss_step=2.47e+7]

Trial status: 1 ERROR | 1 RUNNING | 3 PENDING
Current time: 2025-08-03 02:58:32. Total running time: 1min 31s
Logical resource usage: 2.0/2 CPUs, 0/1 GPUs (0.0/1.0 accelerator_type:T4)
+-------------------------------------------------------------------------+
| Trial name              status       input_size            lr     steps |
+-------------------------------------------------------------------------+
| trainable_86a28_00001   RUNNING              36   0.000129884       500 |
| trainable_86a28_00002   PENDING              48   0.00995402        200 |
| trainable_86a28_00003   PENDING              48   0.00135525        200 |
| trainable_86a28_00004   PENDIN

[36m(trainable pid=10027)[0m `Trainer.fit` stopped: `max_steps=500` reached.
[36m(trainable pid=10027)[0m GPU available: False, used: False
[36m(trainable pid=10027)[0m TPU available: False, using: 0 TPU cores
[36m(trainable pid=10027)[0m HPU available: False, using: 0 HPUs


[36m(trainable pid=10027)[0m Predicting: |          | 0/? [00:00<?, ?it/s]Predicting:   0%|          | 0/105 [00:00<?, ?it/s]Predicting DataLoader 0:   0%|          | 0/105 [00:00<?, ?it/s]
Predicting DataLoader 0:  19%|█▉        | 20/105 [00:00<00:00, 203.58it/s]
Predicting DataLoader 0:  38%|███▊      | 40/105 [00:00<00:00, 203.73it/s]
Predicting DataLoader 0:  57%|█████▋    | 60/105 [00:00<00:00, 172.06it/s]
Predicting DataLoader 0:  76%|███████▌  | 80/105 [00:00<00:00, 156.54it/s]
Predicting DataLoader 0:  95%|█████████▌| 100/105 [00:00<00:00, 150.94it/s]
Predicting DataLoader 0: 100%|██████████| 105/105 [00:00<00:00, 151.17it/s]


2025-08-03 02:59:56,657	ERROR tune_controller.py:1331 -- Trial task failed for trial trainable_86a28_00001
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/ray/air/execution/_internal/event_manager.py", line 110, in resolve_future
    result = ray.get(future)
             ^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/ray/_private/auto_init_hook.py", line 22, in auto_init_wrapper
    return fn(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/ray/_private/client_mode_hook.py", line 104, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/ray/_private/worker.py", line 2858, in get
    values, debugger_breakpoint = worker.get_objects(object_refs, timeout=timeout)
                                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/ray/_private/worker.py", line


Trial trainable_86a28_00001 errored after 0 iterations at 2025-08-03 02:59:56. Total running time: 2min 56s
Error file: /tmp/ray/session_2025-08-03_02-33-24_172762_386/artifacts/2025-08-03_02-57-00/trainable_2025-08-03_02-57-00/driver_artifacts/trainable_86a28_00001_1_input_size=36,lr=0.0001,steps=500_2025-08-03_02-57-01/error.txt

Trial status: 2 ERROR | 3 PENDING
Current time: 2025-08-03 03:00:02. Total running time: 3min 1s
Logical resource usage: 2.0/2 CPUs, 0/1 GPUs (0.0/1.0 accelerator_type:T4)
+-------------------------------------------------------------------------+
| Trial name              status       input_size            lr     steps |
+-------------------------------------------------------------------------+
| trainable_86a28_00002   PENDING              48   0.00995402        200 |
| trainable_86a28_00003   PENDING              48   0.00135525        200 |
| trainable_86a28_00004   PENDING              36   0.00237632       1000 |
| trainable_86a28_00000   ERROR      

2025-08-03 03:00:07,785	INFO tune.py:1009 -- Wrote the latest version of all result files and experiment state to '/content/trainable_2025-08-03_02-57-00' in 0.0048s.


Trial status: 2 ERROR | 3 PENDING
Current time: 2025-08-03 03:00:07. Total running time: 3min 7s
Logical resource usage: 2.0/2 CPUs, 0/1 GPUs (0.0/1.0 accelerator_type:T4)
+-------------------------------------------------------------------------+
| Trial name              status       input_size            lr     steps |
+-------------------------------------------------------------------------+
| trainable_86a28_00002   PENDING              48   0.00995402        200 |
| trainable_86a28_00003   PENDING              48   0.00135525        200 |
| trainable_86a28_00004   PENDING              36   0.00237632       1000 |
| trainable_86a28_00000   ERROR                24   0.000234596       200 |
| trainable_86a28_00001   ERROR                36   0.000129884       500 |
+-------------------------------------------------------------------------+

Number of errored trials: 2
+------------------------------------------------------------------------------------------------------------------

2025-08-03 03:00:09,106	ERROR tune.py:1037 -- Trials did not complete: [trainable_86a28_00000, trainable_86a28_00001]
Resume experiment with: tune.run(..., resume=True)
- trainable_86a28_00002: FileNotFoundError('Could not fetch metrics for trainable_86a28_00002: both result.json and progress.csv were not found at /content/trainable_2025-08-03_02-57-00/trainable_86a28_00002_2_input_size=48,lr=0.0100,steps=200_2025-08-03_02-57-01')
- trainable_86a28_00003: FileNotFoundError('Could not fetch metrics for trainable_86a28_00003: both result.json and progress.csv were not found at /content/trainable_2025-08-03_02-57-00/trainable_86a28_00003_3_input_size=48,lr=0.0014,steps=200_2025-08-03_02-57-01')
- trainable_86a28_00004: FileNotFoundError('Could not fetch metrics for trainable_86a28_00004: both result.json and progress.csv were not found at /content/trainable_2025-08-03_02-57-00/trainable_86a28_00004_4_input_size=36,lr=0.0024,steps=1000_2025-08-03_02-57-01')





ValueError: To fetch the `best_config`, pass a `metric` and `mode` parameter to `tune.run()`. Alternatively, use the `get_best_config(metric, mode)` method to set the metric and mode explicitly.

# Load Data

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import mlflow
import mlflow.pytorch
from sklearn.pipeline import Pipeline
from sklearn.base import BaseEstimator, TransformerMixin
from neuralforecast import NeuralForecast
from neuralforecast.models import NBEATS, PatchTST
from itertools import product
import joblib
import logging
import os
# Configure logging
logging.getLogger().setLevel(logging.WARNING)
logging.getLogger("neuralforecast").setLevel(logging.WARNING)
logging.getLogger("pytorch_lightning").setLevel(logging.WARNING)
logging.getLogger("lightning_fabric").setLevel(logging.WARNING)

# Set random seeds for reproducibility
np.random.seed(42)
torch.manual_seed(42)


def prepare_data_for_nbeats():
    train = pd.read_csv('train.csv')
    test = pd.read_csv('test.csv')

    train['Date'] = pd.to_datetime(train['Date'])
    test['Date'] = pd.to_datetime(test['Date'])

    train['unique_id'] = train['Store'].astype(str) + '_' + train['Dept'].astype(str)
    test['unique_id'] = test['Store'].astype(str) + '_' + test['Dept'].astype(str)

    train_nbeats = train[['unique_id', 'Date', 'Weekly_Sales']].copy()
    train_nbeats.columns = ['unique_id', 'ds', 'y']
    train_nbeats = train_nbeats.sort_values(['unique_id', 'ds'])

    # Updated fillna warning fix
    train_nbeats['y'] = train_nbeats.groupby('unique_id')['y'].ffill()
    train_nbeats['y'] = train_nbeats.groupby('unique_id')['y'].transform(lambda x: x.fillna(x.median()))

    min_length = 104
    series_lengths = train_nbeats.groupby('unique_id').size()
    valid_series = series_lengths[series_lengths >= min_length].index
    train_nbeats = train_nbeats[train_nbeats['unique_id'].isin(valid_series)]

    # Split into training and validation (last 12 weeks for validation)
    horizon = 12
    X_train = train_nbeats.groupby('unique_id').apply(lambda g: g.iloc[:-horizon]).reset_index(drop=True)
    X_valid = train_nbeats.groupby('unique_id').apply(lambda g: g.iloc[-horizon:]).reset_index(drop=True)

    y_train = X_train['y'].values
    y_valid = X_valid['y'].values

    test_nbeats = test[['unique_id', 'Date']].copy()
    test_nbeats.columns = ['unique_id', 'ds']
    test_nbeats = test_nbeats[test_nbeats['unique_id'].isin(valid_series)]

    return X_train, y_train, X_valid, y_valid, test_nbeats


# Load data
X_train, y_train, X_valid, y_valid, test_data = prepare_data_for_nbeats()

print(f"X_train shape: {X_train.shape}")
print(f"X_valid shape: {X_valid.shape}")
print(f"Test data shape: {test_data.shape}")

  X_train = train_nbeats.groupby('unique_id').apply(lambda g: g.iloc[:-horizon]).reset_index(drop=True)


X_train shape: (372569, 3)
X_valid shape: (34416, 3)
Test data shape: (110613, 2)


  X_valid = train_nbeats.groupby('unique_id').apply(lambda g: g.iloc[-horizon:]).reset_index(drop=True)


In [None]:
def compute_wmae(y_true, y_pred, is_holiday):
    """
    Compute the Weighted Mean Absolute Error (WMAE).

    Parameters:
    - y_true: array-like or pandas Series of true values
    - y_pred: array-like or pandas Series of predicted values
    - is_holiday: array-like or Series of booleans indicating if the observation is a holiday

    Returns:
    - WMAE (float)
    """
    import numpy as np
    import pandas as pd

    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    is_holiday = np.array(is_holiday)

    weights = np.where(is_holiday, 5, 1)
    absolute_errors = np.abs(y_true - y_pred)
    wmae = np.sum(weights * absolute_errors) / np.sum(weights)

    return wmae


In [None]:
from itertools import product
from neuralforecast.models import PatchTST
import logging

logging.getLogger().setLevel(logging.WARNING)
logging.getLogger("neuralforecast").setLevel(logging.WARNING)
logging.getLogger("pytorch_lightning").setLevel(logging.WARNING)
logging.getLogger("lightning_fabric").setLevel(logging.WARNING)

def run_nbeats_cv(X_train, y_train, X_valid, y_valid,
                            param_grid,
                            fixed_params,
                            return_all=False):
    results = []

    keys, values = zip(*param_grid.items())
    for vals in product(*values):
        params = dict(zip(keys, vals))
        params.update(fixed_params)

        params['enable_progress_bar'] = False
        params['enable_model_summary'] = False

        model = NBEATS(**params)

        nf_model = NeuralForecastModels(models=[model], model_names=['NBEATS'], freq='W-FRI', one_model=True)
        nf_model.fit(X_train, y_train)
        y_pred = nf_model.predict(X_valid)
        score = compute_wmae(y_valid, y_pred, X_valid['IsHoliday'])

        result = {'wmae': score, 'preds': y_pred}
        result.update(params)

        results.append(result)
        print(" → ".join(f"{k}={v}" for k,v in params.items() if k not in ['enable_progress_bar','enable_model_summary']) + f" → WMAE={score:.4f}")

    if return_all:
        return results
    else:
        return min(results, key=lambda r: r['wmae'])


param_grid = {
    'input_size' : [40,52,60,72]
}

fixed_params = {
    'max_steps': 25 * 104,
    'h': 53,
    'random_seed': 42,
    'batch_size' : 64,
}

best_result = run_nbeats_cv(
    X_train, y_train, X_valid, y_valid,
    param_grid=param_grid,
    fixed_params=fixed_params,
    return_all=False
)

print("\nBest hyperparameters found:")
for param in param_grid.keys():
    print(f"  {param}: {best_result[param]}")
print(f"Best WMAE: {best_result['wmae']:.4f}")


KeyError: 'Date'

In [None]:
import mlflow
import mlflow.pytorch
import mlflow.sklearn
import pandas as pd
import numpy as np
import torch
import json
import tempfile
import os
from pathlib import Path
import matplotlib.pyplot as plt
import seaborn as sns
from neuralforecast import NeuralForecast
from neuralforecast.models import NBEATS
from itertools import product
import logging

# Add the compute_wmae function if not already defined
def compute_wmae(y_true, y_pred, is_holiday):
    """
    Compute the Weighted Mean Absolute Error (WMAE).

    Parameters:
    - y_true: array-like or pandas Series of true values
    - y_pred: array-like or pandas Series of predicted values
    - is_holiday: array-like or Series of booleans indicating if the observation is a holiday

    Returns:
    - WMAE (float)
    """
    try:
        y_true = np.array(y_true)
        y_pred = np.array(y_pred)
        is_holiday = np.array(is_holiday)

        # Handle case where arrays have different lengths
        min_len = min(len(y_true), len(y_pred), len(is_holiday))
        y_true = y_true[:min_len]
        y_pred = y_pred[:min_len]
        is_holiday = is_holiday[:min_len]

        weights = np.where(is_holiday.astype(bool), 5, 1)
        absolute_errors = np.abs(y_true - y_pred)

        # Handle edge case where all weights are zero
        total_weights = np.sum(weights)
        if total_weights == 0:
            return np.mean(absolute_errors)

        wmae = np.sum(weights * absolute_errors) / total_weights
        return wmae
    except Exception as e:
        print(f"Error computing WMAE: {e}")
        return np.mean(np.abs(y_true - y_pred)) if len(y_true) > 0 else float('inf')

# Configure MLflow
def setup_mlflow(experiment_name="walmart-nbeats-forecasting", tracking_uri=None):
    """Setup MLflow experiment and tracking"""
    if tracking_uri:
        mlflow.set_tracking_uri(tracking_uri)

    # Set or create experiment
    try:
        experiment = mlflow.get_experiment_by_name(experiment_name)
        if experiment is None:
            experiment_id = mlflow.create_experiment(experiment_name)
        else:
            experiment_id = experiment.experiment_id
        mlflow.set_experiment(experiment_name)
        print(f"Using MLflow experiment: {experiment_name}")
        return experiment_id
    except Exception as e:
        print(f"Error setting up MLflow: {e}")
        return None

class MLflowNeuralForecastLogger:
    """Enhanced MLflow logger for Neural Forecasting experiments"""

    def __init__(self, experiment_name="walmart-nbeats-forecasting", auto_log=True):
        self.experiment_name = experiment_name
        self.auto_log = auto_log
        self.experiment_id = setup_mlflow(experiment_name)

    def log_hyperparameters(self, params_dict):
        """Log hyperparameters to MLflow"""
        for key, value in params_dict.items():
            # Handle special types that MLflow can't serialize directly
            if isinstance(value, torch.optim.Optimizer):
                mlflow.log_param(key, value.__class__.__name__)
            elif callable(value):
                mlflow.log_param(key, value.__name__ if hasattr(value, '__name__') else str(value))
            elif isinstance(value, (list, dict)):
                mlflow.log_param(key, json.dumps(value))
            else:
                mlflow.log_param(key, value)

    def log_metrics(self, metrics_dict, step=None):
        """Log metrics to MLflow"""
        for key, value in metrics_dict.items():
            try:
                # Ensure value is a valid number
                if pd.isna(value) or not np.isfinite(value):
                    print(f"Warning: Invalid metric value for {key}: {value}")
                    continue
                mlflow.log_metric(key, float(value), step=step)
            except Exception as e:
                print(f"Warning: Could not log metric {key}: {e}")

    def log_data_info(self, X_train=None, y_train=None, X_valid=None, y_valid=None, test_data=None):
        """Log dataset information with robust date handling"""
        data_info = {}

        if X_train is not None:
            try:
                data_info["train_samples"] = len(X_train)
                data_info["train_features"] = X_train.shape[1] if hasattr(X_train, 'shape') else len(X_train.columns) if hasattr(X_train, 'columns') else 0

                # Handle Date column more robustly
                date_columns = []
                if hasattr(X_train, 'columns'):
                    # Look for common date column names
                    possible_date_cols = ['Date', 'date', 'DATE', 'ds', 'timestamp', 'time']
                    date_columns = [col for col in possible_date_cols if col in X_train.columns]

                if date_columns:
                    date_col = date_columns[0]  # Use the first found date column
                    data_info["train_date_range"] = safe_date_range(X_train[date_col])
                    data_info["date_column_used"] = date_col
                else:
                    data_info["train_date_range"] = "No date column found"
                    data_info["available_columns"] = list(X_train.columns) if hasattr(X_train, 'columns') else "Non-DataFrame input"

                # Handle Store column
                if hasattr(X_train, 'columns') and 'Store' in X_train.columns:
                    data_info["unique_stores"] = X_train['Store'].nunique()
                elif hasattr(X_train, 'columns') and 'store' in X_train.columns:
                    data_info["unique_stores"] = X_train['store'].nunique()

                # Handle Dept column
                if hasattr(X_train, 'columns') and 'Dept' in X_train.columns:
                    data_info["unique_depts"] = X_train['Dept'].nunique()
                elif hasattr(X_train, 'columns') and 'dept' in X_train.columns:
                    data_info["unique_depts"] = X_train['dept'].nunique()

                # Handle unique series calculation
                if hasattr(X_train, 'columns'):
                    store_cols = [col for col in X_train.columns if col.lower() in ['store', 'Store']]
                    dept_cols = [col for col in X_train.columns if col.lower() in ['dept', 'Dept', 'department']]

                    if store_cols and dept_cols:
                        data_info["unique_series"] = len(X_train.groupby([store_cols[0], dept_cols[0]]))

            except Exception as e:
                print(f"Warning: Error processing X_train info: {e}")
                data_info["train_processing_error"] = str(e)

        if y_train is not None:
            try:
                # Convert to numpy array for safe processing
                y_train_array = np.array(y_train)
                # Remove any infinite values for statistics
                y_train_finite = y_train_array[np.isfinite(y_train_array)]

                if len(y_train_finite) > 0:
                    data_info["target_mean"] = float(np.mean(y_train_finite))
                    data_info["target_std"] = float(np.std(y_train_finite))
                    data_info["target_min"] = float(np.min(y_train_finite))
                    data_info["target_max"] = float(np.max(y_train_finite))
                    data_info["target_finite_count"] = len(y_train_finite)
                    data_info["target_total_count"] = len(y_train_array)
                else:
                    data_info["target_processing_error"] = "No finite values in target"
            except Exception as e:
                print(f"Warning: Error processing y_train info: {e}")
                data_info["target_processing_error"] = str(e)

        if X_valid is not None:
            try:
                data_info["valid_samples"] = len(X_valid)

                # Handle Date column for validation set
                if hasattr(X_valid, 'columns'):
                    possible_date_cols = ['Date', 'date', 'DATE', 'ds', 'timestamp', 'time']
                    date_columns = [col for col in possible_date_cols if col in X_valid.columns]

                    if date_columns:
                        date_col = date_columns[0]
                        data_info["valid_date_range"] = safe_date_range(X_valid[date_col])
                    else:
                        data_info["valid_date_range"] = "No date column found"
            except Exception as e:
                print(f"Warning: Error processing X_valid info: {e}")
                data_info["valid_processing_error"] = str(e)

        if test_data is not None:
            try:
                data_info["test_samples"] = len(test_data)

                # Handle Date column for test set
                if hasattr(test_data, 'columns'):
                    possible_date_cols = ['Date', 'date', 'DATE', 'ds', 'timestamp', 'time']
                    date_columns = [col for col in possible_date_cols if col in test_data.columns]

                    if date_columns:
                        date_col = date_columns[0]
                        data_info["test_date_range"] = safe_date_range(test_data[date_col])
                    else:
                        data_info["test_date_range"] = "No date column found"
            except Exception as e:
                print(f"Warning: Error processing test_data info: {e}")
                data_info["test_processing_error"] = str(e)

        # Log all collected data info
        self.log_hyperparameters(data_info)

        # Create and log data summary plots (only if targets are present)
        if y_train is not None and y_valid is not None:
            self._log_data_plots(y_train, y_valid)


    def _log_data_plots(self, y_train, y_valid):
        """Create and log data visualization plots"""
        try:
            fig, axes = plt.subplots(2, 2, figsize=(15, 10))

            # Target distribution - handle edge cases
            y_train_clean = y_train[np.isfinite(y_train)]
            y_valid_clean = y_valid[np.isfinite(y_valid)]

            if len(y_train_clean) > 0 and len(y_valid_clean) > 0:
                axes[0, 0].hist(y_train_clean, bins=50, alpha=0.7, label='Train')
                axes[0, 0].hist(y_valid_clean, bins=50, alpha=0.7, label='Valid')
                axes[0, 0].set_title('Target Distribution')
                axes[0, 0].legend()
                axes[0, 0].set_xlabel('Weekly Sales')
                axes[0, 0].set_ylabel('Frequency')

            # Log scale distribution - handle negative values and zeros
            # Use log1p only for positive values, clip negatives to small positive value
            y_train_for_log = np.maximum(y_train_clean, 0.01)  # Clip to avoid log(0)
            y_valid_for_log = np.maximum(y_valid_clean, 0.01)

            log_train = np.log1p(y_train_for_log)
            log_valid = np.log1p(y_valid_for_log)

            # Remove any remaining infinite values
            log_train = log_train[np.isfinite(log_train)]
            log_valid = log_valid[np.isfinite(log_valid)]

            if len(log_train) > 0 and len(log_valid) > 0:
                axes[0, 1].hist(log_train, bins=50, alpha=0.7, label='Train (log)')
                axes[0, 1].hist(log_valid, bins=50, alpha=0.7, label='Valid (log)')
                axes[0, 1].set_title('Log Target Distribution')
                axes[0, 1].legend()
                axes[0, 1].set_xlabel('Log(Weekly Sales + 1)')

            # Box plots - use cleaned data
            if len(y_train_clean) > 0 and len(y_valid_clean) > 0:
                # Sample data if too large for visualization
                max_samples = 10000
                if len(y_train_clean) > max_samples:
                    y_train_sample = np.random.choice(y_train_clean, max_samples, replace=False)
                else:
                    y_train_sample = y_train_clean

                if len(y_valid_clean) > max_samples:
                    y_valid_sample = np.random.choice(y_valid_clean, max_samples, replace=False)
                else:
                    y_valid_sample = y_valid_clean

                data_for_box = pd.DataFrame({
                    'Sales': np.concatenate([y_train_sample, y_valid_sample]),
                    'Split': ['Train'] * len(y_train_sample) + ['Valid'] * len(y_valid_sample)
                })
                sns.boxplot(data=data_for_box, x='Split', y='Sales', ax=axes[1, 0])
                axes[1, 0].set_title('Sales Distribution by Split')

            # Summary stats - use original data but handle infinites
            train_stats = {
                'mean': np.mean(y_train_clean) if len(y_train_clean) > 0 else 0,
                'std': np.std(y_train_clean) if len(y_train_clean) > 0 else 0,
                'min': np.min(y_train_clean) if len(y_train_clean) > 0 else 0,
                'max': np.max(y_train_clean) if len(y_train_clean) > 0 else 0,
                'negative_count': np.sum(y_train < 0),
                'zero_count': np.sum(y_train == 0),
                'total_count': len(y_train)
            }

            valid_stats = {
                'mean': np.mean(y_valid_clean) if len(y_valid_clean) > 0 else 0,
                'std': np.std(y_valid_clean) if len(y_valid_clean) > 0 else 0,
                'min': np.min(y_valid_clean) if len(y_valid_clean) > 0 else 0,
                'max': np.max(y_valid_clean) if len(y_valid_clean) > 0 else 0,
                'negative_count': np.sum(y_valid < 0),
                'zero_count': np.sum(y_valid == 0),
                'total_count': len(y_valid)
            }

            stats_text = f"""
            Train Stats:
            Mean: {train_stats['mean']:.2f}
            Std: {train_stats['std']:.2f}
            Min: {train_stats['min']:.2f}
            Max: {train_stats['max']:.2f}
            Negatives: {train_stats['negative_count']}
            Zeros: {train_stats['zero_count']}
            Total: {train_stats['total_count']}

            Valid Stats:
            Mean: {valid_stats['mean']:.2f}
            Std: {valid_stats['std']:.2f}
            Min: {valid_stats['min']:.2f}
            Max: {valid_stats['max']:.2f}
            Negatives: {valid_stats['negative_count']}
            Zeros: {valid_stats['zero_count']}
            Total: {valid_stats['total_count']}
            """
            axes[1, 1].text(0.1, 0.1, stats_text, transform=axes[1, 1].transAxes,
                            fontsize=9, verticalalignment='bottom', fontfamily='monospace')
            axes[1, 1].set_title('Summary Statistics')
            axes[1, 1].axis('off')

            plt.tight_layout()
            mlflow.log_figure(fig, "data_summary.png")
            plt.close()

        except Exception as e:
            print(f"Warning: Could not create data plots: {e}")
            plt.close('all')  # Clean up any open figures

    def log_predictions(self, y_true, y_pred, X_valid, prefix="validation"):
        """Log prediction results and visualizations"""
        # Clean data - remove any infinite values
        valid_mask = np.isfinite(y_true) & np.isfinite(y_pred)
        y_true_clean = y_true[valid_mask]
        y_pred_clean = y_pred[valid_mask]

        if len(y_true_clean) == 0:
            print("Warning: No valid predictions to evaluate")
            return {}

        # Calculate metrics
        mae = np.mean(np.abs(y_true_clean - y_pred_clean))
        mse = np.mean((y_true_clean - y_pred_clean) ** 2)
        rmse = np.sqrt(mse)

        # Handle MAPE calculation carefully
        mape_denominator = np.abs(y_true_clean) + 1e-8
        mape = np.mean(np.abs((y_true_clean - y_pred_clean) / mape_denominator)) * 100

        # Calculate WMAE
        is_holiday = X_valid['IsHoliday'].values[valid_mask] if len(X_valid) == len(y_true) else X_valid['IsHoliday'].values
        wmae = compute_wmae(y_true_clean, y_pred_clean, is_holiday[:len(y_true_clean)])

        # Calculate R2 safely
        r2 = 0
        if len(set(y_pred_clean)) > 1 and len(set(y_true_clean)) > 1:
            try:
                correlation_matrix = np.corrcoef(y_true_clean, y_pred_clean)
                if correlation_matrix.shape == (2, 2) and np.isfinite(correlation_matrix[0, 1]):
                    r2 = correlation_matrix[0, 1] ** 2
            except:
                r2 = 0

        metrics = {
            f"{prefix}_mae": mae,
            f"{prefix}_mse": mse,
            f"{prefix}_rmse": rmse,
            f"{prefix}_mape": mape,
            f"{prefix}_wmae": wmae,
            f"{prefix}_r2": r2,
            f"{prefix}_valid_predictions": len(y_true_clean),
            f"{prefix}_total_predictions": len(y_true)
        }

        self.log_metrics(metrics)

        # Create prediction plots
        self._log_prediction_plots(y_true_clean, y_pred_clean, is_holiday[:len(y_true_clean)], prefix)

        return metrics

    def _log_prediction_plots(self, y_true, y_pred, is_holiday, prefix):
        """Create and log prediction visualization plots"""
        try:
            fig, axes = plt.subplots(2, 3, figsize=(18, 12))

            # Scatter plot: Actual vs Predicted
            # Sample data if too large for visualization
            max_points = 5000
            if len(y_true) > max_points:
                indices = np.random.choice(len(y_true), max_points, replace=False)
                y_true_sample = y_true[indices]
                y_pred_sample = y_pred[indices]
            else:
                y_true_sample = y_true
                y_pred_sample = y_pred

            axes[0, 0].scatter(y_true_sample, y_pred_sample, alpha=0.5, s=1)

            # Add perfect prediction line
            min_val = min(y_true_sample.min(), y_pred_sample.min())
            max_val = max(y_true_sample.max(), y_pred_sample.max())
            axes[0, 0].plot([min_val, max_val], [min_val, max_val], 'r--', lw=2)
            axes[0, 0].set_xlabel('Actual')
            axes[0, 0].set_ylabel('Predicted')
            axes[0, 0].set_title('Actual vs Predicted')

            # Residuals plot
            residuals = y_true - y_pred
            residuals_sample = residuals[indices] if len(y_true) > max_points else residuals
            y_pred_sample_res = y_pred[indices] if len(y_true) > max_points else y_pred

            axes[0, 1].scatter(y_pred_sample_res, residuals_sample, alpha=0.5, s=1)
            axes[0, 1].axhline(y=0, color='r', linestyle='--')
            axes[0, 1].set_xlabel('Predicted')
            axes[0, 1].set_ylabel('Residuals')
            axes[0, 1].set_title('Residuals vs Predicted')

            # Residuals histogram - clean data
            residuals_clean = residuals[np.isfinite(residuals)]
            if len(residuals_clean) > 0:
                axes[0, 2].hist(residuals_clean, bins=50, alpha=0.7)
                axes[0, 2].set_xlabel('Residuals')
                axes[0, 2].set_ylabel('Frequency')
                axes[0, 2].set_title('Residuals Distribution')

            # Holiday vs Non-holiday performance
            holiday_mask = is_holiday.astype(bool)
            if holiday_mask.sum() > 0 and (~holiday_mask).sum() > 0:
                holiday_mae = np.mean(np.abs(residuals[holiday_mask]))
                non_holiday_mae = np.mean(np.abs(residuals[~holiday_mask]))

                axes[1, 0].bar(['Non-Holiday', 'Holiday'], [non_holiday_mae, holiday_mae])
                axes[1, 0].set_ylabel('MAE')
                axes[1, 0].set_title('MAE: Holiday vs Non-Holiday')

                # Log holiday metrics
                mlflow.log_metric(f"{prefix}_holiday_mae", holiday_mae)
                mlflow.log_metric(f"{prefix}_non_holiday_mae", non_holiday_mae)
            else:
                axes[1, 0].text(0.5, 0.5, 'No holiday data\navailable',
                               ha='center', va='center', transform=axes[1, 0].transAxes)
                axes[1, 0].set_title('Holiday Analysis')

            # Error distribution by prediction magnitude
            if len(y_pred) > 0:
                pred_quantiles = np.quantile(y_pred, [0.25, 0.5, 0.75])
                low_mask = y_pred <= pred_quantiles[0]
                mid_mask = (y_pred > pred_quantiles[0]) & (y_pred <= pred_quantiles[2])
                high_mask = y_pred > pred_quantiles[2]

                error_by_magnitude = []
                labels = []

                if low_mask.sum() > 0:
                    error_by_magnitude.append(np.mean(np.abs(residuals[low_mask])))
                    labels.append('Low')
                if mid_mask.sum() > 0:
                    error_by_magnitude.append(np.mean(np.abs(residuals[mid_mask])))
                    labels.append('Medium')
                if high_mask.sum() > 0:
                    error_by_magnitude.append(np.mean(np.abs(residuals[high_mask])))
                    labels.append('High')

                if error_by_magnitude:
                    axes[1, 1].bar(labels, error_by_magnitude)
                    axes[1, 1].set_ylabel('MAE')
                    axes[1, 1].set_title('MAE by Prediction Magnitude')

            # Time series sample (first 1000 points)
            sample_size = min(1000, len(y_true))
            indices = np.arange(sample_size)
            axes[1, 2].plot(indices, y_true[:sample_size], label='Actual', alpha=0.7, linewidth=1)
            axes[1, 2].plot(indices, y_pred[:sample_size], label='Predicted', alpha=0.7, linewidth=1)
            axes[1, 2].legend()
            axes[1, 2].set_xlabel('Sample Index')
            axes[1, 2].set_ylabel('Sales')
            axes[1, 2].set_title(f'Sample Predictions (First {sample_size} points)')

            plt.tight_layout()
            mlflow.log_figure(fig, f"{prefix}_predictions.png")
            plt.close()

        except Exception as e:
            print(f"Warning: Could not create prediction plots: {e}")
            plt.close('all')  # Clean up any open figures

    def log_model(self, model_wrapper, model_name="nbeats_model"):
        """Log the trained model"""
        try:
            # Save model to temporary file
            with tempfile.TemporaryDirectory() as tmp_dir:
                model_path = os.path.join(tmp_dir, f"{model_name}.pkl")

                # Save using joblib (more reliable for sklearn-like objects)
                import joblib
                joblib.dump(model_wrapper, model_path)

                # Log as artifact
                mlflow.log_artifact(model_path, "models")

                # Also try to log as MLflow model if possible
                try:
                    mlflow.sklearn.log_model(
                        model_wrapper,
                        f"models/{model_name}",
                        registered_model_name=model_name
                    )
                except Exception as e:
                    print(f"Could not log as MLflow model: {e}")

        except Exception as e:
            print(f"Error logging model: {e}")

def run_nbeats_cv_with_mlflow(X_train, y_train, X_valid, y_valid,
                              param_grid, fixed_params,
                              experiment_name="walmart-nbeats-cv",
                              return_all=False):
    """Enhanced cross-validation with comprehensive MLflow logging"""

    # Setup MLflow
    logger = MLflowNeuralForecastLogger(experiment_name)

    results = []
    keys, values = zip(*param_grid.items())

    for i, vals in enumerate(product(*values)):
        params = dict(zip(keys, vals))
        params.update(fixed_params)

        # Start MLflow run
        with mlflow.start_run(run_name=f"nbeats_run_{i+1}"):
            print(f"\n=== Run {i+1}/{len(list(product(*values)))} ===")

            # Log data info (only once)
            if i == 0:
                logger.log_data_info(X_train, y_train, X_valid, y_valid)

            # Log hyperparameters
            logger.log_hyperparameters(params)

            # Prepare model parameters
            model_params = params.copy()
            model_params['enable_progress_bar'] = False
            model_params['enable_model_summary'] = False

            try:
                # Train model
                model = NBEATS(**model_params)
                nf_model = NeuralForecastModels(
                    models=[model],
                    model_names=['NBEATS'],
                    freq='W-FRI',
                    one_model=True
                )

                # Fit and predict
                nf_model.fit(X_train, y_train)
                y_pred = nf_model.predict(X_valid)

                # Log predictions and metrics
                metrics = logger.log_predictions(y_valid, y_pred, X_valid, "validation")

                # Log model
                logger.log_model(nf_model, f"nbeats_model_run_{i+1}")

                # Store results
                result = {'run_id': mlflow.active_run().info.run_id}
                result.update(metrics)
                result.update(params)
                result['preds'] = y_pred

                results.append(result)

                print(f"WMAE: {metrics['validation_wmae']:.4f}")

            except Exception as e:
                print(f"Error in run {i+1}: {e}")
                mlflow.log_param("error", str(e))
                continue

    # Log summary of all runs
    if results:
        with mlflow.start_run(run_name="cv_summary"):
            best_run = min(results, key=lambda r: r['validation_wmae'])
            worst_run = max(results, key=lambda r: r['validation_wmae'])

            summary_metrics = {
                "cv_runs_total": len(results),
                "cv_best_wmae": best_run['validation_wmae'],
                "cv_worst_wmae": worst_run['validation_wmae'],
                "cv_mean_wmae": np.mean([r['validation_wmae'] for r in results]),
                "cv_std_wmae": np.std([r['validation_wmae'] for r in results])
            }

            logger.log_metrics(summary_metrics)
            logger.log_hyperparameters({"best_params": json.dumps({k: v for k, v in best_run.items()
                                                                  if k in param_grid.keys()})})

    if return_all:
        return results
    else:
        return min(results, key=lambda r: r['validation_wmae']) if results else None

# Example usage with your existing code:
def main_experiment():
    """Main experiment function with MLflow logging"""

    # Setup MLflow (replace with your tracking server if needed)
    experiment_id = setup_mlflow("NBeats_Training")

    # Your existing parameter grids
    param_grids = [
        {
            'input_size': [40, 52, 60, 72],
            'fixed': {
                'max_steps': 25 * 104,
                'h': 53,
                'random_seed': 42,
                'batch_size': 64,
            },
            'name': 'tuning'
        },
        {
            'batch_size': [32, 64, 128, 256, 512],
            'fixed': {
                'max_steps': 25 * 104,
                'h': 53,
                'random_seed': 42,
                'input_size': 60,  # Use best from previous search
            },
            'name': 'batch_size_search'
        },
        {
            'learning_rate': [1e-3, 2e-3, 4e-3],
            'fixed': {
                'max_steps': 25 * 104,
                'h': 53,
                'random_seed': 42,
                'input_size': 60,
                'batch_size': 64,  # Use best from previous search
            },
            'name': 'learning_rate_search'
        }
    ]

    all_results = []

    for param_config in param_grids:
        print(f"\n{'='*50}")
        print(f"Running {param_config['name']}")
        print(f"{'='*50}")

        results = run_nbeats_cv_with_mlflow(
            X_train, y_train, X_valid, y_valid,
            param_grid={k: v for k, v in param_config.items() if k not in ['fixed', 'name']},
            fixed_params=param_config['fixed'],
            experiment_name=f"walmart-nbeats-{param_config['name']}",
            return_all=True
        )

        all_results.extend(results)

        # Print best result for this grid
        if results:
            best = min(results, key=lambda r: r['validation_wmae'])
            print(f"Best {param_config['name']}: WMAE = {best['validation_wmae']:.4f}")

    # Final summary
    if all_results:
        overall_best = min(all_results, key=lambda r: r['validation_wmae'])
        print(f"\n{'='*50}")
        print(f"OVERALL BEST RESULT:")
        print(f"WMAE: {overall_best['validation_wmae']:.4f}")
        print(f"Run ID: {overall_best['run_id']}")
        # print(f"Parameters: {json.dumps({k: v for k, v in overall_best.items()
        #                                if k in ['input_size', 'batch_size', 'learning_rate', 'max_steps']},
        #                               indent=2)}")

if __name__ == "__main__":
    # Run the comprehensive experiment
    main_experiment()

Using MLflow experiment: NBeats_Training

Running tuning
Using MLflow experiment: walmart-nbeats-tuning

=== Run 1/4 ===
Error in run 1: 'Date'
🏃 View run nbeats_run_1 at: https://dagshub.com/ekvirika/WalmartRecruiting.mlflow/#/experiments/5/runs/2caf69930cfa46e5906c52459ba870ce
🧪 View experiment at: https://dagshub.com/ekvirika/WalmartRecruiting.mlflow/#/experiments/5

=== Run 2/4 ===
🏃 View run nbeats_run_2 at: https://dagshub.com/ekvirika/WalmartRecruiting.mlflow/#/experiments/5/runs/d9f8ffc19a6c4b3f9633c75bfa846fe7
🧪 View experiment at: https://dagshub.com/ekvirika/WalmartRecruiting.mlflow/#/experiments/5


KeyboardInterrupt: 

In [None]:
import pandas as pd
from neuralforecast import NeuralForecast
from sklearn.base import BaseEstimator, RegressorMixin


class NeuralForecastModels(BaseEstimator, RegressorMixin):
    def __init__(
        self,
        models,
        model_names,
        freq='W-FRI',
        group_cols=['Store', 'Dept'],
        one_model=False,
        date_col='Date'
    ):
        """
        Scikit-learn-style wrapper for NeuralForecast models.

        Args:
            models (list): List of NeuralForecast model instances (e.g., [NBEATS(...), RNN(...)])
            model_names (list): List of names for the models (should match column names in forecast output)
            freq (str): Frequency of the time series data (e.g., 'D', 'W-FRI')
            group_cols (list): List of columns to create unique_id for each series
            one_model (bool): Whether only one model is used (simplifies prediction output)
            date_col (str): Name of the datetime column
        """
        assert len(models) == len(model_names), "Each model must have a corresponding name."
        self.models = models
        self.model_names = model_names
        self.freq = freq
        self.group_cols = group_cols
        self.date_col = date_col
        self.one_model = one_model
        self.nf = None
        self.fitted = False

    def _prepare_df(self, X, y=None):
        df = X.copy()
        df['ds'] = df[self.date_col]
        df['unique_id'] = df[self.group_cols].astype(str).agg('-'.join, axis=1)
        if y is not None:
            df['y'] = y.values if isinstance(y, pd.Series) else y
            return df[['unique_id', 'ds', 'y']]
        else:
            return df[['unique_id', 'ds']]

    def fit(self, X, y):
        df = self._prepare_df(X, y)
        self.nf = NeuralForecast(models=self.models, freq=self.freq)
        self.nf.fit(df)
        self.fitted = True
        return self

    def predict(self, X_test):
        if not self.fitted:
            raise ValueError("Model is not fitted. Call fit() first.")

        test_df = self._prepare_df(X_test)
        forecast_df = self.nf.predict()

        # Merge predictions
        predictions = {}
        for name in self.model_names:
            merged = test_df.merge(
                forecast_df[['unique_id', 'ds', name]],
                on=['unique_id', 'ds'],
                how='left'
            )
            merged[name].fillna(0, inplace=True)
            predictions[name] = merged[name]

        if self.one_model:
            return predictions[self.model_names[0]]
        return predictions

    def forecast(self):
        if not self.fitted:
            raise ValueError("Model is not fitted. Call fit() first.")
        return self.nf.predict()


In [None]:
param_grid = {
    'batch_size' : [32,64,128,256,512]
}

fixed_params = {
    'max_steps': 25 * 104,
    'h': 53,
    'random_seed': 42,
    'input_size':52
    # 'batch_size' : 64,
}

best_result = run_nbeats_cv(
    X_train, y_train, X_valid, y_valid,
    param_grid=param_grid,
    fixed_params=fixed_params,
    return_all=False
)

print("\nBest hyperparameters found:")
for param in param_grid.keys():
    print(f"  {param}: {best_result[param]}")
print(f"Best WMAE: {best_result['wmae']:.4f}")

batch_size=32 → max_steps=2600 → h=53 → random_seed=42 → input_size=52 → WMAE=1342.0787
batch_size=64 → max_steps=2600 → h=53 → random_seed=42 → input_size=52 → WMAE=1339.4898
batch_size=128 → max_steps=2600 → h=53 → random_seed=42 → input_size=52 → WMAE=1347.3169
batch_size=256 → max_steps=2600 → h=53 → random_seed=42 → input_size=52 → WMAE=1357.4192
batch_size=512 → max_steps=2600 → h=53 → random_seed=42 → input_size=52 → WMAE=1364.6991

Best hyperparameters found:
  batch_size: 64
Best WMAE: 1339.4898


In [None]:
param_grid = {
    'learning_rate' : [1e-3,2e-3,4e-3]
}

fixed_params = {
    'max_steps': 25 * 104,
    'h': 53,
    'random_seed': 42,
    'input_size':52,
    'batch_size' : 256,
}

best_result = run_nbeats_cv(
    X_train, y_train, X_valid, y_valid,
    param_grid=param_grid,
    fixed_params=fixed_params,
    return_all=False
)

print("\nBest hyperparameters found:")
for param in param_grid.keys():
    print(f"  {param}: {best_result[param]}")
print(f"Best WMAE: {best_result['wmae']:.4f}")

learning_rate=0.001 → max_steps=2600 → h=53 → random_seed=42 → input_size=52 → batch_size=256 → WMAE=1357.4192


NameError: name 'exit' is not defined

In [None]:
param_grid = {
    'learning_rate' : [1e-3,2e-3,4e-3]
}

fixed_params = {
    'max_steps': 25 * 104,
    'h': 53,
    'random_seed': 42,
    'input_size':52,
    'batch_size' : 256,
}

best_result = run_nbeats_cv(
    X_train, y_train, X_valid, y_valid,
    param_grid=param_grid,
    fixed_params=fixed_params,
    return_all=False
)

print("\nBest hyperparameters found:")
for param in param_grid.keys():
    print(f"  {param}: {best_result[param]}")
print(f"Best WMAE: {best_result['wmae']:.4f}")

In [None]:
param_grid = {
    'activation': ['LeakyReLU','ReLU', 'Tanh','PReLU'],
    # 'stride': [1, 2, 4],
    # 'input_size
}

fixed_params = {
    'max_steps': 25 * 104,
    'h': 53,
    'random_seed': 42,
    'input_size':52,
    'batch_size' : 256,
    'learning_rate': 1e-3,
    'optimizer' : torch.optim.AdamW
}

best_result = run_nbeats_cv(
    X_train, y_train, X_valid, y_valid,
    param_grid=param_grid,
    fixed_params=fixed_params,
    return_all=False
)

print("\nBest hyperparameters found:")
for param in param_grid.keys():
    print(f"  {param}: {best_result[param]}")
print(f"Best WMAE: {best_result['wmae']:.4f}")

In [None]:
model = NBEATS(
    max_steps= 25 * 104,
    h= 53,
    random_seed= 42,
    input_size=52,
    batch_size= 256,
    learning_rate= 1e-3,
    shared_weights=True,
    optimizer= torch.optim.AdamW,
    activation = 'ReLU'
)
nf_model = NeuralForecastModels(models=[model], model_names=['NBEATS'], freq='W-FRI', one_model=True)

nf_model.fit(X_train, y_train)
y_pred = nf_model.predict(X_valid)
wmae = compute_wmae(y_valid, y_pred, X_valid['IsHoliday'])

print(wmae)

In [None]:
model = NBEATS(
    max_steps= 25 * 104,
    h= 53,
    random_seed= 42,
    input_size=52,
    batch_size= 256,
    learning_rate= 1e-3,
    shared_weights=True,
    optimizer= torch.optim.AdamW,
    activation = 'ReLU'
)
nf_model = NeuralForecastModels(models=[model], model_names=['NBEATS'], freq='W-FRI', one_model=True)

nf_model.fit(df.drop(columns='Weekly_Sales'), df['Weekly_Sales'])


In [None]:
import wandb
import joblib

def log_nbeats_to_wandb(model, config_dict, val_wmae_score, run_name="nbeats_run", model_filename="nbeats_model.pkl"):
    """
    Logs the N-BEATS model run to Weights & Biases (wandb).

    Parameters:
    - model: trained model to be saved
    - config_dict: dictionary of hyperparameters and configurations
    - val_wmae_score: validation WMAE score (float)
    - run_name: name of the wandb run
    - model_filename: filename to save the model as a joblib file
    """
    # Save model
    joblib.dump(model, model_filename)

    # Initialize wandb
    wandb.init(project="Walmart Recruiting - Store Sales Forecasting", name=run_name)

    # Add scoring policy if not already included
    config_dict.setdefault('score_metric', 'WMAE')
    config_dict.setdefault('score_policy', {'weight on holidays': 5, 'weight on non_holidays': 1})

    # Log config
    wandb.config.update(config_dict)

    # Log metric
    wandb.log({'val_wmae': val_wmae_score})

    # Log model artifact
    artifact = wandb.Artifact(name=run_name, type="model")
    artifact.add_file(model_filename)
    wandb.log_artifact(artifact)

    # Finish run
    wandb.finish()
