In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
# Install required packages
!pip install -q  wandb torch torchvision pandas numpy matplotlib seaborn scikit-learn mlflow wand dagshub neuralforecast
!pip install pytorch-lightning==2.5.1.post0
# Set up Kaggle API
!pip install -q kaggle ray[tune]



In [5]:
# Upload your kaggle.json to Colab and run:
!mkdir -p ~/.kaggle
!cp /content/drive/MyDrive/ColabNotebooks/kaggle_API_credentials/kaggle.json ~/.kaggle/kaggle.json
! chmod 600 ~/.kaggle/kaggle.json

In [45]:
# Download the dataset
!kaggle competitions download -c walmart-recruiting-store-sales-forecasting
!unzip -q walmart-recruiting-store-sales-forecasting.zip
!unzip -q sampleSubmission.csv.zip

walmart-recruiting-store-sales-forecasting.zip: Skipping, found more recently modified local copy (use --force to force download)
replace features.csv.zip? [y]es, [n]o, [A]ll, [N]one, [r]ename: A


In [7]:
!unzip -q train.csv.zip
!unzip -q stores.csv.zip
!unzip -q test.csv.zip
!unzip -q features.csv.zip

replace train.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: A
unzip:  cannot find or open stores.csv.zip, stores.csv.zip.zip or stores.csv.zip.ZIP.
replace test.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: A
replace features.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: A


In [8]:
import mlflow
from dagshub import dagshub_logger
import os
import mlflow.pytorch
mlflow.pytorch.autolog()
import torch


# Set tracking URI manually
mlflow.set_tracking_uri("https://dagshub.com/ekvirika/WalmartRecruiting.mlflow")

# Use your DagsHub credentials
os.environ["MLFLOW_TRACKING_USERNAME"] = "ekvirika"
os.environ["MLFLOW_TRACKING_PASSWORD"] = "0adb1004ddd4221395353efea2d8ead625e26197"

# Optional: set registry if you're using model registry
mlflow.set_registry_uri("https://dagshub.com/ekvirika/WalmartRecruiting.mlflow")
mlflow.set_experiment("NBeats_Training")

# Detect GPU
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)
torch.manual_seed(42)

# W&B setup
wandb_project = 'WalmartRecruiting'
wandb_entity = None  # Replace with your W&B entity if using teams


cuda


In [9]:
!wandb login

[34m[1mwandb[0m: Currently logged in as: [33mellekvirikashvili[0m ([33mellekvirikashvili-free-university-of-tbilisi-[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


# Import Libraries

In [21]:
import pandas as pd
import numpy as np
import torch
import wandb
import joblib
import mlflow
import os
from itertools import product
from neuralforecast.models import NBEATS
from neuralforecast import NeuralForecast

import logging
logging.basicConfig(level=logging.WARNING)
for lib in ["neuralforecast", "pytorch_lightning", "lightning_fabric"]:
    logging.getLogger(lib).setLevel(logging.WARNING)

# --- Data loading ---
# Replace these with your actual CSV file paths
STORES_PATH = "stores.csv"
FEATURES_PATH = "features.csv"
TRAIN_PATH = "train.csv"
TEST_PATH = "test.csv"

stores = pd.read_csv(STORES_PATH)
features = pd.read_csv(FEATURES_PATH)
train = pd.read_csv(TRAIN_PATH)
test = pd.read_csv(TEST_PATH)

class NeuralForecastModels:
    def __init__(self, models, model_names=None, freq='W-FRI', one_model=False):
        self.freq = freq
        self.one_model = one_model
        self.models = models
        self.model_names = model_names if model_names else [f"model_{i}" for i in range(len(models))]
        self.nf = NeuralForecast(models=self.models, freq=self.freq)
        self.fitted_df = None

    def fit(self, df):
        """
        Fit the model with a DataFrame containing unique_id, ds, y columns
        """
        self.fitted_df = df.copy()
        self.nf.fit(df=df)

    def predict(self, h=None):
        """
        Generate predictions
        """
        if h is None:
            # Use the horizon from the model
            h = self.models[0].h
        return self.nf.predict(h=h)

    def cross_validation(self, df, n_windows=1):
        """
        Perform cross-validation
        """
        return self.nf.cross_validation(df=df, n_windows=n_windows)

def preprocess(df):
    """Preprocess the data for NeuralForecast"""
    df = df.copy()
    df['unique_id'] = df['Store'].astype(str) + "_" + df['Dept'].astype(str)
    df.rename(columns={'Date': 'ds', 'Weekly_Sales': 'y'}, inplace=True)
    df['ds'] = pd.to_datetime(df['ds'])

    # Sort by unique_id and ds for proper time series format
    df = df.sort_values(['unique_id', 'ds']).reset_index(drop=True)

    return df

def prepare_data_for_cv(df, n_windows=1, h=53):
    """
    Prepare data for cross-validation by ensuring each time series has enough data
    """
    min_length = h * (n_windows + 1) + 10  # minimum length needed for CV

    # Filter time series that are long enough
    series_lengths = df.groupby('unique_id').size()
    valid_series = series_lengths[series_lengths >= min_length].index

    filtered_df = df[df['unique_id'].isin(valid_series)].copy()

    print(f"Original series: {len(series_lengths)}, Valid series for CV: {len(valid_series)}")
    print(f"Original data points: {len(df)}, Filtered data points: {len(filtered_df)}")

    return filtered_df

# Preprocess the data
df = preprocess(train)

# Prepare data for cross-validation
df_cv = prepare_data_for_cv(df, n_windows=1, h=53)

# --- WMAE metric function ---
def compute_wmae(y_true, y_pred, is_holiday=None, holiday_weight=5, non_holiday_weight=1):
    """
    Compute Weighted Mean Absolute Error
    """
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)

    # Ensure arrays have the same shape
    if y_true.shape != y_pred.shape:
        min_len = min(len(y_true), len(y_pred))
        y_true = y_true[:min_len]
        y_pred = y_pred[:min_len]
        if is_holiday is not None:
            is_holiday = np.array(is_holiday)[:min_len]

    if is_holiday is not None:
        weights = np.where(np.array(is_holiday), holiday_weight, non_holiday_weight)
    else:
        weights = np.ones_like(y_true)

    abs_errors = np.abs(y_true - y_pred)
    weighted_errors = weights * abs_errors

    return weighted_errors.sum() / weights.sum()

# --- Cross-validation function ---
def run_nbeats_cv(df, param_grid, fixed_params, experiment_name="NBEATS_Hyperparam_Tuning"):
    """
    Run cross-validation for NBEATS hyperparameter tuning
    """
    mlflow.set_experiment(experiment_name)
    results = []
    keys, values = zip(*param_grid.items())

    for vals in product(*values):
        params = dict(zip(keys, vals))
        params.update(fixed_params)
        params.update({'enable_progress_bar': False, 'enable_model_summary': False})

        try:
            with mlflow.start_run(nested=True):
                # Log hyperparams
                for k, v in params.items():
                    if not isinstance(v, (list, dict)):
                        mlflow.log_param(k, v)
                    else:
                        mlflow.log_param(k, str(v))

                model = NBEATS(**params)
                nf_model = NeuralForecastModels(
                    models=[model],
                    model_names=['NBEATS'],
                    freq='W-FRI',
                    one_model=True
                )

                # Perform cross-validation - FIXED: removed h parameter
                cv_df = nf_model.cross_validation(df, n_windows=1)

                # Extract predictions and actual values
                y_true = cv_df['y'].values
                y_pred = cv_df['NBEATS'].values

                # Get holiday information if available
                is_holiday = None
                if 'IsHoliday' in df.columns:
                    # Map holiday information to CV results
                    cv_df_with_holiday = cv_df.merge(
                        df[['unique_id', 'ds', 'IsHoliday']],
                        on=['unique_id', 'ds'],
                        how='left'
                    )
                    is_holiday = cv_df_with_holiday['IsHoliday'].fillna(False).values

                score = compute_wmae(y_true, y_pred, is_holiday)

                mlflow.log_metric("val_wmae", score)

                param_str = " → ".join(f"{k}={v}" for k, v in params.items()
                                     if k not in ['enable_progress_bar', 'enable_model_summary'])
                print(f"{param_str} → WMAE={score:.4f}")

                results.append({'wmae': score, **params})

        except Exception as e:
            print(f"Error with params {params}: {str(e)}")
            continue

    if not results:
        raise ValueError("No successful runs completed")

    return min(results, key=lambda r: r['wmae']) if len(results) > 1 else results[0]

# --- Hyperparameter tuning steps ---
print("Starting hyperparameter tuning...")

print("Tuning input_size...")
param_grid = {'input_size': [40, 52, 60, 72]}
fixed_params = {
    'max_steps': 25 * 104,
    'h': 53,
    'random_seed': 42,
    'batch_size': 64,
}

best_result = run_nbeats_cv(df_cv, param_grid, fixed_params)
print(f"\nBest input_size: {best_result['input_size']} with WMAE: {best_result['wmae']:.4f}")

print("\nTuning batch_size...")
param_grid = {'batch_size': [32, 64, 128, 256]}
fixed_params.update({'input_size': best_result['input_size']})
best_result = run_nbeats_cv(df_cv, param_grid, fixed_params)
print(f"\nBest batch_size: {best_result['batch_size']} with WMAE: {best_result['wmae']:.4f}")

print("\nTuning learning_rate...")
param_grid = {'learning_rate': [1e-3, 2e-3, 4e-3]}
fixed_params.update({'batch_size': best_result['batch_size']})
best_result = run_nbeats_cv(df_cv, param_grid, fixed_params)
print(f"\nBest learning_rate: {best_result['learning_rate']} with WMAE: {best_result['wmae']:.4f}")

print("\nTuning n_blocks and optimizer weight_decay...")
param_grid = {
    'n_blocks': [[1,1,1], [2,2,2], [3,3,3]],
    'optimizer_kwargs': [
        {'weight_decay': 1e-4},
        {'weight_decay': 1e-3},
        {'weight_decay': 1e-2}
    ]
}
fixed_params.update({'learning_rate': best_result['learning_rate'], 'optimizer': torch.optim.AdamW})
best_result = run_nbeats_cv(df_cv, param_grid, fixed_params)
print(f"\nBest n_blocks: {best_result['n_blocks']} and optimizer_kwargs: {best_result['optimizer_kwargs']} with WMAE: {best_result['wmae']:.4f}")

print("\nTuning activation functions...")
param_grid = {'activation': ['LeakyReLU', 'ReLU', 'Tanh']}
fixed_params.update({
    'n_blocks': best_result['n_blocks'],
    'optimizer_kwargs': best_result['optimizer_kwargs']
})
best_result = run_nbeats_cv(df_cv, param_grid, fixed_params)
print(f"\nBest activation: {best_result['activation']} with WMAE: {best_result['wmae']:.4f}")


Original series: 3331, Valid series for CV: 2827
Original data points: 421570, Filtered data points: 402458
Starting hyperparameter tuning...
Tuning input_size...




input_size=40 → max_steps=2600 → h=53 → random_seed=42 → batch_size=64 → WMAE=2879.1875
🏃 View run unruly-eel-19 at: https://dagshub.com/ekvirika/WalmartRecruiting.mlflow/#/experiments/11/runs/4e4989b063ac40c490c0131a6b7f43b4
🧪 View experiment at: https://dagshub.com/ekvirika/WalmartRecruiting.mlflow/#/experiments/11




input_size=52 → max_steps=2600 → h=53 → random_seed=42 → batch_size=64 → WMAE=2815.2565
🏃 View run industrious-ape-329 at: https://dagshub.com/ekvirika/WalmartRecruiting.mlflow/#/experiments/11/runs/e3a209d7c8cb486a98179094142f6d44
🧪 View experiment at: https://dagshub.com/ekvirika/WalmartRecruiting.mlflow/#/experiments/11




input_size=60 → max_steps=2600 → h=53 → random_seed=42 → batch_size=64 → WMAE=2837.5293
🏃 View run inquisitive-colt-973 at: https://dagshub.com/ekvirika/WalmartRecruiting.mlflow/#/experiments/11/runs/6678dbda455a408c85321f44d157b861
🧪 View experiment at: https://dagshub.com/ekvirika/WalmartRecruiting.mlflow/#/experiments/11




input_size=72 → max_steps=2600 → h=53 → random_seed=42 → batch_size=64 → WMAE=3005.6063
🏃 View run righteous-jay-901 at: https://dagshub.com/ekvirika/WalmartRecruiting.mlflow/#/experiments/11/runs/4adb80a10aab41698d8b058a1d236364
🧪 View experiment at: https://dagshub.com/ekvirika/WalmartRecruiting.mlflow/#/experiments/11

Best input_size: 52 with WMAE: 2815.2565

Tuning batch_size...




batch_size=64 → max_steps=2600 → h=53 → random_seed=42 → input_size=52 → WMAE=2815.2565
🏃 View run enchanting-colt-115 at: https://dagshub.com/ekvirika/WalmartRecruiting.mlflow/#/experiments/11/runs/da98f6ad941544c1bc4f6a7a6773796c
🧪 View experiment at: https://dagshub.com/ekvirika/WalmartRecruiting.mlflow/#/experiments/11




batch_size=64 → max_steps=2600 → h=53 → random_seed=42 → input_size=52 → WMAE=2815.2565
🏃 View run vaunted-crane-822 at: https://dagshub.com/ekvirika/WalmartRecruiting.mlflow/#/experiments/11/runs/401ffaceca58463c899c2a6a8620940d
🧪 View experiment at: https://dagshub.com/ekvirika/WalmartRecruiting.mlflow/#/experiments/11




batch_size=64 → max_steps=2600 → h=53 → random_seed=42 → input_size=52 → WMAE=2815.2565
🏃 View run selective-mink-84 at: https://dagshub.com/ekvirika/WalmartRecruiting.mlflow/#/experiments/11/runs/97ff440f74924f5f944da7abab47d922
🧪 View experiment at: https://dagshub.com/ekvirika/WalmartRecruiting.mlflow/#/experiments/11




batch_size=64 → max_steps=2600 → h=53 → random_seed=42 → input_size=52 → WMAE=2815.2565
🏃 View run angry-chimp-445 at: https://dagshub.com/ekvirika/WalmartRecruiting.mlflow/#/experiments/11/runs/ce0df4b314194800a770a90abc5b4575
🧪 View experiment at: https://dagshub.com/ekvirika/WalmartRecruiting.mlflow/#/experiments/11

Best batch_size: 64 with WMAE: 2815.2565

Tuning learning_rate...




learning_rate=0.001 → max_steps=2600 → h=53 → random_seed=42 → batch_size=64 → input_size=52 → WMAE=2815.2565
🏃 View run legendary-cod-371 at: https://dagshub.com/ekvirika/WalmartRecruiting.mlflow/#/experiments/11/runs/3764b4195d2840caaf12f53138f2b961
🧪 View experiment at: https://dagshub.com/ekvirika/WalmartRecruiting.mlflow/#/experiments/11




learning_rate=0.002 → max_steps=2600 → h=53 → random_seed=42 → batch_size=64 → input_size=52 → WMAE=2681.5312
🏃 View run blushing-shrew-420 at: https://dagshub.com/ekvirika/WalmartRecruiting.mlflow/#/experiments/11/runs/1dd35cbadf5b49a89124b23825a86dd2
🧪 View experiment at: https://dagshub.com/ekvirika/WalmartRecruiting.mlflow/#/experiments/11




learning_rate=0.004 → max_steps=2600 → h=53 → random_seed=42 → batch_size=64 → input_size=52 → WMAE=2980.6578
🏃 View run overjoyed-hound-785 at: https://dagshub.com/ekvirika/WalmartRecruiting.mlflow/#/experiments/11/runs/be9da4c7e671457bb21b5a53834fec05
🧪 View experiment at: https://dagshub.com/ekvirika/WalmartRecruiting.mlflow/#/experiments/11

Best learning_rate: 0.002 with WMAE: 2681.5312

Tuning n_blocks and optimizer weight_decay...




n_blocks=[1, 1, 1] → optimizer_kwargs={'weight_decay': 0.0001} → max_steps=2600 → h=53 → random_seed=42 → batch_size=64 → input_size=52 → learning_rate=0.002 → optimizer=<class 'torch.optim.adamw.AdamW'> → WMAE=2818.4835
🏃 View run auspicious-hare-887 at: https://dagshub.com/ekvirika/WalmartRecruiting.mlflow/#/experiments/11/runs/1091a1247a384751a2115738eea7197b
🧪 View experiment at: https://dagshub.com/ekvirika/WalmartRecruiting.mlflow/#/experiments/11




n_blocks=[1, 1, 1] → optimizer_kwargs={'weight_decay': 0.001} → max_steps=2600 → h=53 → random_seed=42 → batch_size=64 → input_size=52 → learning_rate=0.002 → optimizer=<class 'torch.optim.adamw.AdamW'> → WMAE=2780.8924
🏃 View run big-koi-206 at: https://dagshub.com/ekvirika/WalmartRecruiting.mlflow/#/experiments/11/runs/49ae74b3790843f1b2eacfa653fb1447
🧪 View experiment at: https://dagshub.com/ekvirika/WalmartRecruiting.mlflow/#/experiments/11




n_blocks=[1, 1, 1] → optimizer_kwargs={'weight_decay': 0.01} → max_steps=2600 → h=53 → random_seed=42 → batch_size=64 → input_size=52 → learning_rate=0.002 → optimizer=<class 'torch.optim.adamw.AdamW'> → WMAE=2749.2158
🏃 View run intrigued-lark-247 at: https://dagshub.com/ekvirika/WalmartRecruiting.mlflow/#/experiments/11/runs/ee86f0046f0546a3b179629f4e194f96
🧪 View experiment at: https://dagshub.com/ekvirika/WalmartRecruiting.mlflow/#/experiments/11




n_blocks=[2, 2, 2] → optimizer_kwargs={'weight_decay': 0.0001} → max_steps=2600 → h=53 → random_seed=42 → batch_size=64 → input_size=52 → learning_rate=0.002 → optimizer=<class 'torch.optim.adamw.AdamW'> → WMAE=2765.0085
🏃 View run able-midge-592 at: https://dagshub.com/ekvirika/WalmartRecruiting.mlflow/#/experiments/11/runs/759485f2b8544f00a173ef61bacb758c
🧪 View experiment at: https://dagshub.com/ekvirika/WalmartRecruiting.mlflow/#/experiments/11




n_blocks=[2, 2, 2] → optimizer_kwargs={'weight_decay': 0.001} → max_steps=2600 → h=53 → random_seed=42 → batch_size=64 → input_size=52 → learning_rate=0.002 → optimizer=<class 'torch.optim.adamw.AdamW'> → WMAE=2751.6410
🏃 View run learned-bass-232 at: https://dagshub.com/ekvirika/WalmartRecruiting.mlflow/#/experiments/11/runs/26173db3527a4b0ba71fdd68a7bcb494
🧪 View experiment at: https://dagshub.com/ekvirika/WalmartRecruiting.mlflow/#/experiments/11




n_blocks=[2, 2, 2] → optimizer_kwargs={'weight_decay': 0.01} → max_steps=2600 → h=53 → random_seed=42 → batch_size=64 → input_size=52 → learning_rate=0.002 → optimizer=<class 'torch.optim.adamw.AdamW'> → WMAE=2740.0934
🏃 View run caring-stoat-309 at: https://dagshub.com/ekvirika/WalmartRecruiting.mlflow/#/experiments/11/runs/42bdf24f16bc4234a211e8ee9fa4868b
🧪 View experiment at: https://dagshub.com/ekvirika/WalmartRecruiting.mlflow/#/experiments/11




n_blocks=[3, 3, 3] → optimizer_kwargs={'weight_decay': 0.0001} → max_steps=2600 → h=53 → random_seed=42 → batch_size=64 → input_size=52 → learning_rate=0.002 → optimizer=<class 'torch.optim.adamw.AdamW'> → WMAE=2682.8072
🏃 View run painted-panda-862 at: https://dagshub.com/ekvirika/WalmartRecruiting.mlflow/#/experiments/11/runs/127451d9d19948429b7e0aa960630e64
🧪 View experiment at: https://dagshub.com/ekvirika/WalmartRecruiting.mlflow/#/experiments/11




n_blocks=[3, 3, 3] → optimizer_kwargs={'weight_decay': 0.001} → max_steps=2600 → h=53 → random_seed=42 → batch_size=64 → input_size=52 → learning_rate=0.002 → optimizer=<class 'torch.optim.adamw.AdamW'> → WMAE=2738.3392
🏃 View run bald-deer-344 at: https://dagshub.com/ekvirika/WalmartRecruiting.mlflow/#/experiments/11/runs/2aa06ec363b3470fb5b7a423d25b5955
🧪 View experiment at: https://dagshub.com/ekvirika/WalmartRecruiting.mlflow/#/experiments/11




n_blocks=[3, 3, 3] → optimizer_kwargs={'weight_decay': 0.01} → max_steps=2600 → h=53 → random_seed=42 → batch_size=64 → input_size=52 → learning_rate=0.002 → optimizer=<class 'torch.optim.adamw.AdamW'> → WMAE=2527.9716
🏃 View run enchanting-whale-381 at: https://dagshub.com/ekvirika/WalmartRecruiting.mlflow/#/experiments/11/runs/fba23695df67443a9e6715bc8c84aac0
🧪 View experiment at: https://dagshub.com/ekvirika/WalmartRecruiting.mlflow/#/experiments/11

Best n_blocks: [3, 3, 3] and optimizer_kwargs: {'weight_decay': 0.01} with WMAE: 2527.9716

Tuning activation functions...




activation=LeakyReLU → max_steps=2600 → h=53 → random_seed=42 → batch_size=64 → input_size=52 → learning_rate=0.002 → optimizer=<class 'torch.optim.adamw.AdamW'> → n_blocks=[3, 3, 3] → optimizer_kwargs={'weight_decay': 0.01} → WMAE=2721.0448
🏃 View run dashing-bee-504 at: https://dagshub.com/ekvirika/WalmartRecruiting.mlflow/#/experiments/11/runs/73a43fb029bf4322845971fea33c5518
🧪 View experiment at: https://dagshub.com/ekvirika/WalmartRecruiting.mlflow/#/experiments/11




activation=ReLU → max_steps=2600 → h=53 → random_seed=42 → batch_size=64 → input_size=52 → learning_rate=0.002 → optimizer=<class 'torch.optim.adamw.AdamW'> → n_blocks=[3, 3, 3] → optimizer_kwargs={'weight_decay': 0.01} → WMAE=2527.9716
🏃 View run stately-midge-124 at: https://dagshub.com/ekvirika/WalmartRecruiting.mlflow/#/experiments/11/runs/21f2134d2cd84c899775766b7a90eef3
🧪 View experiment at: https://dagshub.com/ekvirika/WalmartRecruiting.mlflow/#/experiments/11




activation=Tanh → max_steps=2600 → h=53 → random_seed=42 → batch_size=64 → input_size=52 → learning_rate=0.002 → optimizer=<class 'torch.optim.adamw.AdamW'> → n_blocks=[3, 3, 3] → optimizer_kwargs={'weight_decay': 0.01} → WMAE=3471.1916
🏃 View run rebellious-skunk-485 at: https://dagshub.com/ekvirika/WalmartRecruiting.mlflow/#/experiments/11/runs/3edcfc63a8c84929aee47b0fc8d055b0
🧪 View experiment at: https://dagshub.com/ekvirika/WalmartRecruiting.mlflow/#/experiments/11

Best activation: ReLU with WMAE: 2527.9716

Training final model with best params...




Final model WMAE: 2607.1690
Model saved and logged to MLflow.


[34m[1mwandb[0m: Currently logged in as: [33mellekvirikashvili[0m ([33mellekvirikashvili-free-university-of-tbilisi-[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


0,1
val_wmae,▁

0,1
val_wmae,2607.16898


Model saved and logged to W&B.
🏃 View run Final NBEATS Model Training at: https://dagshub.com/ekvirika/WalmartRecruiting.mlflow/#/experiments/11/runs/1dcafbea3ad04ef095a5ac4acda485d7
🧪 View experiment at: https://dagshub.com/ekvirika/WalmartRecruiting.mlflow/#/experiments/11

Hyperparameter tuning completed!
Best parameters: {'wmae': np.float64(2527.9716201180972), 'activation': 'ReLU', 'max_steps': 2600, 'h': 53, 'random_seed': 42, 'batch_size': 64, 'input_size': 52, 'learning_rate': 0.002, 'optimizer': <class 'torch.optim.adamw.AdamW'>, 'n_blocks': [3, 3, 3], 'optimizer_kwargs': {'weight_decay': 0.01}, 'enable_progress_bar': False, 'enable_model_summary': False}


In [70]:
# --- Train final model ---
print("\nTraining final model with best params...")
with mlflow.start_run(run_name="Final NBEATS Model Training"):
    final_params = {
        'max_steps': fixed_params['max_steps'],
        'h': fixed_params['h'],
        'random_seed': fixed_params['random_seed'],
        'input_size': best_result.get('input_size', fixed_params['input_size']),
        'batch_size': best_result.get('batch_size', fixed_params['batch_size']),
        'learning_rate': best_result.get('learning_rate', 1e-3),
        'optimizer': torch.optim.AdamW,
        'activation': best_result.get('activation', 'ReLU'),
        'n_blocks': best_result.get('n_blocks', [1,1,1]),
        'optimizer_kwargs': best_result.get('optimizer_kwargs', {'weight_decay': 1e-4}),
        'enable_progress_bar': False,
        'enable_model_summary': False
    }

    final_model = NBEATS(**final_params)
    nf_model = NeuralForecastModels(
        models=[final_model],
        model_names=['NBEATS'],
        freq='W-FRI',
        one_model=True
    )

    # Fit on full training data
    nf_model.fit(df_cv)

    # Perform final cross-validation to get score - FIXED: removed h parameter
    final_cv_df = nf_model.cross_validation(df_cv, n_windows=1)

    y_true = final_cv_df['y'].values
    y_pred = final_cv_df['NBEATS'].values

    # Get holiday information if available
    is_holiday = None
    if 'IsHoliday' in df.columns:
        final_cv_df_with_holiday = final_cv_df.merge(
            df[['unique_id', 'ds', 'IsHoliday']],
            on=['unique_id', 'ds'],
            how='left'
        )
        is_holiday = final_cv_df_with_holiday['IsHoliday'].fillna(False).values

    wmae_score = compute_wmae(y_true, y_pred, is_holiday)

    mlflow.log_metric("val_wmae_final", wmae_score)

    # Log final parameters
    for k, v in final_params.items():
        if not isinstance(v, (list, dict, type)):
            mlflow.log_param(f"final_{k}", v)
        else:
            mlflow.log_param(f"final_{k}", str(v))

    # Save and log model artifact
    model_file = "nbeats_final_model.pkl"
    joblib.dump(nf_model, model_file)
    mlflow.log_artifact(model_file)

    print(f"Final model WMAE: {wmae_score:.4f}")
    print("Model saved and logged to MLflow.")

    # --- Save and log with W&B ---
    wandb.init(project="Walmart Recruiting - Store Sales Forecasting", name="nbeats_final_run")
    wandb.config.update({
        'score_metric': 'WMAE',
        'score_policy': {'weight_on_holidays': 5, 'weight_on_non_holidays': 1},
        'model': 'NBEATS',
        'learning_rate': best_result.get('learning_rate', 1e-3),
        'weight_decay': best_result.get('optimizer_kwargs', {}).get('weight_decay', 1e-4),
        'batch_size': best_result.get('batch_size', 64),
        'max_steps': fixed_params['max_steps'],
        'input_size': best_result.get('input_size', 52),
        'horizon': fixed_params['h'],
        'architecture': ['identity', 'trend', 'seasonality'],
        'n_blocks': best_result.get('n_blocks', [1,1,1]),
        'random_seed': fixed_params['random_seed'],
        'activation': best_result.get('activation', 'ReLU'),
        'optimizer': 'torch.optim.AdamW'
    })

    wandb.log({'val_wmae': wmae_score})

    artifact = wandb.Artifact(name="nbeats_final_model", type="model")
    artifact.add_file(model_file)
    wandb.log_artifact(artifact)
    wandb.finish()

    # Clean up
    if os.path.exists(model_file):
        os.remove(model_file)

    print("Model saved and logged to W&B.")

print("\nHyperparameter tuning completed!")
print(f"Best parameters: {best_result}")


Training final model with best params...
🏃 View run Final NBEATS Model Training at: https://dagshub.com/ekvirika/WalmartRecruiting.mlflow/#/experiments/11/runs/7cc00c699cab498dafcb0c58de0e9196
🧪 View experiment at: https://dagshub.com/ekvirika/WalmartRecruiting.mlflow/#/experiments/11


KeyError: 'input_size'

In [63]:
def preprocess(df):
    df = df.copy()
    if 'ds' in df.columns and 'Date' in df.columns:
        df = df.drop(columns=['ds'])  # Drop conflicting 'ds'

    df['unique_id'] = df['Store'].astype(str) + "_" + df['Dept'].astype(str)
    df.rename(columns={'Date': 'ds', 'Weekly_Sales': 'y'}, inplace=True)

    df['ds'] = pd.to_datetime(df['ds'])
    df = df.sort_values(['unique_id', 'ds'])
    return df


def shift_ds_by_days(df: pd.DataFrame, days: int) -> pd.DataFrame:
    """
    Returns a copy of `df` where the 'ds' column has been shifted by `days`.
    Positive `days` moves dates forward; negative moves them backward.
    """
    df_shifted = df.copy()
    df_shifted['ds'] = df_shifted['ds'] + pd.Timedelta(days=days)
    return df_shifted

In [58]:
def create_final_cv_df(train, test):
    """
    Concatenates train and test to form the input to prediction,
    ensuring 'unique_id', 'ds', and 'y' are present.
    """
    df_train = train.copy()
    df_test = test.copy()

    df_train['unique_id'] = df_train['Store'].astype(str) + "_" + df_train['Dept'].astype(str)
    df_train.rename(columns={'Date': 'ds', 'Weekly_Sales': 'y'}, inplace=True)
    df_train['ds'] = pd.to_datetime(df_train['ds'])

    df_test['unique_id'] = df_test['Store'].astype(str) + "_" + df_test['Dept'].astype(str)
    df_test.rename(columns={'Date': 'ds'}, inplace=True)
    df_test['ds'] = pd.to_datetime(df_test['ds'])

    # Include y only for training, not test
    df_test['y'] = np.nan  # Needed by model

    final_df = pd.concat([df_train[['unique_id', 'ds', 'y']], df_test[['unique_id', 'ds', 'y']]])
    final_df = final_df.sort_values(by=['unique_id', 'ds']).reset_index(drop=True)

    return final_df


In [71]:
# 1. Remove 'h' from final_params safely
model_params = {k: v for k, v in final_params.items() if k != 'h'}
horizon = final_params['h']

# 2. Initialize PatchTST model
from neuralforecast.models import PatchTST
from neuralforecast.core import NeuralForecast

model = PatchTST(h=horizon, **model_params)

# 3. Wrap in NeuralForecast
nf_model = NeuralForecast(models=[model], freq='W-FRI')
# Fit the model
nf_model.fit(df=df_cv)


TypeError: Trainer.__init__() got an unexpected keyword argument 'n_blocks'

In [65]:
# 1. Predict using trained model
final_cv_df = final_cv_df[['unique_id', 'ds']]  # Drop anything else
final_preds = nf_model.predict(df=final_cv_df)

# Optional: Shift the timestamps if needed (e.g. 5 days forward)
# If your forecast needs to align with test dates
final_preds['ds'] = final_preds['ds'] + pd.Timedelta(days=5)


Exception: You must fit the model before predicting.