<a href="https://colab.research.google.com/github/azhgh22/Walmart-Recruiting-Store-Sales-Forecasting/blob/main/notebooks/n_beats.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
%%capture
from google.colab import drive
drive.mount('/content/drive')

from google.colab import userdata
token = userdata.get('GITHUB_TOKEN')
user_name = userdata.get('GITHUB_USERNAME')
mail = userdata.get('GITHUB_MAIL')

!git config --global user.name "{user_name}"
!git config --global user.email "{mail}"
!git clone https://{token}@github.com/azhgh22/Walmart-Recruiting-Store-Sales-Forecasting.git

%cd Walmart-Recruiting-Store-Sales-Forecasting

from google.colab import userdata
! pip install -r ./requirements.txt
kaggle_json_path = userdata.get('KAGGLE_JSON_PATH')
! ./src/data_loader.sh -f {kaggle_json_path}

# **Read Data**

In [None]:
# **Torch**

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.base import BaseEstimator, TransformerMixin
import torch
from neuralforecast import NeuralForecast
from neuralforecast.models import NBEATS
from neuralforecast.losses.pytorch import MSE

from src.config import *

stores = pd.read_csv(STORES_PATH)
features = pd.read_csv(FEATURES_PATH)
train = pd.read_csv(TRAIN_PATH)
test = pd.read_csv(TEST_PATH)

from src import data_loader, processing
import importlib
importlib.reload(processing)

dataframes = data_loader.load_raw_data()
df = processing.run_preprocessing(dataframes, process_test=False, merge_features=False, merge_stores=False)['train']
X_train, y_train, X_valid, y_valid = processing.split_data_by_ratio(df, separate_target=True)

print(f"Shapes of train_df and valid_df: {X_train.shape}, {X_valid.shape}")

Data loading complete.
Shapes of train_df and valid_df: (337256, 4), (84314, 4)


# **Custom NBEATS**

In [None]:
class CustomNBEATS(NBEATS):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.optimizer = torch.optim.AdamW(self.parameters(), lr=1e-3)
        self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=10, gamma=0.9)

    def set_optim(self,optimizer):
      self.optimizer = optimizer
      return self

    def set_scheduler(self,scheduler):
      self.scheduler = scheduler
      return self

    def configure_optimizers(self):
        # Define your custom optimizer here
        optimizer = self.optimizer #torch.optim.AdamW(self.parameters(), lr=1e-3)

        # Optional: add scheduler if needed
        scheduler = self.scheduler #torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.9)

        return {
            'optimizer': optimizer,
            'lr_scheduler': {
                'scheduler': scheduler,
                'interval': 'epoch',
                'frequency': 1
            }
        }

In [None]:
from itertools import product
from neuralforecast.models import PatchTST
from models.neural_forecast_models import NeuralForecastModels
from src.utils import wmae as compute_wmae
import logging

logging.getLogger().setLevel(logging.WARNING)
logging.getLogger("neuralforecast").setLevel(logging.WARNING)
logging.getLogger("pytorch_lightning").setLevel(logging.WARNING)
logging.getLogger("lightning_fabric").setLevel(logging.WARNING)

def run_nbeats_cv(X_train, y_train, X_valid, y_valid,
                            param_grid,
                            fixed_params,
                            return_all=False):
    results = []

    keys, values = zip(*param_grid.items())
    for vals in product(*values):
        params = dict(zip(keys, vals))
        params.update(fixed_params)

        params['enable_progress_bar'] = False
        params['enable_model_summary'] = False

        model = NBEATS(**params)

        nf_model = NeuralForecastModels(models=[model], model_names=['NBEATS'], freq='W-FRI', one_model=True)
        nf_model.fit(X_train, y_train)
        y_pred = nf_model.predict(X_valid)
        score = compute_wmae(y_valid, y_pred, X_valid['IsHoliday'])

        result = {'wmae': score, 'preds': y_pred}
        result.update(params)

        results.append(result)
        print(" → ".join(f"{k}={v}" for k,v in params.items() if k not in ['enable_progress_bar','enable_model_summary']) + f" → WMAE={score:.4f}")

    if return_all:
        return results
    else:
        return min(results, key=lambda r: r['wmae'])

# **Tune input_size**

In [None]:
param_grid = {
    'input_size' : [40,52,60,72]
}

fixed_params = {
    'max_steps': 25 * 104,
    'h': 53,
    'random_seed': 42,
    'batch_size' : 64,
}

best_result = run_nbeats_cv(
    X_train, y_train, X_valid, y_valid,
    param_grid=param_grid,
    fixed_params=fixed_params,
    return_all=False
)

print("\nBest hyperparameters found:")
for param in param_grid.keys():
    print(f"  {param}: {best_result[param]}")
print(f"Best WMAE: {best_result['wmae']:.4f}")

input_size=40 → max_steps=2600 → h=53 → random_seed=42 → batch_size=64 → WMAE=1697.6167
input_size=52 → max_steps=2600 → h=53 → random_seed=42 → batch_size=64 → WMAE=1593.9089
input_size=60 → max_steps=2600 → h=53 → random_seed=42 → batch_size=64 → WMAE=1619.0920
input_size=72 → max_steps=2600 → h=53 → random_seed=42 → batch_size=64 → WMAE=1698.7422

Best hyperparameters found:
  input_size: 52
Best WMAE: 1593.9089


# **Tune Batch size**

In [None]:
param_grid = {
    'batch_size' : [32,64,128,256,512]
}

fixed_params = {
    'max_steps': 25 * 104,
    'h': 53,
    'random_seed': 42,
    'input_size':52
    # 'batch_size' : 64,
}

best_result = run_nbeats_cv(
    X_train, y_train, X_valid, y_valid,
    param_grid=param_grid,
    fixed_params=fixed_params,
    return_all=False
)

print("\nBest hyperparameters found:")
for param in param_grid.keys():
    print(f"  {param}: {best_result[param]}")
print(f"Best WMAE: {best_result['wmae']:.4f}")

batch_size=32 → max_steps=2600 → h=53 → random_seed=42 → input_size=52 → WMAE=1619.6334
batch_size=64 → max_steps=2600 → h=53 → random_seed=42 → input_size=52 → WMAE=1593.9089
batch_size=128 → max_steps=2600 → h=53 → random_seed=42 → input_size=52 → WMAE=1586.4884
batch_size=256 → max_steps=2600 → h=53 → random_seed=42 → input_size=52 → WMAE=1547.6261
batch_size=512 → max_steps=2600 → h=53 → random_seed=42 → input_size=52 → WMAE=1581.5538

Best hyperparameters found:
  batch_size: 256
Best WMAE: 1547.6261


# **tune learning rate**

In [None]:
param_grid = {
    'learning_rate' : [1e-3,2e-3,4e-3]
}

fixed_params = {
    'max_steps': 25 * 104,
    'h': 53,
    'random_seed': 42,
    'input_size':52,
    'batch_size' : 256,
}

best_result = run_nbeats_cv(
    X_train, y_train, X_valid, y_valid,
    param_grid=param_grid,
    fixed_params=fixed_params,
    return_all=False
)

print("\nBest hyperparameters found:")
for param in param_grid.keys():
    print(f"  {param}: {best_result[param]}")
print(f"Best WMAE: {best_result['wmae']:.4f}")

learning_rate=0.001 → max_steps=2600 → h=53 → random_seed=42 → input_size=52 → batch_size=256 → WMAE=1547.6261
learning_rate=0.002 → max_steps=2600 → h=53 → random_seed=42 → input_size=52 → batch_size=256 → WMAE=1578.8466
learning_rate=0.004 → max_steps=2600 → h=53 → random_seed=42 → input_size=52 → batch_size=256 → WMAE=1590.2231

Best hyperparameters found:
  learning_rate: 0.001
Best WMAE: 1547.6261


# **Tune blocknum in stack with weight decay**

In [None]:
param_grid = {
    # 'learning_rate' : [2e-3]
    # 'nb_blocks_per_stack' : [1,2,3,4,5]
    'n_blocks' : [[1,1,1],[2,2,2],[3,3,3]],
    'optimizer_kwargs' : [
        {'weight_decay' : 1e-3},
        {'weight_decay' : 1e-4},
        {'weight_decay' : 1e-2}
    ]
}

fixed_params = {
    'max_steps': 25 * 104,
    'h': 53,
    'random_seed': 42,
    'input_size':52,
    'batch_size' : 256,
    'learning_rate': 1e-3,
    'optimizer' : torch.optim.AdamW
}

best_result = run_nbeats_cv(
    X_train, y_train, X_valid, y_valid,
    param_grid=param_grid,
    fixed_params=fixed_params,
    return_all=False
)

print("\nBest hyperparameters found:")
for param in param_grid.keys():
    print(f"  {param}: {best_result[param]}")
print(f"Best WMAE: {best_result['wmae']:.4f}")

n_blocks=[1, 1, 1] → optimizer_kwargs={'weight_decay': 0.001} → max_steps=2600 → h=53 → random_seed=42 → input_size=52 → batch_size=256 → learning_rate=0.001 → optimizer=<class 'torch.optim.adamw.AdamW'> → WMAE=1570.4595
n_blocks=[1, 1, 1] → optimizer_kwargs={'weight_decay': 0.0001} → max_steps=2600 → h=53 → random_seed=42 → input_size=52 → batch_size=256 → learning_rate=0.001 → optimizer=<class 'torch.optim.adamw.AdamW'> → WMAE=1563.2994
n_blocks=[1, 1, 1] → optimizer_kwargs={'weight_decay': 0.01} → max_steps=2600 → h=53 → random_seed=42 → input_size=52 → batch_size=256 → learning_rate=0.001 → optimizer=<class 'torch.optim.adamw.AdamW'> → WMAE=1587.5859
n_blocks=[2, 2, 2] → optimizer_kwargs={'weight_decay': 0.001} → max_steps=2600 → h=53 → random_seed=42 → input_size=52 → batch_size=256 → learning_rate=0.001 → optimizer=<class 'torch.optim.adamw.AdamW'> → WMAE=1619.3125
n_blocks=[2, 2, 2] → optimizer_kwargs={'weight_decay': 0.0001} → max_steps=2600 → h=53 → random_seed=42 → input_size

# **Tune activation function**

In [None]:
param_grid = {
    'activation': ['LeakyReLU','ReLU', 'Tanh','PReLU'],
    # 'stride': [1, 2, 4],
    # 'input_size
}

fixed_params = {
    'max_steps': 25 * 104,
    'h': 53,
    'random_seed': 42,
    'input_size':52,
    'batch_size' : 256,
    'learning_rate': 1e-3,
    'optimizer' : torch.optim.AdamW
}

best_result = run_nbeats_cv(
    X_train, y_train, X_valid, y_valid,
    param_grid=param_grid,
    fixed_params=fixed_params,
    return_all=False
)

print("\nBest hyperparameters found:")
for param in param_grid.keys():
    print(f"  {param}: {best_result[param]}")
print(f"Best WMAE: {best_result['wmae']:.4f}")

activation=LeakyReLU → max_steps=2600 → h=53 → random_seed=42 → input_size=52 → batch_size=256 → learning_rate=0.001 → optimizer=<class 'torch.optim.adamw.AdamW'> → WMAE=1579.8415
activation=ReLU → max_steps=2600 → h=53 → random_seed=42 → input_size=52 → batch_size=256 → learning_rate=0.001 → optimizer=<class 'torch.optim.adamw.AdamW'> → WMAE=1587.5859
activation=Tanh → max_steps=2600 → h=53 → random_seed=42 → input_size=52 → batch_size=256 → learning_rate=0.001 → optimizer=<class 'torch.optim.adamw.AdamW'> → WMAE=2986.5779
activation=PReLU → max_steps=2600 → h=53 → random_seed=42 → input_size=52 → batch_size=256 → learning_rate=0.001 → optimizer=<class 'torch.optim.adamw.AdamW'> → WMAE=1620.2018

Best hyperparameters found:
  activation: LeakyReLU
Best WMAE: 1579.8415


# **Tune polynomial order**

In [None]:
param_grid = {
    # 'activation': ['LeakyReLU','ReLU', 'Tanh','PReLU'],
    'n_polynomials' : [1,2,3,4,5]
}

fixed_params = {
    'max_steps': 25 * 104,
    'h': 53,
    'random_seed': 42,
    'input_size':52,
    'batch_size' : 256,
    'learning_rate': 1e-3,
    'optimizer' : torch.optim.AdamW,
    'activation' : 'ReLU'
}

best_result = run_nbeats_cv(
    X_train, y_train, X_valid, y_valid,
    param_grid=param_grid,
    fixed_params=fixed_params,
    return_all=False
)

print("\nBest hyperparameters found:")
for param in param_grid.keys():
    print(f"  {param}: {best_result[param]}")
print(f"Best WMAE: {best_result['wmae']:.4f}")

n_polynomials=1 → max_steps=2600 → h=53 → random_seed=42 → input_size=52 → batch_size=256 → learning_rate=0.001 → optimizer=<class 'torch.optim.adamw.AdamW'> → activation=ReLU → WMAE=1587.5859
n_polynomials=2 → max_steps=2600 → h=53 → random_seed=42 → input_size=52 → batch_size=256 → learning_rate=0.001 → optimizer=<class 'torch.optim.adamw.AdamW'> → activation=ReLU → WMAE=1587.5859
n_polynomials=3 → max_steps=2600 → h=53 → random_seed=42 → input_size=52 → batch_size=256 → learning_rate=0.001 → optimizer=<class 'torch.optim.adamw.AdamW'> → activation=ReLU → WMAE=1587.5859
n_polynomials=4 → max_steps=2600 → h=53 → random_seed=42 → input_size=52 → batch_size=256 → learning_rate=0.001 → optimizer=<class 'torch.optim.adamw.AdamW'> → activation=ReLU → WMAE=1587.5859
n_polynomials=5 → max_steps=2600 → h=53 → random_seed=42 → input_size=52 → batch_size=256 → learning_rate=0.001 → optimizer=<class 'torch.optim.adamw.AdamW'> → activation=ReLU → WMAE=1587.5859

Best hyperparameters found:
  n_po

**Cross Validation**

In [None]:
! wandb login

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit: 
[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mazhgh22[0m ([33mMLBeasts[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


# **Model**

In [None]:
from neuralforecast.models import NBEATS
from models.neural_forecast_models import NeuralForecastModels
from src.utils import wmae as compute_wmae

model = NBEATS(
    max_steps= 25 * 104,
    h= 53,
    random_seed= 42,
    input_size=52,
    batch_size= 256,
    learning_rate= 1e-3,
    shared_weights=True,
    optimizer= torch.optim.AdamW,
    activation = 'ReLU'
)
nf_model = NeuralForecastModels(models=[model], model_names=['NBEATS'], freq='W-FRI', one_model=True)

nf_model.fit(X_train, y_train)
y_pred = nf_model.predict(X_valid)
wmae = compute_wmae(y_valid, y_pred, X_valid['IsHoliday'])

print(wmae)

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Predicting: |          | 0/? [00:00<?, ?it/s]

1587.5858753093955


In [None]:
from neuralforecast.models import PatchTST
from models.neural_forecast_models import NeuralForecastModels
from src.utils import wmae as compute_wmae

model = NBEATS(
    max_steps= 25 * 104,
    h= 53,
    random_seed= 42,
    input_size=52,
    batch_size= 256,
    learning_rate= 1e-3,
    shared_weights=True,
    optimizer= torch.optim.AdamW,
    activation = 'ReLU'
)
nf_model = NeuralForecastModels(models=[model], model_names=['NBEATS'], freq='W-FRI', one_model=True)

nf_model.fit(df.drop(columns='Weekly_Sales'), df['Weekly_Sales'])

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

In [None]:
import wandb
import joblib

# fin_model = pipeline.fit(train.drop(columns=['Weekly_Sales'].copy()),train['Weekly_Sales'].copy())

joblib.dump(nf_model, "nbeats_run4.pkl")
wandb.init(project="Walmart Recruiting - Store Sales Forecasting", name="nbeats:run4")

wandb.config.update({
    'score_metric' : 'WMAE',
    'score_policy' : {
        'weight on holidays' : 5,
        'weight on non_holidays' : 1
    },
    'model' : 'nbeats',
    'learning_rate' : 0.001,
    'weight_decay' : 0,
    'batch_size' : 256,
    'max_steps' : 25 * 104,
    'input_size' : 52,
    'horizon': 53,
    'architecture' : ['identity', 'trend', 'seasonality'],
    'n_blocks' : [1,1,1],
    'random_state': 42,
    'objective' : 'reg:squarederror',
    'activation': 'ReLU',
    'optimizer' : 'torch.optim.AdamW'
})

wandb.log({
    'val_wmae': 1587.5858753093955
})


artifact = wandb.Artifact(
    name="nbeats_run4",
    type="model",
)

artifact.add_file("nbeats_run4.pkl")
wandb.log_artifact(artifact)

wandb.finish()

[34m[1mwandb[0m: Currently logged in as: [33mazhgh22[0m ([33mMLBeasts[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


0,1
val_wmae,▁

0,1
val_wmae,1587.58588
