<a href="https://colab.research.google.com/github/azhgh22/Walmart-Recruiting-Store-Sales-Forecasting/blob/main/notebooks/d_linear.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
%%capture
from google.colab import drive
drive.mount('/content/drive')

from google.colab import userdata
token = userdata.get('GITHUB_TOKEN')
user_name = userdata.get('GITHUB_USERNAME')
mail = userdata.get('GITHUB_MAIL')

!git config --global user.name "{user_name}"
!git config --global user.email "{mail}"
!git clone https://{token}@github.com/azhgh22/Walmart-Recruiting-Store-Sales-Forecasting.git

%cd Walmart-Recruiting-Store-Sales-Forecasting

from google.colab import userdata
! pip install -r ./requirements.txt
kaggle_json_path = userdata.get('KAGGLE_JSON_PATH')
! ./src/data_loader.sh -f {kaggle_json_path}

# **Read Data**

In [2]:
# **Torch**

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.base import BaseEstimator, TransformerMixin
import torch
from neuralforecast import NeuralForecast
from neuralforecast.models import NBEATS
from neuralforecast.losses.pytorch import MSE

from src.config import *

stores = pd.read_csv(STORES_PATH)
features = pd.read_csv(FEATURES_PATH)
train = pd.read_csv(TRAIN_PATH)
test = pd.read_csv(TEST_PATH)

from src import data_loader, processing
import importlib
importlib.reload(processing)

dataframes = data_loader.load_raw_data()
df = processing.run_preprocessing(dataframes, process_test=False, merge_features=False, merge_stores=False)['train']
X_train, y_train, X_valid, y_valid = processing.split_data_by_ratio(df, separate_target=True)

print(f"Shapes of train_df and valid_df: {X_train.shape}, {X_valid.shape}")

Data loading complete.
Shapes of train_df and valid_df: (337256, 4), (84314, 4)


In [3]:
from itertools import product
from neuralforecast.models import DLinear
from models.neural_forecast_models import NeuralForecastModels
from src.utils import wmae as compute_wmae
import logging

logging.getLogger().setLevel(logging.WARNING)
logging.getLogger("neuralforecast").setLevel(logging.WARNING)
logging.getLogger("pytorch_lightning").setLevel(logging.WARNING)
logging.getLogger("lightning_fabric").setLevel(logging.WARNING)

def run_dlinear_cv(X_train, y_train, X_valid, y_valid,
                            param_grid,
                            fixed_params,
                            return_all=False):
    results = []

    keys, values = zip(*param_grid.items())
    for vals in product(*values):
        params = dict(zip(keys, vals))
        params.update(fixed_params)

        params['enable_progress_bar'] = False
        params['enable_model_summary'] = False

        model = DLinear(**params)

        nf_model = NeuralForecastModels(models=[model], model_names=['DLinear'], freq='W-FRI', one_model=True)
        nf_model.fit(X_train, y_train)
        y_pred = nf_model.predict(X_valid)
        score = compute_wmae(y_valid, y_pred, X_valid['IsHoliday'])

        result = {'wmae': score, 'preds': y_pred}
        result.update(params)

        results.append(result)
        print(" → ".join(f"{k}={v}" for k,v in params.items() if k not in ['enable_progress_bar','enable_model_summary']) + f" → WMAE={score:.4f}")

    if return_all:
        return results
    else:
        return min(results, key=lambda r: r['wmae'])

# **Tune input size**

In [None]:
param_grid = {
    'input_size' : [40,52,60,72],
    # 'batch_size' : [32,64,128,256,512,1024]
}

fixed_params = {
    'max_steps': 25 * 104,
    'h': 53,
    'random_seed': 42,
    'batch_size' : 64,
}

best_result = run_dlinear_cv(
    X_train, y_train, X_valid, y_valid,
    param_grid=param_grid,
    fixed_params=fixed_params,
    return_all=False
)

print("\nBest hyperparameters found:")
for param in param_grid.keys():
    print(f"  {param}: {best_result[param]}")
print(f"Best WMAE: {best_result['wmae']:.4f}")

input_size=40 → max_steps=2600 → h=53 → random_seed=42 → batch_size=64 → WMAE=1986.8671
input_size=52 → max_steps=2600 → h=53 → random_seed=42 → batch_size=64 → WMAE=1697.3507
input_size=60 → max_steps=2600 → h=53 → random_seed=42 → batch_size=64 → WMAE=1647.6388
input_size=72 → max_steps=2600 → h=53 → random_seed=42 → batch_size=64 → WMAE=1991.4705

Best hyperparameters found:
  input_size: 60
Best WMAE: 1647.6388


# **Tune batch size**

In [None]:
param_grid = {
    # 'input_size' : [40,52,60,72],
    'batch_size' : [32,64,128,256,512,1024]
}

fixed_params = {
    'max_steps': 25 * 104,
    'h': 53,
    'random_seed': 42,
    'input_size' : 60,
    # 'batch_size' : 64,
}

best_result = run_dlinear_cv(
    X_train, y_train, X_valid, y_valid,
    param_grid=param_grid,
    fixed_params=fixed_params,
    return_all=False
)

print("\nBest hyperparameters found:")
for param in param_grid.keys():
    print(f"  {param}: {best_result[param]}")
print(f"Best WMAE: {best_result['wmae']:.4f}")

batch_size=32 → max_steps=2600 → h=53 → random_seed=42 → input_size=60 → WMAE=1658.2520
batch_size=64 → max_steps=2600 → h=53 → random_seed=42 → input_size=60 → WMAE=1647.6388
batch_size=128 → max_steps=2600 → h=53 → random_seed=42 → input_size=60 → WMAE=1642.6096
batch_size=256 → max_steps=2600 → h=53 → random_seed=42 → input_size=60 → WMAE=1642.8322
batch_size=512 → max_steps=2600 → h=53 → random_seed=42 → input_size=60 → WMAE=1641.7895
batch_size=1024 → max_steps=2600 → h=53 → random_seed=42 → input_size=60 → WMAE=1643.3484

Best hyperparameters found:
  batch_size: 512
Best WMAE: 1641.7895


# **tune learning rate**

In [None]:
param_grid = {
    'learning_rate': [1e-2,1e-3,1e-4,1e-5]
}

fixed_params = {
    'max_steps': 25 * 104,
    'h': 53,
    'random_seed': 42,
    'input_size' : 60,
    'batch_size' : 512,
}

best_result = run_dlinear_cv(
    X_train, y_train, X_valid, y_valid,
    param_grid=param_grid,
    fixed_params=fixed_params,
    return_all=False
)

print("\nBest hyperparameters found:")
for param in param_grid.keys():
    print(f"  {param}: {best_result[param]}")
print(f"Best WMAE: {best_result['wmae']:.4f}")

learning_rate=0.01 → max_steps=2600 → h=53 → random_seed=42 → input_size=60 → batch_size=512 → WMAE=1661.9293
learning_rate=0.001 → max_steps=2600 → h=53 → random_seed=42 → input_size=60 → batch_size=512 → WMAE=1660.3139
learning_rate=0.0001 → max_steps=2600 → h=53 → random_seed=42 → input_size=60 → batch_size=512 → WMAE=1641.7895
learning_rate=1e-05 → max_steps=2600 → h=53 → random_seed=42 → input_size=60 → batch_size=512 → WMAE=3234.4342

Best hyperparameters found:
  learning_rate: 0.0001
Best WMAE: 1641.7895


# **Tune scaler function**

In [7]:
param_grid = {
    'scaler_type': ['robust','minmax','standard','identity'],
}

fixed_params = {
    'max_steps': 25 * 104,
    'h': 53,
    'random_seed': 42,
    'input_size' : 60,
    'batch_size' : 512,
    'learning_rate' : 1e-4,
}

best_result = run_dlinear_cv(
    X_train, y_train, X_valid, y_valid,
    param_grid=param_grid,
    fixed_params=fixed_params,
    return_all=False
)

print("\nBest hyperparameters found:")
for param in param_grid.keys():
    print(f"  {param}: {best_result[param]}")
print(f"Best WMAE: {best_result['wmae']:.4f}")

scaler_type=robust → max_steps=2600 → h=53 → random_seed=42 → input_size=60 → batch_size=512 → learning_rate=0.0001 → WMAE=1640.2401
scaler_type=minmax → max_steps=2600 → h=53 → random_seed=42 → input_size=60 → batch_size=512 → learning_rate=0.0001 → WMAE=1817.3580
scaler_type=standard → max_steps=2600 → h=53 → random_seed=42 → input_size=60 → batch_size=512 → learning_rate=0.0001 → WMAE=1631.8249
scaler_type=identity → max_steps=2600 → h=53 → random_seed=42 → input_size=60 → batch_size=512 → learning_rate=0.0001 → WMAE=1641.7895

Best hyperparameters found:
  scaler_type: standard
Best WMAE: 1631.8249


In [9]:
param_grid = {
    'optimizer': [torch.optim.Adam,torch.optim.Adagrad,torch.optim.SGD,torch.optim.RMSprop,torch.optim.AdamW],
    'learning_rate': [1e-2,1e-3,1e-4,1e-5]
}

fixed_params = {
    'max_steps': 25 * 104,
    'h': 53,
    'random_seed': 42,
    'input_size' : 60,
    'batch_size' : 512,
    # 'learning_rate' : 1e-4,
    'scaler_type' : 'robust'
}

best_result = run_dlinear_cv(
    X_train, y_train, X_valid, y_valid,
    param_grid=param_grid,
    fixed_params=fixed_params,
    return_all=False
)

print("\nBest hyperparameters found:")
for param in param_grid.keys():
    print(f"  {param}: {best_result[param]}")
print(f"Best WMAE: {best_result['wmae']:.4f}")

optimizer=<class 'torch.optim.adam.Adam'> → learning_rate=0.01 → max_steps=2600 → h=53 → random_seed=42 → input_size=60 → batch_size=512 → scaler_type=robust → WMAE=1656.5471
optimizer=<class 'torch.optim.adam.Adam'> → learning_rate=0.001 → max_steps=2600 → h=53 → random_seed=42 → input_size=60 → batch_size=512 → scaler_type=robust → WMAE=1624.3064
optimizer=<class 'torch.optim.adam.Adam'> → learning_rate=0.0001 → max_steps=2600 → h=53 → random_seed=42 → input_size=60 → batch_size=512 → scaler_type=robust → WMAE=1640.2401
optimizer=<class 'torch.optim.adam.Adam'> → learning_rate=1e-05 → max_steps=2600 → h=53 → random_seed=42 → input_size=60 → batch_size=512 → scaler_type=robust → WMAE=2535.2774
optimizer=<class 'torch.optim.adagrad.Adagrad'> → learning_rate=0.01 → max_steps=2600 → h=53 → random_seed=42 → input_size=60 → batch_size=512 → scaler_type=robust → WMAE=1592.9379
optimizer=<class 'torch.optim.adagrad.Adagrad'> → learning_rate=0.001 → max_steps=2600 → h=53 → random_seed=42 → in

In [12]:
param_grid = {
    'max_steps' : [75*104],
    'learning_rate': [1e-2,1e-3,1e-4,1e-5]
}

fixed_params = {
    # 'max_steps': 25 * 104,
    'h': 53,
    'random_seed': 42,
    'input_size' : 60,
    'batch_size' : 512,
    # 'learning_rate' : 1e-2,
    'scaler_type' : 'robust',
    'optimizer' : torch.optim.Adagrad,
}

best_result = run_dlinear_cv(
    X_train, y_train, X_valid, y_valid,
    param_grid=param_grid,
    fixed_params=fixed_params,
    return_all=False
)

print("\nBest hyperparameters found:")
for param in param_grid.keys():
    print(f"  {param}: {best_result[param]}")
print(f"Best WMAE: {best_result['wmae']:.4f}")

max_steps=7800 → learning_rate=0.01 → h=53 → random_seed=42 → input_size=60 → batch_size=512 → scaler_type=robust → optimizer=<class 'torch.optim.adagrad.Adagrad'> → WMAE=1598.1066
max_steps=7800 → learning_rate=0.001 → h=53 → random_seed=42 → input_size=60 → batch_size=512 → scaler_type=robust → optimizer=<class 'torch.optim.adagrad.Adagrad'> → WMAE=1725.5933
max_steps=7800 → learning_rate=0.0001 → h=53 → random_seed=42 → input_size=60 → batch_size=512 → scaler_type=robust → optimizer=<class 'torch.optim.adagrad.Adagrad'> → WMAE=2652.5868
max_steps=7800 → learning_rate=1e-05 → h=53 → random_seed=42 → input_size=60 → batch_size=512 → scaler_type=robust → optimizer=<class 'torch.optim.adagrad.Adagrad'> → WMAE=2944.5461

Best hyperparameters found:
  max_steps: 7800
  learning_rate: 0.01
Best WMAE: 1598.1066
